"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/amd/llvm/ac_nir_to_llvm.c" (16 Sep 2020, 183359 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ac_nir_to_llvm.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 20.1.7_vs_20.1.8.

    1 /*
    2  * Copyright © 2016 Bas Nieuwenhuizen
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  */
   23 
   24 #include <llvm/Config/llvm-config.h>
   25 
   26 #include "ac_nir_to_llvm.h"
   27 #include "ac_llvm_build.h"
   28 #include "ac_llvm_util.h"
   29 #include "ac_binary.h"
   30 #include "sid.h"
   31 #include "nir/nir.h"
   32 #include "nir/nir_deref.h"
   33 #include "util/bitscan.h"
   34 #include "util/u_math.h"
   35 #include "ac_shader_abi.h"
   36 #include "ac_shader_util.h"
   37 
   38 struct ac_nir_context {
   39     struct ac_llvm_context ac;
   40     struct ac_shader_abi *abi;
   41     const struct ac_shader_args *args;
   42 
   43     gl_shader_stage stage;
   44     shader_info *info;
   45 
   46     LLVMValueRef *ssa_defs;
   47 
   48     LLVMValueRef scratch;
   49     LLVMValueRef constant_data;
   50 
   51     struct hash_table *defs;
   52     struct hash_table *phis;
   53     struct hash_table *vars;
   54 
   55     LLVMValueRef main_function;
   56     LLVMBasicBlockRef continue_block;
   57     LLVMBasicBlockRef break_block;
   58 
   59     int num_locals;
   60     LLVMValueRef *locals;
   61 };
   62 
   63 static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
   64                        nir_deref_instr *deref_instr,
   65                        const nir_instr *instr,
   66                        bool image);
   67 
   68 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
   69                      nir_deref_instr *deref_instr,
   70                      enum ac_descriptor_type desc_type,
   71                      const nir_instr *instr,
   72                      LLVMValueRef index,
   73                      bool image, bool write);
   74 
   75 static void
   76 build_store_values_extended(struct ac_llvm_context *ac,
   77                  LLVMValueRef *values,
   78                  unsigned value_count,
   79                  unsigned value_stride,
   80                  LLVMValueRef vec)
   81 {
   82     LLVMBuilderRef builder = ac->builder;
   83     unsigned i;
   84 
   85     for (i = 0; i < value_count; i++) {
   86         LLVMValueRef ptr = values[i * value_stride];
   87         LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
   88         LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
   89         LLVMBuildStore(builder, value, ptr);
   90     }
   91 }
   92 
   93 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
   94                                 const nir_ssa_def *def)
   95 {
   96     LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
   97     if (def->num_components > 1) {
   98         type = LLVMVectorType(type, def->num_components);
   99     }
  100     return type;
  101 }
  102 
  103 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
  104 {
  105     assert(src.is_ssa);
  106     return nir->ssa_defs[src.ssa->index];
  107 }
  108 
  109 static LLVMValueRef
  110 get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size)
  111 {
  112     LLVMValueRef ptr = get_src(ctx, src);
  113     ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
  114     int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
  115 
  116     LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);
  117 
  118     return LLVMBuildBitCast(ctx->ac.builder, ptr,
  119                 LLVMPointerType(type, addr_space), "");
  120 }
  121 
  122 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
  123                                    const struct nir_block *b)
  124 {
  125     struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
  126     return (LLVMBasicBlockRef)entry->data;
  127 }
  128 
  129 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
  130                                 nir_alu_src src,
  131                                 unsigned num_components)
  132 {
  133     LLVMValueRef value = get_src(ctx, src.src);
  134     bool need_swizzle = false;
  135 
  136     assert(value);
  137     unsigned src_components = ac_get_llvm_num_components(value);
  138     for (unsigned i = 0; i < num_components; ++i) {
  139         assert(src.swizzle[i] < src_components);
  140         if (src.swizzle[i] != i)
  141             need_swizzle = true;
  142     }
  143 
  144     if (need_swizzle || num_components != src_components) {
  145         LLVMValueRef masks[] = {
  146             LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
  147             LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
  148             LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
  149             LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
  150 
  151         if (src_components > 1 && num_components == 1) {
  152             value = LLVMBuildExtractElement(ctx->ac.builder, value,
  153                                             masks[0], "");
  154         } else if (src_components == 1 && num_components > 1) {
  155             LLVMValueRef values[] = {value, value, value, value};
  156             value = ac_build_gather_values(&ctx->ac, values, num_components);
  157         } else {
  158             LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
  159             value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
  160                                                swizzle, "");
  161         }
  162     }
  163     assert(!src.negate);
  164     assert(!src.abs);
  165     return value;
  166 }
  167 
  168 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
  169                                  LLVMIntPredicate pred, LLVMValueRef src0,
  170                                  LLVMValueRef src1)
  171 {
  172     LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
  173     return LLVMBuildSelect(ctx->builder, result,
  174                            LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
  175                            ctx->i32_0, "");
  176 }
  177 
  178 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
  179                                    LLVMRealPredicate pred, LLVMValueRef src0,
  180                                    LLVMValueRef src1)
  181 {
  182     LLVMValueRef result;
  183     src0 = ac_to_float(ctx, src0);
  184     src1 = ac_to_float(ctx, src1);
  185     result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
  186     return LLVMBuildSelect(ctx->builder, result,
  187                            LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
  188                    ctx->i32_0, "");
  189 }
  190 
  191 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
  192                      const char *intrin,
  193                      LLVMTypeRef result_type,
  194                      LLVMValueRef src0)
  195 {
  196     char name[64];
  197     LLVMValueRef params[] = {
  198         ac_to_float(ctx, src0),
  199     };
  200 
  201     ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
  202                          ac_get_elem_bits(ctx, result_type));
  203     assert(length < sizeof(name));
  204     return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
  205 }
  206 
  207 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
  208                        const char *intrin,
  209                        LLVMTypeRef result_type,
  210                        LLVMValueRef src0, LLVMValueRef src1)
  211 {
  212     char name[64];
  213     LLVMValueRef params[] = {
  214         ac_to_float(ctx, src0),
  215         ac_to_float(ctx, src1),
  216     };
  217 
  218     ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
  219                          ac_get_elem_bits(ctx, result_type));
  220     assert(length < sizeof(name));
  221     return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
  222 }
  223 
  224 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
  225                      const char *intrin,
  226                      LLVMTypeRef result_type,
  227                      LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
  228 {
  229     char name[64];
  230     LLVMValueRef params[] = {
  231         ac_to_float(ctx, src0),
  232         ac_to_float(ctx, src1),
  233         ac_to_float(ctx, src2),
  234     };
  235 
  236     ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
  237                          ac_get_elem_bits(ctx, result_type));
  238     assert(length < sizeof(name));
  239     return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
  240 }
  241 
  242 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
  243                    LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
  244 {
  245     LLVMTypeRef src1_type = LLVMTypeOf(src1);
  246     LLVMTypeRef src2_type = LLVMTypeOf(src2);
  247 
  248     assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
  249 
  250     if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
  251         LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
  252         src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
  253     } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
  254            LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
  255         src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
  256     }
  257 
  258     LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
  259                        ctx->i32_0, "");
  260     return LLVMBuildSelect(ctx->builder, v,
  261                    ac_to_integer_or_pointer(ctx, src1),
  262                    ac_to_integer_or_pointer(ctx, src2), "");
  263 }
  264 
  265 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
  266                   LLVMValueRef src0)
  267 {
  268     return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
  269 }
  270 
  271 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
  272                     const char *intrin,
  273                     LLVMValueRef src0, LLVMValueRef src1)
  274 {
  275     LLVMTypeRef ret_type;
  276     LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
  277     LLVMValueRef res;
  278     LLVMValueRef params[] = { src0, src1 };
  279     ret_type = LLVMStructTypeInContext(ctx->context, types,
  280                        2, true);
  281 
  282     res = ac_build_intrinsic(ctx, intrin, ret_type,
  283                  params, 2, AC_FUNC_ATTR_READNONE);
  284 
  285     res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
  286     res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
  287     return res;
  288 }
  289 
  290 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
  291                  LLVMValueRef src0,
  292                  unsigned bitsize)
  293 {
  294     LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
  295                        LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
  296                        "");
  297     result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
  298 
  299     switch (bitsize) {
  300     case 16:
  301         return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, "");
  302     case 32:
  303         return result;
  304     case 64:
  305         return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
  306     default:
  307         unreachable("Unsupported bit size.");
  308     }
  309 }
  310 
  311 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
  312                  LLVMValueRef src0)
  313 {
  314     src0 = ac_to_float(ctx, src0);
  315     LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
  316     return LLVMBuildSExt(ctx->builder,
  317                  LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
  318                  ctx->i32, "");
  319 }
  320 
  321 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
  322                  LLVMValueRef src0,
  323                  unsigned bitsize)
  324 {
  325     LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
  326 
  327     switch (bitsize) {
  328     case 8:
  329         return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
  330     case 16:
  331         return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
  332     case 32:
  333         return result;
  334     case 64:
  335         return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
  336     default:
  337         unreachable("Unsupported bit size.");
  338     }
  339 }
  340 
  341 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
  342                  LLVMValueRef src0)
  343 {
  344     LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
  345     return LLVMBuildSExt(ctx->builder,
  346                  LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
  347                  ctx->i32, "");
  348 }
  349 
  350 static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx,
  351                    LLVMValueRef src0)
  352 {
  353     LLVMValueRef result;
  354     LLVMValueRef cond = NULL;
  355 
  356     src0 = ac_to_float(ctx, src0);
  357     result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
  358 
  359     if (ctx->chip_class >= GFX8) {
  360         LLVMValueRef args[2];
  361         /* Check if the result is a denormal - and flush to 0 if so. */
  362         args[0] = result;
  363         args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
  364         cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
  365     }
  366 
  367     /* need to convert back up to f32 */
  368     result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
  369 
  370     if (ctx->chip_class >= GFX8)
  371         result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
  372     else {
  373         /* for GFX6-GFX7 */
  374         /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
  375          * so compare the result and flush to 0 if it's smaller.
  376          */
  377         LLVMValueRef temp, cond2;
  378         temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
  379         cond = LLVMBuildFCmp(ctx->builder, LLVMRealOGT,
  380                      LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
  381                      temp, "");
  382         cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE,
  383                       temp, ctx->f32_0, "");
  384         cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
  385         result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
  386     }
  387     return result;
  388 }
  389 
  390 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
  391                    LLVMValueRef src0, LLVMValueRef src1)
  392 {
  393     LLVMValueRef dst64, result;
  394     src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
  395     src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
  396 
  397     dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
  398     dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
  399     result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
  400     return result;
  401 }
  402 
  403 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
  404                    LLVMValueRef src0, LLVMValueRef src1)
  405 {
  406     LLVMValueRef dst64, result;
  407     src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
  408     src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
  409 
  410     dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
  411     dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
  412     result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
  413     return result;
  414 }
  415 
  416 static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
  417                  LLVMValueRef bits, LLVMValueRef offset)
  418 {
  419     /* mask = ((1 << bits) - 1) << offset */
  420     return LLVMBuildShl(ctx->builder,
  421                 LLVMBuildSub(ctx->builder,
  422                      LLVMBuildShl(ctx->builder,
  423                               ctx->i32_1,
  424                               bits, ""),
  425                      ctx->i32_1, ""),
  426                 offset, "");
  427 }
  428 
  429 static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
  430                      LLVMValueRef mask, LLVMValueRef insert,
  431                      LLVMValueRef base)
  432 {
  433     /* Calculate:
  434      *   (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
  435      * Use the right-hand side, which the LLVM backend can convert to V_BFI.
  436      */
  437     return LLVMBuildXor(ctx->builder, base,
  438                 LLVMBuildAnd(ctx->builder, mask,
  439                      LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
  440 }
  441 
  442 static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx,
  443                    LLVMValueRef src0,
  444                    LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
  445                             LLVMValueRef args[2]))
  446 {
  447     LLVMValueRef comp[2];
  448 
  449     src0 = ac_to_float(ctx, src0);
  450     comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
  451     comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
  452 
  453     return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
  454 }
  455 
  456 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
  457                       LLVMValueRef src0)
  458 {
  459     LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
  460     LLVMValueRef temps[2], val;
  461     int i;
  462 
  463     for (i = 0; i < 2; i++) {
  464         val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
  465         val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
  466         val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
  467         temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
  468     }
  469     return ac_build_gather_values(ctx, temps, 2);
  470 }
  471 
  472 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
  473                   nir_op op,
  474                   LLVMValueRef src0)
  475 {
  476     unsigned mask;
  477     int idx;
  478     LLVMValueRef result;
  479 
  480     if (op == nir_op_fddx_fine)
  481         mask = AC_TID_MASK_LEFT;
  482     else if (op == nir_op_fddy_fine)
  483         mask = AC_TID_MASK_TOP;
  484     else
  485         mask = AC_TID_MASK_TOP_LEFT;
  486 
  487     /* for DDX we want to next X pixel, DDY next Y pixel. */
  488     if (op == nir_op_fddx_fine ||
  489         op == nir_op_fddx_coarse ||
  490         op == nir_op_fddx)
  491         idx = 1;
  492     else
  493         idx = 2;
  494 
  495     result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
  496     return result;
  497 }
  498 
  499 struct waterfall_context {
  500     LLVMBasicBlockRef phi_bb[2];
  501     bool use_waterfall;
  502 };
  503 
  504 /* To deal with divergent descriptors we can create a loop that handles all
  505  * lanes with the same descriptor on a given iteration (henceforth a
  506  * waterfall loop).
  507  *
  508  * These helper create the begin and end of the loop leaving the caller
  509  * to implement the body.
  510  * 
  511  * params:
  512  *  - ctx is the usal nir context
  513  *  - wctx is a temporary struct containing some loop info. Can be left uninitialized.
  514  *  - value is the possibly divergent value for which we built the loop
  515  *  - divergent is whether value is actually divergent. If false we just pass
  516  *     things through.
  517  */
  518 static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx,
  519                     struct waterfall_context *wctx,
  520                     LLVMValueRef value, bool divergent)
  521 {
  522     /* If the app claims the value is divergent but it is constant we can
  523      * end up with a dynamic index of NULL. */
  524     if (!value)
  525         divergent = false;
  526 
  527     wctx->use_waterfall = divergent;
  528     if (!divergent)
  529         return value;
  530 
  531     ac_build_bgnloop(&ctx->ac, 6000);
  532 
  533     LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL);
  534 
  535     LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value,
  536                         scalar_value, "uniform_active");
  537 
  538     wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
  539     ac_build_ifcc(&ctx->ac, active, 6001);
  540 
  541     return scalar_value;
  542 }
  543 
  544 static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx,
  545                    struct waterfall_context *wctx,
  546                    LLVMValueRef value)
  547 {
  548     LLVMValueRef ret = NULL;
  549     LLVMValueRef phi_src[2];
  550     LLVMValueRef cc_phi_src[2] = {
  551         LLVMConstInt(ctx->ac.i32, 0, false),
  552         LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
  553     };
  554 
  555     if (!wctx->use_waterfall)
  556         return value;
  557 
  558     wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
  559 
  560     ac_build_endif(&ctx->ac, 6001);
  561 
  562     if (value) {
  563         phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
  564         phi_src[1] = value;
  565 
  566         ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
  567     }
  568 
  569     /*
  570      * By using the optimization barrier on the exit decision, we decouple
  571      * the operations from the break, and hence avoid LLVM hoisting the
  572      * opteration into the break block.
  573      */
  574     LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
  575     ac_build_optimization_barrier(&ctx->ac, &cc);
  576 
  577     LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
  578     ac_build_ifcc(&ctx->ac, active, 6002);
  579     ac_build_break(&ctx->ac);
  580     ac_build_endif(&ctx->ac, 6002);
  581 
  582     ac_build_endloop(&ctx->ac, 6000);
  583     return ret;
  584 }
  585 
  586 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
  587 {
  588     LLVMValueRef src[4], result = NULL;
  589     unsigned num_components = instr->dest.dest.ssa.num_components;
  590     unsigned src_components;
  591     LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
  592 
  593     assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
  594     switch (instr->op) {
  595     case nir_op_vec2:
  596     case nir_op_vec3:
  597     case nir_op_vec4:
  598         src_components = 1;
  599         break;
  600     case nir_op_pack_half_2x16:
  601     case nir_op_pack_snorm_2x16:
  602     case nir_op_pack_unorm_2x16:
  603         src_components = 2;
  604         break;
  605     case nir_op_unpack_half_2x16:
  606         src_components = 1;
  607         break;
  608     case nir_op_cube_face_coord:
  609     case nir_op_cube_face_index:
  610         src_components = 3;
  611         break;
  612     default:
  613         src_components = num_components;
  614         break;
  615     }
  616     for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
  617         src[i] = get_alu_src(ctx, instr->src[i], src_components);
  618 
  619     switch (instr->op) {
  620     case nir_op_mov:
  621         result = src[0];
  622         break;
  623     case nir_op_fneg:
  624             src[0] = ac_to_float(&ctx->ac, src[0]);
  625         result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
  626         if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
  627             /* fneg will be optimized by backend compiler with sign
  628              * bit removed via XOR. This is probably a LLVM bug.
  629              */
  630             result = ac_build_canonicalize(&ctx->ac, result,
  631                                instr->dest.dest.ssa.bit_size);
  632         }
  633         break;
  634     case nir_op_ineg:
  635         result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
  636         break;
  637     case nir_op_inot:
  638         result = LLVMBuildNot(ctx->ac.builder, src[0], "");
  639         break;
  640     case nir_op_iadd:
  641         result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
  642         break;
  643     case nir_op_fadd:
  644         src[0] = ac_to_float(&ctx->ac, src[0]);
  645         src[1] = ac_to_float(&ctx->ac, src[1]);
  646         result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
  647         break;
  648     case nir_op_fsub:
  649         src[0] = ac_to_float(&ctx->ac, src[0]);
  650         src[1] = ac_to_float(&ctx->ac, src[1]);
  651         result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
  652         break;
  653     case nir_op_isub:
  654         result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
  655         break;
  656     case nir_op_imul:
  657         result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
  658         break;
  659     case nir_op_imod:
  660         result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
  661         break;
  662     case nir_op_umod:
  663         result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
  664         break;
  665     case nir_op_fmod:
  666         /* lower_fmod only lower 16-bit and 32-bit fmod */
  667         assert(instr->dest.dest.ssa.bit_size == 64);
  668         src[0] = ac_to_float(&ctx->ac, src[0]);
  669         src[1] = ac_to_float(&ctx->ac, src[1]);
  670         result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
  671         result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
  672                                       ac_to_float_type(&ctx->ac, def_type), result);
  673         result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
  674         result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
  675         break;
  676     case nir_op_irem:
  677         result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
  678         break;
  679     case nir_op_idiv:
  680         result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
  681         break;
  682     case nir_op_udiv:
  683         result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
  684         break;
  685     case nir_op_fmul:
  686         src[0] = ac_to_float(&ctx->ac, src[0]);
  687         src[1] = ac_to_float(&ctx->ac, src[1]);
  688         result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
  689         break;
  690     case nir_op_frcp:
  691         /* For doubles, we need precise division to pass GLCTS. */
  692         if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
  693             ac_get_type_size(def_type) == 8) {
  694             result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1,
  695                            ac_to_float(&ctx->ac, src[0]), "");
  696         } else {
  697             result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
  698                               ac_to_float_type(&ctx->ac, def_type), src[0]);
  699         }
  700         if (ctx->abi->clamp_div_by_zero)
  701             result = ac_build_fmin(&ctx->ac, result,
  702                            LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
  703         break;
  704     case nir_op_iand:
  705         result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
  706         break;
  707     case nir_op_ior:
  708         result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
  709         break;
  710     case nir_op_ixor:
  711         result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
  712         break;
  713     case nir_op_ishl:
  714         if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
  715             src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
  716                            LLVMTypeOf(src[0]), "");
  717         else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
  718             src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
  719                         LLVMTypeOf(src[0]), "");
  720         result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
  721         break;
  722     case nir_op_ishr:
  723         if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
  724             src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
  725                            LLVMTypeOf(src[0]), "");
  726         else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
  727             src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
  728                         LLVMTypeOf(src[0]), "");
  729         result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
  730         break;
  731     case nir_op_ushr:
  732         if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
  733             src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
  734                            LLVMTypeOf(src[0]), "");
  735         else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
  736             src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
  737                         LLVMTypeOf(src[0]), "");
  738         result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
  739         break;
  740     case nir_op_ilt32:
  741         result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
  742         break;
  743     case nir_op_ine32:
  744         result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
  745         break;
  746     case nir_op_ieq32:
  747         result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
  748         break;
  749     case nir_op_ige32:
  750         result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
  751         break;
  752     case nir_op_ult32:
  753         result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
  754         break;
  755     case nir_op_uge32:
  756         result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
  757         break;
  758     case nir_op_feq32:
  759         result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
  760         break;
  761     case nir_op_fne32:
  762         result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
  763         break;
  764     case nir_op_flt32:
  765         result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
  766         break;
  767     case nir_op_fge32:
  768         result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
  769         break;
  770     case nir_op_fabs:
  771         result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
  772                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
  773         if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
  774             /* fabs will be optimized by backend compiler with sign
  775              * bit removed via AND.
  776              */
  777             result = ac_build_canonicalize(&ctx->ac, result,
  778                                instr->dest.dest.ssa.bit_size);
  779         }
  780         break;
  781     case nir_op_iabs:
  782         result = emit_iabs(&ctx->ac, src[0]);
  783         break;
  784     case nir_op_imax:
  785         result = ac_build_imax(&ctx->ac, src[0], src[1]);
  786         break;
  787     case nir_op_imin:
  788         result = ac_build_imin(&ctx->ac, src[0], src[1]);
  789         break;
  790     case nir_op_umax:
  791         result = ac_build_umax(&ctx->ac, src[0], src[1]);
  792         break;
  793     case nir_op_umin:
  794         result = ac_build_umin(&ctx->ac, src[0], src[1]);
  795         break;
  796     case nir_op_isign:
  797         result = ac_build_isign(&ctx->ac, src[0],
  798                     instr->dest.dest.ssa.bit_size);
  799         break;
  800     case nir_op_fsign:
  801         src[0] = ac_to_float(&ctx->ac, src[0]);
  802         result = ac_build_fsign(&ctx->ac, src[0],
  803                     instr->dest.dest.ssa.bit_size);
  804         break;
  805     case nir_op_ffloor:
  806         result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
  807                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
  808         break;
  809     case nir_op_ftrunc:
  810         result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
  811                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
  812         break;
  813     case nir_op_fceil:
  814         result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
  815                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
  816         break;
  817     case nir_op_fround_even:
  818         result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
  819                                       ac_to_float_type(&ctx->ac, def_type),src[0]);
  820         break;
  821     case nir_op_ffract:
  822         src[0] = ac_to_float(&ctx->ac, src[0]);
  823         result = ac_build_fract(&ctx->ac, src[0],
  824                     instr->dest.dest.ssa.bit_size);
  825         break;
  826     case nir_op_fsin:
  827         result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
  828                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
  829         break;
  830     case nir_op_fcos:
  831         result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
  832                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
  833         break;
  834     case nir_op_fsqrt:
  835         result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
  836                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
  837         break;
  838     case nir_op_fexp2:
  839         result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
  840                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
  841         break;
  842     case nir_op_flog2:
  843         result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
  844                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
  845         break;
  846     case nir_op_frsq:
  847         result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
  848                           ac_to_float_type(&ctx->ac, def_type), src[0]);
  849         if (ctx->abi->clamp_div_by_zero)
  850             result = ac_build_fmin(&ctx->ac, result,
  851                            LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
  852         break;
  853     case nir_op_frexp_exp:
  854         src[0] = ac_to_float(&ctx->ac, src[0]);
  855         result = ac_build_frexp_exp(&ctx->ac, src[0],
  856                         ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
  857         if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
  858             result = LLVMBuildSExt(ctx->ac.builder, result,
  859                            ctx->ac.i32, "");
  860         break;
  861     case nir_op_frexp_sig:
  862         src[0] = ac_to_float(&ctx->ac, src[0]);
  863         result = ac_build_frexp_mant(&ctx->ac, src[0],
  864                          instr->dest.dest.ssa.bit_size);
  865         break;
  866     case nir_op_fpow:
  867         result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
  868                                       ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
  869         break;
  870     case nir_op_fmax:
  871         result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
  872                                       ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
  873         if (ctx->ac.chip_class < GFX9 &&
  874             instr->dest.dest.ssa.bit_size == 32) {
  875             /* Only pre-GFX9 chips do not flush denorms. */
  876             result = ac_build_canonicalize(&ctx->ac, result,
  877                                instr->dest.dest.ssa.bit_size);
  878         }
  879         break;
  880     case nir_op_fmin:
  881         result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
  882                                       ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
  883         if (ctx->ac.chip_class < GFX9 &&
  884             instr->dest.dest.ssa.bit_size == 32) {
  885             /* Only pre-GFX9 chips do not flush denorms. */
  886             result = ac_build_canonicalize(&ctx->ac, result,
  887                                instr->dest.dest.ssa.bit_size);
  888         }
  889         break;
  890     case nir_op_ffma:
  891         /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
  892         result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
  893                           ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
  894         break;
  895     case nir_op_ldexp:
  896         src[0] = ac_to_float(&ctx->ac, src[0]);
  897         if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
  898             result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
  899         else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
  900             result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
  901         else
  902             result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
  903         break;
  904     case nir_op_bfm:
  905         result = emit_bfm(&ctx->ac, src[0], src[1]);
  906         break;
  907     case nir_op_bitfield_select:
  908         result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
  909         break;
  910     case nir_op_ubfe:
  911         result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
  912         break;
  913     case nir_op_ibfe:
  914         result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
  915         break;
  916     case nir_op_bitfield_reverse:
  917         result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
  918         break;
  919     case nir_op_bit_count:
  920         result = ac_build_bit_count(&ctx->ac, src[0]);
  921         break;
  922     case nir_op_vec2:
  923     case nir_op_vec3:
  924     case nir_op_vec4:
  925         for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
  926             src[i] = ac_to_integer(&ctx->ac, src[i]);
  927         result = ac_build_gather_values(&ctx->ac, src, num_components);
  928         break;
  929     case nir_op_f2i8:
  930     case nir_op_f2i16:
  931     case nir_op_f2i32:
  932     case nir_op_f2i64:
  933         src[0] = ac_to_float(&ctx->ac, src[0]);
  934         result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
  935         break;
  936     case nir_op_f2u8:
  937     case nir_op_f2u16:
  938     case nir_op_f2u32:
  939     case nir_op_f2u64:
  940         src[0] = ac_to_float(&ctx->ac, src[0]);
  941         result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
  942         break;
  943     case nir_op_i2f16:
  944     case nir_op_i2f32:
  945     case nir_op_i2f64:
  946         result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
  947         break;
  948     case nir_op_u2f16:
  949     case nir_op_u2f32:
  950     case nir_op_u2f64:
  951         result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
  952         break;
  953     case nir_op_f2f16_rtz:
  954         src[0] = ac_to_float(&ctx->ac, src[0]);
  955         if (LLVMTypeOf(src[0]) == ctx->ac.f64)
  956             src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
  957         LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
  958         result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
  959         result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
  960         break;
  961     case nir_op_f2f16_rtne:
  962     case nir_op_f2f16:
  963     case nir_op_f2f32:
  964     case nir_op_f2f64:
  965         src[0] = ac_to_float(&ctx->ac, src[0]);
  966         if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
  967             result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
  968         else
  969             result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
  970         break;
  971     case nir_op_u2u8:
  972     case nir_op_u2u16:
  973     case nir_op_u2u32:
  974     case nir_op_u2u64:
  975         if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
  976             result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
  977         else
  978             result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
  979         break;
  980     case nir_op_i2i8:
  981     case nir_op_i2i16:
  982     case nir_op_i2i32:
  983     case nir_op_i2i64:
  984         if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
  985             result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
  986         else
  987             result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
  988         break;
  989     case nir_op_b32csel:
  990         result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
  991         break;
  992     case nir_op_find_lsb:
  993         result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
  994         break;
  995     case nir_op_ufind_msb:
  996         result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
  997         break;
  998     case nir_op_ifind_msb:
  999         result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
 1000         break;
 1001     case nir_op_uadd_carry:
 1002         result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
 1003         break;
 1004     case nir_op_usub_borrow:
 1005         result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
 1006         break;
 1007     case nir_op_b2f16:
 1008     case nir_op_b2f32:
 1009     case nir_op_b2f64:
 1010         result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
 1011         break;
 1012     case nir_op_f2b32:
 1013         result = emit_f2b(&ctx->ac, src[0]);
 1014         break;
 1015     case nir_op_b2i8:
 1016     case nir_op_b2i16:
 1017     case nir_op_b2i32:
 1018     case nir_op_b2i64:
 1019         result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
 1020         break;
 1021     case nir_op_i2b32:
 1022         result = emit_i2b(&ctx->ac, src[0]);
 1023         break;
 1024     case nir_op_fquantize2f16:
 1025         result = emit_f2f16(&ctx->ac, src[0]);
 1026         break;
 1027     case nir_op_umul_high:
 1028         result = emit_umul_high(&ctx->ac, src[0], src[1]);
 1029         break;
 1030     case nir_op_imul_high:
 1031         result = emit_imul_high(&ctx->ac, src[0], src[1]);
 1032         break;
 1033     case nir_op_pack_half_2x16:
 1034         result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
 1035         break;
 1036     case nir_op_pack_snorm_2x16:
 1037         result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
 1038         break;
 1039     case nir_op_pack_unorm_2x16:
 1040         result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
 1041         break;
 1042     case nir_op_unpack_half_2x16:
 1043         result = emit_unpack_half_2x16(&ctx->ac, src[0]);
 1044         break;
 1045     case nir_op_fddx:
 1046     case nir_op_fddy:
 1047     case nir_op_fddx_fine:
 1048     case nir_op_fddy_fine:
 1049     case nir_op_fddx_coarse:
 1050     case nir_op_fddy_coarse:
 1051         result = emit_ddxy(ctx, instr->op, src[0]);
 1052         break;
 1053 
 1054     case nir_op_unpack_64_2x32_split_x: {
 1055         assert(ac_get_llvm_num_components(src[0]) == 1);
 1056         LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
 1057                             ctx->ac.v2i32,
 1058                             "");
 1059         result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
 1060                          ctx->ac.i32_0, "");
 1061         break;
 1062     }
 1063 
 1064     case nir_op_unpack_64_2x32_split_y: {
 1065         assert(ac_get_llvm_num_components(src[0]) == 1);
 1066         LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
 1067                             ctx->ac.v2i32,
 1068                             "");
 1069         result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
 1070                          ctx->ac.i32_1, "");
 1071         break;
 1072     }
 1073 
 1074     case nir_op_pack_64_2x32_split: {
 1075         LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
 1076         result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
 1077         break;
 1078     }
 1079 
 1080     case nir_op_pack_32_2x16_split: {
 1081         LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
 1082         result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
 1083         break;
 1084     }
 1085 
 1086     case nir_op_unpack_32_2x16_split_x: {
 1087         LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
 1088                             ctx->ac.v2i16,
 1089                             "");
 1090         result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
 1091                          ctx->ac.i32_0, "");
 1092         break;
 1093     }
 1094 
 1095     case nir_op_unpack_32_2x16_split_y: {
 1096         LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
 1097                             ctx->ac.v2i16,
 1098                             "");
 1099         result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
 1100                          ctx->ac.i32_1, "");
 1101         break;
 1102     }
 1103 
 1104     case nir_op_cube_face_coord: {
 1105         src[0] = ac_to_float(&ctx->ac, src[0]);
 1106         LLVMValueRef results[2];
 1107         LLVMValueRef in[3];
 1108         for (unsigned chan = 0; chan < 3; chan++)
 1109             in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
 1110         results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
 1111                         ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
 1112         results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
 1113                         ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
 1114         LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema",
 1115                              ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
 1116         results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
 1117         results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
 1118         LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
 1119         results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
 1120         results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
 1121         result = ac_build_gather_values(&ctx->ac, results, 2);
 1122         break;
 1123     }
 1124 
 1125     case nir_op_cube_face_index: {
 1126         src[0] = ac_to_float(&ctx->ac, src[0]);
 1127         LLVMValueRef in[3];
 1128         for (unsigned chan = 0; chan < 3; chan++)
 1129             in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
 1130         result = ac_build_intrinsic(&ctx->ac,  "llvm.amdgcn.cubeid",
 1131                         ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
 1132         break;
 1133     }
 1134 
 1135     case nir_op_fmin3:
 1136         result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
 1137                         ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
 1138         result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
 1139                         ac_to_float_type(&ctx->ac, def_type), result, src[2]);
 1140         break;
 1141     case nir_op_umin3:
 1142         result = ac_build_umin(&ctx->ac, src[0], src[1]);
 1143         result = ac_build_umin(&ctx->ac, result, src[2]);
 1144         break;
 1145     case nir_op_imin3:
 1146         result = ac_build_imin(&ctx->ac, src[0], src[1]);
 1147         result = ac_build_imin(&ctx->ac, result, src[2]);
 1148         break;
 1149     case nir_op_fmax3:
 1150         result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
 1151                         ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
 1152         result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
 1153                         ac_to_float_type(&ctx->ac, def_type), result, src[2]);
 1154         break;
 1155     case nir_op_umax3:
 1156         result = ac_build_umax(&ctx->ac, src[0], src[1]);
 1157         result = ac_build_umax(&ctx->ac, result, src[2]);
 1158         break;
 1159     case nir_op_imax3:
 1160         result = ac_build_imax(&ctx->ac, src[0], src[1]);
 1161         result = ac_build_imax(&ctx->ac, result, src[2]);
 1162         break;
 1163     case nir_op_fmed3: {
 1164         src[0] = ac_to_float(&ctx->ac, src[0]);
 1165         src[1] = ac_to_float(&ctx->ac, src[1]);
 1166         src[2] = ac_to_float(&ctx->ac, src[2]);
 1167         result = ac_build_fmed3(&ctx->ac, src[0], src[1], src[2],
 1168                     instr->dest.dest.ssa.bit_size);
 1169         break;
 1170     }
 1171     case nir_op_imed3: {
 1172         LLVMValueRef tmp1 = ac_build_imin(&ctx->ac, src[0], src[1]);
 1173         LLVMValueRef tmp2 = ac_build_imax(&ctx->ac, src[0], src[1]);
 1174         tmp2 = ac_build_imin(&ctx->ac, tmp2, src[2]);
 1175         result = ac_build_imax(&ctx->ac, tmp1, tmp2);
 1176         break;
 1177     }
 1178     case nir_op_umed3: {
 1179         LLVMValueRef tmp1 = ac_build_umin(&ctx->ac, src[0], src[1]);
 1180         LLVMValueRef tmp2 = ac_build_umax(&ctx->ac, src[0], src[1]);
 1181         tmp2 = ac_build_umin(&ctx->ac, tmp2, src[2]);
 1182         result = ac_build_umax(&ctx->ac, tmp1, tmp2);
 1183         break;
 1184     }
 1185 
 1186     default:
 1187         fprintf(stderr, "Unknown NIR alu instr: ");
 1188         nir_print_instr(&instr->instr, stderr);
 1189         fprintf(stderr, "\n");
 1190         abort();
 1191     }
 1192 
 1193     if (result) {
 1194         assert(instr->dest.dest.is_ssa);
 1195         result = ac_to_integer_or_pointer(&ctx->ac, result);
 1196         ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
 1197     }
 1198 }
 1199 
 1200 static void visit_load_const(struct ac_nir_context *ctx,
 1201                              const nir_load_const_instr *instr)
 1202 {
 1203     LLVMValueRef values[4], value = NULL;
 1204     LLVMTypeRef element_type =
 1205         LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
 1206 
 1207     for (unsigned i = 0; i < instr->def.num_components; ++i) {
 1208         switch (instr->def.bit_size) {
 1209         case 8:
 1210             values[i] = LLVMConstInt(element_type,
 1211                                      instr->value[i].u8, false);
 1212             break;
 1213         case 16:
 1214             values[i] = LLVMConstInt(element_type,
 1215                                      instr->value[i].u16, false);
 1216             break;
 1217         case 32:
 1218             values[i] = LLVMConstInt(element_type,
 1219                                      instr->value[i].u32, false);
 1220             break;
 1221         case 64:
 1222             values[i] = LLVMConstInt(element_type,
 1223                                      instr->value[i].u64, false);
 1224             break;
 1225         default:
 1226             fprintf(stderr,
 1227                     "unsupported nir load_const bit_size: %d\n",
 1228                     instr->def.bit_size);
 1229             abort();
 1230         }
 1231     }
 1232     if (instr->def.num_components > 1) {
 1233         value = LLVMConstVector(values, instr->def.num_components);
 1234     } else
 1235         value = values[0];
 1236 
 1237     ctx->ssa_defs[instr->def.index] = value;
 1238 }
 1239 
 1240 static LLVMValueRef
 1241 get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
 1242 {
 1243     LLVMValueRef size =
 1244         LLVMBuildExtractElement(ctx->ac.builder, descriptor,
 1245                     LLVMConstInt(ctx->ac.i32, 2, false), "");
 1246 
 1247     /* GFX8 only */
 1248     if (ctx->ac.chip_class == GFX8 && in_elements) {
 1249         /* On GFX8, the descriptor contains the size in bytes,
 1250          * but TXQ must return the size in elements.
 1251          * The stride is always non-zero for resources using TXQ.
 1252          */
 1253         LLVMValueRef stride =
 1254             LLVMBuildExtractElement(ctx->ac.builder, descriptor,
 1255                         ctx->ac.i32_1, "");
 1256         stride = LLVMBuildLShr(ctx->ac.builder, stride,
 1257                        LLVMConstInt(ctx->ac.i32, 16, false), "");
 1258         stride = LLVMBuildAnd(ctx->ac.builder, stride,
 1259                       LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
 1260 
 1261         size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
 1262     }
 1263     return size;
 1264 }
 1265 
 1266 /* Gather4 should follow the same rules as bilinear filtering, but the hardware
 1267  * incorrectly forces nearest filtering if the texture format is integer.
 1268  * The only effect it has on Gather4, which always returns 4 texels for
 1269  * bilinear filtering, is that the final coordinates are off by 0.5 of
 1270  * the texel size.
 1271  *
 1272  * The workaround is to subtract 0.5 from the unnormalized coordinates,
 1273  * or (0.5 / size) from the normalized coordinates.
 1274  *
 1275  * However, cube textures with 8_8_8_8 data formats require a different
 1276  * workaround of overriding the num format to USCALED/SSCALED. This would lose
 1277  * precision in 32-bit data formats, so it needs to be applied dynamically at
 1278  * runtime. In this case, return an i1 value that indicates whether the
 1279  * descriptor was overridden (and hence a fixup of the sampler result is needed).
 1280  */
 1281 static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
 1282                       nir_variable *var,
 1283                       struct ac_image_args *args,
 1284                       const nir_tex_instr *instr)
 1285 {
 1286     const struct glsl_type *type = glsl_without_array(var->type);
 1287     enum glsl_base_type stype = glsl_get_sampler_result_type(type);
 1288     LLVMValueRef wa_8888 = NULL;
 1289     LLVMValueRef half_texel[2];
 1290     LLVMValueRef result;
 1291 
 1292     assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT);
 1293 
 1294     if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
 1295         LLVMValueRef formats;
 1296         LLVMValueRef data_format;
 1297         LLVMValueRef wa_formats;
 1298 
 1299         formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
 1300 
 1301         data_format = LLVMBuildLShr(ctx->builder, formats,
 1302                         LLVMConstInt(ctx->i32, 20, false), "");
 1303         data_format = LLVMBuildAnd(ctx->builder, data_format,
 1304                        LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
 1305         wa_8888 = LLVMBuildICmp(
 1306             ctx->builder, LLVMIntEQ, data_format,
 1307             LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
 1308             "");
 1309 
 1310         uint32_t wa_num_format =
 1311             stype == GLSL_TYPE_UINT ?
 1312             S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) :
 1313             S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
 1314         wa_formats = LLVMBuildAnd(ctx->builder, formats,
 1315                       LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false),
 1316                       "");
 1317         wa_formats = LLVMBuildOr(ctx->builder, wa_formats,
 1318                     LLVMConstInt(ctx->i32, wa_num_format, false), "");
 1319 
 1320         formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
 1321         args->resource = LLVMBuildInsertElement(
 1322             ctx->builder, args->resource, formats, ctx->i32_1, "");
 1323     }
 1324 
 1325     if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
 1326         assert(!wa_8888);
 1327         half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
 1328     } else {
 1329         struct ac_image_args resinfo = {};
 1330         LLVMBasicBlockRef bbs[2];
 1331 
 1332         LLVMValueRef unnorm = NULL;
 1333         LLVMValueRef default_offset = ctx->f32_0;
 1334         if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D &&
 1335             !instr->is_array) {
 1336             /* In vulkan, whether the sampler uses unnormalized
 1337              * coordinates or not is a dynamic property of the
 1338              * sampler. Hence, to figure out whether or not we
 1339              * need to divide by the texture size, we need to test
 1340              * the sampler at runtime. This tests the bit set by
 1341              * radv_init_sampler().
 1342              */
 1343             LLVMValueRef sampler0 =
 1344                 LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
 1345             sampler0 = LLVMBuildLShr(ctx->builder, sampler0,
 1346                          LLVMConstInt(ctx->i32, 15, false), "");
 1347             sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
 1348             unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
 1349             default_offset = LLVMConstReal(ctx->f32, -0.5);
 1350         }
 1351 
 1352         bbs[0] = LLVMGetInsertBlock(ctx->builder);
 1353         if (wa_8888 || unnorm) {
 1354             assert(!(wa_8888 && unnorm));
 1355             LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
 1356             /* Skip the texture size query entirely if we don't need it. */
 1357             ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
 1358             bbs[1] = LLVMGetInsertBlock(ctx->builder);
 1359         }
 1360 
 1361         /* Query the texture size. */
 1362         resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
 1363         resinfo.opcode = ac_image_get_resinfo;
 1364         resinfo.dmask = 0xf;
 1365         resinfo.lod = ctx->i32_0;
 1366         resinfo.resource = args->resource;
 1367         resinfo.attributes = AC_FUNC_ATTR_READNONE;
 1368         LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
 1369 
 1370         /* Compute -0.5 / size. */
 1371         for (unsigned c = 0; c < 2; c++) {
 1372             half_texel[c] =
 1373                 LLVMBuildExtractElement(ctx->builder, size,
 1374                             LLVMConstInt(ctx->i32, c, 0), "");
 1375             half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
 1376             half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
 1377             half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
 1378                               LLVMConstReal(ctx->f32, -0.5), "");
 1379         }
 1380 
 1381         if (wa_8888 || unnorm) {
 1382             ac_build_endif(ctx, 2000);
 1383 
 1384             for (unsigned c = 0; c < 2; c++) {
 1385                 LLVMValueRef values[2] = { default_offset, half_texel[c] };
 1386                 half_texel[c] = ac_build_phi(ctx, ctx->f32, 2,
 1387                                  values, bbs);
 1388             }
 1389         }
 1390     }
 1391 
 1392     for (unsigned c = 0; c < 2; c++) {
 1393         LLVMValueRef tmp;
 1394         tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
 1395         args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
 1396     }
 1397 
 1398     args->attributes = AC_FUNC_ATTR_READNONE;
 1399     result = ac_build_image_opcode(ctx, args);
 1400 
 1401     if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
 1402         LLVMValueRef tmp, tmp2;
 1403 
 1404         /* if the cube workaround is in place, f2i the result. */
 1405         for (unsigned c = 0; c < 4; c++) {
 1406             tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
 1407             if (stype == GLSL_TYPE_UINT)
 1408                 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
 1409             else
 1410                 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
 1411             tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
 1412             tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
 1413             tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
 1414             tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
 1415             result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
 1416         }
 1417     }
 1418     return result;
 1419 }
 1420 
 1421 static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr)
 1422 {
 1423     nir_deref_instr *texture_deref_instr = NULL;
 1424 
 1425     for (unsigned i = 0; i < instr->num_srcs; i++) {
 1426         switch (instr->src[i].src_type) {
 1427         case nir_tex_src_texture_deref:
 1428             texture_deref_instr = nir_src_as_deref(instr->src[i].src);
 1429             break;
 1430         default:
 1431             break;
 1432         }
 1433     }
 1434     return texture_deref_instr;
 1435 }
 1436 
 1437 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 1438                     const nir_tex_instr *instr,
 1439                     struct ac_image_args *args)
 1440 {
 1441     if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
 1442         unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
 1443 
 1444         return ac_build_buffer_load_format(&ctx->ac,
 1445                                        args->resource,
 1446                                        args->coords[0],
 1447                                        ctx->ac.i32_0,
 1448                                        util_last_bit(mask),
 1449                                        0, true);
 1450     }
 1451 
 1452     args->opcode = ac_image_sample;
 1453 
 1454     switch (instr->op) {
 1455     case nir_texop_txf:
 1456     case nir_texop_txf_ms:
 1457     case nir_texop_samples_identical:
 1458         args->opcode = args->level_zero ||
 1459                    instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
 1460                     ac_image_load : ac_image_load_mip;
 1461         args->level_zero = false;
 1462         break;
 1463     case nir_texop_txs:
 1464     case nir_texop_query_levels:
 1465         args->opcode = ac_image_get_resinfo;
 1466         if (!args->lod)
 1467             args->lod = ctx->ac.i32_0;
 1468         args->level_zero = false;
 1469         break;
 1470     case nir_texop_tex:
 1471         if (ctx->stage != MESA_SHADER_FRAGMENT) {
 1472             assert(!args->lod);
 1473             args->level_zero = true;
 1474         }
 1475         break;
 1476     case nir_texop_tg4:
 1477         args->opcode = ac_image_gather4;
 1478         args->level_zero = true;
 1479         break;
 1480     case nir_texop_lod:
 1481         args->opcode = ac_image_get_lod;
 1482         break;
 1483     case nir_texop_fragment_fetch:
 1484     case nir_texop_fragment_mask_fetch:
 1485         args->opcode = ac_image_load;
 1486         args->level_zero = false;
 1487         break;
 1488     default:
 1489         break;
 1490     }
 1491 
 1492     if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {
 1493         nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
 1494         nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
 1495         const struct glsl_type *type = glsl_without_array(var->type);
 1496         enum glsl_base_type stype = glsl_get_sampler_result_type(type);
 1497         if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
 1498             return lower_gather4_integer(&ctx->ac, var, args, instr);
 1499         }
 1500     }
 1501 
 1502     /* Fixup for GFX9 which allocates 1D textures as 2D. */
 1503     if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
 1504         if ((args->dim == ac_image_2darray ||
 1505              args->dim == ac_image_2d) && !args->coords[1]) {
 1506             args->coords[1] = ctx->ac.i32_0;
 1507         }
 1508     }
 1509 
 1510     args->attributes = AC_FUNC_ATTR_READNONE;
 1511     bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE &&
 1512              ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
 1513     if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
 1514         /* Prevent texture instructions with implicit derivatives from being
 1515          * sinked into branches. */
 1516         switch (instr->op) {
 1517         case nir_texop_tex:
 1518         case nir_texop_txb:
 1519         case nir_texop_lod:
 1520             args->attributes |= AC_FUNC_ATTR_CONVERGENT;
 1521             break;
 1522         default:
 1523             break;
 1524         }
 1525     }
 1526 
 1527     return ac_build_image_opcode(&ctx->ac, args);
 1528 }
 1529 
 1530 static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx,
 1531                                                   nir_intrinsic_instr *instr)
 1532 {
 1533     LLVMValueRef ptr = get_src(ctx, instr->src[0]);
 1534     LLVMValueRef index = get_src(ctx, instr->src[1]);
 1535 
 1536     LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
 1537     LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
 1538     return result;
 1539 }
 1540 
 1541 static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
 1542                                              nir_intrinsic_instr *instr)
 1543 {
 1544     LLVMValueRef ptr, addr;
 1545     LLVMValueRef src0 = get_src(ctx, instr->src[0]);
 1546     unsigned index = nir_intrinsic_base(instr);
 1547 
 1548     addr = LLVMConstInt(ctx->ac.i32, index, 0);
 1549     addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
 1550 
 1551     /* Load constant values from user SGPRS when possible, otherwise
 1552      * fallback to the default path that loads directly from memory.
 1553      */
 1554     if (LLVMIsConstant(src0) &&
 1555         instr->dest.ssa.bit_size == 32) {
 1556         unsigned count = instr->dest.ssa.num_components;
 1557         unsigned offset = index;
 1558 
 1559         offset += LLVMConstIntGetZExtValue(src0);
 1560         offset /= 4;
 1561 
 1562         offset -= ctx->args->base_inline_push_consts;
 1563 
 1564         unsigned num_inline_push_consts = ctx->args->num_inline_push_consts;
 1565         if (offset + count <= num_inline_push_consts) {
 1566             LLVMValueRef push_constants[num_inline_push_consts];
 1567             for (unsigned i = 0; i < num_inline_push_consts; i++)
 1568                 push_constants[i] = ac_get_arg(&ctx->ac,
 1569                                    ctx->args->inline_push_consts[i]);
 1570             return ac_build_gather_values(&ctx->ac,
 1571                               push_constants + offset,
 1572                               count);
 1573         }
 1574     }
 1575 
 1576     ptr = LLVMBuildGEP(ctx->ac.builder,
 1577                ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, "");
 1578 
 1579     if (instr->dest.ssa.bit_size == 8) {
 1580         unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
 1581         LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
 1582         ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
 1583         LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
 1584 
 1585         LLVMValueRef params[3];
 1586         if (load_dwords > 1) {
 1587             LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
 1588             params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
 1589             params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
 1590         } else {
 1591             res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
 1592             params[0] = ctx->ac.i32_0;
 1593             params[1] = res;
 1594         }
 1595         params[2] = addr;
 1596         res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
 1597 
 1598         res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
 1599         if (instr->dest.ssa.num_components > 1)
 1600             res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), "");
 1601         return res;
 1602     } else if (instr->dest.ssa.bit_size == 16) {
 1603         unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
 1604         LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
 1605         ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
 1606         LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
 1607         res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
 1608         LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
 1609         cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
 1610         LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
 1611                     LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
 1612                     LLVMConstInt(ctx->ac.i32, 4, false)};
 1613         LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
 1614         LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
 1615         LLVMValueRef shuffle_aligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
 1616         LLVMValueRef shuffle_unaligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
 1617         res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
 1618         return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
 1619     }
 1620 
 1621     ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
 1622 
 1623     return LLVMBuildLoad(ctx->ac.builder, ptr, "");
 1624 }
 1625 
 1626 static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
 1627                                           const nir_intrinsic_instr *instr)
 1628 {
 1629     LLVMValueRef index = get_src(ctx, instr->src[0]);
 1630 
 1631     return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
 1632 }
 1633 
 1634 static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
 1635 {
 1636     uint32_t new_mask = 0;
 1637     for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
 1638         if (mask & (1u << i))
 1639             new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
 1640     return new_mask;
 1641 }
 1642 
 1643 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
 1644                                          unsigned start, unsigned count)
 1645 {
 1646     LLVMValueRef mask[] = {
 1647     ctx->i32_0, ctx->i32_1,
 1648     LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) };
 1649 
 1650     unsigned src_elements = ac_get_llvm_num_components(src);
 1651 
 1652     if (count == src_elements) {
 1653         assert(start == 0);
 1654         return src;
 1655     } else if (count == 1) {
 1656         assert(start < src_elements);
 1657         return LLVMBuildExtractElement(ctx->builder, src, mask[start],  "");
 1658     } else {
 1659         assert(start + count <= src_elements);
 1660         assert(count <= 4);
 1661         LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
 1662         return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
 1663     }
 1664 }
 1665 
 1666 static unsigned get_cache_policy(struct ac_nir_context *ctx,
 1667                  enum gl_access_qualifier access,
 1668                  bool may_store_unaligned,
 1669                  bool writeonly_memory)
 1670 {
 1671     unsigned cache_policy = 0;
 1672 
 1673     /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores.  All
 1674      * store opcodes not aligned to a dword are affected. The only way to
 1675      * get unaligned stores is through shader images.
 1676      */
 1677     if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
 1678          /* If this is write-only, don't keep data in L1 to prevent
 1679           * evicting L1 cache lines that may be needed by other
 1680           * instructions.
 1681           */
 1682          writeonly_memory ||
 1683          access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
 1684         cache_policy |= ac_glc;
 1685     }
 1686 
 1687     if (access & ACCESS_STREAM_CACHE_POLICY)
 1688         cache_policy |= ac_slc;
 1689 
 1690     return cache_policy;
 1691 }
 1692 
 1693 static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx,
 1694                      struct waterfall_context *wctx,
 1695                      const nir_intrinsic_instr *instr,
 1696                      nir_src src)
 1697 {
 1698     return enter_waterfall(ctx, wctx, get_src(ctx, src),
 1699                    nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
 1700 }
 1701 
 1702 static void visit_store_ssbo(struct ac_nir_context *ctx,
 1703                              nir_intrinsic_instr *instr)
 1704 {
 1705     if (ctx->ac.postponed_kill) {
 1706         LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
 1707                                                   ctx->ac.postponed_kill, "");
 1708         ac_build_ifcc(&ctx->ac, cond, 7000);
 1709         }
 1710 
 1711     LLVMValueRef src_data = get_src(ctx, instr->src[0]);
 1712     int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
 1713     unsigned writemask = nir_intrinsic_write_mask(instr);
 1714     enum gl_access_qualifier access = nir_intrinsic_access(instr);
 1715     bool writeonly_memory = access & ACCESS_NON_READABLE;
 1716     unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
 1717 
 1718     struct waterfall_context wctx;
 1719     LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
 1720 
 1721     LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true);
 1722     LLVMValueRef base_data = src_data;
 1723     base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
 1724     LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
 1725 
 1726     while (writemask) {
 1727         int start, count;
 1728         LLVMValueRef data, offset;
 1729         LLVMTypeRef data_type;
 1730 
 1731         u_bit_scan_consecutive_range(&writemask, &start, &count);
 1732 
 1733         /* Due to an LLVM limitation with LLVM < 9, split 3-element
 1734          * writes into a 2-element and a 1-element write. */
 1735         if (count == 3 &&
 1736             (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {
 1737             writemask |= 1 << (start + 2);
 1738             count = 2;
 1739         }
 1740         int num_bytes = count * elem_size_bytes; /* count in bytes */
 1741 
 1742         /* we can only store 4 DWords at the same time.
 1743          * can only happen for 64 Bit vectors. */
 1744         if (num_bytes > 16) {
 1745             writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
 1746             count = 2;
 1747             num_bytes = 16;
 1748         }
 1749 
 1750         /* check alignment of 16 Bit stores */
 1751         if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
 1752             writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
 1753             count = 1;
 1754             num_bytes = 2;
 1755         }
 1756 
 1757         /* Due to alignment issues, split stores of 8-bit/16-bit
 1758          * vectors.
 1759          */
 1760         if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
 1761             writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
 1762             count = 1;
 1763             num_bytes = elem_size_bytes;
 1764         }
 1765 
 1766         data = extract_vector_range(&ctx->ac, base_data, start, count);
 1767 
 1768         offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
 1769                       LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
 1770 
 1771         if (num_bytes == 1) {
 1772             ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data,
 1773                             offset, ctx->ac.i32_0,
 1774                             cache_policy);
 1775         } else if (num_bytes == 2) {
 1776             ac_build_tbuffer_store_short(&ctx->ac, rsrc, data,
 1777                              offset, ctx->ac.i32_0,
 1778                              cache_policy);
 1779         } else {
 1780             int num_channels = num_bytes / 4;
 1781 
 1782             switch (num_bytes) {
 1783             case 16: /* v4f32 */
 1784                 data_type = ctx->ac.v4f32;
 1785                 break;
 1786             case 12: /* v3f32 */
 1787                 data_type = ctx->ac.v3f32;
 1788                 break;
 1789             case 8: /* v2f32 */
 1790                 data_type = ctx->ac.v2f32;
 1791                 break;
 1792             case 4: /* f32 */
 1793                 data_type = ctx->ac.f32;
 1794                 break;
 1795             default:
 1796                 unreachable("Malformed vector store.");
 1797             }
 1798             data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
 1799 
 1800             ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
 1801                             num_channels, offset,
 1802                             ctx->ac.i32_0, 0,
 1803                             cache_policy);
 1804         }
 1805     }
 1806 
 1807     exit_waterfall(ctx, &wctx, NULL);
 1808 
 1809     if (ctx->ac.postponed_kill)
 1810         ac_build_endif(&ctx->ac, 7000);
 1811 }
 1812 
 1813 static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
 1814                                            LLVMValueRef descriptor,
 1815                        LLVMValueRef offset,
 1816                        LLVMValueRef compare,
 1817                        LLVMValueRef exchange)
 1818 {
 1819     LLVMBasicBlockRef start_block = NULL, then_block = NULL;
 1820     if (ctx->abi->robust_buffer_access) {
 1821         LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
 1822 
 1823         LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
 1824         start_block = LLVMGetInsertBlock(ctx->ac.builder);
 1825 
 1826         ac_build_ifcc(&ctx->ac, cond, -1);
 1827 
 1828         then_block = LLVMGetInsertBlock(ctx->ac.builder);
 1829     }
 1830 
 1831     LLVMValueRef ptr_parts[2] = {
 1832         ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
 1833         LLVMBuildAnd(ctx->ac.builder,
 1834                      ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
 1835                      LLVMConstInt(ctx->ac.i32, 65535, 0), "")
 1836     };
 1837 
 1838     ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
 1839     ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
 1840 
 1841     offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
 1842 
 1843     LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
 1844     ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
 1845     ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
 1846     ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL), "");
 1847 
 1848     LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
 1849     result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
 1850 
 1851     if (ctx->abi->robust_buffer_access) {
 1852         ac_build_endif(&ctx->ac, -1);
 1853 
 1854         LLVMBasicBlockRef incoming_blocks[2] = {
 1855             start_block,
 1856             then_block,
 1857         };
 1858 
 1859         LLVMValueRef incoming_values[2] = {
 1860             LLVMConstInt(ctx->ac.i64, 0, 0),
 1861             result,
 1862         };
 1863         LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
 1864         LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
 1865         return ret;
 1866     } else {
 1867         return result;
 1868     }
 1869 }
 1870 
 1871 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
 1872                                       nir_intrinsic_instr *instr)
 1873 {
 1874     if (ctx->ac.postponed_kill) {
 1875         LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
 1876                                                   ctx->ac.postponed_kill, "");
 1877         ac_build_ifcc(&ctx->ac, cond, 7001);
 1878         }
 1879 
 1880     LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
 1881     const char *op;
 1882     char name[64], type[8];
 1883     LLVMValueRef params[6], descriptor;
 1884     LLVMValueRef result;
 1885     int arg_count = 0;
 1886 
 1887     struct waterfall_context wctx;
 1888     LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
 1889 
 1890     switch (instr->intrinsic) {
 1891     case nir_intrinsic_ssbo_atomic_add:
 1892         op = "add";
 1893         break;
 1894     case nir_intrinsic_ssbo_atomic_imin:
 1895         op = "smin";
 1896         break;
 1897     case nir_intrinsic_ssbo_atomic_umin:
 1898         op = "umin";
 1899         break;
 1900     case nir_intrinsic_ssbo_atomic_imax:
 1901         op = "smax";
 1902         break;
 1903     case nir_intrinsic_ssbo_atomic_umax:
 1904         op = "umax";
 1905         break;
 1906     case nir_intrinsic_ssbo_atomic_and:
 1907         op = "and";
 1908         break;
 1909     case nir_intrinsic_ssbo_atomic_or:
 1910         op = "or";
 1911         break;
 1912     case nir_intrinsic_ssbo_atomic_xor:
 1913         op = "xor";
 1914         break;
 1915     case nir_intrinsic_ssbo_atomic_exchange:
 1916         op = "swap";
 1917         break;
 1918     case nir_intrinsic_ssbo_atomic_comp_swap:
 1919         op = "cmpswap";
 1920         break;
 1921     default:
 1922         abort();
 1923     }
 1924 
 1925     descriptor = ctx->abi->load_ssbo(ctx->abi,
 1926                                      rsrc_base,
 1927                                      true);
 1928 
 1929     if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap &&
 1930         return_type == ctx->ac.i64) {
 1931         result = emit_ssbo_comp_swap_64(ctx, descriptor,
 1932                             get_src(ctx, instr->src[1]),
 1933                             get_src(ctx, instr->src[2]),
 1934                             get_src(ctx, instr->src[3]));
 1935     } else {
 1936         if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
 1937             params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
 1938         }
 1939         params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
 1940         params[arg_count++] = descriptor;
 1941 
 1942         if (LLVM_VERSION_MAJOR >= 9) {
 1943             /* XXX: The new raw/struct atomic intrinsics are buggy with
 1944             * LLVM 8, see r358579.
 1945             */
 1946             params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
 1947             params[arg_count++] = ctx->ac.i32_0; /* soffset */
 1948             params[arg_count++] = ctx->ac.i32_0; /* slc */
 1949 
 1950             ac_build_type_name_for_intr(return_type, type, sizeof(type));
 1951             snprintf(name, sizeof(name),
 1952                      "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
 1953         } else {
 1954             params[arg_count++] = ctx->ac.i32_0; /* vindex */
 1955             params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
 1956             params[arg_count++] = ctx->ac.i1false; /* slc */
 1957 
 1958             assert(return_type == ctx->ac.i32);
 1959             snprintf(name, sizeof(name),
 1960                      "llvm.amdgcn.buffer.atomic.%s", op);
 1961         }
 1962 
 1963         result = ac_build_intrinsic(&ctx->ac, name, return_type, params,
 1964                                     arg_count, 0);
 1965     }
 1966 
 1967     result = exit_waterfall(ctx, &wctx, result);
 1968         if (ctx->ac.postponed_kill)
 1969         ac_build_endif(&ctx->ac, 7001);
 1970     return result;
 1971 }
 1972 
 1973 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
 1974                                       nir_intrinsic_instr *instr)
 1975 {
 1976     struct waterfall_context wctx;
 1977     LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
 1978 
 1979     int elem_size_bytes = instr->dest.ssa.bit_size / 8;
 1980     int num_components = instr->num_components;
 1981     enum gl_access_qualifier access = nir_intrinsic_access(instr);
 1982     unsigned cache_policy = get_cache_policy(ctx, access, false, false);
 1983 
 1984     LLVMValueRef offset = get_src(ctx, instr->src[1]);
 1985     LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false);
 1986     LLVMValueRef vindex = ctx->ac.i32_0;
 1987 
 1988     LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
 1989     LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
 1990 
 1991     LLVMValueRef results[4];
 1992     for (int i = 0; i < num_components;) {
 1993         int num_elems = num_components - i;
 1994         if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
 1995             num_elems = 1;
 1996         if (num_elems * elem_size_bytes > 16)
 1997             num_elems = 16 / elem_size_bytes;
 1998         int load_bytes = num_elems * elem_size_bytes;
 1999 
 2000         LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
 2001 
 2002         LLVMValueRef ret;
 2003 
 2004         if (load_bytes == 1) {
 2005             ret = ac_build_tbuffer_load_byte(&ctx->ac,
 2006                               rsrc,
 2007                               offset,
 2008                               ctx->ac.i32_0,
 2009                               immoffset,
 2010                               cache_policy);
 2011         } else if (load_bytes == 2) {
 2012             ret = ac_build_tbuffer_load_short(&ctx->ac,
 2013                              rsrc,
 2014                              offset,
 2015                              ctx->ac.i32_0,
 2016                              immoffset,
 2017                              cache_policy);
 2018         } else {
 2019             int num_channels = util_next_power_of_two(load_bytes) / 4;
 2020             bool can_speculate = access & ACCESS_CAN_REORDER;
 2021 
 2022             ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels,
 2023                            vindex, offset, immoffset, 0,
 2024                            cache_policy, can_speculate, false);
 2025         }
 2026 
 2027         LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
 2028         ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
 2029         ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
 2030 
 2031         LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
 2032         ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
 2033 
 2034         for (unsigned j = 0; j < num_elems; j++) {
 2035             results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
 2036         }
 2037         i += num_elems;
 2038     }
 2039 
 2040     LLVMValueRef ret =  ac_build_gather_values(&ctx->ac, results, num_components);
 2041     return exit_waterfall(ctx, &wctx, ret);
 2042 }
 2043 
 2044 static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx,
 2045                     struct waterfall_context *wctx,
 2046                     const nir_intrinsic_instr *instr)
 2047 {
 2048     return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
 2049                    nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
 2050 }
 2051 
 2052 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
 2053                                           nir_intrinsic_instr *instr)
 2054 {
 2055     struct waterfall_context wctx;
 2056     LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
 2057 
 2058     LLVMValueRef ret;
 2059     LLVMValueRef rsrc = rsrc_base;
 2060     LLVMValueRef offset = get_src(ctx, instr->src[1]);
 2061     int num_components = instr->num_components;
 2062 
 2063     if (ctx->abi->load_ubo)
 2064         rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
 2065 
 2066     if (instr->dest.ssa.bit_size == 64)
 2067         num_components *= 2;
 2068 
 2069     if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
 2070         unsigned load_bytes = instr->dest.ssa.bit_size / 8;
 2071         LLVMValueRef results[num_components];
 2072         for (unsigned i = 0; i < num_components; ++i) {
 2073             LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32,
 2074                                   load_bytes * i, 0);
 2075 
 2076             if (load_bytes == 1) {
 2077                 results[i] = ac_build_tbuffer_load_byte(&ctx->ac,
 2078                                     rsrc,
 2079                                     offset,
 2080                                     ctx->ac.i32_0,
 2081                                     immoffset,
 2082                                     0);
 2083             } else {
 2084                 assert(load_bytes == 2);
 2085                 results[i] = ac_build_tbuffer_load_short(&ctx->ac,
 2086                                      rsrc,
 2087                                      offset,
 2088                                      ctx->ac.i32_0,
 2089                                      immoffset,
 2090                                      0);
 2091             }
 2092         }
 2093         ret = ac_build_gather_values(&ctx->ac, results, num_components);
 2094     } else {
 2095         ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
 2096                        NULL, 0, 0, true, true);
 2097 
 2098         ret = ac_trim_vector(&ctx->ac, ret, num_components);
 2099     }
 2100 
 2101     ret = LLVMBuildBitCast(ctx->ac.builder, ret,
 2102                             get_def_type(ctx, &instr->dest.ssa), "");
 2103 
 2104     return exit_waterfall(ctx, &wctx, ret);
 2105 }
 2106 
 2107 static void
 2108 get_deref_offset(struct ac_nir_context *ctx, nir_deref_instr *instr,
 2109                  bool vs_in, unsigned *vertex_index_out,
 2110                  LLVMValueRef *vertex_index_ref,
 2111                  unsigned *const_out, LLVMValueRef *indir_out)
 2112 {
 2113     nir_variable *var = nir_deref_instr_get_variable(instr);
 2114     nir_deref_path path;
 2115     unsigned idx_lvl = 1;
 2116 
 2117     nir_deref_path_init(&path, instr, NULL);
 2118 
 2119     if (vertex_index_out != NULL || vertex_index_ref != NULL) {
 2120         if (vertex_index_ref) {
 2121             *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
 2122             if (vertex_index_out)
 2123                 *vertex_index_out = 0;
 2124         } else {
 2125             *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
 2126         }
 2127         ++idx_lvl;
 2128     }
 2129 
 2130     uint32_t const_offset = 0;
 2131     LLVMValueRef offset = NULL;
 2132 
 2133     if (var->data.compact) {
 2134         assert(instr->deref_type == nir_deref_type_array);
 2135         const_offset = nir_src_as_uint(instr->arr.index);
 2136         goto out;
 2137     }
 2138 
 2139     for (; path.path[idx_lvl]; ++idx_lvl) {
 2140         const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
 2141         if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
 2142             unsigned index = path.path[idx_lvl]->strct.index;
 2143 
 2144             for (unsigned i = 0; i < index; i++) {
 2145                 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
 2146                 const_offset += glsl_count_attribute_slots(ft, vs_in);
 2147             }
 2148         } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
 2149             unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
 2150             if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
 2151                 const_offset += size *
 2152                     nir_src_as_uint(path.path[idx_lvl]->arr.index);
 2153             } else {
 2154                 LLVMValueRef array_off = LLVMBuildMul(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, size, 0),
 2155                                       get_src(ctx, path.path[idx_lvl]->arr.index), "");
 2156                 if (offset)
 2157                     offset = LLVMBuildAdd(ctx->ac.builder, offset, array_off, "");
 2158                 else
 2159                     offset = array_off;
 2160             }
 2161         } else
 2162             unreachable("Uhandled deref type in get_deref_instr_offset");
 2163     }
 2164 
 2165 out:
 2166     nir_deref_path_finish(&path);
 2167 
 2168     if (const_offset && offset)
 2169         offset = LLVMBuildAdd(ctx->ac.builder, offset,
 2170                       LLVMConstInt(ctx->ac.i32, const_offset, 0),
 2171                       "");
 2172 
 2173     *const_out = const_offset;
 2174     *indir_out = offset;
 2175 }
 2176 
 2177 static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
 2178                        nir_intrinsic_instr *instr,
 2179                        bool load_inputs)
 2180 {
 2181     LLVMValueRef result;
 2182     LLVMValueRef vertex_index = NULL;
 2183     LLVMValueRef indir_index = NULL;
 2184     unsigned const_index = 0;
 2185 
 2186     nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
 2187 
 2188     unsigned location = var->data.location;
 2189     unsigned driver_location = var->data.driver_location;
 2190     const bool is_patch = var->data.patch ||
 2191                   var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
 2192                   var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
 2193     const bool is_compact = var->data.compact;
 2194 
 2195     get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
 2196                      false, NULL, is_patch ? NULL : &vertex_index,
 2197                      &const_index, &indir_index);
 2198 
 2199     LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
 2200 
 2201     LLVMTypeRef src_component_type;
 2202     if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
 2203         src_component_type = LLVMGetElementType(dest_type);
 2204     else
 2205         src_component_type = dest_type;
 2206 
 2207     result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type,
 2208                           vertex_index, indir_index,
 2209                           const_index, location, driver_location,
 2210                           var->data.location_frac,
 2211                           instr->num_components,
 2212                           is_patch, is_compact, load_inputs);
 2213     if (instr->dest.ssa.bit_size == 16) {
 2214         result = ac_to_integer(&ctx->ac, result);
 2215         result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
 2216     }
 2217     return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
 2218 }
 2219 
 2220 static unsigned
 2221 type_scalar_size_bytes(const struct glsl_type *type)
 2222 {
 2223    assert(glsl_type_is_vector_or_scalar(type) ||
 2224           glsl_type_is_matrix(type));
 2225    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
 2226 }
 2227 
 2228 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 2229                    nir_intrinsic_instr *instr)
 2230 {
 2231     nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
 2232     nir_variable *var = nir_deref_instr_get_variable(deref);
 2233 
 2234     LLVMValueRef values[8];
 2235     int idx = 0;
 2236     int ve = instr->dest.ssa.num_components;
 2237     unsigned comp = 0;
 2238     LLVMValueRef indir_index;
 2239     LLVMValueRef ret;
 2240     unsigned const_index;
 2241     unsigned stride = 4;
 2242     int mode = deref->mode;
 2243     
 2244     if (var) {
 2245         bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
 2246             var->data.mode == nir_var_shader_in;
 2247         idx = var->data.driver_location;
 2248         comp = var->data.location_frac;
 2249         mode = var->data.mode;
 2250 
 2251         get_deref_offset(ctx, deref, vs_in, NULL, NULL,
 2252                  &const_index, &indir_index);
 2253 
 2254         if (var->data.compact) {
 2255             stride = 1;
 2256             const_index += comp;
 2257             comp = 0;
 2258         }
 2259     }
 2260 
 2261     if (instr->dest.ssa.bit_size == 64 &&
 2262         (deref->mode == nir_var_shader_in ||
 2263          deref->mode == nir_var_shader_out ||
 2264          deref->mode == nir_var_function_temp))
 2265         ve *= 2;
 2266 
 2267     switch (mode) {
 2268     case nir_var_shader_in:
 2269         if (ctx->stage == MESA_SHADER_TESS_CTRL ||
 2270             ctx->stage == MESA_SHADER_TESS_EVAL) {
 2271             return load_tess_varyings(ctx, instr, true);
 2272         }
 2273 
 2274         if (ctx->stage == MESA_SHADER_GEOMETRY) {
 2275             LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
 2276             LLVMValueRef indir_index;
 2277             unsigned const_index, vertex_index;
 2278             get_deref_offset(ctx, deref, false, &vertex_index, NULL,
 2279                              &const_index, &indir_index);
 2280             assert(indir_index == NULL);
 2281 
 2282             return ctx->abi->load_inputs(ctx->abi, var->data.location,
 2283                              var->data.driver_location,
 2284                              var->data.location_frac,
 2285                              instr->num_components, vertex_index, const_index, type);
 2286         }
 2287 
 2288         for (unsigned chan = comp; chan < ve + comp; chan++) {
 2289             if (indir_index) {
 2290                 unsigned count = glsl_count_attribute_slots(
 2291                         var->type,
 2292                         ctx->stage == MESA_SHADER_VERTEX);
 2293                 count -= chan / 4;
 2294                 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 2295                         &ctx->ac, ctx->abi->inputs + idx + chan, count,
 2296                         stride, false, true);
 2297 
 2298                 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 2299                                        tmp_vec,
 2300                                        indir_index, "");
 2301             } else
 2302                 values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
 2303         }
 2304         break;
 2305     case nir_var_function_temp:
 2306         for (unsigned chan = 0; chan < ve; chan++) {
 2307             if (indir_index) {
 2308                 unsigned count = glsl_count_attribute_slots(
 2309                     var->type, false);
 2310                 count -= chan / 4;
 2311                 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 2312                         &ctx->ac, ctx->locals + idx + chan, count,
 2313                         stride, true, true);
 2314 
 2315                 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 2316                                        tmp_vec,
 2317                                        indir_index, "");
 2318             } else {
 2319                 values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
 2320             }
 2321         }
 2322         break;
 2323     case nir_var_shader_out:
 2324         if (ctx->stage == MESA_SHADER_TESS_CTRL) {
 2325             return load_tess_varyings(ctx, instr, false);
 2326         }
 2327 
 2328         if (ctx->stage == MESA_SHADER_FRAGMENT &&
 2329             var->data.fb_fetch_output &&
 2330             ctx->abi->emit_fbfetch)
 2331             return ctx->abi->emit_fbfetch(ctx->abi);
 2332 
 2333         for (unsigned chan = comp; chan < ve + comp; chan++) {
 2334             if (indir_index) {
 2335                 unsigned count = glsl_count_attribute_slots(
 2336                         var->type, false);
 2337                 count -= chan / 4;
 2338                 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 2339                         &ctx->ac, ctx->abi->outputs + idx + chan, count,
 2340                         stride, true, true);
 2341 
 2342                 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 2343                                        tmp_vec,
 2344                                        indir_index, "");
 2345             } else {
 2346                 values[chan] = LLVMBuildLoad(ctx->ac.builder,
 2347                              ctx->abi->outputs[idx + chan + const_index * stride],
 2348                              "");
 2349             }
 2350         }
 2351         break;
 2352     case nir_var_mem_global:  {
 2353         LLVMValueRef address = get_src(ctx, instr->src[0]);
 2354         LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
 2355         unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
 2356         unsigned natural_stride = type_scalar_size_bytes(deref->type);
 2357         unsigned stride = explicit_stride ? explicit_stride : natural_stride;
 2358         int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8;
 2359         bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
 2360 
 2361         if (stride != natural_stride || split_loads) {
 2362             if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
 2363                 result_type = LLVMGetElementType(result_type);
 2364 
 2365             LLVMTypeRef ptr_type = LLVMPointerType(result_type,
 2366                                    LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
 2367             address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
 2368 
 2369             for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
 2370                 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
 2371                 values[i] = LLVMBuildLoad(ctx->ac.builder,
 2372                                           ac_build_gep_ptr(&ctx->ac, address, offset), "");
 2373             }
 2374             return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
 2375         } else {
 2376             LLVMTypeRef ptr_type =  LLVMPointerType(result_type,
 2377                                                     LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
 2378             address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
 2379             LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
 2380             return val;
 2381         }
 2382     }
 2383     default:
 2384         unreachable("unhandle variable mode");
 2385     }
 2386     ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
 2387     return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
 2388 }
 2389 
 2390 static void
 2391 visit_store_var(struct ac_nir_context *ctx,
 2392         nir_intrinsic_instr *instr)
 2393 {
 2394     if (ctx->ac.postponed_kill) {
 2395         LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
 2396                                                   ctx->ac.postponed_kill, "");
 2397         ac_build_ifcc(&ctx->ac, cond, 7002);
 2398         }
 2399 
 2400     nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
 2401     nir_variable *var = nir_deref_instr_get_variable(deref);
 2402 
 2403     LLVMValueRef temp_ptr, value;
 2404     int idx = 0;
 2405     unsigned comp = 0;
 2406     LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
 2407     int writemask = instr->const_index[0];
 2408     LLVMValueRef indir_index;
 2409     unsigned const_index;
 2410 
 2411     if (var) {
 2412         get_deref_offset(ctx, deref, false,
 2413                          NULL, NULL, &const_index, &indir_index);
 2414         idx = var->data.driver_location;
 2415         comp = var->data.location_frac;
 2416 
 2417         if (var->data.compact) {
 2418             const_index += comp;
 2419             comp = 0;
 2420         }
 2421     }
 2422 
 2423     if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 &&
 2424         (deref->mode == nir_var_shader_out ||
 2425          deref->mode == nir_var_function_temp)) {
 2426 
 2427         src = LLVMBuildBitCast(ctx->ac.builder, src,
 2428                                LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
 2429                                "");
 2430 
 2431         writemask = widen_mask(writemask, 2);
 2432     }
 2433 
 2434     writemask = writemask << comp;
 2435 
 2436     switch (deref->mode) {
 2437     case nir_var_shader_out:
 2438 
 2439         if (ctx->stage == MESA_SHADER_TESS_CTRL) {
 2440             LLVMValueRef vertex_index = NULL;
 2441             LLVMValueRef indir_index = NULL;
 2442             unsigned const_index = 0;
 2443             const bool is_patch = var->data.patch ||
 2444                           var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
 2445                           var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
 2446 
 2447             get_deref_offset(ctx, deref, false, NULL,
 2448                              is_patch ? NULL : &vertex_index,
 2449                              &const_index, &indir_index);
 2450 
 2451             ctx->abi->store_tcs_outputs(ctx->abi, var,
 2452                             vertex_index, indir_index,
 2453                             const_index, src, writemask);
 2454             break;
 2455         }
 2456 
 2457         for (unsigned chan = 0; chan < 8; chan++) {
 2458             int stride = 4;
 2459             if (!(writemask & (1 << chan)))
 2460                 continue;
 2461 
 2462             value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
 2463 
 2464             if (var->data.compact)
 2465                 stride = 1;
 2466             if (indir_index) {
 2467                 unsigned count = glsl_count_attribute_slots(
 2468                         var->type, false);
 2469                 count -= chan / 4;
 2470                 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 2471                         &ctx->ac, ctx->abi->outputs + idx + chan, count,
 2472                         stride, true, true);
 2473 
 2474                 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
 2475                                      value, indir_index, "");
 2476                 build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan,
 2477                                 count, stride, tmp_vec);
 2478 
 2479             } else {
 2480                 temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride];
 2481 
 2482                 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
 2483             }
 2484         }
 2485         break;
 2486     case nir_var_function_temp:
 2487         for (unsigned chan = 0; chan < 8; chan++) {
 2488             if (!(writemask & (1 << chan)))
 2489                 continue;
 2490 
 2491             value = ac_llvm_extract_elem(&ctx->ac, src, chan);
 2492             if (indir_index) {
 2493                 unsigned count = glsl_count_attribute_slots(
 2494                     var->type, false);
 2495                 count -= chan / 4;
 2496                 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 2497                     &ctx->ac, ctx->locals + idx + chan, count,
 2498                     4, true, true);
 2499 
 2500                 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
 2501                                  value, indir_index, "");
 2502                 build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
 2503                                 count, 4, tmp_vec);
 2504             } else {
 2505                 temp_ptr = ctx->locals[idx + chan + const_index * 4];
 2506 
 2507                 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
 2508             }
 2509         }
 2510         break;
 2511 
 2512     case nir_var_mem_global: {
 2513         int writemask = instr->const_index[0];
 2514         LLVMValueRef address = get_src(ctx, instr->src[0]);
 2515         LLVMValueRef val = get_src(ctx, instr->src[1]);
 2516 
 2517         unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
 2518         unsigned natural_stride = type_scalar_size_bytes(deref->type);
 2519         unsigned stride = explicit_stride ? explicit_stride : natural_stride;
 2520         int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8;
 2521         bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
 2522 
 2523         LLVMTypeRef ptr_type =  LLVMPointerType(LLVMTypeOf(val),
 2524                             LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
 2525         address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
 2526 
 2527         if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
 2528             stride == natural_stride && !split_stores) {
 2529             LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
 2530                                                    LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
 2531             address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
 2532 
 2533             val = LLVMBuildBitCast(ctx->ac.builder, val,
 2534                                    LLVMGetElementType(LLVMTypeOf(address)), "");
 2535             LLVMBuildStore(ctx->ac.builder, val, address);
 2536         } else {
 2537             LLVMTypeRef val_type = LLVMTypeOf(val);
 2538             if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
 2539                 val_type = LLVMGetElementType(val_type);
 2540 
 2541             LLVMTypeRef ptr_type = LLVMPointerType(val_type,
 2542                                    LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
 2543             address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
 2544             for (unsigned chan = 0; chan < 4; chan++) {
 2545                 if (!(writemask & (1 << chan)))
 2546                     continue;
 2547 
 2548                 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0);
 2549 
 2550                 LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset);
 2551                 LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
 2552                                     chan);
 2553                 src = LLVMBuildBitCast(ctx->ac.builder, src,
 2554                                        LLVMGetElementType(LLVMTypeOf(ptr)), "");
 2555                 LLVMBuildStore(ctx->ac.builder, src, ptr);
 2556             }
 2557         }
 2558         break;
 2559     }
 2560     default:
 2561         abort();
 2562         break;
 2563     }
 2564 
 2565     if (ctx->ac.postponed_kill)
 2566         ac_build_endif(&ctx->ac, 7002);
 2567 }
 2568 
 2569 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
 2570 {
 2571     switch (dim) {
 2572     case GLSL_SAMPLER_DIM_BUF:
 2573         return 1;
 2574     case GLSL_SAMPLER_DIM_1D:
 2575         return array ? 2 : 1;
 2576     case GLSL_SAMPLER_DIM_2D:
 2577         return array ? 3 : 2;
 2578     case GLSL_SAMPLER_DIM_MS:
 2579         return array ? 4 : 3;
 2580     case GLSL_SAMPLER_DIM_3D:
 2581     case GLSL_SAMPLER_DIM_CUBE:
 2582         return 3;
 2583     case GLSL_SAMPLER_DIM_RECT:
 2584     case GLSL_SAMPLER_DIM_SUBPASS:
 2585         return 2;
 2586     case GLSL_SAMPLER_DIM_SUBPASS_MS:
 2587         return 3;
 2588     default:
 2589         break;
 2590     }
 2591     return 0;
 2592 }
 2593 
 2594 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
 2595                             LLVMValueRef coord_x, LLVMValueRef coord_y,
 2596                             LLVMValueRef coord_z,
 2597                             LLVMValueRef sample_index,
 2598                             LLVMValueRef fmask_desc_ptr)
 2599 {
 2600     unsigned sample_chan = coord_z ? 3 : 2;
 2601     LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
 2602     addr[sample_chan] = sample_index;
 2603 
 2604     ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
 2605     return addr[sample_chan];
 2606 }
 2607 
 2608 static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
 2609 {
 2610     assert(instr->src[0].is_ssa);
 2611     return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
 2612 }
 2613 
 2614 static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
 2615                                          const nir_intrinsic_instr *instr,
 2616                                          LLVMValueRef dynamic_index,
 2617                                          enum ac_descriptor_type desc_type,
 2618                                          bool write)
 2619 {
 2620     nir_deref_instr *deref_instr =
 2621         instr->src[0].ssa->parent_instr->type == nir_instr_type_deref ?
 2622         nir_instr_as_deref(instr->src[0].ssa->parent_instr) : NULL;
 2623 
 2624     return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);
 2625 }
 2626 
 2627 static void get_image_coords(struct ac_nir_context *ctx,
 2628                  const nir_intrinsic_instr *instr,
 2629                  LLVMValueRef dynamic_desc_index,
 2630                  struct ac_image_args *args,
 2631                  enum glsl_sampler_dim dim,
 2632                  bool is_array)
 2633 {
 2634     LLVMValueRef src0 = get_src(ctx, instr->src[1]);
 2635     LLVMValueRef masks[] = {
 2636         LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
 2637         LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
 2638     };
 2639     LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
 2640 
 2641     int count;
 2642     ASSERTED bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
 2643                       dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
 2644     bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
 2645               dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
 2646     bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
 2647     assert(!add_frag_pos && "Input attachments should be lowered by this point.");
 2648     count = image_type_to_components_count(dim, is_array);
 2649 
 2650     if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
 2651               instr->intrinsic == nir_intrinsic_bindless_image_load)) {
 2652         LLVMValueRef fmask_load_address[3];
 2653 
 2654         fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
 2655         fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
 2656         if (is_array)
 2657             fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
 2658         else
 2659             fmask_load_address[2] = NULL;
 2660 
 2661         sample_index = adjust_sample_index_using_fmask(&ctx->ac,
 2662                                    fmask_load_address[0],
 2663                                    fmask_load_address[1],
 2664                                    fmask_load_address[2],
 2665                                    sample_index,
 2666                                    get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
 2667                                         AC_DESC_FMASK, &instr->instr, dynamic_desc_index, true, false));
 2668     }
 2669     if (count == 1 && !gfx9_1d) {
 2670         if (instr->src[1].ssa->num_components)
 2671             args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
 2672         else
 2673             args->coords[0] = src0;
 2674     } else {
 2675         int chan;
 2676         if (is_ms)
 2677             count--;
 2678         for (chan = 0; chan < count; ++chan) {
 2679             args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
 2680         }
 2681 
 2682         if (gfx9_1d) {
 2683             if (is_array) {
 2684                 args->coords[2] = args->coords[1];
 2685                 args->coords[1] = ctx->ac.i32_0;
 2686             } else
 2687                 args->coords[1] = ctx->ac.i32_0;
 2688             count++;
 2689         }
 2690         if (ctx->ac.chip_class == GFX9 &&
 2691             dim == GLSL_SAMPLER_DIM_2D &&
 2692             !is_array) {
 2693             /* The hw can't bind a slice of a 3D image as a 2D
 2694              * image, because it ignores BASE_ARRAY if the target
 2695              * is 3D. The workaround is to read BASE_ARRAY and set
 2696              * it as the 3rd address operand for all 2D images.
 2697              */
 2698             LLVMValueRef first_layer, const5, mask;
 2699 
 2700             const5 = LLVMConstInt(ctx->ac.i32, 5, 0);
 2701             mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);
 2702             first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");
 2703             first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");
 2704 
 2705             args->coords[count] = first_layer;
 2706             count++;
 2707         }
 2708 
 2709 
 2710         if (is_ms) {
 2711             args->coords[count] = sample_index;
 2712             count++;
 2713         }
 2714     }
 2715 }
 2716 
 2717 static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
 2718                                                 const nir_intrinsic_instr *instr,
 2719                         LLVMValueRef dynamic_index,
 2720                         bool write, bool atomic)
 2721 {
 2722     LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, write);
 2723     if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) {
 2724         LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
 2725         LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
 2726         stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
 2727 
 2728         LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
 2729                                                       LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
 2730                                                       elem_count, stride, "");
 2731 
 2732         rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
 2733                                       LLVMConstInt(ctx->ac.i32, 2, 0), "");
 2734     }
 2735     return rsrc;
 2736 }
 2737 
 2738 static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,
 2739                       struct waterfall_context *wctx,
 2740                       const nir_intrinsic_instr *instr)
 2741 {
 2742     nir_deref_instr *deref_instr = NULL;
 2743 
 2744     if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)
 2745         deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
 2746 
 2747     LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
 2748     return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
 2749 }
 2750 
 2751 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 2752                      const nir_intrinsic_instr *instr,
 2753                      bool bindless)
 2754 {
 2755     LLVMValueRef res;
 2756 
 2757     enum glsl_sampler_dim dim;
 2758     enum gl_access_qualifier access;
 2759     bool is_array;
 2760     if (bindless) {
 2761         dim = nir_intrinsic_image_dim(instr);
 2762         access = nir_intrinsic_access(instr);
 2763         is_array = nir_intrinsic_image_array(instr);
 2764     } else {
 2765         const nir_deref_instr *image_deref = get_image_deref(instr);
 2766         const struct glsl_type *type = image_deref->type;
 2767         const nir_variable *var = nir_deref_instr_get_variable(image_deref);
 2768         dim = glsl_get_sampler_dim(type);
 2769         access = var->data.access;
 2770         is_array = glsl_sampler_type_is_array(type);
 2771     }
 2772 
 2773     struct waterfall_context wctx;
 2774     LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
 2775 
 2776     struct ac_image_args args = {};
 2777 
 2778     args.cache_policy = get_cache_policy(ctx, access, false, false);
 2779 
 2780     if (dim == GLSL_SAMPLER_DIM_BUF) {
 2781         unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
 2782         unsigned num_channels = util_last_bit(mask);
 2783         LLVMValueRef rsrc, vindex;
 2784 
 2785         rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, false, false);
 2786         vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 2787                          ctx->ac.i32_0, "");
 2788 
 2789         bool can_speculate = access & ACCESS_CAN_REORDER;
 2790         res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
 2791                           ctx->ac.i32_0, num_channels,
 2792                           args.cache_policy,
 2793                           can_speculate);
 2794         res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
 2795 
 2796         res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
 2797         res = ac_to_integer(&ctx->ac, res);
 2798     } else {
 2799         bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
 2800 
 2801         args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
 2802         args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
 2803         get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
 2804         args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
 2805         if (!level_zero)
 2806             args.lod = get_src(ctx, instr->src[3]);
 2807         args.dmask = 15;
 2808         args.attributes = AC_FUNC_ATTR_READONLY;
 2809 
 2810         res = ac_build_image_opcode(&ctx->ac, &args);
 2811     }
 2812     return exit_waterfall(ctx, &wctx, res);
 2813 }
 2814 
 2815 static void visit_image_store(struct ac_nir_context *ctx,
 2816                   const nir_intrinsic_instr *instr,
 2817                   bool bindless)
 2818 {
 2819     if (ctx->ac.postponed_kill) {
 2820         LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
 2821                                                   ctx->ac.postponed_kill, "");
 2822         ac_build_ifcc(&ctx->ac, cond, 7003);
 2823         }
 2824 
 2825     enum glsl_sampler_dim dim;
 2826     enum gl_access_qualifier access;
 2827     bool is_array;
 2828 
 2829     if (bindless) {
 2830         dim = nir_intrinsic_image_dim(instr);
 2831         access = nir_intrinsic_access(instr);
 2832         is_array = nir_intrinsic_image_array(instr);
 2833     } else {
 2834         const nir_deref_instr *image_deref = get_image_deref(instr);
 2835         const struct glsl_type *type = image_deref->type;
 2836         const nir_variable *var = nir_deref_instr_get_variable(image_deref);
 2837         dim = glsl_get_sampler_dim(type);
 2838         access = var->data.access;
 2839         is_array = glsl_sampler_type_is_array(type);
 2840     }
 2841 
 2842     struct waterfall_context wctx;
 2843     LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
 2844 
 2845     bool writeonly_memory = access & ACCESS_NON_READABLE;
 2846     struct ac_image_args args = {};
 2847 
 2848     args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
 2849 
 2850     if (dim == GLSL_SAMPLER_DIM_BUF) {
 2851         LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, false);
 2852         LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
 2853         unsigned src_channels = ac_get_llvm_num_components(src);
 2854         LLVMValueRef vindex;
 2855 
 2856         if (src_channels == 3)
 2857             src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
 2858 
 2859         vindex = LLVMBuildExtractElement(ctx->ac.builder,
 2860                          get_src(ctx, instr->src[1]),
 2861                          ctx->ac.i32_0, "");
 2862 
 2863         ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
 2864                          ctx->ac.i32_0, src_channels,
 2865                          args.cache_policy);
 2866     } else {
 2867         bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
 2868 
 2869         args.opcode = level_zero ? ac_image_store : ac_image_store_mip;
 2870         args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
 2871         args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
 2872         get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
 2873         args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
 2874         if (!level_zero)
 2875             args.lod = get_src(ctx, instr->src[4]);
 2876         args.dmask = 15;
 2877 
 2878         ac_build_image_opcode(&ctx->ac, &args);
 2879     }
 2880 
 2881     exit_waterfall(ctx, &wctx, NULL);
 2882     if (ctx->ac.postponed_kill)
 2883         ac_build_endif(&ctx->ac, 7003);
 2884 }
 2885 
 2886 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
 2887                      const nir_intrinsic_instr *instr,
 2888                      bool bindless)
 2889 {
 2890     if (ctx->ac.postponed_kill) {
 2891         LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
 2892                                                   ctx->ac.postponed_kill, "");
 2893         ac_build_ifcc(&ctx->ac, cond, 7004);
 2894         }
 2895 
 2896     LLVMValueRef params[7];
 2897     int param_count = 0;
 2898 
 2899     bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
 2900                instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
 2901     const char *atomic_name;
 2902     char intrinsic_name[64];
 2903     enum ac_atomic_op atomic_subop;
 2904     ASSERTED int length;
 2905 
 2906     enum glsl_sampler_dim dim;
 2907     bool is_array;
 2908     if (bindless) {
 2909         if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_imin ||
 2910             instr->intrinsic == nir_intrinsic_bindless_image_atomic_umin ||
 2911             instr->intrinsic == nir_intrinsic_bindless_image_atomic_imax ||
 2912             instr->intrinsic == nir_intrinsic_bindless_image_atomic_umax) {
 2913             ASSERTED const GLenum format = nir_intrinsic_format(instr);
 2914             assert(format == GL_R32UI || format == GL_R32I);
 2915         }
 2916         dim = nir_intrinsic_image_dim(instr);
 2917         is_array = nir_intrinsic_image_array(instr);
 2918     } else {
 2919         const struct glsl_type *type = get_image_deref(instr)->type;
 2920         dim = glsl_get_sampler_dim(type);
 2921         is_array = glsl_sampler_type_is_array(type);
 2922     }
 2923 
 2924     struct waterfall_context wctx;
 2925     LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
 2926 
 2927     switch (instr->intrinsic) {
 2928     case nir_intrinsic_bindless_image_atomic_add:
 2929     case nir_intrinsic_image_deref_atomic_add:
 2930         atomic_name = "add";
 2931         atomic_subop = ac_atomic_add;
 2932         break;
 2933     case nir_intrinsic_bindless_image_atomic_imin:
 2934     case nir_intrinsic_image_deref_atomic_imin:
 2935         atomic_name = "smin";
 2936         atomic_subop = ac_atomic_smin;
 2937         break;
 2938     case nir_intrinsic_bindless_image_atomic_umin:
 2939     case nir_intrinsic_image_deref_atomic_umin:
 2940         atomic_name = "umin";
 2941         atomic_subop = ac_atomic_umin;
 2942         break;
 2943     case nir_intrinsic_bindless_image_atomic_imax:
 2944     case nir_intrinsic_image_deref_atomic_imax:
 2945         atomic_name = "smax";
 2946         atomic_subop = ac_atomic_smax;
 2947         break;
 2948     case nir_intrinsic_bindless_image_atomic_umax:
 2949     case nir_intrinsic_image_deref_atomic_umax:
 2950         atomic_name = "umax";
 2951         atomic_subop = ac_atomic_umax;
 2952         break;
 2953     case nir_intrinsic_bindless_image_atomic_and:
 2954     case nir_intrinsic_image_deref_atomic_and:
 2955         atomic_name = "and";
 2956         atomic_subop = ac_atomic_and;
 2957         break;
 2958     case nir_intrinsic_bindless_image_atomic_or:
 2959     case nir_intrinsic_image_deref_atomic_or:
 2960         atomic_name = "or";
 2961         atomic_subop = ac_atomic_or;
 2962         break;
 2963     case nir_intrinsic_bindless_image_atomic_xor:
 2964     case nir_intrinsic_image_deref_atomic_xor:
 2965         atomic_name = "xor";
 2966         atomic_subop = ac_atomic_xor;
 2967         break;
 2968     case nir_intrinsic_bindless_image_atomic_exchange:
 2969     case nir_intrinsic_image_deref_atomic_exchange:
 2970         atomic_name = "swap";
 2971         atomic_subop = ac_atomic_swap;
 2972         break;
 2973     case nir_intrinsic_bindless_image_atomic_comp_swap:
 2974     case nir_intrinsic_image_deref_atomic_comp_swap:
 2975         atomic_name = "cmpswap";
 2976         atomic_subop = 0; /* not used */
 2977         break;
 2978     case nir_intrinsic_bindless_image_atomic_inc_wrap:
 2979     case nir_intrinsic_image_deref_atomic_inc_wrap: {
 2980         atomic_name = "inc";
 2981         atomic_subop = ac_atomic_inc_wrap;
 2982         /* ATOMIC_INC instruction does:
 2983          *      value = (value + 1) % (data + 1)
 2984          * but we want:
 2985          *      value = (value + 1) % data
 2986          * So replace 'data' by 'data - 1'.
 2987          */
 2988         ctx->ssa_defs[instr->src[3].ssa->index] =
 2989             LLVMBuildSub(ctx->ac.builder,
 2990                      ctx->ssa_defs[instr->src[3].ssa->index],
 2991                      ctx->ac.i32_1, "");
 2992         break;
 2993     }
 2994     case nir_intrinsic_bindless_image_atomic_dec_wrap:
 2995     case nir_intrinsic_image_deref_atomic_dec_wrap:
 2996         atomic_name = "dec";
 2997         atomic_subop = ac_atomic_dec_wrap;
 2998         break;
 2999     default:
 3000         abort();
 3001     }
 3002 
 3003     if (cmpswap)
 3004         params[param_count++] = get_src(ctx, instr->src[4]);
 3005     params[param_count++] = get_src(ctx, instr->src[3]);
 3006 
 3007     LLVMValueRef result;
 3008     if (dim == GLSL_SAMPLER_DIM_BUF) {
 3009         params[param_count++] = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, true);
 3010         params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 3011                                 ctx->ac.i32_0, ""); /* vindex */
 3012         params[param_count++] = ctx->ac.i32_0; /* voffset */
 3013         if (LLVM_VERSION_MAJOR >= 9) {
 3014             /* XXX: The new raw/struct atomic intrinsics are buggy
 3015              * with LLVM 8, see r358579.
 3016              */
 3017             params[param_count++] = ctx->ac.i32_0; /* soffset */
 3018             params[param_count++] = ctx->ac.i32_0;  /* slc */
 3019 
 3020             length = snprintf(intrinsic_name, sizeof(intrinsic_name),
 3021                               "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name);
 3022         } else {
 3023             params[param_count++] = ctx->ac.i1false;  /* slc */
 3024 
 3025             length = snprintf(intrinsic_name, sizeof(intrinsic_name),
 3026                               "llvm.amdgcn.buffer.atomic.%s", atomic_name);
 3027         }
 3028 
 3029         assert(length < sizeof(intrinsic_name));
 3030         result = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
 3031                         params, param_count, 0);
 3032     } else {
 3033         struct ac_image_args args = {};
 3034         args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
 3035         args.atomic = atomic_subop;
 3036         args.data[0] = params[0];
 3037         if (cmpswap)
 3038             args.data[1] = params[1];
 3039         args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
 3040         get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
 3041         args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
 3042 
 3043         result = ac_build_image_opcode(&ctx->ac, &args);
 3044     }
 3045 
 3046     result = exit_waterfall(ctx, &wctx, result);
 3047     if (ctx->ac.postponed_kill)
 3048         ac_build_endif(&ctx->ac, 7004);
 3049     return result;
 3050 }
 3051 
 3052 static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
 3053                     nir_intrinsic_instr *instr)
 3054 {
 3055     struct waterfall_context wctx;
 3056     LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
 3057     LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
 3058 
 3059     LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);
 3060 
 3061     return exit_waterfall(ctx, &wctx, ret);
 3062 }
 3063 
 3064 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
 3065                      const nir_intrinsic_instr *instr,
 3066                      bool bindless)
 3067 {
 3068     LLVMValueRef res;
 3069 
 3070     enum glsl_sampler_dim dim;
 3071     bool is_array;
 3072     if (bindless) {
 3073         dim = nir_intrinsic_image_dim(instr);
 3074         is_array = nir_intrinsic_image_array(instr);
 3075     } else {
 3076         const struct glsl_type *type = get_image_deref(instr)->type;
 3077         dim = glsl_get_sampler_dim(type);
 3078         is_array = glsl_sampler_type_is_array(type);
 3079     }
 3080 
 3081     struct waterfall_context wctx;
 3082     LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
 3083 
 3084     if (dim == GLSL_SAMPLER_DIM_BUF) {
 3085         res =  get_buffer_size(ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);
 3086     } else {
 3087 
 3088         struct ac_image_args args = { 0 };
 3089 
 3090         args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
 3091         args.dmask = 0xf;
 3092         args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
 3093         args.opcode = ac_image_get_resinfo;
 3094         args.lod = ctx->ac.i32_0;
 3095         args.attributes = AC_FUNC_ATTR_READNONE;
 3096 
 3097         res = ac_build_image_opcode(&ctx->ac, &args);
 3098 
 3099         LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
 3100 
 3101         if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
 3102             LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
 3103             LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
 3104             z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
 3105             res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
 3106         }
 3107 
 3108         if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
 3109             LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
 3110             res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
 3111                                          ctx->ac.i32_1, "");
 3112         }
 3113     }
 3114     return exit_waterfall(ctx, &wctx, res);
 3115 }
 3116 
 3117 static void emit_membar(struct ac_llvm_context *ac,
 3118             const nir_intrinsic_instr *instr)
 3119 {
 3120     unsigned wait_flags = 0;
 3121 
 3122     switch (instr->intrinsic) {
 3123     case nir_intrinsic_memory_barrier:
 3124     case nir_intrinsic_group_memory_barrier:
 3125         wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
 3126         break;
 3127     case nir_intrinsic_memory_barrier_buffer:
 3128     case nir_intrinsic_memory_barrier_image:
 3129         wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
 3130         break;
 3131     case nir_intrinsic_memory_barrier_shared:
 3132         wait_flags = AC_WAIT_LGKM;
 3133         break;
 3134     default:
 3135         break;
 3136     }
 3137 
 3138     ac_build_waitcnt(ac, wait_flags);
 3139 }
 3140 
 3141 void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
 3142 {
 3143     /* GFX6 only (thanks to a hw bug workaround):
 3144      * The real barrier instruction isn’t needed, because an entire patch
 3145      * always fits into a single wave.
 3146      */
 3147     if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
 3148         ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
 3149         return;
 3150     }
 3151     ac_build_s_barrier(ac);
 3152 }
 3153 
 3154 static void emit_discard(struct ac_nir_context *ctx,
 3155              const nir_intrinsic_instr *instr)
 3156 {
 3157     LLVMValueRef cond;
 3158 
 3159     if (instr->intrinsic == nir_intrinsic_discard_if) {
 3160         cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
 3161                      get_src(ctx, instr->src[0]),
 3162                      ctx->ac.i32_0, "");
 3163     } else {
 3164         assert(instr->intrinsic == nir_intrinsic_discard);
 3165         cond = ctx->ac.i1false;
 3166     }
 3167 
 3168     ac_build_kill_if_false(&ctx->ac, cond);
 3169 }
 3170 
 3171 static void emit_demote(struct ac_nir_context *ctx,
 3172             const nir_intrinsic_instr *instr)
 3173 {
 3174     LLVMValueRef cond;
 3175 
 3176     if (instr->intrinsic == nir_intrinsic_demote_if) {
 3177         cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
 3178                      get_src(ctx, instr->src[0]),
 3179                      ctx->ac.i32_0, "");
 3180     } else {
 3181         assert(instr->intrinsic == nir_intrinsic_demote);
 3182         cond = ctx->ac.i1false;
 3183     }
 3184 
 3185     /* Kill immediately while maintaining WQM. */
 3186     ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
 3187 
 3188     LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
 3189     mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
 3190     LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
 3191     return;
 3192 }
 3193 
 3194 static LLVMValueRef
 3195 visit_load_local_invocation_index(struct ac_nir_context *ctx)
 3196 {
 3197     LLVMValueRef result;
 3198     LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
 3199     result = LLVMBuildAnd(ctx->ac.builder,
 3200                   ac_get_arg(&ctx->ac, ctx->args->tg_size),
 3201                   LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
 3202 
 3203     if (ctx->ac.wave_size == 32)
 3204         result = LLVMBuildLShr(ctx->ac.builder, result,
 3205                        LLVMConstInt(ctx->ac.i32, 1, false), "");
 3206 
 3207     return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
 3208 }
 3209 
 3210 static LLVMValueRef
 3211 visit_load_subgroup_id(struct ac_nir_context *ctx)
 3212 {
 3213     if (ctx->stage == MESA_SHADER_COMPUTE) {
 3214         LLVMValueRef result;
 3215         result = LLVMBuildAnd(ctx->ac.builder,
 3216                       ac_get_arg(&ctx->ac, ctx->args->tg_size),
 3217                 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
 3218         return LLVMBuildLShr(ctx->ac.builder, result,  LLVMConstInt(ctx->ac.i32, 6, false), "");
 3219     } else {
 3220         return LLVMConstInt(ctx->ac.i32, 0, false);
 3221     }
 3222 }
 3223 
 3224 static LLVMValueRef
 3225 visit_load_num_subgroups(struct ac_nir_context *ctx)
 3226 {
 3227     if (ctx->stage == MESA_SHADER_COMPUTE) {
 3228         return LLVMBuildAnd(ctx->ac.builder,
 3229                     ac_get_arg(&ctx->ac, ctx->args->tg_size),
 3230                             LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
 3231     } else {
 3232         return LLVMConstInt(ctx->ac.i32, 1, false);
 3233     }
 3234 }
 3235 
 3236 static LLVMValueRef
 3237 visit_first_invocation(struct ac_nir_context *ctx)
 3238 {
 3239     LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
 3240     const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
 3241 
 3242     /* The second argument is whether cttz(0) should be defined, but we do not care. */
 3243     LLVMValueRef args[] = {active_set, ctx->ac.i1false};
 3244     LLVMValueRef result =  ac_build_intrinsic(&ctx->ac, intr,
 3245                                               ctx->ac.iN_wavemask, args, 2,
 3246                                               AC_FUNC_ATTR_NOUNWIND |
 3247                                               AC_FUNC_ATTR_READNONE);
 3248 
 3249     return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
 3250 }
 3251 
 3252 static LLVMValueRef
 3253 visit_load_shared(struct ac_nir_context *ctx,
 3254            const nir_intrinsic_instr *instr)
 3255 {
 3256     LLVMValueRef values[4], derived_ptr, index, ret;
 3257 
 3258     LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
 3259                       instr->dest.ssa.bit_size);
 3260 
 3261     for (int chan = 0; chan < instr->num_components; chan++) {
 3262         index = LLVMConstInt(ctx->ac.i32, chan, 0);
 3263         derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
 3264         values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
 3265     }
 3266 
 3267     ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
 3268     return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
 3269 }
 3270 
 3271 static void
 3272 visit_store_shared(struct ac_nir_context *ctx,
 3273            const nir_intrinsic_instr *instr)
 3274 {
 3275     LLVMValueRef derived_ptr, data,index;
 3276     LLVMBuilderRef builder = ctx->ac.builder;
 3277 
 3278     LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1],
 3279                       instr->src[0].ssa->bit_size);
 3280     LLVMValueRef src = get_src(ctx, instr->src[0]);
 3281 
 3282     int writemask = nir_intrinsic_write_mask(instr);
 3283     for (int chan = 0; chan < 4; chan++) {
 3284         if (!(writemask & (1 << chan))) {
 3285             continue;
 3286         }
 3287         data = ac_llvm_extract_elem(&ctx->ac, src, chan);
 3288         index = LLVMConstInt(ctx->ac.i32, chan, 0);
 3289         derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
 3290         LLVMBuildStore(builder, data, derived_ptr);
 3291     }
 3292 }
 3293 
 3294 static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
 3295                      const nir_intrinsic_instr *instr,
 3296                      LLVMValueRef ptr, int src_idx)
 3297 {
 3298     if (ctx->ac.postponed_kill) {
 3299         LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
 3300                                                   ctx->ac.postponed_kill, "");
 3301         ac_build_ifcc(&ctx->ac, cond, 7005);
 3302         }
 3303 
 3304     LLVMValueRef result;
 3305     LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
 3306 
 3307     const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
 3308 
 3309     if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref) {
 3310         nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
 3311         if (deref->mode == nir_var_mem_global) {
 3312             /* use "singlethread" sync scope to implement relaxed ordering */
 3313             sync_scope = LLVM_VERSION_MAJOR >= 9 ? "singlethread-one-as" : "singlethread";
 3314 
 3315             LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(src), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));
 3316             ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type , "");
 3317         }
 3318     }
 3319 
 3320     if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
 3321         instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
 3322         LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
 3323         result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
 3324         result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
 3325     } else {
 3326         LLVMAtomicRMWBinOp op;
 3327         switch (instr->intrinsic) {
 3328         case nir_intrinsic_shared_atomic_add:
 3329         case nir_intrinsic_deref_atomic_add:
 3330             op = LLVMAtomicRMWBinOpAdd;
 3331             break;
 3332         case nir_intrinsic_shared_atomic_umin:
 3333         case nir_intrinsic_deref_atomic_umin:
 3334             op = LLVMAtomicRMWBinOpUMin;
 3335             break;
 3336         case nir_intrinsic_shared_atomic_umax:
 3337         case nir_intrinsic_deref_atomic_umax:
 3338             op = LLVMAtomicRMWBinOpUMax;
 3339             break;
 3340         case nir_intrinsic_shared_atomic_imin:
 3341         case nir_intrinsic_deref_atomic_imin:
 3342             op = LLVMAtomicRMWBinOpMin;
 3343             break;
 3344         case nir_intrinsic_shared_atomic_imax:
 3345         case nir_intrinsic_deref_atomic_imax:
 3346             op = LLVMAtomicRMWBinOpMax;
 3347             break;
 3348         case nir_intrinsic_shared_atomic_and:
 3349         case nir_intrinsic_deref_atomic_and:
 3350             op = LLVMAtomicRMWBinOpAnd;
 3351             break;
 3352         case nir_intrinsic_shared_atomic_or:
 3353         case nir_intrinsic_deref_atomic_or:
 3354             op = LLVMAtomicRMWBinOpOr;
 3355             break;
 3356         case nir_intrinsic_shared_atomic_xor:
 3357         case nir_intrinsic_deref_atomic_xor:
 3358             op = LLVMAtomicRMWBinOpXor;
 3359             break;
 3360         case nir_intrinsic_shared_atomic_exchange:
 3361         case nir_intrinsic_deref_atomic_exchange:
 3362             op = LLVMAtomicRMWBinOpXchg;
 3363             break;
 3364         default:
 3365             return NULL;
 3366         }
 3367 
 3368         result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope);
 3369     }
 3370 
 3371     if (ctx->ac.postponed_kill)
 3372         ac_build_endif(&ctx->ac, 7005);
 3373     return result;
 3374 }
 3375 
 3376 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
 3377 {
 3378     LLVMValueRef values[2];
 3379     LLVMValueRef pos[2];
 3380 
 3381     pos[0] = ac_to_float(&ctx->ac,
 3382                  ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]));
 3383     pos[1] = ac_to_float(&ctx->ac,
 3384                  ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]));
 3385 
 3386     values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
 3387     values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
 3388     return ac_build_gather_values(&ctx->ac, values, 2);
 3389 }
 3390 
 3391 static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx,
 3392                     enum glsl_interp_mode interp, unsigned location)
 3393 {
 3394     switch (interp) {
 3395     case INTERP_MODE_FLAT:
 3396     default:
 3397         return NULL;
 3398     case INTERP_MODE_SMOOTH:
 3399     case INTERP_MODE_NONE:
 3400         if (location == INTERP_CENTER)
 3401             return ac_get_arg(&ctx->ac, ctx->args->persp_center);
 3402         else if (location == INTERP_CENTROID)
 3403             return ctx->abi->persp_centroid;
 3404         else if (location == INTERP_SAMPLE)
 3405             return ac_get_arg(&ctx->ac, ctx->args->persp_sample);
 3406         break;
 3407     case INTERP_MODE_NOPERSPECTIVE:
 3408         if (location == INTERP_CENTER)
 3409             return ac_get_arg(&ctx->ac, ctx->args->linear_center);
 3410         else if (location == INTERP_CENTROID)
 3411             return ctx->abi->linear_centroid;
 3412         else if (location == INTERP_SAMPLE)
 3413             return ac_get_arg(&ctx->ac, ctx->args->linear_sample);
 3414         break;
 3415     }
 3416     return NULL;
 3417 }
 3418 
 3419 static LLVMValueRef barycentric_center(struct ac_nir_context *ctx,
 3420                        unsigned mode)
 3421 {
 3422     LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
 3423     return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
 3424 }
 3425 
 3426 static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx,
 3427                        unsigned mode,
 3428                        LLVMValueRef offset)
 3429 {
 3430     LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
 3431     LLVMValueRef src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));
 3432     LLVMValueRef src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));
 3433 
 3434     LLVMValueRef ij_out[2];
 3435     LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
 3436 
 3437     /*
 3438      * take the I then J parameters, and the DDX/Y for it, and
 3439      * calculate the IJ inputs for the interpolator.
 3440      * temp1 = ddx * offset/sample.x + I;
 3441      * interp_param.I = ddy * offset/sample.y + temp1;
 3442      * temp1 = ddx * offset/sample.x + J;
 3443      * interp_param.J = ddy * offset/sample.y + temp1;
 3444      */
 3445     for (unsigned i = 0; i < 2; i++) {
 3446         LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
 3447         LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
 3448         LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
 3449                                   ddxy_out, ix_ll, "");
 3450         LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
 3451                                   ddxy_out, iy_ll, "");
 3452         LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
 3453                                  interp_param, ix_ll, "");
 3454         LLVMValueRef temp1, temp2;
 3455 
 3456         interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el,
 3457                          ctx->ac.f32, "");
 3458 
 3459         temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
 3460         temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
 3461 
 3462         ij_out[i] = LLVMBuildBitCast(ctx->ac.builder,
 3463                          temp2, ctx->ac.i32, "");
 3464     }
 3465     interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
 3466     return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
 3467 }
 3468 
 3469 static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx,
 3470                      unsigned mode)
 3471 {
 3472     LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);
 3473     return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
 3474 }
 3475 
 3476 static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx,
 3477                       unsigned mode,
 3478                       LLVMValueRef sample_id)
 3479 {
 3480     if (ctx->abi->interp_at_sample_force_center)
 3481         return barycentric_center(ctx, mode);
 3482 
 3483     LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
 3484 
 3485     /* fetch sample ID */
 3486     LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);
 3487 
 3488     LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");
 3489     src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
 3490     LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");
 3491     src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
 3492     LLVMValueRef coords[] = { src_c0, src_c1 };
 3493     LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);
 3494 
 3495     return barycentric_offset(ctx, mode, offset);
 3496 }
 3497 
 3498 
 3499 static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx,
 3500                        unsigned mode)
 3501 {
 3502     LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);
 3503     return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
 3504 }
 3505 
 3506 static LLVMValueRef barycentric_model(struct ac_nir_context *ctx)
 3507 {
 3508     return LLVMBuildBitCast(ctx->ac.builder,
 3509                 ac_get_arg(&ctx->ac, ctx->args->pull_model),
 3510                 ctx->ac.v3i32, "");
 3511 }
 3512 
 3513 static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx,
 3514                         LLVMValueRef interp_param,
 3515                         unsigned index, unsigned comp_start,
 3516                         unsigned num_components,
 3517                         unsigned bitsize)
 3518 {
 3519     LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
 3520 
 3521     interp_param = LLVMBuildBitCast(ctx->ac.builder,
 3522                 interp_param, ctx->ac.v2f32, "");
 3523     LLVMValueRef i = LLVMBuildExtractElement(
 3524         ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
 3525     LLVMValueRef j = LLVMBuildExtractElement(
 3526         ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
 3527 
 3528     LLVMValueRef values[4];
 3529     assert(bitsize == 16 || bitsize == 32);
 3530     for (unsigned comp = 0; comp < num_components; comp++) {
 3531         LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);
 3532         if (bitsize == 16) {
 3533             values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,
 3534                                   ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
 3535         } else {
 3536             values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
 3537                               ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
 3538         }
 3539     }
 3540 
 3541     return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
 3542 }
 3543 
 3544 static LLVMValueRef load_input(struct ac_nir_context *ctx,
 3545                    nir_intrinsic_instr *instr)
 3546 {
 3547     unsigned offset_idx = instr->intrinsic == nir_intrinsic_load_input ? 0 : 1;
 3548 
 3549     /* We only lower inputs for fragment shaders ATM */
 3550     ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[offset_idx]);
 3551     assert(offset);
 3552     assert(offset[0].i32 == 0);
 3553 
 3554     unsigned component = nir_intrinsic_component(instr);
 3555     unsigned index = nir_intrinsic_base(instr);
 3556     unsigned vertex_id = 2; /* P0 */
 3557 
 3558     if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
 3559         nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
 3560 
 3561         switch (src0[0].i32) {
 3562         case 0:
 3563             vertex_id = 2;
 3564             break;
 3565         case 1:
 3566             vertex_id = 0;
 3567             break;
 3568         case 2:
 3569             vertex_id = 1;
 3570             break;
 3571         default:
 3572             unreachable("Invalid vertex index");
 3573         }
 3574     }
 3575 
 3576     LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
 3577     LLVMValueRef values[8];
 3578 
 3579     /* Each component of a 64-bit value takes up two GL-level channels. */
 3580     unsigned num_components = instr->dest.ssa.num_components;
 3581     unsigned bit_size = instr->dest.ssa.bit_size;
 3582     unsigned channels =
 3583         bit_size == 64 ? num_components * 2 : num_components;
 3584 
 3585     for (unsigned chan = 0; chan < channels; chan++) {
 3586         if (component + chan > 4)
 3587             attr_number = LLVMConstInt(ctx->ac.i32, index + 1, false);
 3588         LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
 3589         values[chan] = ac_build_fs_interp_mov(&ctx->ac,
 3590                               LLVMConstInt(ctx->ac.i32, vertex_id, false),
 3591                               llvm_chan,
 3592                               attr_number,
 3593                               ac_get_arg(&ctx->ac, ctx->args->prim_mask));
 3594         values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
 3595         values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
 3596                                bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");
 3597     }
 3598 
 3599     LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, channels);
 3600     if (bit_size == 64) {
 3601         LLVMTypeRef type = num_components == 1 ? ctx->ac.i64 :
 3602             LLVMVectorType(ctx->ac.i64, num_components);
 3603         result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");
 3604     }
 3605     return result;
 3606 }
 3607 
 3608 static void visit_intrinsic(struct ac_nir_context *ctx,
 3609                             nir_intrinsic_instr *instr)
 3610 {
 3611     LLVMValueRef result = NULL;
 3612 
 3613     switch (instr->intrinsic) {
 3614     case nir_intrinsic_ballot:
 3615         result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
 3616         if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
 3617             result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
 3618         break;
 3619     case nir_intrinsic_read_invocation:
 3620         result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]),
 3621                 get_src(ctx, instr->src[1]));
 3622         break;
 3623     case nir_intrinsic_read_first_invocation:
 3624         result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
 3625         break;
 3626     case nir_intrinsic_load_subgroup_invocation:
 3627         result = ac_get_thread_id(&ctx->ac);
 3628         break;
 3629     case nir_intrinsic_load_work_group_id: {
 3630         LLVMValueRef values[3];
 3631 
 3632         for (int i = 0; i < 3; i++) {
 3633             values[i] = ctx->args->workgroup_ids[i].used ?
 3634                     ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i]) : ctx->ac.i32_0;
 3635         }
 3636 
 3637         result = ac_build_gather_values(&ctx->ac, values, 3);
 3638         break;
 3639     }
 3640     case nir_intrinsic_load_base_vertex:
 3641     case nir_intrinsic_load_first_vertex:
 3642         result = ctx->abi->load_base_vertex(ctx->abi);
 3643         break;
 3644     case nir_intrinsic_load_local_group_size:
 3645         result = ctx->abi->load_local_group_size(ctx->abi);
 3646         break;
 3647     case nir_intrinsic_load_vertex_id:
 3648         result = LLVMBuildAdd(ctx->ac.builder,
 3649                       ac_get_arg(&ctx->ac, ctx->args->vertex_id),
 3650                       ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");
 3651         break;
 3652     case nir_intrinsic_load_vertex_id_zero_base: {
 3653         result = ctx->abi->vertex_id;
 3654         break;
 3655     }
 3656     case nir_intrinsic_load_local_invocation_id: {
 3657         result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
 3658         break;
 3659     }
 3660     case nir_intrinsic_load_base_instance:
 3661         result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
 3662         break;
 3663     case nir_intrinsic_load_draw_id:
 3664         result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
 3665         break;
 3666     case nir_intrinsic_load_view_index:
 3667         result = ac_get_arg(&ctx->ac, ctx->args->view_index);
 3668         break;
 3669     case nir_intrinsic_load_invocation_id:
 3670         if (ctx->stage == MESA_SHADER_TESS_CTRL) {
 3671             result = ac_unpack_param(&ctx->ac,
 3672                          ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids),
 3673                          8, 5);
 3674         } else {
 3675             if (ctx->ac.chip_class >= GFX10) {
 3676                 result = LLVMBuildAnd(ctx->ac.builder,
 3677                               ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
 3678                               LLVMConstInt(ctx->ac.i32, 127, 0), "");
 3679             } else {
 3680                 result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
 3681             }
 3682         }
 3683         break;
 3684     case nir_intrinsic_load_primitive_id:
 3685         if (ctx->stage == MESA_SHADER_GEOMETRY) {
 3686             result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
 3687         } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
 3688             result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
 3689         } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
 3690             result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
 3691         } else
 3692             fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
 3693         break;
 3694     case nir_intrinsic_load_sample_id:
 3695         result = ac_unpack_param(&ctx->ac,
 3696                      ac_get_arg(&ctx->ac, ctx->args->ancillary),
 3697                      8, 4);
 3698         break;
 3699     case nir_intrinsic_load_sample_pos:
 3700         result = load_sample_pos(ctx);
 3701         break;
 3702     case nir_intrinsic_load_sample_mask_in:
 3703         result = ctx->abi->load_sample_mask_in(ctx->abi);
 3704         break;
 3705     case nir_intrinsic_load_frag_coord: {
 3706         LLVMValueRef values[4] = {
 3707             ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]),
 3708             ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
 3709             ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
 3710             ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
 3711                       ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))
 3712         };
 3713         result = ac_to_integer(&ctx->ac,
 3714                                ac_build_gather_values(&ctx->ac, values, 4));
 3715         break;
 3716     }
 3717     case nir_intrinsic_load_layer_id:
 3718         result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
 3719         break;
 3720     case nir_intrinsic_load_front_face:
 3721         result = ac_get_arg(&ctx->ac, ctx->args->front_face);
 3722         break;
 3723     case nir_intrinsic_load_helper_invocation:
 3724         result = ac_build_load_helper_invocation(&ctx->ac);
 3725         break;
 3726     case nir_intrinsic_is_helper_invocation:
 3727         result = ac_build_is_helper_invocation(&ctx->ac);
 3728         break;
 3729     case nir_intrinsic_load_color0:
 3730         result = ctx->abi->color0;
 3731         break;
 3732     case nir_intrinsic_load_color1:
 3733         result = ctx->abi->color1;
 3734         break;
 3735     case nir_intrinsic_load_user_data_amd:
 3736         assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
 3737         result = ctx->abi->user_data;
 3738         break;
 3739     case nir_intrinsic_load_instance_id:
 3740         result = ctx->abi->instance_id;
 3741         break;
 3742     case nir_intrinsic_load_num_work_groups:
 3743         result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
 3744         break;
 3745     case nir_intrinsic_load_local_invocation_index:
 3746         result = visit_load_local_invocation_index(ctx);
 3747         break;
 3748     case nir_intrinsic_load_subgroup_id:
 3749         result = visit_load_subgroup_id(ctx);
 3750         break;
 3751     case nir_intrinsic_load_num_subgroups:
 3752         result = visit_load_num_subgroups(ctx);
 3753         break;
 3754     case nir_intrinsic_first_invocation:
 3755         result = visit_first_invocation(ctx);
 3756         break;
 3757     case nir_intrinsic_load_push_constant:
 3758         result = visit_load_push_constant(ctx, instr);
 3759         break;
 3760     case nir_intrinsic_vulkan_resource_index: {
 3761         LLVMValueRef index = get_src(ctx, instr->src[0]);
 3762         unsigned desc_set = nir_intrinsic_desc_set(instr);
 3763         unsigned binding = nir_intrinsic_binding(instr);
 3764 
 3765         result = ctx->abi->load_resource(ctx->abi, index, desc_set,
 3766                          binding);
 3767         break;
 3768     }
 3769     case nir_intrinsic_vulkan_resource_reindex:
 3770         result = visit_vulkan_resource_reindex(ctx, instr);
 3771         break;
 3772     case nir_intrinsic_store_ssbo:
 3773         visit_store_ssbo(ctx, instr);
 3774         break;
 3775     case nir_intrinsic_load_ssbo:
 3776         result = visit_load_buffer(ctx, instr);
 3777         break;
 3778     case nir_intrinsic_ssbo_atomic_add:
 3779     case nir_intrinsic_ssbo_atomic_imin:
 3780     case nir_intrinsic_ssbo_atomic_umin:
 3781     case nir_intrinsic_ssbo_atomic_imax:
 3782     case nir_intrinsic_ssbo_atomic_umax:
 3783     case nir_intrinsic_ssbo_atomic_and:
 3784     case nir_intrinsic_ssbo_atomic_or:
 3785     case nir_intrinsic_ssbo_atomic_xor:
 3786     case nir_intrinsic_ssbo_atomic_exchange:
 3787     case nir_intrinsic_ssbo_atomic_comp_swap:
 3788         result = visit_atomic_ssbo(ctx, instr);
 3789         break;
 3790     case nir_intrinsic_load_ubo:
 3791         result = visit_load_ubo_buffer(ctx, instr);
 3792         break;
 3793     case nir_intrinsic_get_buffer_size:
 3794         result = visit_get_buffer_size(ctx, instr);
 3795         break;
 3796     case nir_intrinsic_load_deref:
 3797         result = visit_load_var(ctx, instr);
 3798         break;
 3799     case nir_intrinsic_store_deref:
 3800         visit_store_var(ctx, instr);
 3801         break;
 3802     case nir_intrinsic_load_shared:
 3803         result = visit_load_shared(ctx, instr);
 3804         break;
 3805     case nir_intrinsic_store_shared:
 3806         visit_store_shared(ctx, instr);
 3807         break;
 3808     case nir_intrinsic_bindless_image_samples:
 3809     case nir_intrinsic_image_deref_samples:
 3810         result = visit_image_samples(ctx, instr);
 3811         break;
 3812     case nir_intrinsic_bindless_image_load:
 3813         result = visit_image_load(ctx, instr, true);
 3814         break;
 3815     case nir_intrinsic_image_deref_load:
 3816         result = visit_image_load(ctx, instr, false);
 3817         break;
 3818     case nir_intrinsic_bindless_image_store:
 3819         visit_image_store(ctx, instr, true);
 3820         break;
 3821     case nir_intrinsic_image_deref_store:
 3822         visit_image_store(ctx, instr, false);
 3823         break;
 3824     case nir_intrinsic_bindless_image_atomic_add:
 3825     case nir_intrinsic_bindless_image_atomic_imin:
 3826     case nir_intrinsic_bindless_image_atomic_umin:
 3827     case nir_intrinsic_bindless_image_atomic_imax:
 3828     case nir_intrinsic_bindless_image_atomic_umax:
 3829     case nir_intrinsic_bindless_image_atomic_and:
 3830     case nir_intrinsic_bindless_image_atomic_or:
 3831     case nir_intrinsic_bindless_image_atomic_xor:
 3832     case nir_intrinsic_bindless_image_atomic_exchange:
 3833     case nir_intrinsic_bindless_image_atomic_comp_swap:
 3834     case nir_intrinsic_bindless_image_atomic_inc_wrap:
 3835     case nir_intrinsic_bindless_image_atomic_dec_wrap:
 3836         result = visit_image_atomic(ctx, instr, true);
 3837         break;
 3838     case nir_intrinsic_image_deref_atomic_add:
 3839     case nir_intrinsic_image_deref_atomic_imin:
 3840     case nir_intrinsic_image_deref_atomic_umin:
 3841     case nir_intrinsic_image_deref_atomic_imax:
 3842     case nir_intrinsic_image_deref_atomic_umax:
 3843     case nir_intrinsic_image_deref_atomic_and:
 3844     case nir_intrinsic_image_deref_atomic_or:
 3845     case nir_intrinsic_image_deref_atomic_xor:
 3846     case nir_intrinsic_image_deref_atomic_exchange:
 3847     case nir_intrinsic_image_deref_atomic_comp_swap:
 3848     case nir_intrinsic_image_deref_atomic_inc_wrap:
 3849     case nir_intrinsic_image_deref_atomic_dec_wrap:
 3850         result = visit_image_atomic(ctx, instr, false);
 3851         break;
 3852     case nir_intrinsic_bindless_image_size:
 3853         result = visit_image_size(ctx, instr, true);
 3854         break;
 3855     case nir_intrinsic_image_deref_size:
 3856         result = visit_image_size(ctx, instr, false);
 3857         break;
 3858     case nir_intrinsic_shader_clock:
 3859         result = ac_build_shader_clock(&ctx->ac);
 3860         break;
 3861     case nir_intrinsic_discard:
 3862     case nir_intrinsic_discard_if:
 3863         emit_discard(ctx, instr);
 3864         break;
 3865         case nir_intrinsic_demote:
 3866         case nir_intrinsic_demote_if:
 3867         emit_demote(ctx, instr);
 3868         break;
 3869     case nir_intrinsic_memory_barrier:
 3870     case nir_intrinsic_group_memory_barrier:
 3871     case nir_intrinsic_memory_barrier_buffer:
 3872     case nir_intrinsic_memory_barrier_image:
 3873     case nir_intrinsic_memory_barrier_shared:
 3874         emit_membar(&ctx->ac, instr);
 3875         break;
 3876     case nir_intrinsic_memory_barrier_tcs_patch:
 3877         break;
 3878     case nir_intrinsic_control_barrier:
 3879         ac_emit_barrier(&ctx->ac, ctx->stage);
 3880         break;
 3881     case nir_intrinsic_shared_atomic_add:
 3882     case nir_intrinsic_shared_atomic_imin:
 3883     case nir_intrinsic_shared_atomic_umin:
 3884     case nir_intrinsic_shared_atomic_imax:
 3885     case nir_intrinsic_shared_atomic_umax:
 3886     case nir_intrinsic_shared_atomic_and:
 3887     case nir_intrinsic_shared_atomic_or:
 3888     case nir_intrinsic_shared_atomic_xor:
 3889     case nir_intrinsic_shared_atomic_exchange:
 3890     case nir_intrinsic_shared_atomic_comp_swap: {
 3891         LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
 3892                           instr->src[1].ssa->bit_size);
 3893         result = visit_var_atomic(ctx, instr, ptr, 1);
 3894         break;
 3895     }
 3896     case nir_intrinsic_deref_atomic_add:
 3897     case nir_intrinsic_deref_atomic_imin:
 3898     case nir_intrinsic_deref_atomic_umin:
 3899     case nir_intrinsic_deref_atomic_imax:
 3900     case nir_intrinsic_deref_atomic_umax:
 3901     case nir_intrinsic_deref_atomic_and:
 3902     case nir_intrinsic_deref_atomic_or:
 3903     case nir_intrinsic_deref_atomic_xor:
 3904     case nir_intrinsic_deref_atomic_exchange:
 3905     case nir_intrinsic_deref_atomic_comp_swap: {
 3906         LLVMValueRef ptr = get_src(ctx, instr->src[0]);
 3907         result = visit_var_atomic(ctx, instr, ptr, 1);
 3908         break;
 3909     }
 3910     case nir_intrinsic_load_barycentric_pixel:
 3911         result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
 3912         break;
 3913     case nir_intrinsic_load_barycentric_centroid:
 3914         result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
 3915         break;
 3916     case nir_intrinsic_load_barycentric_sample:
 3917         result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
 3918         break;
 3919     case nir_intrinsic_load_barycentric_model:
 3920         result = barycentric_model(ctx);
 3921         break;
 3922     case nir_intrinsic_load_barycentric_at_offset: {
 3923         LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
 3924         result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
 3925         break;
 3926     }
 3927     case nir_intrinsic_load_barycentric_at_sample: {
 3928         LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
 3929         result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
 3930         break;
 3931     }
 3932     case nir_intrinsic_load_interpolated_input: {
 3933         /* We assume any indirect loads have been lowered away */
 3934         ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
 3935         assert(offset);
 3936         assert(offset[0].i32 == 0);
 3937 
 3938         LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
 3939         unsigned index = nir_intrinsic_base(instr);
 3940         unsigned component = nir_intrinsic_component(instr);
 3941         result = load_interpolated_input(ctx, interp_param, index,
 3942                          component,
 3943                          instr->dest.ssa.num_components,
 3944                          instr->dest.ssa.bit_size);
 3945         break;
 3946     }
 3947     case nir_intrinsic_load_input:
 3948     case nir_intrinsic_load_input_vertex:
 3949         result = load_input(ctx, instr);
 3950         break;
 3951     case nir_intrinsic_emit_vertex:
 3952         ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
 3953         break;
 3954     case nir_intrinsic_emit_vertex_with_counter: {
 3955         unsigned stream = nir_intrinsic_stream_id(instr);
 3956         LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
 3957         ctx->abi->emit_vertex_with_counter(ctx->abi, stream,
 3958                            next_vertex,
 3959                            ctx->abi->outputs);
 3960         break;
 3961     }
 3962     case nir_intrinsic_end_primitive:
 3963     case nir_intrinsic_end_primitive_with_counter:
 3964         ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
 3965         break;
 3966     case nir_intrinsic_load_tess_coord:
 3967         result = ctx->abi->load_tess_coord(ctx->abi);
 3968         break;
 3969     case nir_intrinsic_load_tess_level_outer:
 3970         result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
 3971         break;
 3972     case nir_intrinsic_load_tess_level_inner:
 3973         result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
 3974         break;
 3975     case nir_intrinsic_load_tess_level_outer_default:
 3976         result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
 3977         break;
 3978     case nir_intrinsic_load_tess_level_inner_default:
 3979         result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
 3980         break;
 3981     case nir_intrinsic_load_patch_vertices_in:
 3982         result = ctx->abi->load_patch_vertices_in(ctx->abi);
 3983         break;
 3984     case nir_intrinsic_vote_all: {
 3985         LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
 3986         result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
 3987         break;
 3988     }
 3989     case nir_intrinsic_vote_any: {
 3990         LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
 3991         result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
 3992         break;
 3993     }
 3994     case nir_intrinsic_shuffle:
 3995         if (ctx->ac.chip_class == GFX8 ||
 3996             ctx->ac.chip_class == GFX9 ||
 3997             (ctx->ac.chip_class == GFX10 && ctx->ac.wave_size == 32)) {
 3998             result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
 3999                           get_src(ctx, instr->src[1]));
 4000         } else {
 4001             LLVMValueRef src = get_src(ctx, instr->src[0]);
 4002             LLVMValueRef index = get_src(ctx, instr->src[1]);
 4003             LLVMTypeRef type = LLVMTypeOf(src);
 4004                     struct waterfall_context wctx;
 4005                     LLVMValueRef index_val;
 4006 
 4007                     index_val = enter_waterfall(ctx, &wctx, index, true);
 4008 
 4009             src = LLVMBuildZExt(ctx->ac.builder, src,
 4010                         ctx->ac.i32, "");
 4011 
 4012             result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
 4013                             ctx->ac.i32,
 4014                             (LLVMValueRef []) { src, index_val }, 2,
 4015                             AC_FUNC_ATTR_READNONE |
 4016                             AC_FUNC_ATTR_CONVERGENT);
 4017 
 4018             result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
 4019 
 4020                 result = exit_waterfall(ctx, &wctx, result);
 4021         }
 4022         break;
 4023     case nir_intrinsic_reduce:
 4024         result = ac_build_reduce(&ctx->ac,
 4025                 get_src(ctx, instr->src[0]),
 4026                 instr->const_index[0],
 4027                 instr->const_index[1]);
 4028         break;
 4029     case nir_intrinsic_inclusive_scan:
 4030         result = ac_build_inclusive_scan(&ctx->ac,
 4031                 get_src(ctx, instr->src[0]),
 4032                 instr->const_index[0]);
 4033         break;
 4034     case nir_intrinsic_exclusive_scan:
 4035         result = ac_build_exclusive_scan(&ctx->ac,
 4036                 get_src(ctx, instr->src[0]),
 4037                 instr->const_index[0]);
 4038         break;
 4039     case nir_intrinsic_quad_broadcast: {
 4040         unsigned lane = nir_src_as_uint(instr->src[1]);
 4041         result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
 4042                 lane, lane, lane, lane);
 4043         break;
 4044     }
 4045     case nir_intrinsic_quad_swap_horizontal:
 4046         result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3 ,2);
 4047         break;
 4048     case nir_intrinsic_quad_swap_vertical:
 4049         result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0 ,1);
 4050         break;
 4051     case nir_intrinsic_quad_swap_diagonal:
 4052         result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1 ,0);
 4053         break;
 4054     case nir_intrinsic_quad_swizzle_amd: {
 4055         uint32_t mask = nir_intrinsic_swizzle_mask(instr);
 4056         result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
 4057                            mask & 0x3, (mask >> 2) & 0x3,
 4058                            (mask >> 4) & 0x3, (mask >> 6) & 0x3);
 4059         break;
 4060     }
 4061     case nir_intrinsic_masked_swizzle_amd: {
 4062         uint32_t mask = nir_intrinsic_swizzle_mask(instr);
 4063         result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
 4064         break;
 4065     }
 4066     case nir_intrinsic_write_invocation_amd:
 4067         result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
 4068                         get_src(ctx, instr->src[1]),
 4069                         get_src(ctx, instr->src[2]));
 4070         break;
 4071     case nir_intrinsic_mbcnt_amd:
 4072         result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
 4073         break;
 4074     case nir_intrinsic_load_scratch: {
 4075         LLVMValueRef offset = get_src(ctx, instr->src[0]);
 4076         LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
 4077                          offset);
 4078         LLVMTypeRef comp_type =
 4079             LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
 4080         LLVMTypeRef vec_type =
 4081             instr->dest.ssa.num_components == 1 ? comp_type :
 4082             LLVMVectorType(comp_type, instr->dest.ssa.num_components);
 4083         unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
 4084         ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
 4085                        LLVMPointerType(vec_type, addr_space), "");
 4086         result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
 4087         break;
 4088     }
 4089     case nir_intrinsic_store_scratch: {
 4090         LLVMValueRef offset = get_src(ctx, instr->src[1]);
 4091         LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
 4092                          offset);
 4093         LLVMTypeRef comp_type =
 4094             LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
 4095         unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
 4096         ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
 4097                        LLVMPointerType(comp_type, addr_space), "");
 4098         LLVMValueRef src = get_src(ctx, instr->src[0]);
 4099         unsigned wrmask = nir_intrinsic_write_mask(instr);
 4100         while (wrmask) {
 4101             int start, count;
 4102             u_bit_scan_consecutive_range(&wrmask, &start, &count);
 4103             
 4104             LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
 4105             LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
 4106             LLVMTypeRef vec_type =
 4107                 count == 1 ? comp_type : LLVMVectorType(comp_type, count);
 4108             offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
 4109                               offset_ptr,
 4110                               LLVMPointerType(vec_type, addr_space),
 4111                               "");
 4112             LLVMValueRef offset_src =
 4113                 ac_extract_components(&ctx->ac, src, start, count);
 4114             LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
 4115         }
 4116         break;
 4117     }
 4118     case nir_intrinsic_load_constant: {
 4119         unsigned base = nir_intrinsic_base(instr);
 4120         unsigned range = nir_intrinsic_range(instr);
 4121 
 4122         LLVMValueRef offset = get_src(ctx, instr->src[0]);
 4123         offset = LLVMBuildAdd(ctx->ac.builder, offset,
 4124                       LLVMConstInt(ctx->ac.i32, base, false), "");
 4125 
 4126         /* Clamp the offset to avoid out-of-bound access because global
 4127          * instructions can't handle them.
 4128          */
 4129         LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
 4130         LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
 4131                           offset, size, "");
 4132         offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
 4133 
 4134         LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data,
 4135                          offset);
 4136         LLVMTypeRef comp_type =
 4137             LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
 4138         LLVMTypeRef vec_type =
 4139             instr->dest.ssa.num_components == 1 ? comp_type :
 4140             LLVMVectorType(comp_type, instr->dest.ssa.num_components);
 4141         unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
 4142         ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
 4143                        LLVMPointerType(vec_type, addr_space), "");
 4144         result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
 4145         break;
 4146     }
 4147     default:
 4148         fprintf(stderr, "Unknown intrinsic: ");
 4149         nir_print_instr(&instr->instr, stderr);
 4150         fprintf(stderr, "\n");
 4151         break;
 4152     }
 4153     if (result) {
 4154         ctx->ssa_defs[instr->dest.ssa.index] = result;
 4155     }
 4156 }
 4157 
 4158 static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx,
 4159                             unsigned base_index,
 4160                             unsigned constant_index,
 4161                             LLVMValueRef dynamic_index)
 4162 {
 4163     LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
 4164     LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
 4165                       LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
 4166 
 4167     /* Bindless uniforms are 64bit so multiple index by 8 */
 4168     index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
 4169     offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
 4170 
 4171     LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
 4172 
 4173     LLVMValueRef ret = ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset,
 4174                         NULL, 0, 0, true, true);
 4175 
 4176     return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
 4177 }
 4178 
 4179 struct sampler_desc_address {
 4180     unsigned descriptor_set;
 4181     unsigned base_index; /* binding in vulkan */
 4182     unsigned constant_index;
 4183     LLVMValueRef dynamic_index;
 4184     bool image;
 4185     bool bindless;
 4186 };
 4187 
 4188 static struct sampler_desc_address
 4189 get_sampler_desc_internal(struct ac_nir_context *ctx,
 4190               nir_deref_instr *deref_instr,
 4191               const nir_instr *instr,
 4192               bool image)
 4193 {
 4194     LLVMValueRef index = NULL;
 4195     unsigned constant_index = 0;
 4196     unsigned descriptor_set;
 4197     unsigned base_index;
 4198     bool bindless = false;
 4199 
 4200     if (!deref_instr) {
 4201         descriptor_set = 0;
 4202         if (image) {
 4203             nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);
 4204             base_index = 0;
 4205             bindless = true;
 4206             index = get_src(ctx, img_instr->src[0]);
 4207         } else {
 4208             nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
 4209             int sampSrcIdx = nir_tex_instr_src_index(tex_instr,
 4210                                  nir_tex_src_sampler_handle);
 4211             if (sampSrcIdx != -1) {
 4212                 base_index = 0;
 4213                 bindless = true;
 4214                 index = get_src(ctx, tex_instr->src[sampSrcIdx].src);
 4215             } else {
 4216                 assert(tex_instr && !image);
 4217                 base_index = tex_instr->sampler_index;
 4218             }
 4219         }
 4220     } else {
 4221         while(deref_instr->deref_type != nir_deref_type_var) {
 4222             if (deref_instr->deref_type == nir_deref_type_array) {
 4223                 unsigned array_size = glsl_get_aoa_size(deref_instr->type);
 4224                 if (!array_size)
 4225                     array_size = 1;
 4226 
 4227                 if (nir_src_is_const(deref_instr->arr.index)) {
 4228                     constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);
 4229                 } else {
 4230                     LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
 4231 
 4232                     indirect = LLVMBuildMul(ctx->ac.builder, indirect,
 4233                         LLVMConstInt(ctx->ac.i32, array_size, false), "");
 4234 
 4235                     if (!index)
 4236                         index = indirect;
 4237                     else
 4238                         index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
 4239                 }
 4240 
 4241                 deref_instr = nir_src_as_deref(deref_instr->parent);
 4242             } else if (deref_instr->deref_type == nir_deref_type_struct) {
 4243                 unsigned sidx = deref_instr->strct.index;
 4244                 deref_instr = nir_src_as_deref(deref_instr->parent);
 4245                 constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
 4246             } else {
 4247                 unreachable("Unsupported deref type");
 4248             }
 4249         }
 4250         descriptor_set = deref_instr->var->data.descriptor_set;
 4251 
 4252         if (deref_instr->var->data.bindless) {
 4253             /* For now just assert on unhandled variable types */
 4254             assert(deref_instr->var->data.mode == nir_var_uniform);
 4255 
 4256             base_index = deref_instr->var->data.driver_location;
 4257             bindless = true;
 4258 
 4259             index = index ? index : ctx->ac.i32_0;
 4260             index = get_bindless_index_from_uniform(ctx, base_index,
 4261                                 constant_index, index);
 4262         } else
 4263             base_index = deref_instr->var->data.binding;
 4264     }
 4265     return (struct sampler_desc_address) {
 4266         .descriptor_set = descriptor_set,
 4267         .base_index = base_index,
 4268         .constant_index = constant_index,
 4269         .dynamic_index = index,
 4270         .image = image,
 4271         .bindless = bindless,
 4272     };
 4273 }
 4274 
 4275 /* Extract any possibly divergent index into a separate value that can be fed
 4276  * into get_sampler_desc with the same arguments. */
 4277 static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
 4278                        nir_deref_instr *deref_instr,
 4279                        const nir_instr *instr,
 4280                        bool image)
 4281 {
 4282     struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
 4283     return addr.dynamic_index;
 4284 }
 4285 
 4286 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
 4287                      nir_deref_instr *deref_instr,
 4288                      enum ac_descriptor_type desc_type,
 4289                      const nir_instr *instr,
 4290                      LLVMValueRef index,
 4291                      bool image, bool write)
 4292 {
 4293     struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
 4294     return ctx->abi->load_sampler_desc(ctx->abi,
 4295                       addr.descriptor_set,
 4296                       addr.base_index,
 4297                       addr.constant_index, index,
 4298                       desc_type, addr.image, write, addr.bindless);
 4299 }
 4300 
 4301 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
 4302  *
 4303  * GFX6-GFX7:
 4304  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
 4305  *   filtering manually. The driver sets img7 to a mask clearing
 4306  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
 4307  *     s_and_b32 samp0, samp0, img7
 4308  *
 4309  * GFX8:
 4310  *   The ANISO_OVERRIDE sampler field enables this fix in TA.
 4311  */
 4312 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
 4313                                            LLVMValueRef res, LLVMValueRef samp)
 4314 {
 4315     LLVMBuilderRef builder = ctx->ac.builder;
 4316     LLVMValueRef img7, samp0;
 4317 
 4318     if (ctx->ac.chip_class >= GFX8)
 4319         return samp;
 4320 
 4321     img7 = LLVMBuildExtractElement(builder, res,
 4322                                    LLVMConstInt(ctx->ac.i32, 7, 0), "");
 4323     samp0 = LLVMBuildExtractElement(builder, samp,
 4324                                     LLVMConstInt(ctx->ac.i32, 0, 0), "");
 4325     samp0 = LLVMBuildAnd(builder, samp0, img7, "");
 4326     return LLVMBuildInsertElement(builder, samp, samp0,
 4327                                   LLVMConstInt(ctx->ac.i32, 0, 0), "");
 4328 }
 4329 
 4330 static void tex_fetch_ptrs(struct ac_nir_context *ctx,
 4331                nir_tex_instr *instr,
 4332                struct waterfall_context *wctx,
 4333                LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
 4334                LLVMValueRef *fmask_ptr)
 4335 {
 4336     nir_deref_instr *texture_deref_instr = NULL;
 4337     nir_deref_instr *sampler_deref_instr = NULL;
 4338     int plane = -1;
 4339 
 4340     for (unsigned i = 0; i < instr->num_srcs; i++) {
 4341         switch (instr->src[i].src_type) {
 4342         case nir_tex_src_texture_deref:
 4343             texture_deref_instr = nir_src_as_deref(instr->src[i].src);
 4344             break;
 4345         case nir_tex_src_sampler_deref:
 4346             sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
 4347             break;
 4348         case nir_tex_src_plane:
 4349             plane = nir_src_as_int(instr->src[i].src);
 4350             break;
 4351         default:
 4352             break;
 4353         }
 4354     }
 4355 
 4356     LLVMValueRef texture_dynamic_index = get_sampler_desc_index(ctx, texture_deref_instr,
 4357                                     &instr->instr, false);
 4358     if (!sampler_deref_instr)
 4359         sampler_deref_instr = texture_deref_instr;
 4360 
 4361         LLVMValueRef sampler_dynamic_index = get_sampler_desc_index(ctx, sampler_deref_instr,
 4362                                     &instr->instr, false);
 4363     if (instr->texture_non_uniform)
 4364         texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);
 4365 
 4366     if (instr->sampler_non_uniform)
 4367         sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);
 4368 
 4369     enum ac_descriptor_type main_descriptor = instr->sampler_dim  == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
 4370 
 4371     if (plane >= 0) {
 4372         assert(instr->op != nir_texop_txf_ms &&
 4373                instr->op != nir_texop_samples_identical);
 4374         assert(instr->sampler_dim  != GLSL_SAMPLER_DIM_BUF);
 4375 
 4376         main_descriptor = AC_DESC_PLANE_0 + plane;
 4377     }
 4378 
 4379     if (instr->op == nir_texop_fragment_mask_fetch) {
 4380         /* The fragment mask is fetched from the compressed
 4381          * multisampled surface.
 4382          */
 4383         main_descriptor = AC_DESC_FMASK;
 4384     }
 4385 
 4386     *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,
 4387                                 texture_dynamic_index, false, false);
 4388 
 4389     if (samp_ptr) {
 4390         *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,
 4391                                      sampler_dynamic_index, false, false);
 4392         if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
 4393             *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
 4394     }
 4395     if (fmask_ptr && (instr->op == nir_texop_txf_ms ||
 4396                       instr->op == nir_texop_samples_identical))
 4397         *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK,
 4398                                       &instr->instr, texture_dynamic_index, false, false);
 4399 }
 4400 
 4401 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
 4402                       LLVMValueRef coord)
 4403 {
 4404     coord = ac_to_float(ctx, coord);
 4405     coord = ac_build_round(ctx, coord);
 4406     coord = ac_to_integer(ctx, coord);
 4407     return coord;
 4408 }
 4409 
 4410 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 4411 {
 4412     LLVMValueRef result = NULL;
 4413     struct ac_image_args args = { 0 };
 4414     LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
 4415     LLVMValueRef ddx = NULL, ddy = NULL;
 4416     unsigned offset_src = 0;
 4417     struct waterfall_context wctx[2] = {{{0}}};
 4418 
 4419     tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);
 4420 
 4421     for (unsigned i = 0; i < instr->num_srcs; i++) {
 4422         switch (instr->src[i].src_type) {
 4423         case nir_tex_src_coord: {
 4424             LLVMValueRef coord = get_src(ctx, instr->src[i].src);
 4425             for (unsigned chan = 0; chan < instr->coord_components; ++chan)
 4426                 args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
 4427             break;
 4428         }
 4429         case nir_tex_src_projector:
 4430             break;
 4431         case nir_tex_src_comparator:
 4432             if (instr->is_shadow) {
 4433                 args.compare = get_src(ctx, instr->src[i].src);
 4434                 args.compare = ac_to_float(&ctx->ac, args.compare);
 4435             }
 4436             break;
 4437         case nir_tex_src_offset:
 4438             args.offset = get_src(ctx, instr->src[i].src);
 4439             offset_src = i;
 4440             break;
 4441         case nir_tex_src_bias:
 4442             if (instr->op == nir_texop_txb)
 4443                 args.bias = get_src(ctx, instr->src[i].src);
 4444             break;
 4445         case nir_tex_src_lod: {
 4446             if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
 4447                 args.level_zero = true;
 4448             else
 4449                 args.lod = get_src(ctx, instr->src[i].src);
 4450             break;
 4451         }
 4452         case nir_tex_src_ms_index:
 44