"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/radeonsi/si_shader_llvm.c" (16 Sep 2020, 26121 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "si_shader_llvm.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright 2016 Advanced Micro Devices, Inc.
    3  * All Rights Reserved.
    4  *
    5  * Permission is hereby granted, free of charge, to any person obtaining a
    6  * copy of this software and associated documentation files (the "Software"),
    7  * to deal in the Software without restriction, including without limitation
    8  * on the rights to use, copy, modify, merge, publish, distribute, sub
    9  * license, and/or sell copies of the Software, and to permit persons to whom
   10  * the Software is furnished to do so, subject to the following conditions:
   11  *
   12  * The above copyright notice and this permission notice (including the next
   13  * paragraph) shall be included in all copies or substantial portions of the
   14  * Software.
   15  *
   16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
   19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
   20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
   23  */
   24 
   25 #include "ac_nir_to_llvm.h"
   26 #include "ac_rtld.h"
   27 #include "si_pipe.h"
   28 #include "si_shader_internal.h"
   29 #include "sid.h"
   30 #include "tgsi/tgsi_from_mesa.h"
   31 #include "util/u_memory.h"
   32 
   33 struct si_llvm_diagnostics {
   34    struct pipe_debug_callback *debug;
   35    unsigned retval;
   36 };
   37 
   38 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
   39 {
   40    struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
   41    LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
   42    const char *severity_str = NULL;
   43 
   44    switch (severity) {
   45    case LLVMDSError:
   46       severity_str = "error";
   47       break;
   48    case LLVMDSWarning:
   49       severity_str = "warning";
   50       break;
   51    case LLVMDSRemark:
   52    case LLVMDSNote:
   53    default:
   54       return;
   55    }
   56 
   57    char *description = LLVMGetDiagInfoDescription(di);
   58 
   59    pipe_debug_message(diag->debug, SHADER_INFO, "LLVM diagnostic (%s): %s", severity_str,
   60                       description);
   61 
   62    if (severity == LLVMDSError) {
   63       diag->retval = 1;
   64       fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", description);
   65    }
   66 
   67    LLVMDisposeMessage(description);
   68 }
   69 
   70 bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
   71                      struct ac_shader_config *conf, struct ac_llvm_compiler *compiler,
   72                      struct ac_llvm_context *ac, struct pipe_debug_callback *debug,
   73                      enum pipe_shader_type shader_type, const char *name, bool less_optimized)
   74 {
   75    unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
   76 
   77    if (si_can_dump_shader(sscreen, shader_type)) {
   78       fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
   79 
   80       if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
   81          fprintf(stderr, "%s LLVM IR:\n\n", name);
   82          ac_dump_module(ac->module);
   83          fprintf(stderr, "\n");
   84       }
   85    }
   86 
   87    if (sscreen->record_llvm_ir) {
   88       char *ir = LLVMPrintModuleToString(ac->module);
   89       binary->llvm_ir_string = strdup(ir);
   90       LLVMDisposeMessage(ir);
   91    }
   92 
   93    if (!si_replace_shader(count, binary)) {
   94       struct ac_compiler_passes *passes = compiler->passes;
   95 
   96       if (ac->wave_size == 32)
   97          passes = compiler->passes_wave32;
   98       else if (less_optimized && compiler->low_opt_passes)
   99          passes = compiler->low_opt_passes;
  100 
  101       struct si_llvm_diagnostics diag = {debug};
  102       LLVMContextSetDiagnosticHandler(ac->context, si_diagnostic_handler, &diag);
  103 
  104       if (!ac_compile_module_to_elf(passes, ac->module, (char **)&binary->elf_buffer,
  105                                     &binary->elf_size))
  106          diag.retval = 1;
  107 
  108       if (diag.retval != 0) {
  109          pipe_debug_message(debug, SHADER_INFO, "LLVM compilation failed");
  110          return false;
  111       }
  112    }
  113 
  114    struct ac_rtld_binary rtld;
  115    if (!ac_rtld_open(&rtld, (struct ac_rtld_open_info){
  116                                .info = &sscreen->info,
  117                                .shader_type = tgsi_processor_to_shader_stage(shader_type),
  118                                .wave_size = ac->wave_size,
  119                                .num_parts = 1,
  120                                .elf_ptrs = &binary->elf_buffer,
  121                                .elf_sizes = &binary->elf_size}))
  122       return false;
  123 
  124    bool ok = ac_rtld_read_config(&rtld, conf);
  125    ac_rtld_close(&rtld);
  126    return ok;
  127 }
  128 
  129 void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen,
  130                           struct ac_llvm_compiler *compiler, unsigned wave_size)
  131 {
  132    memset(ctx, 0, sizeof(*ctx));
  133    ctx->screen = sscreen;
  134    ctx->compiler = compiler;
  135 
  136    ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class, sscreen->info.family,
  137                         AC_FLOAT_MODE_DEFAULT_OPENGL, wave_size, 64);
  138 }
  139 
  140 void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types,
  141                          unsigned num_return_elems, unsigned max_workgroup_size)
  142 {
  143    LLVMTypeRef ret_type;
  144    enum ac_llvm_calling_convention call_conv;
  145    enum pipe_shader_type real_shader_type;
  146 
  147    if (num_return_elems)
  148       ret_type = LLVMStructTypeInContext(ctx->ac.context, return_types, num_return_elems, true);
  149    else
  150       ret_type = ctx->ac.voidt;
  151 
  152    real_shader_type = ctx->type;
  153 
  154    /* LS is merged into HS (TCS), and ES is merged into GS. */
  155    if (ctx->screen->info.chip_class >= GFX9) {
  156       if (ctx->shader->key.as_ls)
  157          real_shader_type = PIPE_SHADER_TESS_CTRL;
  158       else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
  159          real_shader_type = PIPE_SHADER_GEOMETRY;
  160    }
  161 
  162    switch (real_shader_type) {
  163    case PIPE_SHADER_VERTEX:
  164    case PIPE_SHADER_TESS_EVAL:
  165       call_conv = AC_LLVM_AMDGPU_VS;
  166       break;
  167    case PIPE_SHADER_TESS_CTRL:
  168       call_conv = AC_LLVM_AMDGPU_HS;
  169       break;
  170    case PIPE_SHADER_GEOMETRY:
  171       call_conv = AC_LLVM_AMDGPU_GS;
  172       break;
  173    case PIPE_SHADER_FRAGMENT:
  174       call_conv = AC_LLVM_AMDGPU_PS;
  175       break;
  176    case PIPE_SHADER_COMPUTE:
  177       call_conv = AC_LLVM_AMDGPU_CS;
  178       break;
  179    default:
  180       unreachable("Unhandle shader type");
  181    }
  182 
  183    /* Setup the function */
  184    ctx->return_type = ret_type;
  185    ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name, ret_type, ctx->ac.module);
  186    ctx->return_value = LLVMGetUndef(ctx->return_type);
  187 
  188    if (ctx->screen->info.address32_hi) {
  189       ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-32bit-address-high-bits",
  190                                            ctx->screen->info.address32_hi);
  191    }
  192 
  193    LLVMAddTargetDependentFunctionAttr(ctx->main_fn, "no-signed-zeros-fp-math", "true");
  194 
  195    ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
  196 }
  197 
  198 void si_llvm_optimize_module(struct si_shader_context *ctx)
  199 {
  200    /* Dump LLVM IR before any optimization passes */
  201    if (ctx->screen->debug_flags & DBG(PREOPT_IR) && si_can_dump_shader(ctx->screen, ctx->type))
  202       LLVMDumpModule(ctx->ac.module);
  203 
  204    /* Run the pass */
  205    LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
  206    LLVMDisposeBuilder(ctx->ac.builder);
  207 }
  208 
  209 void si_llvm_dispose(struct si_shader_context *ctx)
  210 {
  211    LLVMDisposeModule(ctx->ac.module);
  212    LLVMContextDispose(ctx->ac.context);
  213    ac_llvm_context_dispose(&ctx->ac);
  214 }
  215 
  216 /**
  217  * Load a dword from a constant buffer.
  218  */
  219 LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx, LLVMValueRef resource,
  220                                   LLVMValueRef offset)
  221 {
  222    return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL, 0, 0, true, true);
  223 }
  224 
  225 void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
  226 {
  227    if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
  228       LLVMBuildRetVoid(ctx->ac.builder);
  229    else
  230       LLVMBuildRet(ctx->ac.builder, ret);
  231 }
  232 
  233 LLVMValueRef si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
  234                                  struct ac_arg param, unsigned return_index)
  235 {
  236    return LLVMBuildInsertValue(ctx->ac.builder, ret, ac_get_arg(&ctx->ac, param), return_index, "");
  237 }
  238 
  239 LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
  240                                        struct ac_arg param, unsigned return_index)
  241 {
  242    LLVMBuilderRef builder = ctx->ac.builder;
  243    LLVMValueRef p = ac_get_arg(&ctx->ac, param);
  244 
  245    return LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, p), return_index, "");
  246 }
  247 
  248 LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
  249                                  struct ac_arg param, unsigned return_index)
  250 {
  251    LLVMBuilderRef builder = ctx->ac.builder;
  252    LLVMValueRef ptr = ac_get_arg(&ctx->ac, param);
  253    ptr = LLVMBuildPtrToInt(builder, ptr, ctx->ac.i32, "");
  254    return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
  255 }
  256 
  257 LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
  258 {
  259    LLVMValueRef ptr[2], list;
  260    bool merged_shader = si_is_merged_shader(ctx->shader);
  261 
  262    ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
  263    list =
  264       LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
  265    return list;
  266 }
  267 
  268 LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx, LLVMTypeRef type,
  269                                    LLVMValueRef val1, LLVMValueRef val2)
  270 {
  271    LLVMValueRef values[2] = {
  272       ac_to_integer(&ctx->ac, val1),
  273       ac_to_integer(&ctx->ac, val2),
  274    };
  275    LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
  276    return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
  277 }
  278 
  279 void si_llvm_emit_barrier(struct si_shader_context *ctx)
  280 {
  281    /* GFX6 only (thanks to a hw bug workaround):
  282     * The real barrier instruction isn’t needed, because an entire patch
  283     * always fits into a single wave.
  284     */
  285    if (ctx->screen->info.chip_class == GFX6 && ctx->type == PIPE_SHADER_TESS_CTRL) {
  286       ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
  287       return;
  288    }
  289 
  290    ac_build_s_barrier(&ctx->ac);
  291 }
  292 
  293 /* Ensure that the esgs ring is declared.
  294  *
  295  * We declare it with 64KB alignment as a hint that the
  296  * pointer value will always be 0.
  297  */
  298 void si_llvm_declare_esgs_ring(struct si_shader_context *ctx)
  299 {
  300    if (ctx->esgs_ring)
  301       return;
  302 
  303    assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
  304 
  305    ctx->esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
  306                                                 "esgs_ring", AC_ADDR_SPACE_LDS);
  307    LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
  308    LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
  309 }
  310 
  311 void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param, unsigned bitoffset)
  312 {
  313    LLVMValueRef args[] = {
  314       ac_get_arg(&ctx->ac, param),
  315       LLVMConstInt(ctx->ac.i32, bitoffset, 0),
  316    };
  317    ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.init.exec.from.input", ctx->ac.voidt, args, 2,
  318                       AC_FUNC_ATTR_CONVERGENT);
  319 }
  320 
  321 /**
  322  * Get the value of a shader input parameter and extract a bitfield.
  323  */
  324 static LLVMValueRef unpack_llvm_param(struct si_shader_context *ctx, LLVMValueRef value,
  325                                       unsigned rshift, unsigned bitwidth)
  326 {
  327    if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
  328       value = ac_to_integer(&ctx->ac, value);
  329 
  330    if (rshift)
  331       value = LLVMBuildLShr(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, rshift, 0), "");
  332 
  333    if (rshift + bitwidth < 32) {
  334       unsigned mask = (1 << bitwidth) - 1;
  335       value = LLVMBuildAnd(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, mask, 0), "");
  336    }
  337 
  338    return value;
  339 }
  340 
  341 LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift,
  342                              unsigned bitwidth)
  343 {
  344    LLVMValueRef value = ac_get_arg(&ctx->ac, param);
  345 
  346    return unpack_llvm_param(ctx, value, rshift, bitwidth);
  347 }
  348 
  349 LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle)
  350 {
  351    if (swizzle > 0)
  352       return ctx->ac.i32_0;
  353 
  354    switch (ctx->type) {
  355    case PIPE_SHADER_VERTEX:
  356       return ac_get_arg(&ctx->ac, ctx->vs_prim_id);
  357    case PIPE_SHADER_TESS_CTRL:
  358       return ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id);
  359    case PIPE_SHADER_TESS_EVAL:
  360       return ac_get_arg(&ctx->ac, ctx->args.tes_patch_id);
  361    case PIPE_SHADER_GEOMETRY:
  362       return ac_get_arg(&ctx->ac, ctx->args.gs_prim_id);
  363    default:
  364       assert(0);
  365       return ctx->ac.i32_0;
  366    }
  367 }
  368 
  369 LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi)
  370 {
  371    struct si_shader_context *ctx = si_shader_context_from_abi(abi);
  372 
  373    LLVMValueRef values[3];
  374    LLVMValueRef result;
  375    unsigned i;
  376    unsigned *properties = ctx->shader->selector->info.properties;
  377 
  378    if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
  379       unsigned sizes[3] = {properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
  380                            properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
  381                            properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]};
  382 
  383       for (i = 0; i < 3; ++i)
  384          values[i] = LLVMConstInt(ctx->ac.i32, sizes[i], 0);
  385 
  386       result = ac_build_gather_values(&ctx->ac, values, 3);
  387    } else {
  388       result = ac_get_arg(&ctx->ac, ctx->block_size);
  389    }
  390 
  391    return result;
  392 }
  393 
  394 void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
  395 {
  396    struct si_shader_selector *sel = ctx->shader->selector;
  397    unsigned lds_size = sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE];
  398 
  399    LLVMTypeRef i8p = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS);
  400    LLVMValueRef var;
  401 
  402    assert(!ctx->ac.lds);
  403 
  404    var = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i8, lds_size),
  405                                      "compute_lds", AC_ADDR_SPACE_LDS);
  406    LLVMSetAlignment(var, 64 * 1024);
  407 
  408    ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
  409 }
  410 
  411 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
  412 {
  413    if (nir->info.stage == MESA_SHADER_VERTEX) {
  414       si_llvm_load_vs_inputs(ctx, nir);
  415    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
  416       unsigned colors_read = ctx->shader->selector->info.colors_read;
  417       LLVMValueRef main_fn = ctx->main_fn;
  418 
  419       LLVMValueRef undef = LLVMGetUndef(ctx->ac.f32);
  420 
  421       unsigned offset = SI_PARAM_POS_FIXED_PT + 1;
  422 
  423       if (colors_read & 0x0f) {
  424          unsigned mask = colors_read & 0x0f;
  425          LLVMValueRef values[4];
  426          values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
  427          values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
  428          values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
  429          values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
  430          ctx->abi.color0 = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
  431       }
  432       if (colors_read & 0xf0) {
  433          unsigned mask = (colors_read & 0xf0) >> 4;
  434          LLVMValueRef values[4];
  435          values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
  436          values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
  437          values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
  438          values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
  439          ctx->abi.color1 = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
  440       }
  441 
  442       ctx->abi.interp_at_sample_force_center =
  443          ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center;
  444    } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
  445       if (nir->info.cs.user_data_components_amd) {
  446          ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data);
  447          ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data,
  448                                                       nir->info.cs.user_data_components_amd);
  449       }
  450    }
  451 
  452    ctx->abi.inputs = &ctx->inputs[0];
  453    ctx->abi.clamp_shadow_reference = true;
  454    ctx->abi.robust_buffer_access = true;
  455    ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero;
  456 
  457    if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) {
  458       assert(gl_shader_stage_is_compute(nir->info.stage));
  459       si_llvm_declare_compute_memory(ctx);
  460    }
  461    ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args, nir);
  462 
  463    return true;
  464 }
  465 
  466 /**
  467  * Given a list of shader part functions, build a wrapper function that
  468  * runs them in sequence to form a monolithic shader.
  469  */
  470 void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts,
  471                                unsigned num_parts, unsigned main_part,
  472                                unsigned next_shader_first_part)
  473 {
  474    LLVMBuilderRef builder = ctx->ac.builder;
  475    /* PS epilog has one arg per color component; gfx9 merged shader
  476     * prologs need to forward 40 SGPRs.
  477     */
  478    LLVMValueRef initial[AC_MAX_ARGS], out[AC_MAX_ARGS];
  479    LLVMTypeRef function_type;
  480    unsigned num_first_params;
  481    unsigned num_out, initial_num_out;
  482    ASSERTED unsigned num_out_sgpr;         /* used in debug checks */
  483    ASSERTED unsigned initial_num_out_sgpr; /* used in debug checks */
  484    unsigned num_sgprs, num_vgprs;
  485    unsigned gprs;
  486 
  487    memset(&ctx->args, 0, sizeof(ctx->args));
  488 
  489    for (unsigned i = 0; i < num_parts; ++i) {
  490       ac_add_function_attr(ctx->ac.context, parts[i], -1, AC_FUNC_ATTR_ALWAYSINLINE);
  491       LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
  492    }
  493 
  494    /* The parameters of the wrapper function correspond to those of the
  495     * first part in terms of SGPRs and VGPRs, but we use the types of the
  496     * main part to get the right types. This is relevant for the
  497     * dereferenceable attribute on descriptor table pointers.
  498     */
  499    num_sgprs = 0;
  500    num_vgprs = 0;
  501 
  502    function_type = LLVMGetElementType(LLVMTypeOf(parts[0]));
  503    num_first_params = LLVMCountParamTypes(function_type);
  504 
  505    for (unsigned i = 0; i < num_first_params; ++i) {
  506       LLVMValueRef param = LLVMGetParam(parts[0], i);
  507 
  508       if (ac_is_sgpr_param(param)) {
  509          assert(num_vgprs == 0);
  510          num_sgprs += ac_get_type_size(LLVMTypeOf(param)) / 4;
  511       } else {
  512          num_vgprs += ac_get_type_size(LLVMTypeOf(param)) / 4;
  513       }
  514    }
  515 
  516    gprs = 0;
  517    while (gprs < num_sgprs + num_vgprs) {
  518       LLVMValueRef param = LLVMGetParam(parts[main_part], ctx->args.arg_count);
  519       LLVMTypeRef type = LLVMTypeOf(param);
  520       unsigned size = ac_get_type_size(type) / 4;
  521 
  522       /* This is going to get casted anyways, so we don't have to
  523        * have the exact same type. But we do have to preserve the
  524        * pointer-ness so that LLVM knows about it.
  525        */
  526       enum ac_arg_type arg_type = AC_ARG_INT;
  527       if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
  528          type = LLVMGetElementType(type);
  529 
  530          if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
  531             if (LLVMGetVectorSize(type) == 4)
  532                arg_type = AC_ARG_CONST_DESC_PTR;
  533             else if (LLVMGetVectorSize(type) == 8)
  534                arg_type = AC_ARG_CONST_IMAGE_PTR;
  535             else
  536                assert(0);
  537          } else if (type == ctx->ac.f32) {
  538             arg_type = AC_ARG_CONST_FLOAT_PTR;
  539          } else {
  540             assert(0);
  541          }
  542       }
  543 
  544       ac_add_arg(&ctx->args, gprs < num_sgprs ? AC_ARG_SGPR : AC_ARG_VGPR, size, arg_type, NULL);
  545 
  546       assert(ac_is_sgpr_param(param) == (gprs < num_sgprs));
  547       assert(gprs + size <= num_sgprs + num_vgprs &&
  548              (gprs >= num_sgprs || gprs + size <= num_sgprs));
  549 
  550       gprs += size;
  551    }
  552 
  553    /* Prepare the return type. */
  554    unsigned num_returns = 0;
  555    LLVMTypeRef returns[AC_MAX_ARGS], last_func_type, return_type;
  556 
  557    last_func_type = LLVMGetElementType(LLVMTypeOf(parts[num_parts - 1]));
  558    return_type = LLVMGetReturnType(last_func_type);
  559 
  560    switch (LLVMGetTypeKind(return_type)) {
  561    case LLVMStructTypeKind:
  562       num_returns = LLVMCountStructElementTypes(return_type);
  563       assert(num_returns <= ARRAY_SIZE(returns));
  564       LLVMGetStructElementTypes(return_type, returns);
  565       break;
  566    case LLVMVoidTypeKind:
  567       break;
  568    default:
  569       unreachable("unexpected type");
  570    }
  571 
  572    si_llvm_create_func(ctx, "wrapper", returns, num_returns,
  573                        si_get_max_workgroup_size(ctx->shader));
  574 
  575    if (si_is_merged_shader(ctx->shader))
  576       ac_init_exec_full_mask(&ctx->ac);
  577 
  578    /* Record the arguments of the function as if they were an output of
  579     * a previous part.
  580     */
  581    num_out = 0;
  582    num_out_sgpr = 0;
  583 
  584    for (unsigned i = 0; i < ctx->args.arg_count; ++i) {
  585       LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
  586       LLVMTypeRef param_type = LLVMTypeOf(param);
  587       LLVMTypeRef out_type = ctx->args.args[i].file == AC_ARG_SGPR ? ctx->ac.i32 : ctx->ac.f32;
  588       unsigned size = ac_get_type_size(param_type) / 4;
  589 
  590       if (size == 1) {
  591          if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
  592             param = LLVMBuildPtrToInt(builder, param, ctx->ac.i32, "");
  593             param_type = ctx->ac.i32;
  594          }
  595 
  596          if (param_type != out_type)
  597             param = LLVMBuildBitCast(builder, param, out_type, "");
  598          out[num_out++] = param;
  599       } else {
  600          LLVMTypeRef vector_type = LLVMVectorType(out_type, size);
  601 
  602          if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
  603             param = LLVMBuildPtrToInt(builder, param, ctx->ac.i64, "");
  604             param_type = ctx->ac.i64;
  605          }
  606 
  607          if (param_type != vector_type)
  608             param = LLVMBuildBitCast(builder, param, vector_type, "");
  609 
  610          for (unsigned j = 0; j < size; ++j)
  611             out[num_out++] =
  612                LLVMBuildExtractElement(builder, param, LLVMConstInt(ctx->ac.i32, j, 0), "");
  613       }
  614 
  615       if (ctx->args.args[i].file == AC_ARG_SGPR)
  616          num_out_sgpr = num_out;
  617    }
  618 
  619    memcpy(initial, out, sizeof(out));
  620    initial_num_out = num_out;
  621    initial_num_out_sgpr = num_out_sgpr;
  622 
  623    /* Now chain the parts. */
  624    LLVMValueRef ret = NULL;
  625    for (unsigned part = 0; part < num_parts; ++part) {
  626       LLVMValueRef in[AC_MAX_ARGS];
  627       LLVMTypeRef ret_type;
  628       unsigned out_idx = 0;
  629       unsigned num_params = LLVMCountParams(parts[part]);
  630 
  631       /* Merged shaders are executed conditionally depending
  632        * on the number of enabled threads passed in the input SGPRs. */
  633       if (si_is_multi_part_shader(ctx->shader) && part == 0) {
  634          LLVMValueRef ena, count = initial[3];
  635 
  636          count = LLVMBuildAnd(builder, count, LLVMConstInt(ctx->ac.i32, 0x7f, 0), "");
  637          ena = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), count, "");
  638          ac_build_ifcc(&ctx->ac, ena, 6506);
  639       }
  640 
  641       /* Derive arguments for the next part from outputs of the
  642        * previous one.
  643        */
  644       for (unsigned param_idx = 0; param_idx < num_params; ++param_idx) {
  645          LLVMValueRef param;
  646          LLVMTypeRef param_type;
  647          bool is_sgpr;
  648          unsigned param_size;
  649          LLVMValueRef arg = NULL;
  650 
  651          param = LLVMGetParam(parts[part], param_idx);
  652          param_type = LLVMTypeOf(param);
  653          param_size = ac_get_type_size(param_type) / 4;
  654          is_sgpr = ac_is_sgpr_param(param);
  655 
  656          if (is_sgpr) {
  657             ac_add_function_attr(ctx->ac.context, parts[part], param_idx + 1, AC_FUNC_ATTR_INREG);
  658          } else if (out_idx < num_out_sgpr) {
  659             /* Skip returned SGPRs the current part doesn't
  660              * declare on the input. */
  661             out_idx = num_out_sgpr;
  662          }
  663 
  664          assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
  665 
  666          if (param_size == 1)
  667             arg = out[out_idx];
  668          else
  669             arg = ac_build_gather_values(&ctx->ac, &out[out_idx], param_size);
  670 
  671          if (LLVMTypeOf(arg) != param_type) {
  672             if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
  673                if (LLVMGetPointerAddressSpace(param_type) == AC_ADDR_SPACE_CONST_32BIT) {
  674                   arg = LLVMBuildBitCast(builder, arg, ctx->ac.i32, "");
  675                   arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
  676                } else {
  677                   arg = LLVMBuildBitCast(builder, arg, ctx->ac.i64, "");
  678                   arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
  679                }
  680             } else {
  681                arg = LLVMBuildBitCast(builder, arg, param_type, "");
  682             }
  683          }
  684 
  685          in[param_idx] = arg;
  686          out_idx += param_size;
  687       }
  688 
  689       ret = ac_build_call(&ctx->ac, parts[part], in, num_params);
  690 
  691       if (si_is_multi_part_shader(ctx->shader) && part + 1 == next_shader_first_part) {
  692          ac_build_endif(&ctx->ac, 6506);
  693 
  694          /* The second half of the merged shader should use
  695           * the inputs from the toplevel (wrapper) function,
  696           * not the return value from the last call.
  697           *
  698           * That's because the last call was executed condi-
  699           * tionally, so we can't consume it in the main
  700           * block.
  701           */
  702          memcpy(out, initial, sizeof(initial));
  703          num_out = initial_num_out;
  704          num_out_sgpr = initial_num_out_sgpr;
  705          continue;
  706       }
  707 
  708       /* Extract the returned GPRs. */
  709       ret_type = LLVMTypeOf(ret);
  710       num_out = 0;
  711       num_out_sgpr = 0;
  712 
  713       if (LLVMGetTypeKind(ret_type) != LLVMVoidTypeKind) {
  714          assert(LLVMGetTypeKind(ret_type) == LLVMStructTypeKind);
  715 
  716          unsigned ret_size = LLVMCountStructElementTypes(ret_type);
  717 
  718          for (unsigned i = 0; i < ret_size; ++i) {
  719             LLVMValueRef val = LLVMBuildExtractValue(builder, ret, i, "");
  720 
  721             assert(num_out < ARRAY_SIZE(out));
  722             out[num_out++] = val;
  723 
  724             if (LLVMTypeOf(val) == ctx->ac.i32) {
  725                assert(num_out_sgpr + 1 == num_out);
  726                num_out_sgpr = num_out;
  727             }
  728          }
  729       }
  730    }
  731 
  732    /* Return the value from the last part. */
  733    if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
  734       LLVMBuildRetVoid(builder);
  735    else
  736       LLVMBuildRet(builder, ret);
  737 }