"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/broadcom/compiler/vir.c" (16 Sep 2020, 41696 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "vir.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright © 2016-2017 Broadcom
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  */
   23 
   24 #include "broadcom/common/v3d_device_info.h"
   25 #include "v3d_compiler.h"
   26 #include "util/u_prim.h"
   27 
   28 int
   29 vir_get_nsrc(struct qinst *inst)
   30 {
   31         switch (inst->qpu.type) {
   32         case V3D_QPU_INSTR_TYPE_BRANCH:
   33                 return 0;
   34         case V3D_QPU_INSTR_TYPE_ALU:
   35                 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
   36                         return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
   37                 else
   38                         return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
   39         }
   40 
   41         return 0;
   42 }
   43 
   44 /**
   45  * Returns whether the instruction has any side effects that must be
   46  * preserved.
   47  */
   48 bool
   49 vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
   50 {
   51         switch (inst->qpu.type) {
   52         case V3D_QPU_INSTR_TYPE_BRANCH:
   53                 return true;
   54         case V3D_QPU_INSTR_TYPE_ALU:
   55                 switch (inst->qpu.alu.add.op) {
   56                 case V3D_QPU_A_SETREVF:
   57                 case V3D_QPU_A_SETMSF:
   58                 case V3D_QPU_A_VPMSETUP:
   59                 case V3D_QPU_A_STVPMV:
   60                 case V3D_QPU_A_STVPMD:
   61                 case V3D_QPU_A_STVPMP:
   62                 case V3D_QPU_A_VPMWT:
   63                 case V3D_QPU_A_TMUWT:
   64                         return true;
   65                 default:
   66                         break;
   67                 }
   68 
   69                 switch (inst->qpu.alu.mul.op) {
   70                 case V3D_QPU_M_MULTOP:
   71                         return true;
   72                 default:
   73                         break;
   74                 }
   75         }
   76 
   77         if (inst->qpu.sig.ldtmu ||
   78             inst->qpu.sig.ldvary ||
   79             inst->qpu.sig.ldtlbu ||
   80             inst->qpu.sig.ldtlb ||
   81             inst->qpu.sig.wrtmuc ||
   82             inst->qpu.sig.thrsw) {
   83                 return true;
   84         }
   85 
   86         return false;
   87 }
   88 
   89 bool
   90 vir_is_raw_mov(struct qinst *inst)
   91 {
   92         if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
   93             (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
   94              inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
   95                 return false;
   96         }
   97 
   98         if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
   99             inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
  100                 return false;
  101         }
  102 
  103         if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
  104             inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE ||
  105             inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
  106             inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) {
  107                 return false;
  108         }
  109 
  110         if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
  111             inst->qpu.flags.mc != V3D_QPU_COND_NONE)
  112                 return false;
  113 
  114         return true;
  115 }
  116 
  117 bool
  118 vir_is_add(struct qinst *inst)
  119 {
  120         return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
  121                 inst->qpu.alu.add.op != V3D_QPU_A_NOP);
  122 }
  123 
  124 bool
  125 vir_is_mul(struct qinst *inst)
  126 {
  127         return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
  128                 inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
  129 }
  130 
  131 bool
  132 vir_is_tex(struct qinst *inst)
  133 {
  134         if (inst->dst.file == QFILE_MAGIC)
  135                 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
  136 
  137         if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
  138             inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
  139                 return true;
  140         }
  141 
  142         return false;
  143 }
  144 
  145 bool
  146 vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
  147 {
  148         for (int i = 0; i < vir_get_nsrc(inst); i++) {
  149                 switch (inst->src[i].file) {
  150                 case QFILE_VPM:
  151                         return true;
  152                 default:
  153                         break;
  154                 }
  155         }
  156 
  157         if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
  158                                   inst->qpu.sig.ldtlb ||
  159                                   inst->qpu.sig.ldtlbu ||
  160                                   inst->qpu.sig.ldvpm)) {
  161                 return true;
  162         }
  163 
  164         return false;
  165 }
  166 
  167 bool
  168 vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
  169 {
  170         switch (inst->dst.file) {
  171         case QFILE_MAGIC:
  172                 switch (inst->dst.index) {
  173                 case V3D_QPU_WADDR_RECIP:
  174                 case V3D_QPU_WADDR_RSQRT:
  175                 case V3D_QPU_WADDR_EXP:
  176                 case V3D_QPU_WADDR_LOG:
  177                 case V3D_QPU_WADDR_SIN:
  178                         return true;
  179                 }
  180                 break;
  181         default:
  182                 break;
  183         }
  184 
  185         if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
  186                 return true;
  187 
  188         return false;
  189 }
  190 
  191 void
  192 vir_set_unpack(struct qinst *inst, int src,
  193                enum v3d_qpu_input_unpack unpack)
  194 {
  195         assert(src == 0 || src == 1);
  196 
  197         if (vir_is_add(inst)) {
  198                 if (src == 0)
  199                         inst->qpu.alu.add.a_unpack = unpack;
  200                 else
  201                         inst->qpu.alu.add.b_unpack = unpack;
  202         } else {
  203                 assert(vir_is_mul(inst));
  204                 if (src == 0)
  205                         inst->qpu.alu.mul.a_unpack = unpack;
  206                 else
  207                         inst->qpu.alu.mul.b_unpack = unpack;
  208         }
  209 }
  210 
  211 void
  212 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
  213 {
  214         if (vir_is_add(inst)) {
  215                 inst->qpu.flags.ac = cond;
  216         } else {
  217                 assert(vir_is_mul(inst));
  218                 inst->qpu.flags.mc = cond;
  219         }
  220 }
  221 
  222 void
  223 vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
  224 {
  225         if (vir_is_add(inst)) {
  226                 inst->qpu.flags.apf = pf;
  227         } else {
  228                 assert(vir_is_mul(inst));
  229                 inst->qpu.flags.mpf = pf;
  230         }
  231 }
  232 
  233 void
  234 vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf)
  235 {
  236         if (vir_is_add(inst)) {
  237                 inst->qpu.flags.auf = uf;
  238         } else {
  239                 assert(vir_is_mul(inst));
  240                 inst->qpu.flags.muf = uf;
  241         }
  242 }
  243 
  244 #if 0
  245 uint8_t
  246 vir_channels_written(struct qinst *inst)
  247 {
  248         if (vir_is_mul(inst)) {
  249                 switch (inst->dst.pack) {
  250                 case QPU_PACK_MUL_NOP:
  251                 case QPU_PACK_MUL_8888:
  252                         return 0xf;
  253                 case QPU_PACK_MUL_8A:
  254                         return 0x1;
  255                 case QPU_PACK_MUL_8B:
  256                         return 0x2;
  257                 case QPU_PACK_MUL_8C:
  258                         return 0x4;
  259                 case QPU_PACK_MUL_8D:
  260                         return 0x8;
  261                 }
  262         } else {
  263                 switch (inst->dst.pack) {
  264                 case QPU_PACK_A_NOP:
  265                 case QPU_PACK_A_8888:
  266                 case QPU_PACK_A_8888_SAT:
  267                 case QPU_PACK_A_32_SAT:
  268                         return 0xf;
  269                 case QPU_PACK_A_8A:
  270                 case QPU_PACK_A_8A_SAT:
  271                         return 0x1;
  272                 case QPU_PACK_A_8B:
  273                 case QPU_PACK_A_8B_SAT:
  274                         return 0x2;
  275                 case QPU_PACK_A_8C:
  276                 case QPU_PACK_A_8C_SAT:
  277                         return 0x4;
  278                 case QPU_PACK_A_8D:
  279                 case QPU_PACK_A_8D_SAT:
  280                         return 0x8;
  281                 case QPU_PACK_A_16A:
  282                 case QPU_PACK_A_16A_SAT:
  283                         return 0x3;
  284                 case QPU_PACK_A_16B:
  285                 case QPU_PACK_A_16B_SAT:
  286                         return 0xc;
  287                 }
  288         }
  289         unreachable("Bad pack field");
  290 }
  291 #endif
  292 
  293 struct qreg
  294 vir_get_temp(struct v3d_compile *c)
  295 {
  296         struct qreg reg;
  297 
  298         reg.file = QFILE_TEMP;
  299         reg.index = c->num_temps++;
  300 
  301         if (c->num_temps > c->defs_array_size) {
  302                 uint32_t old_size = c->defs_array_size;
  303                 c->defs_array_size = MAX2(old_size * 2, 16);
  304 
  305                 c->defs = reralloc(c, c->defs, struct qinst *,
  306                                    c->defs_array_size);
  307                 memset(&c->defs[old_size], 0,
  308                        sizeof(c->defs[0]) * (c->defs_array_size - old_size));
  309 
  310                 c->spillable = reralloc(c, c->spillable,
  311                                         BITSET_WORD,
  312                                         BITSET_WORDS(c->defs_array_size));
  313                 for (int i = old_size; i < c->defs_array_size; i++)
  314                         BITSET_SET(c->spillable, i);
  315         }
  316 
  317         return reg;
  318 }
  319 
  320 struct qinst *
  321 vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
  322 {
  323         struct qinst *inst = calloc(1, sizeof(*inst));
  324 
  325         inst->qpu = v3d_qpu_nop();
  326         inst->qpu.alu.add.op = op;
  327 
  328         inst->dst = dst;
  329         inst->src[0] = src0;
  330         inst->src[1] = src1;
  331         inst->uniform = ~0;
  332 
  333         return inst;
  334 }
  335 
  336 struct qinst *
  337 vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
  338 {
  339         struct qinst *inst = calloc(1, sizeof(*inst));
  340 
  341         inst->qpu = v3d_qpu_nop();
  342         inst->qpu.alu.mul.op = op;
  343 
  344         inst->dst = dst;
  345         inst->src[0] = src0;
  346         inst->src[1] = src1;
  347         inst->uniform = ~0;
  348 
  349         return inst;
  350 }
  351 
  352 struct qinst *
  353 vir_branch_inst(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
  354 {
  355         struct qinst *inst = calloc(1, sizeof(*inst));
  356 
  357         inst->qpu = v3d_qpu_nop();
  358         inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
  359         inst->qpu.branch.cond = cond;
  360         inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
  361         inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
  362         inst->qpu.branch.ub = true;
  363         inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
  364 
  365         inst->dst = vir_nop_reg();
  366         inst->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, 0);
  367 
  368         return inst;
  369 }
  370 
  371 static void
  372 vir_emit(struct v3d_compile *c, struct qinst *inst)
  373 {
  374         switch (c->cursor.mode) {
  375         case vir_cursor_add:
  376                 list_add(&inst->link, c->cursor.link);
  377                 break;
  378         case vir_cursor_addtail:
  379                 list_addtail(&inst->link, c->cursor.link);
  380                 break;
  381         }
  382 
  383         c->cursor = vir_after_inst(inst);
  384         c->live_intervals_valid = false;
  385 }
  386 
  387 /* Updates inst to write to a new temporary, emits it, and notes the def. */
  388 struct qreg
  389 vir_emit_def(struct v3d_compile *c, struct qinst *inst)
  390 {
  391         assert(inst->dst.file == QFILE_NULL);
  392 
  393         /* If we're emitting an instruction that's a def, it had better be
  394          * writing a register.
  395          */
  396         if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
  397                 assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
  398                        v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
  399                 assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
  400                        v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
  401         }
  402 
  403         inst->dst = vir_get_temp(c);
  404 
  405         if (inst->dst.file == QFILE_TEMP)
  406                 c->defs[inst->dst.index] = inst;
  407 
  408         vir_emit(c, inst);
  409 
  410         return inst->dst;
  411 }
  412 
  413 struct qinst *
  414 vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
  415 {
  416         if (inst->dst.file == QFILE_TEMP)
  417                 c->defs[inst->dst.index] = NULL;
  418 
  419         vir_emit(c, inst);
  420 
  421         return inst;
  422 }
  423 
  424 struct qblock *
  425 vir_new_block(struct v3d_compile *c)
  426 {
  427         struct qblock *block = rzalloc(c, struct qblock);
  428 
  429         list_inithead(&block->instructions);
  430 
  431         block->predecessors = _mesa_set_create(block,
  432                                                _mesa_hash_pointer,
  433                                                _mesa_key_pointer_equal);
  434 
  435         block->index = c->next_block_index++;
  436 
  437         return block;
  438 }
  439 
  440 void
  441 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
  442 {
  443         c->cur_block = block;
  444         c->cursor = vir_after_block(block);
  445         list_addtail(&block->link, &c->blocks);
  446 }
  447 
  448 struct qblock *
  449 vir_entry_block(struct v3d_compile *c)
  450 {
  451         return list_first_entry(&c->blocks, struct qblock, link);
  452 }
  453 
  454 struct qblock *
  455 vir_exit_block(struct v3d_compile *c)
  456 {
  457         return list_last_entry(&c->blocks, struct qblock, link);
  458 }
  459 
  460 void
  461 vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
  462 {
  463         _mesa_set_add(successor->predecessors, predecessor);
  464         if (predecessor->successors[0]) {
  465                 assert(!predecessor->successors[1]);
  466                 predecessor->successors[1] = successor;
  467         } else {
  468                 predecessor->successors[0] = successor;
  469         }
  470 }
  471 
  472 const struct v3d_compiler *
  473 v3d_compiler_init(const struct v3d_device_info *devinfo)
  474 {
  475         struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
  476         if (!compiler)
  477                 return NULL;
  478 
  479         compiler->devinfo = devinfo;
  480 
  481         if (!vir_init_reg_sets(compiler)) {
  482                 ralloc_free(compiler);
  483                 return NULL;
  484         }
  485 
  486         return compiler;
  487 }
  488 
  489 void
  490 v3d_compiler_free(const struct v3d_compiler *compiler)
  491 {
  492         ralloc_free((void *)compiler);
  493 }
  494 
  495 static struct v3d_compile *
  496 vir_compile_init(const struct v3d_compiler *compiler,
  497                  struct v3d_key *key,
  498                  nir_shader *s,
  499                  void (*debug_output)(const char *msg,
  500                                       void *debug_output_data),
  501                  void *debug_output_data,
  502                  int program_id, int variant_id)
  503 {
  504         struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
  505 
  506         c->compiler = compiler;
  507         c->devinfo = compiler->devinfo;
  508         c->key = key;
  509         c->program_id = program_id;
  510         c->variant_id = variant_id;
  511         c->threads = 4;
  512         c->debug_output = debug_output;
  513         c->debug_output_data = debug_output_data;
  514 
  515         s = nir_shader_clone(c, s);
  516         c->s = s;
  517 
  518         list_inithead(&c->blocks);
  519         vir_set_emit_block(c, vir_new_block(c));
  520 
  521         c->output_position_index = -1;
  522         c->output_sample_mask_index = -1;
  523 
  524         c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
  525                                             _mesa_key_pointer_equal);
  526 
  527         return c;
  528 }
  529 
  530 static int
  531 type_size_vec4(const struct glsl_type *type, bool bindless)
  532 {
  533         return glsl_count_attribute_slots(type, false);
  534 }
  535 
  536 static void
  537 v3d_lower_nir(struct v3d_compile *c)
  538 {
  539         struct nir_lower_tex_options tex_options = {
  540                 .lower_txd = true,
  541                 .lower_tg4_broadcom_swizzle = true,
  542 
  543                 .lower_rect = false, /* XXX: Use this on V3D 3.x */
  544                 .lower_txp = ~0,
  545                 /* Apply swizzles to all samplers. */
  546                 .swizzle_result = ~0,
  547         };
  548 
  549         /* Lower the format swizzle and (for 32-bit returns)
  550          * ARB_texture_swizzle-style swizzle.
  551          */
  552         for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
  553                 for (int j = 0; j < 4; j++)
  554                         tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
  555 
  556                 if (c->key->tex[i].clamp_s)
  557                         tex_options.saturate_s |= 1 << i;
  558                 if (c->key->tex[i].clamp_t)
  559                         tex_options.saturate_t |= 1 << i;
  560                 if (c->key->tex[i].clamp_r)
  561                         tex_options.saturate_r |= 1 << i;
  562                 if (c->key->tex[i].return_size == 16) {
  563                         tex_options.lower_tex_packing[i] =
  564                                 nir_lower_tex_packing_16;
  565                 }
  566         }
  567 
  568         /* CS textures may not have return_size reflecting the shadow state. */
  569         nir_foreach_variable(var, &c->s->uniforms) {
  570                 const struct glsl_type *type = glsl_without_array(var->type);
  571                 unsigned array_len = MAX2(glsl_get_length(var->type), 1);
  572 
  573                 if (!glsl_type_is_sampler(type) ||
  574                     !glsl_sampler_type_is_shadow(type))
  575                         continue;
  576 
  577                 for (int i = 0; i < array_len; i++) {
  578                         tex_options.lower_tex_packing[var->data.binding + i] =
  579                                 nir_lower_tex_packing_16;
  580                 }
  581         }
  582 
  583         NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
  584         NIR_PASS_V(c->s, nir_lower_system_values);
  585 
  586         NIR_PASS_V(c->s, nir_lower_vars_to_scratch,
  587                    nir_var_function_temp,
  588                    0,
  589                    glsl_get_natural_size_align_bytes);
  590         NIR_PASS_V(c->s, v3d_nir_lower_scratch);
  591 }
  592 
  593 static void
  594 v3d_set_prog_data_uniforms(struct v3d_compile *c,
  595                            struct v3d_prog_data *prog_data)
  596 {
  597         int count = c->num_uniforms;
  598         struct v3d_uniform_list *ulist = &prog_data->uniforms;
  599 
  600         ulist->count = count;
  601         ulist->data = ralloc_array(prog_data, uint32_t, count);
  602         memcpy(ulist->data, c->uniform_data,
  603                count * sizeof(*ulist->data));
  604         ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
  605         memcpy(ulist->contents, c->uniform_contents,
  606                count * sizeof(*ulist->contents));
  607 }
  608 
  609 static void
  610 v3d_vs_set_prog_data(struct v3d_compile *c,
  611                      struct v3d_vs_prog_data *prog_data)
  612 {
  613         /* The vertex data gets format converted by the VPM so that
  614          * each attribute channel takes up a VPM column.  Precompute
  615          * the sizes for the shader record.
  616          */
  617         for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
  618                 prog_data->vattr_sizes[i] = c->vattr_sizes[i];
  619                 prog_data->vpm_input_size += c->vattr_sizes[i];
  620         }
  621 
  622         prog_data->uses_vid = (c->s->info.system_values_read &
  623                                (1ull << SYSTEM_VALUE_VERTEX_ID));
  624         prog_data->uses_iid = (c->s->info.system_values_read &
  625                                (1ull << SYSTEM_VALUE_INSTANCE_ID));
  626 
  627         if (prog_data->uses_vid)
  628                 prog_data->vpm_input_size++;
  629         if (prog_data->uses_iid)
  630                 prog_data->vpm_input_size++;
  631 
  632         /* Input/output segment size are in sectors (8 rows of 32 bits per
  633          * channel).
  634          */
  635         prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
  636         prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
  637 
  638         /* Set us up for shared input/output segments.  This is apparently
  639          * necessary for our VCM setup to avoid varying corruption.
  640          */
  641         prog_data->separate_segments = false;
  642         prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
  643                                           prog_data->vpm_input_size);
  644         prog_data->vpm_input_size = 0;
  645 
  646         /* Compute VCM cache size.  We set up our program to take up less than
  647          * half of the VPM, so that any set of bin and render programs won't
  648          * run out of space.  We need space for at least one input segment,
  649          * and then allocate the rest to output segments (one for the current
  650          * program, the rest to VCM).  The valid range of the VCM cache size
  651          * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
  652          * batches.
  653          */
  654         assert(c->devinfo->vpm_size);
  655         int sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8;
  656         int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
  657         int half_vpm = vpm_size_in_sectors / 2;
  658         int vpm_output_sectors = half_vpm - prog_data->vpm_input_size;
  659         int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
  660         assert(vpm_output_batches >= 2);
  661         prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
  662 }
  663 
  664 static void
  665 v3d_gs_set_prog_data(struct v3d_compile *c,
  666                      struct v3d_gs_prog_data *prog_data)
  667 {
  668         prog_data->num_inputs = c->num_inputs;
  669         memcpy(prog_data->input_slots, c->input_slots,
  670                c->num_inputs * sizeof(*c->input_slots));
  671 
  672         /* gl_PrimitiveIdIn is written by the GBG into the first word of the
  673          * VPM output header automatically and the shader will overwrite
  674          * it after reading it if necessary, so it doesn't add to the VPM
  675          * size requirements.
  676          */
  677         prog_data->uses_pid = (c->s->info.system_values_read &
  678                                (1ull << SYSTEM_VALUE_PRIMITIVE_ID));
  679 
  680         /* Output segment size is in sectors (8 rows of 32 bits per channel) */
  681         prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
  682 
  683         /* Compute SIMD dispatch width and update VPM output size accordingly
  684          * to ensure we can fit our program in memory. Available widths are
  685          * 16, 8, 4, 1.
  686          *
  687          * Notice that at draw time we will have to consider VPM memory
  688          * requirements from other stages and choose a smaller dispatch
  689          * width if needed to fit the program in VPM memory.
  690          */
  691         prog_data->simd_width = 16;
  692         while ((prog_data->simd_width > 1 && prog_data->vpm_output_size > 16) ||
  693                prog_data->simd_width == 2) {
  694                 prog_data->simd_width >>= 1;
  695                 prog_data->vpm_output_size =
  696                         align(prog_data->vpm_output_size, 2) / 2;
  697         }
  698         assert(prog_data->vpm_output_size <= 16);
  699         assert(prog_data->simd_width != 2);
  700 
  701         prog_data->out_prim_type = c->s->info.gs.output_primitive;
  702         prog_data->num_invocations = c->s->info.gs.invocations;
  703 }
  704 
  705 static void
  706 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
  707                             struct v3d_fs_prog_data *prog_data)
  708 {
  709         prog_data->num_inputs = c->num_inputs;
  710         memcpy(prog_data->input_slots, c->input_slots,
  711                c->num_inputs * sizeof(*c->input_slots));
  712 
  713         STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
  714                       (V3D_MAX_FS_INPUTS - 1) / 24);
  715         for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
  716                 if (BITSET_TEST(c->flat_shade_flags, i))
  717                         prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
  718 
  719                 if (BITSET_TEST(c->noperspective_flags, i))
  720                         prog_data->noperspective_flags[i / 24] |= 1 << (i % 24);
  721 
  722                 if (BITSET_TEST(c->centroid_flags, i))
  723                         prog_data->centroid_flags[i / 24] |= 1 << (i % 24);
  724         }
  725 }
  726 
  727 static void
  728 v3d_fs_set_prog_data(struct v3d_compile *c,
  729                      struct v3d_fs_prog_data *prog_data)
  730 {
  731         v3d_set_fs_prog_data_inputs(c, prog_data);
  732         prog_data->writes_z = c->writes_z;
  733         prog_data->disable_ez = !c->s->info.fs.early_fragment_tests;
  734         prog_data->uses_center_w = c->uses_center_w;
  735         prog_data->uses_implicit_point_line_varyings =
  736                 c->uses_implicit_point_line_varyings;
  737         prog_data->lock_scoreboard_on_first_thrsw =
  738                 c->lock_scoreboard_on_first_thrsw;
  739 }
  740 
  741 static void
  742 v3d_cs_set_prog_data(struct v3d_compile *c,
  743                      struct v3d_compute_prog_data *prog_data)
  744 {
  745         prog_data->shared_size = c->s->info.cs.shared_size;
  746 }
  747 
  748 static void
  749 v3d_set_prog_data(struct v3d_compile *c,
  750                   struct v3d_prog_data *prog_data)
  751 {
  752         prog_data->threads = c->threads;
  753         prog_data->single_seg = !c->last_thrsw;
  754         prog_data->spill_size = c->spill_size;
  755         prog_data->tmu_dirty_rcl = c->tmu_dirty_rcl;
  756 
  757         v3d_set_prog_data_uniforms(c, prog_data);
  758 
  759         switch (c->s->info.stage) {
  760         case MESA_SHADER_VERTEX:
  761                 v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
  762                 break;
  763         case MESA_SHADER_GEOMETRY:
  764                 v3d_gs_set_prog_data(c, (struct v3d_gs_prog_data *)prog_data);
  765                 break;
  766         case MESA_SHADER_FRAGMENT:
  767                 v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data);
  768                 break;
  769         case MESA_SHADER_COMPUTE:
  770                 v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data);
  771                 break;
  772         default:
  773                 unreachable("unsupported shader stage");
  774         }
  775 }
  776 
  777 static uint64_t *
  778 v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
  779 {
  780         *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
  781 
  782         uint64_t *qpu_insts = malloc(*final_assembly_size);
  783         if (!qpu_insts)
  784                 return NULL;
  785 
  786         memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
  787 
  788         vir_compile_destroy(c);
  789 
  790         return qpu_insts;
  791 }
  792 
  793 static void
  794 v3d_nir_lower_vs_early(struct v3d_compile *c)
  795 {
  796         /* Split our I/O vars and dead code eliminate the unused
  797          * components.
  798          */
  799         NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
  800                    nir_var_shader_in | nir_var_shader_out);
  801         uint64_t used_outputs[4] = {0};
  802         for (int i = 0; i < c->vs_key->num_used_outputs; i++) {
  803                 int slot = v3d_slot_get_slot(c->vs_key->used_outputs[i]);
  804                 int comp = v3d_slot_get_component(c->vs_key->used_outputs[i]);
  805                 used_outputs[comp] |= 1ull << slot;
  806         }
  807         NIR_PASS_V(c->s, nir_remove_unused_io_vars,
  808                    &c->s->outputs, used_outputs, NULL); /* demotes to globals */
  809         NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
  810         v3d_optimize_nir(c->s);
  811         NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in);
  812 
  813         /* This must go before nir_lower_io */
  814         if (c->vs_key->per_vertex_point_size)
  815                 NIR_PASS_V(c->s, nir_lower_point_size, 1.0f, 0.0f);
  816 
  817         NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
  818                    type_size_vec4,
  819                    (nir_lower_io_options)0);
  820         /* clean up nir_lower_io's deref_var remains */
  821         NIR_PASS_V(c->s, nir_opt_dce);
  822 }
  823 
  824 static void
  825 v3d_nir_lower_gs_early(struct v3d_compile *c)
  826 {
  827         /* Split our I/O vars and dead code eliminate the unused
  828          * components.
  829          */
  830         NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
  831                    nir_var_shader_in | nir_var_shader_out);
  832         uint64_t used_outputs[4] = {0};
  833         for (int i = 0; i < c->gs_key->num_used_outputs; i++) {
  834                 int slot = v3d_slot_get_slot(c->gs_key->used_outputs[i]);
  835                 int comp = v3d_slot_get_component(c->gs_key->used_outputs[i]);
  836                 used_outputs[comp] |= 1ull << slot;
  837         }
  838         NIR_PASS_V(c->s, nir_remove_unused_io_vars,
  839                    &c->s->outputs, used_outputs, NULL); /* demotes to globals */
  840         NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
  841         v3d_optimize_nir(c->s);
  842         NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in);
  843 
  844         /* This must go before nir_lower_io */
  845         if (c->gs_key->per_vertex_point_size)
  846                 NIR_PASS_V(c->s, nir_lower_point_size, 1.0f, 0.0f);
  847 
  848         NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
  849                    type_size_vec4,
  850                    (nir_lower_io_options)0);
  851         /* clean up nir_lower_io's deref_var remains */
  852         NIR_PASS_V(c->s, nir_opt_dce);
  853 }
  854 
  855 static void
  856 v3d_fixup_fs_output_types(struct v3d_compile *c)
  857 {
  858         nir_foreach_variable(var, &c->s->outputs) {
  859                 uint32_t mask = 0;
  860 
  861                 switch (var->data.location) {
  862                 case FRAG_RESULT_COLOR:
  863                         mask = ~0;
  864                         break;
  865                 case FRAG_RESULT_DATA0:
  866                 case FRAG_RESULT_DATA1:
  867                 case FRAG_RESULT_DATA2:
  868                 case FRAG_RESULT_DATA3:
  869                         mask = 1 << (var->data.location - FRAG_RESULT_DATA0);
  870                         break;
  871                 }
  872 
  873                 if (c->fs_key->int_color_rb & mask) {
  874                         var->type =
  875                                 glsl_vector_type(GLSL_TYPE_INT,
  876                                                  glsl_get_components(var->type));
  877                 } else if (c->fs_key->uint_color_rb & mask) {
  878                         var->type =
  879                                 glsl_vector_type(GLSL_TYPE_UINT,
  880                                                  glsl_get_components(var->type));
  881                 }
  882         }
  883 }
  884 
  885 static void
  886 v3d_nir_lower_fs_early(struct v3d_compile *c)
  887 {
  888         if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
  889                 v3d_fixup_fs_output_types(c);
  890 
  891         NIR_PASS_V(c->s, v3d_nir_lower_logic_ops, c);
  892 
  893         /* If the shader has no non-TLB side effects, we can promote it to
  894          * enabling early_fragment_tests even if the user didn't.
  895          */
  896         if (!(c->s->info.num_images ||
  897               c->s->info.num_ssbos)) {
  898                 c->s->info.fs.early_fragment_tests = true;
  899         }
  900 }
  901 
  902 static void
  903 v3d_nir_lower_gs_late(struct v3d_compile *c)
  904 {
  905         if (c->key->ucp_enables) {
  906                 NIR_PASS_V(c->s, nir_lower_clip_gs, c->key->ucp_enables,
  907                            false, NULL);
  908         }
  909 
  910         /* Note: GS output scalarizing must happen after nir_lower_clip_gs. */
  911         NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
  912 }
  913 
  914 static void
  915 v3d_nir_lower_vs_late(struct v3d_compile *c)
  916 {
  917         if (c->vs_key->clamp_color)
  918                 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
  919 
  920         if (c->key->ucp_enables) {
  921                 NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables,
  922                            false, false, NULL);
  923                 NIR_PASS_V(c->s, nir_lower_io_to_scalar,
  924                            nir_var_shader_out);
  925         }
  926 
  927         /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
  928         NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
  929 }
  930 
  931 static void
  932 v3d_nir_lower_fs_late(struct v3d_compile *c)
  933 {
  934         if (c->fs_key->light_twoside)
  935                 NIR_PASS_V(c->s, nir_lower_two_sided_color);
  936 
  937         if (c->fs_key->clamp_color)
  938                 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
  939 
  940         if (c->fs_key->alpha_test) {
  941                 NIR_PASS_V(c->s, nir_lower_alpha_test,
  942                            c->fs_key->alpha_test_func,
  943                            false, NULL);
  944         }
  945 
  946         if (c->key->ucp_enables)
  947                 NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables,
  948                            false);
  949 
  950         /* Note: FS input scalarizing must happen after
  951          * nir_lower_two_sided_color, which only handles a vec4 at a time.
  952          */
  953         NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
  954 }
  955 
  956 static uint32_t
  957 vir_get_max_temps(struct v3d_compile *c)
  958 {
  959         int max_ip = 0;
  960         vir_for_each_inst_inorder(inst, c)
  961                 max_ip++;
  962 
  963         uint32_t *pressure = rzalloc_array(NULL, uint32_t, max_ip);
  964 
  965         for (int t = 0; t < c->num_temps; t++) {
  966                 for (int i = c->temp_start[t]; (i < c->temp_end[t] &&
  967                                                 i < max_ip); i++) {
  968                         if (i > max_ip)
  969                                 break;
  970                         pressure[i]++;
  971                 }
  972         }
  973 
  974         uint32_t max_temps = 0;
  975         for (int i = 0; i < max_ip; i++)
  976                 max_temps = MAX2(max_temps, pressure[i]);
  977 
  978         ralloc_free(pressure);
  979 
  980         return max_temps;
  981 }
  982 
  983 uint64_t *v3d_compile(const struct v3d_compiler *compiler,
  984                       struct v3d_key *key,
  985                       struct v3d_prog_data **out_prog_data,
  986                       nir_shader *s,
  987                       void (*debug_output)(const char *msg,
  988                                            void *debug_output_data),
  989                       void *debug_output_data,
  990                       int program_id, int variant_id,
  991                       uint32_t *final_assembly_size)
  992 {
  993         struct v3d_prog_data *prog_data;
  994         struct v3d_compile *c = vir_compile_init(compiler, key, s,
  995                                                  debug_output, debug_output_data,
  996                                                  program_id, variant_id);
  997 
  998         switch (c->s->info.stage) {
  999         case MESA_SHADER_VERTEX:
 1000                 c->vs_key = (struct v3d_vs_key *)key;
 1001                 prog_data = rzalloc_size(NULL, sizeof(struct v3d_vs_prog_data));
 1002                 break;
 1003         case MESA_SHADER_GEOMETRY:
 1004                 c->gs_key = (struct v3d_gs_key *)key;
 1005                 prog_data = rzalloc_size(NULL, sizeof(struct v3d_gs_prog_data));
 1006                 break;
 1007         case MESA_SHADER_FRAGMENT:
 1008                 c->fs_key = (struct v3d_fs_key *)key;
 1009                 prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data));
 1010                 break;
 1011         case MESA_SHADER_COMPUTE:
 1012                 prog_data = rzalloc_size(NULL,
 1013                                          sizeof(struct v3d_compute_prog_data));
 1014                 break;
 1015         default:
 1016                 unreachable("unsupported shader stage");
 1017         }
 1018 
 1019 
 1020         switch (c->s->info.stage) {
 1021         case MESA_SHADER_VERTEX:
 1022                 v3d_nir_lower_vs_early(c);
 1023                 break;
 1024         case MESA_SHADER_GEOMETRY:
 1025                 v3d_nir_lower_gs_early(c);
 1026                 break;
 1027         case MESA_SHADER_FRAGMENT:
 1028                 v3d_nir_lower_fs_early(c);
 1029                 break;
 1030         default:
 1031                 break;
 1032         }
 1033 
 1034         v3d_lower_nir(c);
 1035 
 1036         switch (c->s->info.stage) {
 1037         case MESA_SHADER_VERTEX:
 1038                 v3d_nir_lower_vs_late(c);
 1039                 break;
 1040         case MESA_SHADER_GEOMETRY:
 1041                 v3d_nir_lower_gs_late(c);
 1042                 break;
 1043         case MESA_SHADER_FRAGMENT:
 1044                 v3d_nir_lower_fs_late(c);
 1045                 break;
 1046         default:
 1047                 break;
 1048         }
 1049 
 1050         NIR_PASS_V(c->s, v3d_nir_lower_io, c);
 1051         NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
 1052         NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
 1053         NIR_PASS_V(c->s, nir_lower_idiv, nir_lower_idiv_fast);
 1054 
 1055         v3d_optimize_nir(c->s);
 1056 
 1057         /* Do late algebraic optimization to turn add(a, neg(b)) back into
 1058          * subs, then the mandatory cleanup after algebraic.  Note that it may
 1059          * produce fnegs, and if so then we need to keep running to squash
 1060          * fneg(fneg(a)).
 1061          */
 1062         bool more_late_algebraic = true;
 1063         while (more_late_algebraic) {
 1064                 more_late_algebraic = false;
 1065                 NIR_PASS(more_late_algebraic, c->s, nir_opt_algebraic_late);
 1066                 NIR_PASS_V(c->s, nir_opt_constant_folding);
 1067                 NIR_PASS_V(c->s, nir_copy_prop);
 1068                 NIR_PASS_V(c->s, nir_opt_dce);
 1069                 NIR_PASS_V(c->s, nir_opt_cse);
 1070         }
 1071 
 1072         NIR_PASS_V(c->s, nir_lower_bool_to_int32);
 1073         NIR_PASS_V(c->s, nir_convert_from_ssa, true);
 1074 
 1075         /* Schedule for about half our register space, to enable more shaders
 1076          * to hit 4 threads.
 1077          */
 1078         NIR_PASS_V(c->s, nir_schedule, 24);
 1079 
 1080         v3d_nir_to_vir(c);
 1081 
 1082         v3d_set_prog_data(c, prog_data);
 1083 
 1084         *out_prog_data = prog_data;
 1085 
 1086         char *shaderdb;
 1087         int ret = asprintf(&shaderdb,
 1088                            "%s shader: %d inst, %d threads, %d loops, "
 1089                            "%d uniforms, %d max-temps, %d:%d spills:fills, "
 1090                            "%d sfu-stalls, %d inst-and-stalls",
 1091                            vir_get_stage_name(c),
 1092                            c->qpu_inst_count,
 1093                            c->threads,
 1094                            c->loops,
 1095                            c->num_uniforms,
 1096                            vir_get_max_temps(c),
 1097                            c->spills,
 1098                            c->fills,
 1099                            c->qpu_inst_stalled_count,
 1100                            c->qpu_inst_count + c->qpu_inst_stalled_count);
 1101         if (ret >= 0) {
 1102                 if (V3D_DEBUG & V3D_DEBUG_SHADERDB)
 1103                         fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
 1104 
 1105                 c->debug_output(shaderdb, c->debug_output_data);
 1106                 free(shaderdb);
 1107         }
 1108 
 1109        return v3d_return_qpu_insts(c, final_assembly_size);
 1110 }
 1111 
 1112 void
 1113 vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
 1114 {
 1115         if (qinst->dst.file == QFILE_TEMP)
 1116                 c->defs[qinst->dst.index] = NULL;
 1117 
 1118         assert(&qinst->link != c->cursor.link);
 1119 
 1120         list_del(&qinst->link);
 1121         free(qinst);
 1122 
 1123         c->live_intervals_valid = false;
 1124 }
 1125 
 1126 struct qreg
 1127 vir_follow_movs(struct v3d_compile *c, struct qreg reg)
 1128 {
 1129         /* XXX
 1130         int pack = reg.pack;
 1131 
 1132         while (reg.file == QFILE_TEMP &&
 1133                c->defs[reg.index] &&
 1134                (c->defs[reg.index]->op == QOP_MOV ||
 1135                 c->defs[reg.index]->op == QOP_FMOV) &&
 1136                !c->defs[reg.index]->dst.pack &&
 1137                !c->defs[reg.index]->src[0].pack) {
 1138                 reg = c->defs[reg.index]->src[0];
 1139         }
 1140 
 1141         reg.pack = pack;
 1142         */
 1143         return reg;
 1144 }
 1145 
 1146 void
 1147 vir_compile_destroy(struct v3d_compile *c)
 1148 {
 1149         /* Defuse the assert that we aren't removing the cursor's instruction.
 1150          */
 1151         c->cursor.link = NULL;
 1152 
 1153         vir_for_each_block(block, c) {
 1154                 while (!list_is_empty(&block->instructions)) {
 1155                         struct qinst *qinst =
 1156                                 list_first_entry(&block->instructions,
 1157                                                  struct qinst, link);
 1158                         vir_remove_instruction(c, qinst);
 1159                 }
 1160         }
 1161 
 1162         ralloc_free(c);
 1163 }
 1164 
 1165 uint32_t
 1166 vir_get_uniform_index(struct v3d_compile *c,
 1167                       enum quniform_contents contents,
 1168                       uint32_t data)
 1169 {
 1170         for (int i = 0; i < c->num_uniforms; i++) {
 1171                 if (c->uniform_contents[i] == contents &&
 1172                     c->uniform_data[i] == data) {
 1173                         return i;
 1174                 }
 1175         }
 1176 
 1177         uint32_t uniform = c->num_uniforms++;
 1178 
 1179         if (uniform >= c->uniform_array_size) {
 1180                 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
 1181                                              c->uniform_array_size * 2);
 1182 
 1183                 c->uniform_data = reralloc(c, c->uniform_data,
 1184                                            uint32_t,
 1185                                            c->uniform_array_size);
 1186                 c->uniform_contents = reralloc(c, c->uniform_contents,
 1187                                                enum quniform_contents,
 1188                                                c->uniform_array_size);
 1189         }
 1190 
 1191         c->uniform_contents[uniform] = contents;
 1192         c->uniform_data[uniform] = data;
 1193 
 1194         return uniform;
 1195 }
 1196 
 1197 struct qreg
 1198 vir_uniform(struct v3d_compile *c,
 1199             enum quniform_contents contents,
 1200             uint32_t data)
 1201 {
 1202         struct qinst *inst = vir_NOP(c);
 1203         inst->qpu.sig.ldunif = true;
 1204         inst->uniform = vir_get_uniform_index(c, contents, data);
 1205         inst->dst = vir_get_temp(c);
 1206         c->defs[inst->dst.index] = inst;
 1207         return inst->dst;
 1208 }
 1209 
 1210 #define OPTPASS(func)                                                   \
 1211         do {                                                            \
 1212                 bool stage_progress = func(c);                          \
 1213                 if (stage_progress) {                                   \
 1214                         progress = true;                                \
 1215                         if (print_opt_debug) {                          \
 1216                                 fprintf(stderr,                         \
 1217                                         "VIR opt pass %2d: %s progress\n", \
 1218                                         pass, #func);                   \
 1219                         }                                               \
 1220                         /*XXX vir_validate(c);*/                        \
 1221                 }                                                       \
 1222         } while (0)
 1223 
 1224 void
 1225 vir_optimize(struct v3d_compile *c)
 1226 {
 1227         bool print_opt_debug = false;
 1228         int pass = 1;
 1229 
 1230         while (true) {
 1231                 bool progress = false;
 1232 
 1233                 OPTPASS(vir_opt_copy_propagate);
 1234                 OPTPASS(vir_opt_redundant_flags);
 1235                 OPTPASS(vir_opt_dead_code);
 1236                 OPTPASS(vir_opt_small_immediates);
 1237 
 1238                 if (!progress)
 1239                         break;
 1240 
 1241                 pass++;
 1242         }
 1243 }
 1244 
 1245 const char *
 1246 vir_get_stage_name(struct v3d_compile *c)
 1247 {
 1248         if (c->vs_key && c->vs_key->is_coord)
 1249                 return "MESA_SHADER_VERTEX_BIN";
 1250         else if (c->gs_key && c->gs_key->is_coord)
 1251                 return "MESA_SHADER_GEOMETRY_BIN";
 1252         else
 1253                 return gl_shader_stage_name(c->s->info.stage);
 1254 }