"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/freedreno/ir3/ir3.c" (16 Sep 2020, 35127 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ir3.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   21  * SOFTWARE.
   22  */
   23 
   24 #include "ir3.h"
   25 
   26 #include <stdlib.h>
   27 #include <stdio.h>
   28 #include <string.h>
   29 #include <assert.h>
   30 #include <stdbool.h>
   31 #include <errno.h>
   32 
   33 #include "util/bitscan.h"
   34 #include "util/ralloc.h"
   35 #include "util/u_math.h"
   36 
   37 #include "instr-a3xx.h"
   38 #include "ir3_compiler.h"
   39 
   40 /* simple allocator to carve allocations out of an up-front allocated heap,
   41  * so that we can free everything easily in one shot.
   42  */
   43 void * ir3_alloc(struct ir3 *shader, int sz)
   44 {
   45     return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */
   46 }
   47 
   48 struct ir3 * ir3_create(struct ir3_compiler *compiler, gl_shader_stage type)
   49 {
   50     struct ir3 *shader = rzalloc(NULL, struct ir3);
   51 
   52     shader->compiler = compiler;
   53     shader->type = type;
   54 
   55     list_inithead(&shader->block_list);
   56     list_inithead(&shader->array_list);
   57 
   58     return shader;
   59 }
   60 
   61 void ir3_destroy(struct ir3 *shader)
   62 {
   63     ralloc_free(shader);
   64 }
   65 
   66 #define iassert(cond) do { \
   67     if (!(cond)) { \
   68         debug_assert(cond); \
   69         return -1; \
   70     } } while (0)
   71 
   72 #define iassert_type(reg, full) do { \
   73     if ((full)) { \
   74         iassert(!((reg)->flags & IR3_REG_HALF)); \
   75     } else { \
   76         iassert((reg)->flags & IR3_REG_HALF); \
   77     } } while (0);
   78 
   79 static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
   80         uint32_t repeat, uint32_t valid_flags)
   81 {
   82     reg_t val = { .dummy32 = 0 };
   83 
   84     if (reg->flags & ~valid_flags) {
   85         debug_printf("INVALID FLAGS: %x vs %x\n",
   86                 reg->flags, valid_flags);
   87     }
   88 
   89     if (!(reg->flags & IR3_REG_R))
   90         repeat = 0;
   91 
   92     if (reg->flags & IR3_REG_IMMED) {
   93         val.iim_val = reg->iim_val;
   94     } else {
   95         unsigned components;
   96         int16_t max;
   97 
   98         if (reg->flags & IR3_REG_RELATIV) {
   99             components = reg->size;
  100             val.idummy10 = reg->array.offset;
  101             max = (reg->array.offset + repeat + components - 1);
  102         } else {
  103             components = util_last_bit(reg->wrmask);
  104             val.comp = reg->num & 0x3;
  105             val.num  = reg->num >> 2;
  106             max = (reg->num + repeat + components - 1);
  107         }
  108 
  109         if (reg->flags & IR3_REG_CONST) {
  110             info->max_const = MAX2(info->max_const, max >> 2);
  111         } else if (val.num == 63) {
  112             /* ignore writes to dummy register r63.x */
  113         } else if (max < regid(48, 0)) {
  114             if (reg->flags & IR3_REG_HALF) {
  115                 if (info->gpu_id >= 600) {
  116                     /* starting w/ a6xx, half regs conflict with full regs: */
  117                     info->max_reg = MAX2(info->max_reg, max >> 3);
  118                 } else {
  119                     info->max_half_reg = MAX2(info->max_half_reg, max >> 2);
  120                 }
  121             } else {
  122                 info->max_reg = MAX2(info->max_reg, max >> 2);
  123             }
  124         }
  125     }
  126 
  127     return val.dummy32;
  128 }
  129 
  130 static int emit_cat0(struct ir3_instruction *instr, void *ptr,
  131         struct ir3_info *info)
  132 {
  133     instr_cat0_t *cat0 = ptr;
  134 
  135     if (info->gpu_id >= 500) {
  136         cat0->a5xx.immed = instr->cat0.immed;
  137     } else if (info->gpu_id >= 400) {
  138         cat0->a4xx.immed = instr->cat0.immed;
  139     } else {
  140         cat0->a3xx.immed = instr->cat0.immed;
  141     }
  142     cat0->repeat   = instr->repeat;
  143     cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
  144     cat0->inv      = instr->cat0.inv;
  145     cat0->comp     = instr->cat0.comp;
  146     cat0->opc      = instr->opc;
  147     cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
  148     cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
  149     cat0->opc_cat  = 0;
  150 
  151     switch (instr->opc) {
  152     case OPC_IF:
  153     case OPC_ELSE:
  154     case OPC_ENDIF:
  155         cat0->dummy4 = 16;
  156         break;
  157     default:
  158         break;
  159     }
  160 
  161     return 0;
  162 }
  163 
  164 static int emit_cat1(struct ir3_instruction *instr, void *ptr,
  165         struct ir3_info *info)
  166 {
  167     struct ir3_register *dst = instr->regs[0];
  168     struct ir3_register *src = instr->regs[1];
  169     instr_cat1_t *cat1 = ptr;
  170 
  171     iassert(instr->regs_count == 2);
  172     iassert_type(dst, type_size(instr->cat1.dst_type) == 32);
  173     if (!(src->flags & IR3_REG_IMMED))
  174         iassert_type(src, type_size(instr->cat1.src_type) == 32);
  175 
  176     if (src->flags & IR3_REG_IMMED) {
  177         cat1->iim_val = src->iim_val;
  178         cat1->src_im  = 1;
  179     } else if (src->flags & IR3_REG_RELATIV) {
  180         cat1->off       = reg(src, info, instr->repeat,
  181                 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV);
  182         cat1->src_rel   = 1;
  183         cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
  184     } else {
  185         cat1->src  = reg(src, info, instr->repeat,
  186                 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
  187         cat1->src_c     = !!(src->flags & IR3_REG_CONST);
  188     }
  189 
  190     cat1->dst      = reg(dst, info, instr->repeat,
  191             IR3_REG_RELATIV | IR3_REG_EVEN |
  192             IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
  193     cat1->repeat   = instr->repeat;
  194     cat1->src_r    = !!(src->flags & IR3_REG_R);
  195     cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
  196     cat1->ul       = !!(instr->flags & IR3_INSTR_UL);
  197     cat1->dst_type = instr->cat1.dst_type;
  198     cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
  199     cat1->src_type = instr->cat1.src_type;
  200     cat1->even     = !!(dst->flags & IR3_REG_EVEN);
  201     cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
  202     cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
  203     cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
  204     cat1->opc_cat  = 1;
  205 
  206     return 0;
  207 }
  208 
  209 static int emit_cat2(struct ir3_instruction *instr, void *ptr,
  210         struct ir3_info *info)
  211 {
  212     struct ir3_register *dst = instr->regs[0];
  213     struct ir3_register *src1 = instr->regs[1];
  214     struct ir3_register *src2 = instr->regs[2];
  215     instr_cat2_t *cat2 = ptr;
  216     unsigned absneg = ir3_cat2_absneg(instr->opc);
  217 
  218     iassert((instr->regs_count == 2) || (instr->regs_count == 3));
  219 
  220     if (instr->nop) {
  221         iassert(!instr->repeat);
  222         iassert(instr->nop <= 3);
  223 
  224         cat2->src1_r = instr->nop & 0x1;
  225         cat2->src2_r = (instr->nop >> 1) & 0x1;
  226     } else {
  227         cat2->src1_r = !!(src1->flags & IR3_REG_R);
  228         if (src2)
  229             cat2->src2_r = !!(src2->flags & IR3_REG_R);
  230     }
  231 
  232     if (src1->flags & IR3_REG_RELATIV) {
  233         iassert(src1->array.offset < (1 << 10));
  234         cat2->rel1.src1      = reg(src1, info, instr->repeat,
  235                 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
  236                 IR3_REG_HALF | absneg);
  237         cat2->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
  238         cat2->rel1.src1_rel  = 1;
  239     } else if (src1->flags & IR3_REG_CONST) {
  240         iassert(src1->num < (1 << 12));
  241         cat2->c1.src1   = reg(src1, info, instr->repeat,
  242                 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF |
  243                 absneg);
  244         cat2->c1.src1_c = 1;
  245     } else {
  246         iassert(src1->num < (1 << 11));
  247         cat2->src1 = reg(src1, info, instr->repeat,
  248                 IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
  249                 absneg);
  250     }
  251     cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
  252     cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
  253     cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
  254 
  255     if (src2) {
  256         iassert((src2->flags & IR3_REG_IMMED) ||
  257                 !((src1->flags ^ src2->flags) & IR3_REG_HALF));
  258 
  259         if (src2->flags & IR3_REG_RELATIV) {
  260             iassert(src2->array.offset < (1 << 10));
  261             cat2->rel2.src2      = reg(src2, info, instr->repeat,
  262                     IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
  263                     IR3_REG_HALF | absneg);
  264             cat2->rel2.src2_c    = !!(src2->flags & IR3_REG_CONST);
  265             cat2->rel2.src2_rel  = 1;
  266         } else if (src2->flags & IR3_REG_CONST) {
  267             iassert(src2->num < (1 << 12));
  268             cat2->c2.src2   = reg(src2, info, instr->repeat,
  269                     IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF |
  270                     absneg);
  271             cat2->c2.src2_c = 1;
  272         } else {
  273             iassert(src2->num < (1 << 11));
  274             cat2->src2 = reg(src2, info, instr->repeat,
  275                     IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
  276                     absneg);
  277         }
  278 
  279         cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
  280         cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
  281         cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
  282     }
  283 
  284     cat2->dst      = reg(dst, info, instr->repeat,
  285             IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
  286     cat2->repeat   = instr->repeat;
  287     cat2->sat      = !!(instr->flags & IR3_INSTR_SAT);
  288     cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
  289     cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
  290     cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
  291     cat2->ei       = !!(dst->flags & IR3_REG_EI);
  292     cat2->cond     = instr->cat2.condition;
  293     cat2->full     = ! (src1->flags & IR3_REG_HALF);
  294     cat2->opc      = instr->opc;
  295     cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
  296     cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
  297     cat2->opc_cat  = 2;
  298 
  299     return 0;
  300 }
  301 
  302 static int emit_cat3(struct ir3_instruction *instr, void *ptr,
  303         struct ir3_info *info)
  304 {
  305     struct ir3_register *dst = instr->regs[0];
  306     struct ir3_register *src1 = instr->regs[1];
  307     struct ir3_register *src2 = instr->regs[2];
  308     struct ir3_register *src3 = instr->regs[3];
  309     unsigned absneg = ir3_cat3_absneg(instr->opc);
  310     instr_cat3_t *cat3 = ptr;
  311     uint32_t src_flags = 0;
  312 
  313     switch (instr->opc) {
  314     case OPC_MAD_F16:
  315     case OPC_MAD_U16:
  316     case OPC_MAD_S16:
  317     case OPC_SEL_B16:
  318     case OPC_SEL_S16:
  319     case OPC_SEL_F16:
  320     case OPC_SAD_S16:
  321     case OPC_SAD_S32:  // really??
  322         src_flags |= IR3_REG_HALF;
  323         break;
  324     default:
  325         break;
  326     }
  327 
  328     iassert(instr->regs_count == 4);
  329     iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
  330     iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
  331     iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
  332 
  333     if (instr->nop) {
  334         iassert(!instr->repeat);
  335         iassert(instr->nop <= 3);
  336 
  337         cat3->src1_r = instr->nop & 0x1;
  338         cat3->src2_r = (instr->nop >> 1) & 0x1;
  339     } else {
  340         cat3->src1_r = !!(src1->flags & IR3_REG_R);
  341         cat3->src2_r = !!(src2->flags & IR3_REG_R);
  342     }
  343 
  344     if (src1->flags & IR3_REG_RELATIV) {
  345         iassert(src1->array.offset < (1 << 10));
  346         cat3->rel1.src1      = reg(src1, info, instr->repeat,
  347                 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
  348                 IR3_REG_HALF | absneg);
  349         cat3->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
  350         cat3->rel1.src1_rel  = 1;
  351     } else if (src1->flags & IR3_REG_CONST) {
  352         iassert(src1->num < (1 << 12));
  353         cat3->c1.src1   = reg(src1, info, instr->repeat,
  354                 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
  355         cat3->c1.src1_c = 1;
  356     } else {
  357         iassert(src1->num < (1 << 11));
  358         cat3->src1 = reg(src1, info, instr->repeat,
  359                 IR3_REG_R | IR3_REG_HALF | absneg);
  360     }
  361 
  362     cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
  363 
  364     cat3->src2     = reg(src2, info, instr->repeat,
  365             IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
  366     cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
  367     cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
  368 
  369     if (src3->flags & IR3_REG_RELATIV) {
  370         iassert(src3->array.offset < (1 << 10));
  371         cat3->rel2.src3      = reg(src3, info, instr->repeat,
  372                 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
  373                 IR3_REG_HALF | absneg);
  374         cat3->rel2.src3_c    = !!(src3->flags & IR3_REG_CONST);
  375         cat3->rel2.src3_rel  = 1;
  376     } else if (src3->flags & IR3_REG_CONST) {
  377         iassert(src3->num < (1 << 12));
  378         cat3->c2.src3   = reg(src3, info, instr->repeat,
  379                 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
  380         cat3->c2.src3_c = 1;
  381     } else {
  382         iassert(src3->num < (1 << 11));
  383         cat3->src3 = reg(src3, info, instr->repeat,
  384                 IR3_REG_R | IR3_REG_HALF | absneg);
  385     }
  386 
  387     cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
  388     cat3->src3_r   = !!(src3->flags & IR3_REG_R);
  389 
  390     cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
  391     cat3->repeat   = instr->repeat;
  392     cat3->sat      = !!(instr->flags & IR3_INSTR_SAT);
  393     cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
  394     cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
  395     cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
  396     cat3->opc      = instr->opc;
  397     cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
  398     cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
  399     cat3->opc_cat  = 3;
  400 
  401     return 0;
  402 }
  403 
  404 static int emit_cat4(struct ir3_instruction *instr, void *ptr,
  405         struct ir3_info *info)
  406 {
  407     struct ir3_register *dst = instr->regs[0];
  408     struct ir3_register *src = instr->regs[1];
  409     instr_cat4_t *cat4 = ptr;
  410 
  411     iassert(instr->regs_count == 2);
  412 
  413     if (src->flags & IR3_REG_RELATIV) {
  414         iassert(src->array.offset < (1 << 10));
  415         cat4->rel.src      = reg(src, info, instr->repeat,
  416                 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
  417                 IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
  418         cat4->rel.src_c    = !!(src->flags & IR3_REG_CONST);
  419         cat4->rel.src_rel  = 1;
  420     } else if (src->flags & IR3_REG_CONST) {
  421         iassert(src->num < (1 << 12));
  422         cat4->c.src   = reg(src, info, instr->repeat,
  423                 IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS |
  424                 IR3_REG_R | IR3_REG_HALF);
  425         cat4->c.src_c = 1;
  426     } else {
  427         iassert(src->num < (1 << 11));
  428         cat4->src = reg(src, info, instr->repeat,
  429                 IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
  430                 IR3_REG_R | IR3_REG_HALF);
  431     }
  432 
  433     cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
  434     cat4->src_neg  = !!(src->flags & IR3_REG_FNEG);
  435     cat4->src_abs  = !!(src->flags & IR3_REG_FABS);
  436     cat4->src_r    = !!(src->flags & IR3_REG_R);
  437 
  438     cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
  439     cat4->repeat   = instr->repeat;
  440     cat4->sat      = !!(instr->flags & IR3_INSTR_SAT);
  441     cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
  442     cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
  443     cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
  444     cat4->full     = ! (src->flags & IR3_REG_HALF);
  445     cat4->opc      = instr->opc;
  446     cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
  447     cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
  448     cat4->opc_cat  = 4;
  449 
  450     return 0;
  451 }
  452 
  453 static int emit_cat5(struct ir3_instruction *instr, void *ptr,
  454         struct ir3_info *info)
  455 {
  456     struct ir3_register *dst = instr->regs[0];
  457     /* To simplify things when there could be zero, one, or two args other
  458      * than tex/sampler idx, we use the first src reg in the ir to hold
  459      * samp_tex hvec2:
  460      */
  461     struct ir3_register *src1;
  462     struct ir3_register *src2;
  463     instr_cat5_t *cat5 = ptr;
  464 
  465     iassert((instr->regs_count == 1) ||
  466             (instr->regs_count == 2) ||
  467             (instr->regs_count == 3) ||
  468             (instr->regs_count == 4));
  469 
  470     if (instr->flags & IR3_INSTR_S2EN) {
  471         src1 = instr->regs[2];
  472         src2 = instr->regs_count > 3 ? instr->regs[3] : NULL;
  473     } else {
  474         src1 = instr->regs_count > 1 ? instr->regs[1] : NULL;
  475         src2 = instr->regs_count > 2 ? instr->regs[2] : NULL;
  476     }
  477 
  478     assume(src1 || !src2);
  479 
  480     if (src1) {
  481         cat5->full = ! (src1->flags & IR3_REG_HALF);
  482         cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
  483     }
  484 
  485     if (src2) {
  486         iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
  487         cat5->src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
  488     }
  489 
  490     if (instr->flags & IR3_INSTR_B) {
  491         cat5->s2en_bindless.base_hi = instr->cat5.tex_base >> 1;
  492         cat5->base_lo = instr->cat5.tex_base & 1;
  493     }
  494 
  495     if (instr->flags & IR3_INSTR_S2EN) {
  496         struct ir3_register *samp_tex = instr->regs[1];
  497         iassert(samp_tex->flags & IR3_REG_HALF);
  498         cat5->s2en_bindless.src3 = reg(samp_tex, info, instr->repeat,
  499                                        (instr->flags & IR3_INSTR_B) ? 0 : IR3_REG_HALF);
  500         if (instr->flags & IR3_INSTR_B) {
  501             if (instr->flags & IR3_INSTR_A1EN) {
  502                 cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_A1_UNIFORM;
  503             } else {
  504                 cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_UNIFORM;
  505             }
  506         } else {
  507             /* TODO: This should probably be CAT5_UNIFORM, at least on a6xx,
  508              * as this is what the blob does and it is presumably faster, but
  509              * first we should confirm it is actually nonuniform and figure
  510              * out when the whole descriptor mode mechanism was introduced.
  511              */
  512             cat5->s2en_bindless.desc_mode = CAT5_NONUNIFORM;
  513         }
  514         iassert(!(instr->cat5.samp | instr->cat5.tex));
  515     } else if (instr->flags & IR3_INSTR_B) {
  516         cat5->s2en_bindless.src3 = instr->cat5.samp;
  517         if (instr->flags & IR3_INSTR_A1EN) {
  518             cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_A1_IMM;
  519         } else {
  520             cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_IMM;
  521         }
  522     } else {
  523         cat5->norm.samp = instr->cat5.samp;
  524         cat5->norm.tex  = instr->cat5.tex;
  525     }
  526 
  527     cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
  528     cat5->wrmask   = dst->wrmask;
  529     cat5->type     = instr->cat5.type;
  530     cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
  531     cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
  532     cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
  533     cat5->is_s2en_bindless = !!(instr->flags & (IR3_INSTR_S2EN | IR3_INSTR_B));
  534     cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
  535     cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
  536     cat5->opc      = instr->opc;
  537     cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
  538     cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
  539     cat5->opc_cat  = 5;
  540 
  541     return 0;
  542 }
  543 
  544 static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
  545         struct ir3_info *info)
  546 {
  547     struct ir3_register *src1, *src2, *ssbo;
  548     instr_cat6_a6xx_t *cat6 = ptr;
  549     bool has_dest = (instr->opc == OPC_LDIB || instr->opc == OPC_LDC);
  550 
  551     ssbo = instr->regs[1];
  552     src1 = instr->regs[2];
  553 
  554     if (has_dest) {
  555         /* the src2 field in the instruction is actually the destination
  556          * register for load instructions:
  557          */
  558         src2 = instr->regs[0];
  559     } else {
  560         src2 = instr->regs[3];
  561     }
  562 
  563     cat6->type      = instr->cat6.type;
  564     cat6->d         = instr->cat6.d - (instr->opc == OPC_LDC ? 0 : 1);
  565     cat6->typed     = instr->cat6.typed;
  566     cat6->type_size = instr->cat6.iim_val - 1;
  567     cat6->opc       = instr->opc;
  568     cat6->jmp_tgt   = !!(instr->flags & IR3_INSTR_JP);
  569     cat6->sync      = !!(instr->flags & IR3_INSTR_SY);
  570     cat6->opc_cat   = 6;
  571 
  572     cat6->src1 = reg(src1, info, instr->repeat, 0);
  573     cat6->src2 = reg(src2, info, instr->repeat, 0);
  574     cat6->ssbo = reg(ssbo, info, instr->repeat, IR3_REG_IMMED);
  575 
  576     if (instr->flags & IR3_INSTR_B) {
  577         if (ssbo->flags & IR3_REG_IMMED) {
  578             cat6->desc_mode = CAT6_BINDLESS_IMM;
  579         } else {
  580             cat6->desc_mode = CAT6_BINDLESS_UNIFORM;
  581         }
  582         cat6->base = instr->cat6.base;
  583     } else {
  584         /* TODO figure out mode for indirect SSBO index in !bindless */
  585         iassert(ssbo->flags & IR3_REG_IMMED);
  586         cat6->desc_mode = CAT6_IMM;
  587     }
  588 
  589     switch (instr->opc) {
  590     case OPC_ATOMIC_ADD:
  591     case OPC_ATOMIC_SUB:
  592     case OPC_ATOMIC_XCHG:
  593     case OPC_ATOMIC_INC:
  594     case OPC_ATOMIC_DEC:
  595     case OPC_ATOMIC_CMPXCHG:
  596     case OPC_ATOMIC_MIN:
  597     case OPC_ATOMIC_MAX:
  598     case OPC_ATOMIC_AND:
  599     case OPC_ATOMIC_OR:
  600     case OPC_ATOMIC_XOR:
  601         cat6->pad1 = 0x1;
  602         cat6->pad3 = 0xc;
  603         cat6->pad5 = 0x3;
  604         break;
  605     case OPC_STIB:
  606         cat6->pad1 = 0x0;
  607         cat6->pad3 = 0xc;
  608         cat6->pad5 = 0x2;
  609         break;
  610     case OPC_LDIB:
  611         cat6->pad1 = 0x1;
  612         cat6->pad3 = 0xc;
  613         cat6->pad5 = 0x2;
  614         break;
  615     case OPC_LDC:
  616         cat6->pad1 = 0x0;
  617         cat6->pad3 = 0x8;
  618         cat6->pad5 = 0x2;
  619         break;
  620     default:
  621         iassert(0);
  622     }
  623     cat6->pad2 = 0x0;
  624     cat6->pad4 = 0x0;
  625 
  626     return 0;
  627 }
  628 
  629 static int emit_cat6(struct ir3_instruction *instr, void *ptr,
  630         struct ir3_info *info)
  631 {
  632     struct ir3_register *dst, *src1, *src2;
  633     instr_cat6_t *cat6 = ptr;
  634 
  635     /* In a6xx we start using a new instruction encoding for some of
  636      * these instructions:
  637      */
  638     if (info->gpu_id >= 600) {
  639         switch (instr->opc) {
  640         case OPC_ATOMIC_ADD:
  641         case OPC_ATOMIC_SUB:
  642         case OPC_ATOMIC_XCHG:
  643         case OPC_ATOMIC_INC:
  644         case OPC_ATOMIC_DEC:
  645         case OPC_ATOMIC_CMPXCHG:
  646         case OPC_ATOMIC_MIN:
  647         case OPC_ATOMIC_MAX:
  648         case OPC_ATOMIC_AND:
  649         case OPC_ATOMIC_OR:
  650         case OPC_ATOMIC_XOR:
  651             /* The shared variants of these still use the old encoding: */
  652             if (!(instr->flags & IR3_INSTR_G))
  653                 break;
  654             /* fallthrough */
  655         case OPC_STIB:
  656         case OPC_LDIB:
  657         case OPC_LDC:
  658             return emit_cat6_a6xx(instr, ptr, info);
  659         default:
  660             break;
  661         }
  662     }
  663 
  664     bool type_full = type_size(instr->cat6.type) == 32;
  665 
  666     cat6->type     = instr->cat6.type;
  667     cat6->opc      = instr->opc;
  668     cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
  669     cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
  670     cat6->g        = !!(instr->flags & IR3_INSTR_G);
  671     cat6->opc_cat  = 6;
  672 
  673     switch (instr->opc) {
  674     case OPC_RESINFO:
  675     case OPC_RESFMT:
  676         iassert_type(instr->regs[0], type_full); /* dst */
  677         iassert_type(instr->regs[1], type_full); /* src1 */
  678         break;
  679     case OPC_L2G:
  680     case OPC_G2L:
  681         iassert_type(instr->regs[0], true);      /* dst */
  682         iassert_type(instr->regs[1], true);      /* src1 */
  683         break;
  684     case OPC_STG:
  685     case OPC_STL:
  686     case OPC_STP:
  687     case OPC_STLW:
  688     case OPC_STIB:
  689         /* no dst, so regs[0] is dummy */
  690         iassert_type(instr->regs[1], true);      /* dst */
  691         iassert_type(instr->regs[2], type_full); /* src1 */
  692         iassert_type(instr->regs[3], true);      /* src2 */
  693         break;
  694     default:
  695         iassert_type(instr->regs[0], type_full); /* dst */
  696         iassert_type(instr->regs[1], true);      /* src1 */
  697         if (instr->regs_count > 2)
  698             iassert_type(instr->regs[2], true);  /* src1 */
  699         break;
  700     }
  701 
  702     /* the "dst" for a store instruction is (from the perspective
  703      * of data flow in the shader, ie. register use/def, etc) in
  704      * fact a register that is read by the instruction, rather
  705      * than written:
  706      */
  707     if (is_store(instr)) {
  708         iassert(instr->regs_count >= 3);
  709 
  710         dst  = instr->regs[1];
  711         src1 = instr->regs[2];
  712         src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
  713     } else {
  714         iassert(instr->regs_count >= 2);
  715 
  716         dst  = instr->regs[0];
  717         src1 = instr->regs[1];
  718         src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
  719     }
  720 
  721     /* TODO we need a more comprehensive list about which instructions
  722      * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
  723      * indicate to use the src_off encoding even if offset is zero
  724      * (but then what to do about dst_off?)
  725      */
  726     if (is_atomic(instr->opc)) {
  727         instr_cat6ldgb_t *ldgb = ptr;
  728 
  729         /* maybe these two bits both determine the instruction encoding? */
  730         cat6->src_off = false;
  731 
  732         ldgb->d = instr->cat6.d - 1;
  733         ldgb->typed = instr->cat6.typed;
  734         ldgb->type_size = instr->cat6.iim_val - 1;
  735 
  736         ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
  737 
  738         if (ldgb->g) {
  739             struct ir3_register *src3 = instr->regs[3];
  740             struct ir3_register *src4 = instr->regs[4];
  741 
  742             /* first src is src_ssbo: */
  743             iassert(src1->flags & IR3_REG_IMMED);
  744             ldgb->src_ssbo = src1->uim_val;
  745 
  746             ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
  747             ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
  748             ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
  749             ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
  750 
  751             ldgb->src3 = reg(src4, info, instr->repeat, 0);
  752             ldgb->pad0 = 0x1;
  753             ldgb->pad3 = 0x1;
  754         } else {
  755             ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
  756             ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED);
  757             ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
  758             ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
  759             ldgb->pad0 = 0x1;
  760             ldgb->pad3 = 0x0;
  761         }
  762 
  763         return 0;
  764     } else if (instr->opc == OPC_LDGB) {
  765         struct ir3_register *src3 = instr->regs[3];
  766         instr_cat6ldgb_t *ldgb = ptr;
  767 
  768         /* maybe these two bits both determine the instruction encoding? */
  769         cat6->src_off = false;
  770 
  771         ldgb->d = instr->cat6.d - 1;
  772         ldgb->typed = instr->cat6.typed;
  773         ldgb->type_size = instr->cat6.iim_val - 1;
  774 
  775         ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
  776 
  777         /* first src is src_ssbo: */
  778         iassert(src1->flags & IR3_REG_IMMED);
  779         ldgb->src_ssbo = src1->uim_val;
  780 
  781         /* then next two are src1/src2: */
  782         ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
  783         ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
  784         ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
  785         ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
  786 
  787         ldgb->pad0 = 0x0;
  788         ldgb->pad3 = 0x1;
  789 
  790         return 0;
  791     } else if (instr->opc == OPC_RESINFO) {
  792         instr_cat6ldgb_t *ldgb = ptr;
  793 
  794         ldgb->d = instr->cat6.d - 1;
  795 
  796         ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
  797 
  798         /* first src is src_ssbo: */
  799         iassert(src1->flags & IR3_REG_IMMED);
  800         ldgb->src_ssbo = src1->uim_val;
  801 
  802         return 0;
  803     } else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) {
  804         struct ir3_register *src3 = instr->regs[4];
  805         instr_cat6stgb_t *stgb = ptr;
  806 
  807         /* maybe these two bits both determine the instruction encoding? */
  808         cat6->src_off = true;
  809         stgb->pad3 = 0x2;
  810 
  811         stgb->d = instr->cat6.d - 1;
  812         stgb->typed = instr->cat6.typed;
  813         stgb->type_size = instr->cat6.iim_val - 1;
  814 
  815         /* first src is dst_ssbo: */
  816         iassert(dst->flags & IR3_REG_IMMED);
  817         stgb->dst_ssbo = dst->uim_val;
  818 
  819         /* then src1/src2/src3: */
  820         stgb->src1 = reg(src1, info, instr->repeat, 0);
  821         stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
  822         stgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
  823         stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
  824         stgb->src3_im = !!(src3->flags & IR3_REG_IMMED);
  825 
  826         return 0;
  827     } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) ||
  828             (instr->opc == OPC_LDL) || (instr->opc == OPC_LDLW)) {
  829         struct ir3_register *src3 = instr->regs[3];
  830         instr_cat6a_t *cat6a = ptr;
  831 
  832         cat6->src_off = true;
  833 
  834         if (instr->opc == OPC_LDG) {
  835             /* For LDG src1 can not be immediate, so src1_imm is redundant and
  836              * instead used to signal whether (when true) 'off' is a 32 bit
  837              * register or an immediate offset.
  838              */
  839             cat6a->src1 = reg(src1, info, instr->repeat, 0);
  840             cat6a->src1_im = !(src3->flags & IR3_REG_IMMED);
  841             cat6a->off = reg(src3, info, instr->repeat, IR3_REG_IMMED);
  842         } else {
  843             cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
  844             cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
  845             cat6a->off = reg(src3, info, instr->repeat, IR3_REG_IMMED);
  846             iassert(src3->flags & IR3_REG_IMMED);
  847         }
  848 
  849         /* Num components */
  850         cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
  851         cat6a->src2_im = true;
  852     } else {
  853         instr_cat6b_t *cat6b = ptr;
  854 
  855         cat6->src_off = false;
  856 
  857         cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF);
  858         cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
  859         if (src2) {
  860             cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
  861             cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED);
  862         }
  863     }
  864 
  865     if (instr->cat6.dst_offset || (instr->opc == OPC_STG) ||
  866             (instr->opc == OPC_STL) || (instr->opc == OPC_STLW)) {
  867         instr_cat6c_t *cat6c = ptr;
  868         cat6->dst_off = true;
  869         cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
  870 
  871         if (instr->flags & IR3_INSTR_G) {
  872             struct ir3_register *src3 = instr->regs[4];
  873             cat6c->off = reg(src3, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
  874             if (src3->flags & IR3_REG_IMMED) {
  875                 /* Immediate offsets are in bytes... */
  876                 cat6->g = false;
  877                 cat6c->off *= 4;
  878             }
  879         } else {
  880             cat6c->off = instr->cat6.dst_offset;
  881         }
  882     } else {
  883         instr_cat6d_t *cat6d = ptr;
  884         cat6->dst_off = false;
  885         cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
  886     }
  887 
  888     return 0;
  889 }
  890 
  891 static int emit_cat7(struct ir3_instruction *instr, void *ptr,
  892         struct ir3_info *info)
  893 {
  894     instr_cat7_t *cat7 = ptr;
  895 
  896     cat7->ss      = !!(instr->flags & IR3_INSTR_SS);
  897     cat7->w       = instr->cat7.w;
  898     cat7->r       = instr->cat7.r;
  899     cat7->l       = instr->cat7.l;
  900     cat7->g       = instr->cat7.g;
  901     cat7->opc     = instr->opc;
  902     cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
  903     cat7->sync    = !!(instr->flags & IR3_INSTR_SY);
  904     cat7->opc_cat = 7;
  905 
  906     return 0;
  907 }
  908 
  909 static int (*emit[])(struct ir3_instruction *instr, void *ptr,
  910         struct ir3_info *info) = {
  911     emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
  912     emit_cat7,
  913 };
  914 
  915 void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
  916         uint32_t gpu_id)
  917 {
  918     uint32_t *ptr, *dwords;
  919 
  920     memset(info, 0, sizeof(*info));
  921     info->gpu_id        = gpu_id;
  922     info->max_reg       = -1;
  923     info->max_half_reg  = -1;
  924     info->max_const     = -1;
  925 
  926     foreach_block (block, &shader->block_list) {
  927         foreach_instr (instr, &block->instr_list) {
  928             info->sizedwords += 2;
  929         }
  930     }
  931 
  932     /* need an integer number of instruction "groups" (sets of 16
  933      * instructions on a4xx or sets of 4 instructions on a3xx),
  934      * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
  935      */
  936     if (gpu_id >= 400) {
  937         info->sizedwords = align(info->sizedwords, 16 * 2);
  938     } else {
  939         info->sizedwords = align(info->sizedwords, 4 * 2);
  940     }
  941 
  942     ptr = dwords = calloc(4, info->sizedwords);
  943 
  944     foreach_block (block, &shader->block_list) {
  945         unsigned sfu_delay = 0;
  946 
  947         foreach_instr (instr, &block->instr_list) {
  948             int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
  949             if (ret)
  950                 goto fail;
  951 
  952             if ((instr->opc == OPC_BARY_F) && (instr->regs[0]->flags & IR3_REG_EI))
  953                 info->last_baryf = info->instrs_count;
  954 
  955             info->instrs_count += 1 + instr->repeat + instr->nop;
  956             info->nops_count += instr->nop;
  957             if (instr->opc == OPC_NOP)
  958                 info->nops_count += 1 + instr->repeat;
  959             if (instr->opc == OPC_MOV) {
  960                 if (instr->cat1.src_type == instr->cat1.dst_type) {
  961                     info->mov_count += 1 + instr->repeat;
  962                 } else {
  963                     info->cov_count += 1 + instr->repeat;
  964                 }
  965             }
  966             dwords += 2;
  967 
  968             if (instr->flags & IR3_INSTR_SS) {
  969                 info->ss++;
  970                 info->sstall += sfu_delay;
  971             }
  972 
  973             if (instr->flags & IR3_INSTR_SY)
  974                 info->sy++;
  975 
  976             if (is_sfu(instr)) {
  977                 sfu_delay = 10;
  978             } else if (sfu_delay > 0) {
  979                 sfu_delay--;
  980             }
  981         }
  982     }
  983 
  984     return ptr;
  985 
  986 fail:
  987     free(ptr);
  988     return NULL;
  989 }
  990 
  991 static struct ir3_register * reg_create(struct ir3 *shader,
  992         int num, int flags)
  993 {
  994     struct ir3_register *reg =
  995             ir3_alloc(shader, sizeof(struct ir3_register));
  996     reg->wrmask = 1;
  997     reg->flags = flags;
  998     reg->num = num;
  999     if (shader->compiler->gpu_id >= 600)
 1000         reg->merged = true;
 1001     return reg;
 1002 }
 1003 
 1004 static void insert_instr(struct ir3_block *block,
 1005         struct ir3_instruction *instr)
 1006 {
 1007     struct ir3 *shader = block->shader;
 1008 #ifdef DEBUG
 1009     instr->serialno = ++shader->instr_count;
 1010 #endif
 1011     list_addtail(&instr->node, &block->instr_list);
 1012 
 1013     if (is_input(instr))
 1014         array_insert(shader, shader->baryfs, instr);
 1015 }
 1016 
 1017 struct ir3_block * ir3_block_create(struct ir3 *shader)
 1018 {
 1019     struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
 1020 #ifdef DEBUG
 1021     block->serialno = ++shader->block_count;
 1022 #endif
 1023     block->shader = shader;
 1024     list_inithead(&block->node);
 1025     list_inithead(&block->instr_list);
 1026     return block;
 1027 }
 1028 
 1029 static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg)
 1030 {
 1031     struct ir3_instruction *instr;
 1032     unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0]));
 1033     char *ptr = ir3_alloc(block->shader, sz);
 1034 
 1035     instr = (struct ir3_instruction *)ptr;
 1036     ptr  += sizeof(*instr);
 1037     instr->regs = (struct ir3_register **)ptr;
 1038 
 1039 #ifdef DEBUG
 1040     instr->regs_max = nreg;
 1041 #endif
 1042 
 1043     return instr;
 1044 }
 1045 
 1046 struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
 1047         opc_t opc, int nreg)
 1048 {
 1049     struct ir3_instruction *instr = instr_create(block, nreg);
 1050     instr->block = block;
 1051     instr->opc = opc;
 1052     insert_instr(block, instr);
 1053     return instr;
 1054 }
 1055 
 1056 struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc)
 1057 {
 1058     /* NOTE: we could be slightly more clever, at least for non-meta,
 1059      * and choose # of regs based on category.
 1060      */
 1061     return ir3_instr_create2(block, opc, 4);
 1062 }
 1063 
 1064 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
 1065 {
 1066     struct ir3_instruction *new_instr = instr_create(instr->block,
 1067             instr->regs_count);
 1068     struct ir3_register **regs;
 1069     unsigned i;
 1070 
 1071     regs = new_instr->regs;
 1072     *new_instr = *instr;
 1073     new_instr->regs = regs;
 1074 
 1075     insert_instr(instr->block, new_instr);
 1076 
 1077     /* clone registers: */
 1078     new_instr->regs_count = 0;
 1079     for (i = 0; i < instr->regs_count; i++) {
 1080         struct ir3_register *reg = instr->regs[i];
 1081         struct ir3_register *new_reg =
 1082                 ir3_reg_create(new_instr, reg->num, reg->flags);
 1083         *new_reg = *reg;
 1084     }
 1085 
 1086     return new_instr;
 1087 }
 1088 
 1089 /* Add a false dependency to instruction, to ensure it is scheduled first: */
 1090 void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
 1091 {
 1092     array_insert(instr, instr->deps, dep);
 1093 }
 1094 
 1095 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
 1096         int num, int flags)
 1097 {
 1098     struct ir3 *shader = instr->block->shader;
 1099     struct ir3_register *reg = reg_create(shader, num, flags);
 1100 #ifdef DEBUG
 1101     debug_assert(instr->regs_count < instr->regs_max);
 1102 #endif
 1103     instr->regs[instr->regs_count++] = reg;
 1104     return reg;
 1105 }
 1106 
 1107 struct ir3_register * ir3_reg_clone(struct ir3 *shader,
 1108         struct ir3_register *reg)
 1109 {
 1110     struct ir3_register *new_reg = reg_create(shader, 0, 0);
 1111     *new_reg = *reg;
 1112     return new_reg;
 1113 }
 1114 
 1115 void
 1116 ir3_instr_set_address(struct ir3_instruction *instr,
 1117         struct ir3_instruction *addr)
 1118 {
 1119     if (instr->address != addr) {
 1120         struct ir3 *ir = instr->block->shader;
 1121 
 1122         debug_assert(!instr->address);
 1123         debug_assert(instr->block == addr->block);
 1124 
 1125         instr->address = addr;
 1126         debug_assert(reg_num(addr->regs[0]) == REG_A0);
 1127         unsigned comp = reg_comp(addr->regs[0]);
 1128         if (comp == 0) {
 1129             array_insert(ir, ir->a0_users, instr);
 1130         } else {
 1131             debug_assert(comp == 1);
 1132             array_insert(ir, ir->a1_users, instr);
 1133         }
 1134     }
 1135 }
 1136 
 1137 void
 1138 ir3_block_clear_mark(struct ir3_block *block)
 1139 {
 1140     foreach_instr (instr, &block->instr_list)
 1141         instr->flags &= ~IR3_INSTR_MARK;
 1142 }
 1143 
 1144 void
 1145 ir3_clear_mark(struct ir3 *ir)
 1146 {
 1147     foreach_block (block, &ir->block_list) {
 1148         ir3_block_clear_mark(block);
 1149     }
 1150 }
 1151 
 1152 unsigned
 1153 ir3_count_instructions(struct ir3 *ir)
 1154 {
 1155     unsigned cnt = 1;
 1156     foreach_block (block, &ir->block_list) {
 1157         block->start_ip = cnt;
 1158         foreach_instr (instr, &block->instr_list) {
 1159             instr->ip = cnt++;
 1160         }
 1161         block->end_ip = cnt;
 1162     }
 1163     return cnt;
 1164 }
 1165 
 1166 /* When counting instructions for RA, we insert extra fake instructions at the
 1167  * beginning of each block, where values become live, and at the end where
 1168  * values die. This prevents problems where values live-in at the beginning or
 1169  * live-out at the end of a block from being treated as if they were
 1170  * live-in/live-out at the first/last instruction, which would be incorrect.
 1171  * In ir3_legalize these ip's are assumed to be actual ip's of the final
 1172  * program, so it would be incorrect to use this everywhere.
 1173  */
 1174 
 1175 unsigned
 1176 ir3_count_instructions_ra(struct ir3 *ir)
 1177 {
 1178     unsigned cnt = 1;
 1179     foreach_block (block, &ir->block_list) {
 1180         block->start_ip = cnt++;
 1181         foreach_instr (instr, &block->instr_list) {
 1182             instr->ip = cnt++;
 1183         }
 1184         block->end_ip = cnt++;
 1185     }
 1186     return cnt;
 1187 }
 1188 
 1189 struct ir3_array *
 1190 ir3_lookup_array(struct ir3 *ir, unsigned id)
 1191 {
 1192     foreach_array (arr, &ir->array_list)
 1193         if (arr->id == id)
 1194             return arr;
 1195     return NULL;
 1196 }
 1197 
 1198 void
 1199 ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps)
 1200 {
 1201     /* We could do this in a single pass if we can assume instructions
 1202      * are always sorted.  Which currently might not always be true.
 1203      * (In particular after ir3_group pass, but maybe other places.)
 1204      */
 1205     foreach_block (block, &ir->block_list)
 1206         foreach_instr (instr, &block->instr_list)
 1207             instr->uses = NULL;
 1208 
 1209     foreach_block (block, &ir->block_list) {
 1210         foreach_instr (instr, &block->instr_list) {
 1211             struct ir3_instruction *src;
 1212 
 1213             foreach_ssa_src_n (src, n, instr) {
 1214                 if (__is_false_dep(instr, n) && !falsedeps)
 1215                     continue;
 1216                 if (!src->uses)
 1217                     src->uses = _mesa_pointer_set_create(mem_ctx);
 1218                 _mesa_set_add(src->uses, instr);
 1219             }
 1220         }
 1221     }
 1222 }