"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/broadcom/compiler/vir_live_variables.c" (16 Sep 2020, 13384 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "vir_live_variables.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright © 2012 Intel Corporation
    3  * Copyright © 2016 Broadcom
    4  *
    5  * Permission is hereby granted, free of charge, to any person obtaining a
    6  * copy of this software and associated documentation files (the "Software"),
    7  * to deal in the Software without restriction, including without limitation
    8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    9  * and/or sell copies of the Software, and to permit persons to whom the
   10  * Software is furnished to do so, subject to the following conditions:
   11  *
   12  * The above copyright notice and this permission notice (including the next
   13  * paragraph) shall be included in all copies or substantial portions of the
   14  * Software.
   15  *
   16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   22  * IN THE SOFTWARE.
   23  */
   24 
   25 #define MAX_INSTRUCTION (1 << 30)
   26 
   27 #include "util/ralloc.h"
   28 #include "util/register_allocate.h"
   29 #include "v3d_compiler.h"
   30 
   31 struct partial_update_state {
   32         struct qinst *insts[4];
   33         uint8_t channels;
   34 };
   35 
   36 static int
   37 vir_reg_to_var(struct qreg reg)
   38 {
   39         if (reg.file == QFILE_TEMP)
   40                 return reg.index;
   41 
   42         return -1;
   43 }
   44 
   45 static void
   46 vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
   47               struct qreg src)
   48 {
   49         int var = vir_reg_to_var(src);
   50         if (var == -1)
   51                 return;
   52 
   53         c->temp_start[var] = MIN2(c->temp_start[var], ip);
   54         c->temp_end[var] = MAX2(c->temp_end[var], ip);
   55 
   56         /* The use[] bitset marks when the block makes
   57          * use of a variable without having completely
   58          * defined that variable within the block.
   59          */
   60         if (!BITSET_TEST(block->def, var))
   61                 BITSET_SET(block->use, var);
   62 }
   63 
   64 static struct partial_update_state *
   65 get_partial_update_state(struct hash_table *partial_update_ht,
   66                          struct qinst *inst)
   67 {
   68         struct hash_entry *entry =
   69                 _mesa_hash_table_search(partial_update_ht,
   70                                         &inst->dst.index);
   71         if (entry)
   72                 return entry->data;
   73 
   74         struct partial_update_state *state =
   75                 rzalloc(partial_update_ht, struct partial_update_state);
   76 
   77         _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
   78 
   79         return state;
   80 }
   81 
   82 static void
   83 vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
   84               struct hash_table *partial_update_ht, struct qinst *inst)
   85 {
   86         if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
   87                 return;
   88 
   89         /* The def[] bitset marks when an initialization in a
   90          * block completely screens off previous updates of
   91          * that variable.
   92          */
   93         int var = vir_reg_to_var(inst->dst);
   94         if (var == -1)
   95                 return;
   96 
   97         c->temp_start[var] = MIN2(c->temp_start[var], ip);
   98         c->temp_end[var] = MAX2(c->temp_end[var], ip);
   99 
  100         /* Mark the block as having a (partial) def of the var. */
  101         BITSET_SET(block->defout, var);
  102 
  103         /* If we've already tracked this as a def that screens off previous
  104          * uses, or already used it within the block, there's nothing to do.
  105          */
  106         if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
  107                 return;
  108 
  109         /* Easy, common case: unconditional full register update.*/
  110         if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
  111              inst->qpu.flags.mc == V3D_QPU_COND_NONE) &&
  112             inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
  113             inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
  114                 BITSET_SET(block->def, var);
  115                 return;
  116         }
  117 
  118         /* Finally, look at the condition code and packing and mark it as a
  119          * def.  We need to make sure that we understand sequences
  120          * instructions like:
  121          *
  122          *     mov.zs t0, t1
  123          *     mov.zc t0, t2
  124          *
  125          * or:
  126          *
  127          *     mmov t0.8a, t1
  128          *     mmov t0.8b, t2
  129          *     mmov t0.8c, t3
  130          *     mmov t0.8d, t4
  131          *
  132          * as defining the temp within the block, because otherwise dst's live
  133          * range will get extended up the control flow to the top of the
  134          * program.
  135          */
  136         struct partial_update_state *state =
  137                 get_partial_update_state(partial_update_ht, inst);
  138         uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
  139 
  140         if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
  141             inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
  142                 state->channels |= mask;
  143         } else {
  144                 for (int i = 0; i < 4; i++) {
  145                         if (!(mask & (1 << i)))
  146                                 continue;
  147 
  148                         /* XXXif (state->insts[i] &&
  149                             state->insts[i]->cond ==
  150                             qpu_cond_complement(inst->cond))
  151                                 state->channels |= 1 << i;
  152                         else
  153                         */
  154                                 state->insts[i] = inst;
  155                 }
  156         }
  157 
  158         if (state->channels == 0xf)
  159                 BITSET_SET(block->def, var);
  160 }
  161 
  162 static void
  163 sf_state_clear(struct hash_table *partial_update_ht)
  164 {
  165         hash_table_foreach(partial_update_ht, entry) {
  166                 struct partial_update_state *state = entry->data;
  167 
  168                 for (int i = 0; i < 4; i++) {
  169                         if (state->insts[i] &&
  170                             (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
  171                              state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
  172                                 state->insts[i] = NULL;
  173                 }
  174         }
  175 }
  176 
  177 /* Sets up the def/use arrays for when variables are used-before-defined or
  178  * defined-before-used in the block.
  179  *
  180  * Also initializes the temp_start/temp_end to cover just the instruction IPs
  181  * where the variable is used, which will be extended later in
  182  * vir_compute_start_end().
  183  */
  184 static void
  185 vir_setup_def_use(struct v3d_compile *c)
  186 {
  187         struct hash_table *partial_update_ht =
  188                 _mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal);
  189         int ip = 0;
  190 
  191         vir_for_each_block(block, c) {
  192                 block->start_ip = ip;
  193 
  194                 _mesa_hash_table_clear(partial_update_ht, NULL);
  195 
  196                 vir_for_each_inst(inst, block) {
  197                         for (int i = 0; i < vir_get_nsrc(inst); i++)
  198                                 vir_setup_use(c, block, ip, inst->src[i]);
  199 
  200                         vir_setup_def(c, block, ip, partial_update_ht, inst);
  201 
  202                         if (false /* XXX inst->uf */)
  203                                 sf_state_clear(partial_update_ht);
  204 
  205                         /* Payload registers: r0/1/2 contain W, centroid W,
  206                          * and Z at program start.  Register allocation will
  207                          * force their nodes to R0/1/2.
  208                          */
  209                         if (inst->src[0].file == QFILE_REG) {
  210                                 switch (inst->src[0].index) {
  211                                 case 0:
  212                                 case 1:
  213                                 case 2:
  214                                         c->temp_start[inst->dst.index] = 0;
  215                                         break;
  216                                 }
  217                         }
  218 
  219                         ip++;
  220                 }
  221                 block->end_ip = ip;
  222         }
  223 
  224         _mesa_hash_table_destroy(partial_update_ht, NULL);
  225 }
  226 
  227 static bool
  228 vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
  229 {
  230         bool cont = false;
  231 
  232         vir_for_each_block_rev(block, c) {
  233                 /* Update live_out: Any successor using the variable
  234                  * on entrance needs us to have the variable live on
  235                  * exit.
  236                  */
  237                 vir_for_each_successor(succ, block) {
  238                         for (int i = 0; i < bitset_words; i++) {
  239                                 BITSET_WORD new_live_out = (succ->live_in[i] &
  240                                                             ~block->live_out[i]);
  241                                 if (new_live_out) {
  242                                         block->live_out[i] |= new_live_out;
  243                                         cont = true;
  244                                 }
  245                         }
  246                 }
  247 
  248                 /* Update live_in */
  249                 for (int i = 0; i < bitset_words; i++) {
  250                         BITSET_WORD new_live_in = (block->use[i] |
  251                                                    (block->live_out[i] &
  252                                                     ~block->def[i]));
  253                         if (new_live_in & ~block->live_in[i]) {
  254                                 block->live_in[i] |= new_live_in;
  255                                 cont = true;
  256                         }
  257                 }
  258         }
  259 
  260         return cont;
  261 }
  262 
  263 static bool
  264 vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)
  265 {
  266         bool cont = false;
  267 
  268         vir_for_each_block_rev(block, c) {
  269                 /* Propagate defin/defout down the successors to produce the
  270                  * union of blocks with a reachable (partial) definition of
  271                  * the var.
  272                  *
  273                  * This keeps a conditional first write to a reg from
  274                  * extending its lifetime back to the start of the program.
  275                  */
  276                 vir_for_each_successor(succ, block) {
  277                         for (int i = 0; i < bitset_words; i++) {
  278                                 BITSET_WORD new_def = (block->defout[i] &
  279                                                        ~succ->defin[i]);
  280                                 succ->defin[i] |= new_def;
  281                                 succ->defout[i] |= new_def;
  282                                 cont |= new_def;
  283                         }
  284                 }
  285         }
  286 
  287         return cont;
  288 }
  289 
  290 /**
  291  * Extend the start/end ranges for each variable to account for the
  292  * new information calculated from control flow.
  293  */
  294 static void
  295 vir_compute_start_end(struct v3d_compile *c, int num_vars)
  296 {
  297         vir_for_each_block(block, c) {
  298                 for (int i = 0; i < num_vars; i++) {
  299                         if (BITSET_TEST(block->live_in, i) &&
  300                             BITSET_TEST(block->defin, i)) {
  301                                 c->temp_start[i] = MIN2(c->temp_start[i],
  302                                                         block->start_ip);
  303                                 c->temp_end[i] = MAX2(c->temp_end[i],
  304                                                       block->start_ip);
  305                         }
  306 
  307                         if (BITSET_TEST(block->live_out, i) &&
  308                             BITSET_TEST(block->defout, i)) {
  309                                 c->temp_start[i] = MIN2(c->temp_start[i],
  310                                                         block->end_ip);
  311                                 c->temp_end[i] = MAX2(c->temp_end[i],
  312                                                       block->end_ip);
  313                         }
  314                 }
  315         }
  316 }
  317 
  318 void
  319 vir_calculate_live_intervals(struct v3d_compile *c)
  320 {
  321         int bitset_words = BITSET_WORDS(c->num_temps);
  322 
  323         /* We may be called more than once if we've rearranged the program to
  324          * try to get register allocation to succeed.
  325          */
  326         if (c->temp_start) {
  327                 ralloc_free(c->temp_start);
  328                 ralloc_free(c->temp_end);
  329 
  330                 vir_for_each_block(block, c) {
  331                         ralloc_free(block->def);
  332                         ralloc_free(block->use);
  333                         ralloc_free(block->live_in);
  334                         ralloc_free(block->live_out);
  335                 }
  336         }
  337 
  338         c->temp_start = rzalloc_array(c, int, c->num_temps);
  339         c->temp_end = rzalloc_array(c, int, c->num_temps);
  340 
  341         for (int i = 0; i < c->num_temps; i++) {
  342                 c->temp_start[i] = MAX_INSTRUCTION;
  343                 c->temp_end[i] = -1;
  344         }
  345 
  346         vir_for_each_block(block, c) {
  347                 block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
  348                 block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);
  349                 block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);
  350                 block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
  351                 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
  352                 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
  353         }
  354 
  355         vir_setup_def_use(c);
  356 
  357         while (vir_live_variables_dataflow(c, bitset_words))
  358                 ;
  359 
  360         while (vir_live_variables_defin_defout_dataflow(c, bitset_words))
  361                 ;
  362 
  363         vir_compute_start_end(c, c->num_temps);
  364 
  365         c->live_intervals_valid = true;
  366 }