"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/compiler/nir/nir_opt_if.c" (16 Sep 2020, 50008 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "nir_opt_if.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 20.1.7_vs_20.1.8.

    1 /*
    2  * Copyright © 2016 Intel Corporation
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  */
   23 
   24 #include "nir.h"
   25 #include "nir/nir_builder.h"
   26 #include "nir_constant_expressions.h"
   27 #include "nir_control_flow.h"
   28 #include "nir_loop_analyze.h"
   29 
   30 static nir_ssa_def *clone_alu_and_replace_src_defs(nir_builder *b,
   31                                                    const nir_alu_instr *alu,
   32                                                    nir_ssa_def **src_defs);
   33 
   34 /**
   35  * Gets the single block that jumps back to the loop header. Already assumes
   36  * there is exactly one such block.
   37  */
   38 static nir_block*
   39 find_continue_block(nir_loop *loop)
   40 {
   41    nir_block *header_block = nir_loop_first_block(loop);
   42    nir_block *prev_block =
   43       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
   44 
   45    assert(header_block->predecessors->entries == 2);
   46 
   47    set_foreach(header_block->predecessors, pred_entry) {
   48       if (pred_entry->key != prev_block)
   49          return (nir_block*)pred_entry->key;
   50    }
   51 
   52    unreachable("Continue block not found!");
   53 }
   54 
   55 /**
   56  * Does a phi have one constant value from outside a loop and one from inside?
   57  */
   58 static bool
   59 phi_has_constant_from_outside_and_one_from_inside_loop(nir_phi_instr *phi,
   60                                                        const nir_block *entry_block,
   61                                                        uint32_t *entry_val,
   62                                                        uint32_t *continue_val)
   63 {
   64    /* We already know we have exactly one continue */
   65    assert(exec_list_length(&phi->srcs) == 2);
   66 
   67    *entry_val = 0;
   68    *continue_val = 0;
   69 
   70     nir_foreach_phi_src(src, phi) {
   71        assert(src->src.is_ssa);
   72        nir_const_value *const_src = nir_src_as_const_value(src->src);
   73        if (!const_src)
   74           return false;
   75 
   76        if (src->pred != entry_block) {
   77           *continue_val = const_src[0].u32;
   78        } else {
   79           *entry_val = const_src[0].u32;
   80        }
   81     }
   82 
   83     return true;
   84 }
   85 
   86 /**
   87  * This optimization detects if statements at the tops of loops where the
   88  * condition is a phi node of two constants and moves half of the if to above
   89  * the loop and the other half of the if to the end of the loop.  A simple for
   90  * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
   91  * ends up looking something like this:
   92  *
   93  * vec1 32 ssa_0 = load_const (0x00000000)
   94  * vec1 32 ssa_1 = load_const (0xffffffff)
   95  * loop {
   96  *    block block_1:
   97  *    vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
   98  *    vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
   99  *    if ssa_3 {
  100  *       block block_2:
  101  *       vec1 32 ssa_4 = load_const (0x00000001)
  102  *       vec1 32 ssa_5 = iadd ssa_2, ssa_4
  103  *    } else {
  104  *       block block_3:
  105  *    }
  106  *    block block_4:
  107  *    vec1 32 ssa_6 = load_const (0x00000004)
  108  *    vec1 32 ssa_7 = ilt ssa_5, ssa_6
  109  *    if ssa_7 {
  110  *       block block_5:
  111  *    } else {
  112  *       block block_6:
  113  *       break
  114  *    }
  115  *    block block_7:
  116  * }
  117  *
  118  * This turns it into something like this:
  119  *
  120  * // Stuff from block 1
  121  * // Stuff from block 3
  122  * loop {
  123  *    block block_1:
  124  *    vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
  125  *    vec1 32 ssa_6 = load_const (0x00000004)
  126  *    vec1 32 ssa_7 = ilt ssa_2, ssa_6
  127  *    if ssa_7 {
  128  *       block block_5:
  129  *    } else {
  130  *       block block_6:
  131  *       break
  132  *    }
  133  *    block block_7:
  134  *    // Stuff from block 1
  135  *    // Stuff from block 2
  136  *    vec1 32 ssa_4 = load_const (0x00000001)
  137  *    vec1 32 ssa_5 = iadd ssa_2, ssa_4
  138  * }
  139  */
  140 static bool
  141 opt_peel_loop_initial_if(nir_loop *loop)
  142 {
  143    nir_block *header_block = nir_loop_first_block(loop);
  144    nir_block *const prev_block =
  145       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
  146 
  147    /* It would be insane if this were not true */
  148    assert(_mesa_set_search(header_block->predecessors, prev_block));
  149 
  150    /* The loop must have exactly one continue block which could be a block
  151     * ending in a continue instruction or the "natural" continue from the
  152     * last block in the loop back to the top.
  153     */
  154    if (header_block->predecessors->entries != 2)
  155       return false;
  156 
  157    nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
  158    if (!if_node || if_node->type != nir_cf_node_if)
  159       return false;
  160 
  161    nir_if *nif = nir_cf_node_as_if(if_node);
  162    assert(nif->condition.is_ssa);
  163 
  164    nir_ssa_def *cond = nif->condition.ssa;
  165    if (cond->parent_instr->type != nir_instr_type_phi)
  166       return false;
  167 
  168    nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr);
  169    if (cond->parent_instr->block != header_block)
  170       return false;
  171 
  172    uint32_t entry_val = 0, continue_val = 0;
  173    if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi,
  174                                                                prev_block,
  175                                                                &entry_val,
  176                                                                &continue_val))
  177       return false;
  178 
  179    /* If they both execute or both don't execute, this is a job for
  180     * nir_dead_cf, not this pass.
  181     */
  182    if ((entry_val && continue_val) || (!entry_val && !continue_val))
  183       return false;
  184 
  185    struct exec_list *continue_list, *entry_list;
  186    if (continue_val) {
  187       continue_list = &nif->then_list;
  188       entry_list = &nif->else_list;
  189    } else {
  190       continue_list = &nif->else_list;
  191       entry_list = &nif->then_list;
  192    }
  193 
  194    /* We want to be moving the contents of entry_list to above the loop so it
  195     * can't contain any break or continue instructions.
  196     */
  197    foreach_list_typed(nir_cf_node, cf_node, node, entry_list) {
  198       nir_foreach_block_in_cf_node(block, cf_node) {
  199          nir_instr *last_instr = nir_block_last_instr(block);
  200          if (last_instr && last_instr->type == nir_instr_type_jump)
  201             return false;
  202       }
  203    }
  204 
  205    /* We're about to re-arrange a bunch of blocks so make sure that we don't
  206     * have deref uses which cross block boundaries.  We don't want a deref
  207     * accidentally ending up in a phi.
  208     */
  209    nir_rematerialize_derefs_in_use_blocks_impl(
  210       nir_cf_node_get_function(&loop->cf_node));
  211 
  212    /* Before we do anything, convert the loop to LCSSA.  We're about to
  213     * replace a bunch of SSA defs with registers and this will prevent any of
  214     * it from leaking outside the loop.
  215     */
  216    nir_convert_loop_to_lcssa(loop);
  217 
  218    nir_block *after_if_block =
  219       nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
  220 
  221    /* Get rid of phis in the header block since we will be duplicating it */
  222    nir_lower_phis_to_regs_block(header_block);
  223    /* Get rid of phis after the if since dominance will change */
  224    nir_lower_phis_to_regs_block(after_if_block);
  225 
  226    /* Get rid of SSA defs in the pieces we're about to move around */
  227    nir_lower_ssa_defs_to_regs_block(header_block);
  228    nir_foreach_block_in_cf_node(block, &nif->cf_node)
  229       nir_lower_ssa_defs_to_regs_block(block);
  230 
  231    nir_cf_list header, tmp;
  232    nir_cf_extract(&header, nir_before_block(header_block),
  233                            nir_after_block(header_block));
  234 
  235    nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
  236    nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
  237    nir_cf_extract(&tmp, nir_before_cf_list(entry_list),
  238                         nir_after_cf_list(entry_list));
  239    nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
  240 
  241    nir_cf_reinsert(&header,
  242                    nir_after_block_before_jump(find_continue_block(loop)));
  243 
  244    bool continue_list_jumps =
  245       nir_block_ends_in_jump(exec_node_data(nir_block,
  246                                             exec_list_get_tail(continue_list),
  247                                             cf_node.node));
  248 
  249    nir_cf_extract(&tmp, nir_before_cf_list(continue_list),
  250                         nir_after_cf_list(continue_list));
  251 
  252    /* Get continue block again as the previous reinsert might have removed the
  253     * block.  Also, if both the continue list and the continue block ends in
  254     * jump instructions, removes the jump from the latter, as it will not be
  255     * executed if we insert the continue list before it. */
  256 
  257    nir_block *continue_block = find_continue_block(loop);
  258 
  259    if (continue_list_jumps) {
  260       nir_instr *last_instr = nir_block_last_instr(continue_block);
  261       if (last_instr && last_instr->type == nir_instr_type_jump)
  262          nir_instr_remove(last_instr);
  263    }
  264 
  265    nir_cf_reinsert(&tmp,
  266                    nir_after_block_before_jump(continue_block));
  267 
  268    nir_cf_node_remove(&nif->cf_node);
  269 
  270    return true;
  271 }
  272 
  273 static bool
  274 alu_instr_is_comparison(const nir_alu_instr *alu)
  275 {
  276    switch (alu->op) {
  277    case nir_op_flt32:
  278    case nir_op_fge32:
  279    case nir_op_feq32:
  280    case nir_op_fne32:
  281    case nir_op_ilt32:
  282    case nir_op_ult32:
  283    case nir_op_ige32:
  284    case nir_op_uge32:
  285    case nir_op_ieq32:
  286    case nir_op_ine32:
  287       return true;
  288    default:
  289       return nir_alu_instr_is_comparison(alu);
  290    }
  291 }
  292 
  293 static bool
  294 alu_instr_is_type_conversion(const nir_alu_instr *alu)
  295 {
  296    return nir_op_infos[alu->op].num_inputs == 1 &&
  297           nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) !=
  298           nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]);
  299 }
  300 
  301 /**
  302  * Splits ALU instructions that have a source that is a phi node
  303  *
  304  * ALU instructions in the header block of a loop that meet the following
  305  * criteria can be split.
  306  *
  307  * - The loop has no continue instructions other than the "natural" continue
  308  *   at the bottom of the loop.
  309  *
  310  * - At least one source of the instruction is a phi node from the header block.
  311  *
  312  * - The phi node selects a constant or undef from the block before the loop.
  313  *
  314  * - Any non-phi sources of the ALU instruction come from a block that
  315  *   dominates the block before the loop.  The most common failure mode for
  316  *   this check is sources that are generated in the loop header block.
  317  *
  318  * The split process splits the original ALU instruction into two, one at the
  319  * bottom of the loop and one at the block before the loop. The instruction
  320  * before the loop computes the value on the first iteration, and the
  321  * instruction at the bottom computes the value on the second, third, and so
  322  * on. A new phi node is added to the header block that selects either the
  323  * instruction before the loop or the one at the end, and uses of the original
  324  * instruction are replaced by this phi.
  325  *
  326  * The splitting transforms a loop like:
  327  *
  328  *    vec1 32 ssa_8 = load_const (0x00000001)
  329  *    vec1 32 ssa_10 = load_const (0x00000000)
  330  *    // succs: block_1
  331  *    loop {
  332  *            block block_1:
  333  *            // preds: block_0 block_4
  334  *            vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15
  335  *            vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15
  336  *            vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16
  337  *            vec1 32 ssa_14 = iadd ssa_11, ssa_8
  338  *            vec1 32 ssa_15 = b32csel ssa_13, ssa_14, ssa_12
  339  *            ...
  340  *            // succs: block_1
  341  *    }
  342  *
  343  * into:
  344  *
  345  *    vec1 32 ssa_8 = load_const (0x00000001)
  346  *    vec1 32 ssa_10 = load_const (0x00000000)
  347  *    vec1 32 ssa_22 = iadd ssa_10, ssa_8
  348  *    // succs: block_1
  349  *    loop {
  350  *            block block_1:
  351  *            // preds: block_0 block_4
  352  *            vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15
  353  *            vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15
  354  *            vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16
  355  *            vec1 32 ssa_21 = phi block_0: ssa_22, block_4: ssa_20
  356  *            vec1 32 ssa_15 = b32csel ssa_13, ssa_21, ssa_12
  357  *            ...
  358  *            vec1 32 ssa_20 = iadd ssa_15, ssa_8
  359  *            // succs: block_1
  360  *    }
  361  */
  362 static bool
  363 opt_split_alu_of_phi(nir_builder *b, nir_loop *loop)
  364 {
  365    bool progress = false;
  366    nir_block *header_block = nir_loop_first_block(loop);
  367    nir_block *const prev_block =
  368       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
  369 
  370    /* It would be insane if this were not true */
  371    assert(_mesa_set_search(header_block->predecessors, prev_block));
  372 
  373    /* The loop must have exactly one continue block which could be a block
  374     * ending in a continue instruction or the "natural" continue from the
  375     * last block in the loop back to the top.
  376     */
  377    if (header_block->predecessors->entries != 2)
  378       return false;
  379 
  380    nir_foreach_instr_safe(instr, header_block) {
  381       if (instr->type != nir_instr_type_alu)
  382          continue;
  383 
  384       nir_alu_instr *const alu = nir_instr_as_alu(instr);
  385 
  386       /* nir_op_vec{2,3,4} and nir_op_mov are excluded because they can easily
  387        * lead to infinite optimization loops. Splitting comparisons can lead
  388        * to loop unrolling not recognizing loop termintators, and type
  389        * conversions also lead to regressions.
  390        */
  391       if (nir_op_is_vec(alu->op) ||
  392           alu_instr_is_comparison(alu) ||
  393           alu_instr_is_type_conversion(alu))
  394          continue;
  395 
  396       bool has_phi_src_from_prev_block = false;
  397       bool all_non_phi_exist_in_prev_block = true;
  398       bool is_prev_result_undef = true;
  399       bool is_prev_result_const = true;
  400       nir_ssa_def *prev_srcs[8];     // FINISHME: Array size?
  401       nir_ssa_def *continue_srcs[8]; // FINISHME: Array size?
  402 
  403       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
  404          nir_instr *const src_instr = alu->src[i].src.ssa->parent_instr;
  405 
  406          /* If the source is a phi in the loop header block, then the
  407           * prev_srcs and continue_srcs will come from the different sources
  408           * of the phi.
  409           */
  410          if (src_instr->type == nir_instr_type_phi &&
  411              src_instr->block == header_block) {
  412             nir_phi_instr *const phi = nir_instr_as_phi(src_instr);
  413 
  414             /* Only strictly need to NULL out the pointers when the assertions
  415              * (below) are compiled in.  Debugging a NULL pointer deref in the
  416              * wild is easier than debugging a random pointer deref, so set
  417              * NULL unconditionally just to be safe.
  418              */
  419             prev_srcs[i] = NULL;
  420             continue_srcs[i] = NULL;
  421 
  422             nir_foreach_phi_src(src_of_phi, phi) {
  423                if (src_of_phi->pred == prev_block) {
  424                   if (src_of_phi->src.ssa->parent_instr->type !=
  425                       nir_instr_type_ssa_undef) {
  426                      is_prev_result_undef = false;
  427                   }
  428 
  429                   if (src_of_phi->src.ssa->parent_instr->type !=
  430                       nir_instr_type_load_const) {
  431                      is_prev_result_const = false;
  432                   }
  433 
  434                   prev_srcs[i] = src_of_phi->src.ssa;
  435                   has_phi_src_from_prev_block = true;
  436                } else
  437                   continue_srcs[i] = src_of_phi->src.ssa;
  438             }
  439 
  440             assert(prev_srcs[i] != NULL);
  441             assert(continue_srcs[i] != NULL);
  442          } else {
  443             /* If the source is not a phi (or a phi in a block other than the
  444              * loop header), then the value must exist in prev_block.
  445              */
  446             if (!nir_block_dominates(src_instr->block, prev_block)) {
  447                all_non_phi_exist_in_prev_block = false;
  448                break;
  449             }
  450 
  451             prev_srcs[i] = alu->src[i].src.ssa;
  452             continue_srcs[i] = alu->src[i].src.ssa;
  453          }
  454       }
  455 
  456       if (has_phi_src_from_prev_block && all_non_phi_exist_in_prev_block &&
  457           (is_prev_result_undef || is_prev_result_const)) {
  458          nir_block *const continue_block = find_continue_block(loop);
  459 
  460          b->cursor = nir_after_block(prev_block);
  461          nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
  462 
  463          /* Make a copy of the original ALU instruction.  Replace the sources
  464           * of the new instruction that read a phi with an undef source from
  465           * prev_block with the non-undef source of that phi.
  466           *
  467           * Insert the new instruction at the end of the continue block.
  468           */
  469          b->cursor = nir_after_block_before_jump(continue_block);
  470 
  471          nir_ssa_def *const alu_copy =
  472             clone_alu_and_replace_src_defs(b, alu, continue_srcs);
  473 
  474          /* Make a new phi node that selects a value from prev_block and the
  475           * result of the new instruction from continue_block.
  476           */
  477          nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
  478          nir_phi_src *phi_src;
  479 
  480          phi_src = ralloc(phi, nir_phi_src);
  481          phi_src->pred = prev_block;
  482          phi_src->src = nir_src_for_ssa(prev_value);
  483          exec_list_push_tail(&phi->srcs, &phi_src->node);
  484 
  485          phi_src = ralloc(phi, nir_phi_src);
  486          phi_src->pred = continue_block;
  487          phi_src->src = nir_src_for_ssa(alu_copy);
  488          exec_list_push_tail(&phi->srcs, &phi_src->node);
  489 
  490          nir_ssa_dest_init(&phi->instr, &phi->dest,
  491                            alu_copy->num_components, alu_copy->bit_size, NULL);
  492 
  493          b->cursor = nir_after_phis(header_block);
  494          nir_builder_instr_insert(b, &phi->instr);
  495 
  496          /* Modify all readers of the original ALU instruction to read the
  497           * result of the phi.
  498           */
  499          nir_foreach_use_safe(use_src, &alu->dest.dest.ssa) {
  500             nir_instr_rewrite_src(use_src->parent_instr,
  501                                   use_src,
  502                                   nir_src_for_ssa(&phi->dest.ssa));
  503          }
  504 
  505          nir_foreach_if_use_safe(use_src, &alu->dest.dest.ssa) {
  506             nir_if_rewrite_condition(use_src->parent_if,
  507                                      nir_src_for_ssa(&phi->dest.ssa));
  508          }
  509 
  510          /* Since the original ALU instruction no longer has any readers, just
  511           * remove it.
  512           */
  513          nir_instr_remove_v(&alu->instr);
  514          ralloc_free(alu);
  515 
  516          progress = true;
  517       }
  518    }
  519 
  520    return progress;
  521 }
  522 
  523 /**
  524  * Get the SSA value from a phi node that corresponds to a specific block
  525  */
  526 static nir_ssa_def *
  527 ssa_for_phi_from_block(nir_phi_instr *phi, nir_block *block)
  528 {
  529    nir_foreach_phi_src(src, phi) {
  530       if (src->pred == block)
  531          return src->src.ssa;
  532    }
  533 
  534    assert(!"Block is not a predecessor of phi.");
  535    return NULL;
  536 }
  537 
  538 /**
  539  * Simplify a bcsel whose sources are all phi nodes from the loop header block
  540  *
  541  * bcsel instructions in a loop that meet the following criteria can be
  542  * converted to phi nodes:
  543  *
  544  * - The loop has no continue instructions other than the "natural" continue
  545  *   at the bottom of the loop.
  546  *
  547  * - All of the sources of the bcsel are phi nodes in the header block of the
  548  *   loop.
  549  *
  550  * - The phi node representing the condition of the bcsel instruction chooses
  551  *   only constant values.
  552  *
  553  * The contant value from the condition will select one of the other sources
  554  * when entered from outside the loop and the remaining source when entered
  555  * from the continue block.  Since each of these sources is also a phi node in
  556  * the header block, the value of the phi node can be "evaluated."  These
  557  * evaluated phi nodes provide the sources for a new phi node.  All users of
  558  * the bcsel result are updated to use the phi node result.
  559  *
  560  * The replacement transforms loops like:
  561  *
  562  *    vec1 32 ssa_7 = undefined
  563  *    vec1 32 ssa_8 = load_const (0x00000001)
  564  *    vec1 32 ssa_9 = load_const (0x000000c8)
  565  *    vec1 32 ssa_10 = load_const (0x00000000)
  566  *    // succs: block_1
  567  *    loop {
  568  *            block block_1:
  569  *            // preds: block_0 block_4
  570  *            vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
  571  *            vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
  572  *            vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
  573  *            vec1 32 ssa_14 = b32csel ssa_12, ssa_13, ssa_11
  574  *            vec1 32 ssa_16 = ige32 ssa_14, ssa_9
  575  *            ...
  576  *            vec1 32 ssa_15 = load_const (0xffffffff)
  577  *            ...
  578  *            vec1 32 ssa_25 = iadd ssa_14, ssa_8
  579  *            // succs: block_1
  580  *    }
  581  *
  582  * into:
  583  *
  584  *    vec1 32 ssa_7 = undefined
  585  *    vec1 32 ssa_8 = load_const (0x00000001)
  586  *    vec1 32 ssa_9 = load_const (0x000000c8)
  587  *    vec1 32 ssa_10 = load_const (0x00000000)
  588  *    // succs: block_1
  589  *    loop {
  590  *            block block_1:
  591  *            // preds: block_0 block_4
  592  *            vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
  593  *            vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
  594  *            vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
  595  *            vec1 32 sss_26 = phi block_0: ssa_1, block_4: ssa_25
  596  *            vec1 32 ssa_16 = ige32 ssa_26, ssa_9
  597  *            ...
  598  *            vec1 32 ssa_15 = load_const (0xffffffff)
  599  *            ...
  600  *            vec1 32 ssa_25 = iadd ssa_26, ssa_8
  601  *            // succs: block_1
  602  *    }
  603  *
  604  * \note
  605  * It may be possible modify this function to not require a phi node as the
  606  * source of the bcsel that is selected when entering from outside the loop.
  607  * The only restriction is that the source must be geneated outside the loop
  608  * (since it will become the source of a phi node in the header block of the
  609  * loop).
  610  */
  611 static bool
  612 opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop)
  613 {
  614    bool progress = false;
  615    nir_block *header_block = nir_loop_first_block(loop);
  616    nir_block *const prev_block =
  617       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
  618 
  619    /* It would be insane if this were not true */
  620    assert(_mesa_set_search(header_block->predecessors, prev_block));
  621 
  622    /* The loop must have exactly one continue block which could be a block
  623     * ending in a continue instruction or the "natural" continue from the
  624     * last block in the loop back to the top.
  625     */
  626    if (header_block->predecessors->entries != 2)
  627       return false;
  628 
  629    /* We can move any bcsel that can guaranteed to execut on every iteration
  630     * of a loop.  For now this is accomplished by only taking bcsels from the
  631     * header_block.  In the future, this could be expanced to include any
  632     * bcsel that must come before any break.
  633     *
  634     * For more details, see
  635     * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/170#note_110305
  636     */
  637    nir_foreach_instr_safe(instr, header_block) {
  638       if (instr->type != nir_instr_type_alu)
  639          continue;
  640 
  641       nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
  642       if (bcsel->op != nir_op_bcsel &&
  643           bcsel->op != nir_op_b32csel &&
  644           bcsel->op != nir_op_fcsel)
  645          continue;
  646 
  647       bool match = true;
  648       for (unsigned i = 0; i < 3; i++) {
  649          /* FINISHME: The abs and negate cases could be handled by adding
  650           * move instructions at the bottom of the continue block and more
  651           * phi nodes in the header_block.
  652           */
  653          if (!bcsel->src[i].src.is_ssa ||
  654              bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi ||
  655              bcsel->src[i].src.ssa->parent_instr->block != header_block ||
  656              bcsel->src[i].negate || bcsel->src[i].abs) {
  657             match = false;
  658             break;
  659          }
  660       }
  661 
  662       if (!match)
  663          continue;
  664 
  665       nir_phi_instr *const cond_phi =
  666          nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr);
  667 
  668       uint32_t entry_val = 0, continue_val = 0;
  669       if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi,
  670                                                                   prev_block,
  671                                                                   &entry_val,
  672                                                                   &continue_val))
  673          continue;
  674 
  675       /* If they both execute or both don't execute, this is a job for
  676        * nir_dead_cf, not this pass.
  677        */
  678       if ((entry_val && continue_val) || (!entry_val && !continue_val))
  679          continue;
  680 
  681       const unsigned entry_src = entry_val ? 1 : 2;
  682       const unsigned continue_src = entry_val ? 2 : 1;
  683 
  684       /* Create a new phi node that selects the value for prev_block from
  685        * the bcsel source that is selected by entry_val and the value for
  686        * continue_block from the other bcsel source.  Both sources have
  687        * already been verified to be phi nodes.
  688        */
  689       nir_block *const continue_block = find_continue_block(loop);
  690       nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
  691       nir_phi_src *phi_src;
  692 
  693       phi_src = ralloc(phi, nir_phi_src);
  694       phi_src->pred = prev_block;
  695       phi_src->src =
  696          nir_src_for_ssa(ssa_for_phi_from_block(nir_instr_as_phi(bcsel->src[entry_src].src.ssa->parent_instr),
  697                                                 prev_block));
  698       exec_list_push_tail(&phi->srcs, &phi_src->node);
  699 
  700       phi_src = ralloc(phi, nir_phi_src);
  701       phi_src->pred = continue_block;
  702       phi_src->src =
  703          nir_src_for_ssa(ssa_for_phi_from_block(nir_instr_as_phi(bcsel->src[continue_src].src.ssa->parent_instr),
  704                                                 continue_block));
  705       exec_list_push_tail(&phi->srcs, &phi_src->node);
  706 
  707       nir_ssa_dest_init(&phi->instr,
  708                         &phi->dest,
  709                         nir_dest_num_components(bcsel->dest.dest),
  710                         nir_dest_bit_size(bcsel->dest.dest),
  711                         NULL);
  712 
  713       b->cursor = nir_after_phis(header_block);
  714       nir_builder_instr_insert(b, &phi->instr);
  715 
  716       /* Modify all readers of the bcsel instruction to read the result of
  717        * the phi.
  718        */
  719       nir_foreach_use_safe(use_src, &bcsel->dest.dest.ssa) {
  720          nir_instr_rewrite_src(use_src->parent_instr,
  721                                use_src,
  722                                nir_src_for_ssa(&phi->dest.ssa));
  723       }
  724 
  725       nir_foreach_if_use_safe(use_src, &bcsel->dest.dest.ssa) {
  726          nir_if_rewrite_condition(use_src->parent_if,
  727                                   nir_src_for_ssa(&phi->dest.ssa));
  728       }
  729 
  730       /* Since the original bcsel instruction no longer has any readers,
  731        * just remove it.
  732        */
  733       nir_instr_remove_v(&bcsel->instr);
  734       ralloc_free(bcsel);
  735 
  736       progress = true;
  737    }
  738 
  739    return progress;
  740 }
  741 
  742 static bool
  743 is_block_empty(nir_block *block)
  744 {
  745    return nir_cf_node_is_last(&block->cf_node) &&
  746           exec_list_is_empty(&block->instr_list);
  747 }
  748 
  749 static bool
  750 nir_block_ends_in_continue(nir_block *block)
  751 {
  752    if (exec_list_is_empty(&block->instr_list))
  753       return false;
  754 
  755    nir_instr *instr = nir_block_last_instr(block);
  756    return instr->type == nir_instr_type_jump &&
  757       nir_instr_as_jump(instr)->type == nir_jump_continue;
  758 }
  759 
  760 /**
  761  * This optimization turns:
  762  *
  763  *     loop {
  764  *        ...
  765  *        if (cond) {
  766  *           do_work_1();
  767  *           continue;
  768  *        } else {
  769  *        }
  770  *        do_work_2();
  771  *     }
  772  *
  773  * into:
  774  *
  775  *     loop {
  776  *        ...
  777  *        if (cond) {
  778  *           do_work_1();
  779  *           continue;
  780  *        } else {
  781  *           do_work_2();
  782  *        }
  783  *     }
  784  *
  785  * The continue should then be removed by nir_opt_trivial_continues() and the
  786  * loop can potentially be unrolled.
  787  *
  788  * Note: Unless the function param aggressive_last_continue==true do_work_2()
  789  * is only ever blocks and nested loops. We avoid nesting other if-statments
  790  * in the branch as this can result in increased register pressure, and in
  791  * the i965 driver it causes a large amount of spilling in shader-db.
  792  * For RADV however nesting these if-statements allows further continues to be
  793  * remove and provides a significant FPS boost in Doom, which is why we have
  794  * opted for this special bool to enable more aggresive optimisations.
  795  * TODO: The GCM pass solves most of the spilling regressions in i965, if it
  796  * is ever enabled we should consider removing the aggressive_last_continue
  797  * param.
  798  */
  799 static bool
  800 opt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue)
  801 {
  802    nir_if *nif;
  803    bool then_ends_in_continue = false;
  804    bool else_ends_in_continue = false;
  805 
  806    /* Scan the control flow of the loop from the last to the first node
  807     * looking for an if-statement we can optimise.
  808     */
  809    nir_block *last_block = nir_loop_last_block(loop);
  810    nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node);
  811    while (if_node) {
  812       if (if_node->type == nir_cf_node_if) {
  813          nif = nir_cf_node_as_if(if_node);
  814          nir_block *then_block = nir_if_last_then_block(nif);
  815          nir_block *else_block = nir_if_last_else_block(nif);
  816 
  817          then_ends_in_continue = nir_block_ends_in_continue(then_block);
  818          else_ends_in_continue = nir_block_ends_in_continue(else_block);
  819 
  820          /* If both branches end in a jump do nothing, this should be handled
  821           * by nir_opt_dead_cf().
  822           */
  823          if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) &&
  824              (else_ends_in_continue || nir_block_ends_in_break(else_block)))
  825             return false;
  826 
  827          /* If continue found stop scanning and attempt optimisation, or
  828           */
  829          if (then_ends_in_continue || else_ends_in_continue ||
  830              !aggressive_last_continue)
  831             break;
  832       }
  833 
  834       if_node = nir_cf_node_prev(if_node);
  835    }
  836 
  837    /* If we didn't find an if to optimise return */
  838    if (!then_ends_in_continue && !else_ends_in_continue)
  839       return false;
  840 
  841    /* If there is nothing after the if-statement we bail */
  842    if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) &&
  843        exec_list_is_empty(&last_block->instr_list))
  844       return false;
  845 
  846    /* Move the last block of the loop inside the last if-statement */
  847    nir_cf_list tmp;
  848    nir_cf_extract(&tmp, nir_after_cf_node(if_node),
  849                         nir_after_block(last_block));
  850    if (then_ends_in_continue)
  851       nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->else_list));
  852    else
  853       nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->then_list));
  854 
  855    /* In order to avoid running nir_lower_regs_to_ssa_impl() every time an if
  856     * opt makes progress we leave nir_opt_trivial_continues() to remove the
  857     * continue now that the end of the loop has been simplified.
  858     */
  859 
  860    return true;
  861 }
  862 
  863 /* Walk all the phis in the block immediately following the if statement and
  864  * swap the blocks.
  865  */
  866 static void
  867 rewrite_phi_predecessor_blocks(nir_if *nif,
  868                                nir_block *old_then_block,
  869                                nir_block *old_else_block,
  870                                nir_block *new_then_block,
  871                                nir_block *new_else_block)
  872 {
  873    nir_block *after_if_block =
  874       nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
  875 
  876    nir_foreach_instr(instr, after_if_block) {
  877       if (instr->type != nir_instr_type_phi)
  878          continue;
  879 
  880       nir_phi_instr *phi = nir_instr_as_phi(instr);
  881 
  882       foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
  883          if (src->pred == old_then_block) {
  884             src->pred = new_then_block;
  885          } else if (src->pred == old_else_block) {
  886             src->pred = new_else_block;
  887          }
  888       }
  889    }
  890 }
  891 
  892 /**
  893  * This optimization turns:
  894  *
  895  *     if (cond) {
  896  *     } else {
  897  *         do_work();
  898  *     }
  899  *
  900  * into:
  901  *
  902  *     if (!cond) {
  903  *         do_work();
  904  *     } else {
  905  *     }
  906  */
  907 static bool
  908 opt_if_simplification(nir_builder *b, nir_if *nif)
  909 {
  910    /* Only simplify if the then block is empty and the else block is not. */
  911    if (!is_block_empty(nir_if_first_then_block(nif)) ||
  912        is_block_empty(nir_if_first_else_block(nif)))
  913       return false;
  914 
  915    /* Make sure the condition is a comparison operation. */
  916    nir_instr *src_instr = nif->condition.ssa->parent_instr;
  917    if (src_instr->type != nir_instr_type_alu)
  918       return false;
  919 
  920    nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
  921    if (!nir_alu_instr_is_comparison(alu_instr))
  922       return false;
  923 
  924    /* Insert the inverted instruction and rewrite the condition. */
  925    b->cursor = nir_after_instr(&alu_instr->instr);
  926 
  927    nir_ssa_def *new_condition =
  928       nir_inot(b, &alu_instr->dest.dest.ssa);
  929 
  930    nir_if_rewrite_condition(nif, nir_src_for_ssa(new_condition));
  931 
  932    /* Grab pointers to the last then/else blocks for fixing up the phis. */
  933    nir_block *then_block = nir_if_last_then_block(nif);
  934    nir_block *else_block = nir_if_last_else_block(nif);
  935 
  936    if (nir_block_ends_in_jump(else_block)) {
  937       /* Even though this if statement has a jump on one side, we may still have
  938        * phis afterwards.  Single-source phis can be produced by loop unrolling
  939        * or dead control-flow passes and are perfectly legal.  Run a quick phi
  940        * removal on the block after the if to clean up any such phis.
  941        */
  942       nir_block *const next_block =
  943          nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
  944       nir_opt_remove_phis_block(next_block);
  945    }
  946 
  947    rewrite_phi_predecessor_blocks(nif, then_block, else_block, else_block,
  948                                   then_block);
  949 
  950    /* Finally, move the else block to the then block. */
  951    nir_cf_list tmp;
  952    nir_cf_extract(&tmp, nir_before_cf_list(&nif->else_list),
  953                         nir_after_cf_list(&nif->else_list));
  954    nir_cf_reinsert(&tmp, nir_before_cf_list(&nif->then_list));
  955 
  956    return true;
  957 }
  958 
  959 /**
  960  * This optimization simplifies potential loop terminators which then allows
  961  * other passes such as opt_if_simplification() and loop unrolling to progress
  962  * further:
  963  *
  964  *     if (cond) {
  965  *        ... then block instructions ...
  966  *     } else {
  967  *         ...
  968  *        break;
  969  *     }
  970  *
  971  * into:
  972  *
  973  *     if (cond) {
  974  *     } else {
  975  *         ...
  976  *        break;
  977  *     }
  978  *     ... then block instructions ...
  979  */
  980 static bool
  981 opt_if_loop_terminator(nir_if *nif)
  982 {
  983    nir_block *break_blk = NULL;
  984    nir_block *continue_from_blk = NULL;
  985    bool continue_from_then = true;
  986 
  987    nir_block *last_then = nir_if_last_then_block(nif);
  988    nir_block *last_else = nir_if_last_else_block(nif);
  989 
  990    if (nir_block_ends_in_break(last_then)) {
  991       break_blk = last_then;
  992       continue_from_blk = last_else;
  993       continue_from_then = false;
  994    } else if (nir_block_ends_in_break(last_else)) {
  995       break_blk = last_else;
  996       continue_from_blk = last_then;
  997    }
  998 
  999    /* Continue if the if-statement contained no jumps at all */
 1000    if (!break_blk)
 1001       return false;
 1002 
 1003    /* If the continue from block is empty then return as there is nothing to
 1004     * move.
 1005     */
 1006    nir_block *first_continue_from_blk = continue_from_then ?
 1007       nir_if_first_then_block(nif) :
 1008       nir_if_first_else_block(nif);
 1009    if (is_block_empty(first_continue_from_blk))
 1010       return false;
 1011 
 1012    if (nir_block_ends_in_jump(continue_from_blk))
 1013       return false;
 1014 
 1015    /* Even though this if statement has a jump on one side, we may still have
 1016     * phis afterwards.  Single-source phis can be produced by loop unrolling
 1017     * or dead control-flow passes and are perfectly legal.  Run a quick phi
 1018     * removal on the block after the if to clean up any such phis.
 1019     */
 1020    nir_opt_remove_phis_block(nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)));
 1021 
 1022    /* Finally, move the continue from branch after the if-statement. */
 1023    nir_cf_list tmp;
 1024    nir_cf_extract(&tmp, nir_before_block(first_continue_from_blk),
 1025                         nir_after_block(continue_from_blk));
 1026    nir_cf_reinsert(&tmp, nir_after_cf_node(&nif->cf_node));
 1027 
 1028    return true;
 1029 }
 1030 
 1031 static bool
 1032 evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value)
 1033 {
 1034    nir_block *use_block = nir_cursor_current_block(cursor);
 1035    if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
 1036       *value = true;
 1037       return true;
 1038    } else if (nir_block_dominates(nir_if_first_else_block(nif), use_block)) {
 1039       *value = false;
 1040       return true;
 1041    } else {
 1042       return false;
 1043    }
 1044 }
 1045 
 1046 static nir_ssa_def *
 1047 clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
 1048                                nir_ssa_def **src_defs)
 1049 {
 1050    nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op);
 1051    nalu->exact = alu->exact;
 1052 
 1053    nir_ssa_dest_init(&nalu->instr, &nalu->dest.dest,
 1054                      alu->dest.dest.ssa.num_components,
 1055                      alu->dest.dest.ssa.bit_size, alu->dest.dest.ssa.name);
 1056 
 1057    nalu->dest.saturate = alu->dest.saturate;
 1058    nalu->dest.write_mask = alu->dest.write_mask;
 1059 
 1060    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
 1061       assert(alu->src[i].src.is_ssa);
 1062       nalu->src[i].src = nir_src_for_ssa(src_defs[i]);
 1063       nalu->src[i].negate = alu->src[i].negate;
 1064       nalu->src[i].abs = alu->src[i].abs;
 1065       memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
 1066              sizeof(nalu->src[i].swizzle));
 1067    }
 1068 
 1069    nir_builder_instr_insert(b, &nalu->instr);
 1070 
 1071    return &nalu->dest.dest.ssa;;
 1072 }
 1073 
 1074 /*
 1075  * This propagates if condition evaluation down the chain of some alu
 1076  * instructions. For example by checking the use of some of the following alu
 1077  * instruction we can eventually replace ssa_107 with NIR_TRUE.
 1078  *
 1079  *   loop {
 1080  *      block block_1:
 1081  *      vec1 32 ssa_85 = load_const (0x00000002)
 1082  *      vec1 32 ssa_86 = ieq ssa_48, ssa_85
 1083  *      vec1 32 ssa_87 = load_const (0x00000001)
 1084  *      vec1 32 ssa_88 = ieq ssa_48, ssa_87
 1085  *      vec1 32 ssa_89 = ior ssa_86, ssa_88
 1086  *      vec1 32 ssa_90 = ieq ssa_48, ssa_0
 1087  *      vec1 32 ssa_91 = ior ssa_89, ssa_90
 1088  *      if ssa_86 {
 1089  *         block block_2:
 1090  *             ...
 1091  *            break
 1092  *      } else {
 1093  *            block block_3:
 1094  *      }
 1095  *      block block_4:
 1096  *      if ssa_88 {
 1097  *            block block_5:
 1098  *             ...
 1099  *            break
 1100  *      } else {
 1101  *            block block_6:
 1102  *      }
 1103  *      block block_7:
 1104  *      if ssa_90 {
 1105  *            block block_8:
 1106  *             ...
 1107  *            break
 1108  *      } else {
 1109  *            block block_9:
 1110  *      }
 1111  *      block block_10:
 1112  *      vec1 32 ssa_107 = inot ssa_91
 1113  *      if ssa_107 {
 1114  *            block block_11:
 1115  *            break
 1116  *      } else {
 1117  *            block block_12:
 1118  *      }
 1119  *   }
 1120  */
 1121 static bool
 1122 propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
 1123                          nir_src *alu_use, nir_alu_instr *alu,
 1124                          bool is_if_condition)
 1125 {
 1126    bool bool_value;
 1127    b->cursor = nir_before_src(alu_use, is_if_condition);
 1128    if (!evaluate_if_condition(nif, b->cursor, &bool_value))
 1129       return false;
 1130 
 1131    nir_ssa_def *def[NIR_MAX_VEC_COMPONENTS] = {0};
 1132    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
 1133       if (alu->src[i].src.ssa == use_src->ssa) {
 1134          def[i] = nir_imm_bool(b, bool_value);
 1135       } else {
 1136          def[i] = alu->src[i].src.ssa;
 1137       }
 1138    }
 1139 
 1140    nir_ssa_def *nalu = clone_alu_and_replace_src_defs(b, alu, def);
 1141 
 1142    /* Rewrite use to use new alu instruction */
 1143    nir_src new_src = nir_src_for_ssa(nalu);
 1144 
 1145    if (is_if_condition)
 1146       nir_if_rewrite_condition(alu_use->parent_if, new_src);
 1147    else
 1148       nir_instr_rewrite_src(alu_use->parent_instr, alu_use, new_src);
 1149 
 1150    return true;
 1151 }
 1152 
 1153 static bool
 1154 can_propagate_through_alu(nir_src *src)
 1155 {
 1156    if (src->parent_instr->type != nir_instr_type_alu)
 1157       return false;
 1158 
 1159    nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
 1160    switch (alu->op) {
 1161       case nir_op_ior:
 1162       case nir_op_iand:
 1163       case nir_op_inot:
 1164       case nir_op_b2i32:
 1165          return true;
 1166       case nir_op_bcsel:
 1167          return src == &alu->src[0].src;
 1168       default:
 1169          return false;
 1170    }
 1171 }
 1172 
 1173 static bool
 1174 evaluate_condition_use(nir_builder *b, nir_if *nif, nir_src *use_src,
 1175                        bool is_if_condition)
 1176 {
 1177    bool progress = false;
 1178 
 1179    b->cursor = nir_before_src(use_src, is_if_condition);
 1180 
 1181    bool bool_value;
 1182    if (evaluate_if_condition(nif, b->cursor, &bool_value)) {
 1183       /* Rewrite use to use const */
 1184       nir_src imm_src = nir_src_for_ssa(nir_imm_bool(b, bool_value));
 1185       if (is_if_condition)
 1186          nir_if_rewrite_condition(use_src->parent_if, imm_src);
 1187       else
 1188          nir_instr_rewrite_src(use_src->parent_instr, use_src, imm_src);
 1189 
 1190       progress = true;
 1191    }
 1192 
 1193    if (!is_if_condition && can_propagate_through_alu(use_src)) {
 1194       nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr);
 1195 
 1196       nir_foreach_use_safe(alu_use, &alu->dest.dest.ssa) {
 1197          progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu,
 1198                                               false);
 1199       }
 1200 
 1201       nir_foreach_if_use_safe(alu_use, &alu->dest.dest.ssa) {
 1202          progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu,
 1203                                               true);
 1204       }
 1205    }
 1206 
 1207    return progress;
 1208 }
 1209 
 1210 static bool
 1211 opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif)
 1212 {
 1213    bool progress = false;
 1214 
 1215    /* Evaluate any uses of the if condition inside the if branches */
 1216    assert(nif->condition.is_ssa);
 1217    nir_foreach_use_safe(use_src, nif->condition.ssa) {
 1218       progress |= evaluate_condition_use(b, nif, use_src, false);
 1219    }
 1220 
 1221    nir_foreach_if_use_safe(use_src, nif->condition.ssa) {
 1222       if (use_src->parent_if != nif)
 1223          progress |= evaluate_condition_use(b, nif, use_src, true);
 1224    }
 1225 
 1226    return progress;
 1227 }
 1228 
 1229 static void
 1230 simple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then,
 1231                 bool src_if_then)
 1232 {
 1233    /* Now merge the if branch */
 1234    nir_block *dest_blk = dest_if_then ? nir_if_last_then_block(dest_if)
 1235                                       : nir_if_last_else_block(dest_if);
 1236 
 1237    struct exec_list *list = src_if_then ? &src_if->then_list
 1238                                         : &src_if->else_list;
 1239 
 1240    nir_cf_list if_cf_list;
 1241    nir_cf_extract(&if_cf_list, nir_before_cf_list(list),
 1242                   nir_after_cf_list(list));
 1243    nir_cf_reinsert(&if_cf_list, nir_after_block(dest_blk));
 1244 }
 1245 
 1246 static bool
 1247 opt_if_merge(nir_if *nif)
 1248 {
 1249    bool progress = false;
 1250 
 1251    nir_block *next_blk = nir_cf_node_cf_tree_next(&nif->cf_node);
 1252    if (next_blk && nif->condition.is_ssa) {
 1253       nir_if *next_if = nir_block_get_following_if(next_blk);
 1254       if (next_if && next_if->condition.is_ssa) {
 1255 
 1256          /* Here we merge two consecutive ifs that have the same
 1257           * condition e.g:
 1258           *
 1259           *   if ssa_12 {
 1260           *      ...
 1261           *   } else {
 1262           *      ...
 1263           *   }
 1264           *   if ssa_12 {
 1265           *      ...
 1266           *   } else {
 1267           *      ...
 1268           *   }
 1269           *
 1270           * Note: This only merges if-statements when the block between them
 1271           * is empty. The reason we don't try to merge ifs that just have phis
 1272           * between them is because this can results in increased register
 1273           * pressure. For example when merging if ladders created by indirect
 1274           * indexing.
 1275           */
 1276          if (nif->condition.ssa == next_if->condition.ssa &&
 1277              exec_list_is_empty(&next_blk->instr_list)) {
 1278 
 1279             /* This optimization isn't made to work in this case and
 1280              * opt_if_evaluate_condition_use will optimize it later.
 1281              */
 1282             if (nir_block_ends_in_jump(nir_if_last_then_block(nif)) ||
 1283                 nir_block_ends_in_jump(nir_if_last_else_block(nif)))
 1284                return false;
 1285 
 1286             simple_merge_if(nif, next_if, true, true);
 1287             simple_merge_if(nif, next_if, false, false);
 1288 
 1289             nir_block *new_then_block = nir_if_last_then_block(nif);
 1290             nir_block *new_else_block = nir_if_last_else_block(nif);
 1291 
 1292             nir_block *old_then_block = nir_if_last_then_block(next_if);
 1293             nir_block *old_else_block = nir_if_last_else_block(next_if);
 1294 
 1295             /* Rewrite the predecessor block for any phis following the second
 1296              * if-statement.
 1297              */
 1298             rewrite_phi_predecessor_blocks(next_if, old_then_block,
 1299                                            old_else_block,
 1300                                            new_then_block,
 1301                                            new_else_block);
 1302 
 1303             /* Move phis after merged if to avoid them being deleted when we
 1304              * remove the merged if-statement.
 1305              */
 1306             nir_block *after_next_if_block =
 1307                nir_cf_node_as_block(nir_cf_node_next(&next_if->cf_node));
 1308 
 1309             nir_foreach_instr_safe(instr, after_next_if_block) {
 1310                if (instr->type != nir_instr_type_phi)
 1311                   break;
 1312 
 1313                exec_node_remove(&instr->node);
 1314                exec_list_push_tail(&next_blk->instr_list, &instr->node);
 1315                instr->block = next_blk;
 1316             }
 1317 
 1318             nir_cf_node_remove(&next_if->cf_node);
 1319 
 1320             progress = true;
 1321          }
 1322       }
 1323    }
 1324 
 1325    return progress;
 1326 }
 1327 
 1328 static bool
 1329 opt_if_cf_list(nir_builder *b, struct exec_list *cf_list,
 1330                bool aggressive_last_continue)
 1331 {
 1332    bool progress = false;
 1333    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
 1334       switch (cf_node->type) {
 1335       case nir_cf_node_block:
 1336          break;
 1337 
 1338       case nir_cf_node_if: {
 1339          nir_if *nif = nir_cf_node_as_if(cf_node);
 1340          progress |= opt_if_cf_list(b, &nif->then_list,
 1341                                     aggressive_last_continue);
 1342          progress |= opt_if_cf_list(b, &nif->else_list,
 1343                                     aggressive_last_continue);
 1344          progress |= opt_if_loop_terminator(nif);
 1345          progress |= opt_if_merge(nif);
 1346          progress |= opt_if_simplification(b, nif);
 1347          break;
 1348       }
 1349 
 1350       case nir_cf_node_loop: {
 1351          nir_loop *loop = nir_cf_node_as_loop(cf_node);
 1352          progress |= opt_if_cf_list(b, &loop->body,
 1353                                     aggressive_last_continue);
 1354          progress |= opt_simplify_bcsel_of_phi(b, loop);
 1355          progress |= opt_if_loop_last_continue(loop,
 1356                                                aggressive_last_continue);
 1357          break;
 1358       }
 1359 
 1360       case nir_cf_node_function:
 1361          unreachable("Invalid cf type");
 1362       }
 1363    }
 1364 
 1365    return progress;
 1366 }
 1367 
 1368 static bool
 1369 opt_peel_loop_initial_if_cf_list(struct exec_list *cf_list)
 1370 {
 1371    bool progress = false;
 1372    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
 1373       switch (cf_node->type) {
 1374       case nir_cf_node_block:
 1375          break;
 1376 
 1377       case nir_cf_node_if: {
 1378          nir_if *nif = nir_cf_node_as_if(cf_node);
 1379          progress |= opt_peel_loop_initial_if_cf_list(&nif->then_list);
 1380          progress |= opt_peel_loop_initial_if_cf_list(&nif->else_list);
 1381          break;
 1382       }
 1383 
 1384       case nir_cf_node_loop: {
 1385          nir_loop *loop = nir_cf_node_as_loop(cf_node);
 1386          progress |= opt_peel_loop_initial_if_cf_list(&loop->body);
 1387          progress |= opt_peel_loop_initial_if(loop);
 1388          break;
 1389       }
 1390 
 1391       case nir_cf_node_function:
 1392          unreachable("Invalid cf type");
 1393       }
 1394    }
 1395 
 1396    return progress;
 1397 }
 1398 
 1399 /**
 1400  * These optimisations depend on nir_metadata_block_index and therefore must
 1401  * not do anything to cause the metadata to become invalid.
 1402  */
 1403 static bool
 1404 opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list)
 1405 {
 1406    bool progress = false;
 1407    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
 1408       switch (cf_node->type) {
 1409       case nir_cf_node_block:
 1410          break;
 1411 
 1412       case nir_cf_node_if: {
 1413          nir_if *nif = nir_cf_node_as_if(cf_node);
 1414          progress |= opt_if_safe_cf_list(b, &nif->then_list);
 1415          progress |= opt_if_safe_cf_list(b, &nif->else_list);
 1416          progress |= opt_if_evaluate_condition_use(b, nif);
 1417          break;
 1418       }
 1419 
 1420       case nir_cf_node_loop: {
 1421          nir_loop *loop = nir_cf_node_as_loop(cf_node);
 1422          progress |= opt_if_safe_cf_list(b, &loop->body);
 1423          progress |= opt_split_alu_of_phi(b, loop);
 1424          break;
 1425       }
 1426 
 1427       case nir_cf_node_function:
 1428          unreachable("Invalid cf type");
 1429       }
 1430    }
 1431 
 1432    return progress;
 1433 }
 1434 
 1435 bool
 1436 nir_opt_if(nir_shader *shader, bool aggressive_last_continue)
 1437 {
 1438    bool progress = false;
 1439 
 1440    nir_foreach_function(function, shader) {
 1441       if (function->impl == NULL)
 1442          continue;
 1443 
 1444       nir_builder b;
 1445       nir_builder_init(&b, function->impl);
 1446 
 1447       nir_metadata_require(function->impl, nir_metadata_block_index |
 1448                            nir_metadata_dominance);
 1449       progress = opt_if_safe_cf_list(&b, &function->impl->body);
 1450       nir_metadata_preserve(function->impl, nir_metadata_block_index |
 1451                             nir_metadata_dominance);
 1452 
 1453       bool preserve = true;
 1454 
 1455       if (opt_if_cf_list(&b, &function->impl->body, aggressive_last_continue)) {
 1456          preserve = false;
 1457          progress = true;
 1458       }
 1459 
 1460       if (opt_peel_loop_initial_if_cf_list(&function->impl->body)) {
 1461          preserve = false;
 1462          progress = true;
 1463 
 1464          /* If that made progress, we're no longer really in SSA form.  We
 1465           * need to convert registers back into SSA defs and clean up SSA defs
 1466           * that don't dominate their uses.
 1467           */
 1468          nir_lower_regs_to_ssa_impl(function->impl);
 1469       }
 1470 
 1471       if (preserve) {
 1472          nir_metadata_preserve(function->impl, nir_metadata_none);
 1473       } else {
 1474    #ifndef NDEBUG
 1475          function->impl->valid_metadata &= ~nir_metadata_not_properly_reset;
 1476    #endif
 1477       }
 1478    }
 1479 
 1480    return progress;
 1481 }