"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/compiler/glsl/loop_unroll.cpp" (16 Sep 2020, 19481 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "loop_unroll.cpp" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright © 2010 Intel Corporation
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   21  * DEALINGS IN THE SOFTWARE.
   22  */
   23 
   24 #include "compiler/glsl_types.h"
   25 #include "loop_analysis.h"
   26 #include "ir_hierarchical_visitor.h"
   27 
   28 #include "main/mtypes.h"
   29 
   30 namespace {
   31 
   32 class loop_unroll_visitor : public ir_hierarchical_visitor {
   33 public:
   34    loop_unroll_visitor(loop_state *state,
   35                        const struct gl_shader_compiler_options *options)
   36    {
   37       this->state = state;
   38       this->progress = false;
   39       this->options = options;
   40    }
   41 
   42    virtual ir_visitor_status visit_leave(ir_loop *ir);
   43    void simple_unroll(ir_loop *ir, int iterations);
   44    void complex_unroll(ir_loop *ir, int iterations,
   45                        bool continue_from_then_branch,
   46                        bool limiting_term_first,
   47                        bool lt_continue_from_then_branch);
   48    void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest);
   49 
   50    loop_state *state;
   51 
   52    bool progress;
   53    const struct gl_shader_compiler_options *options;
   54 };
   55 
   56 } /* anonymous namespace */
   57 
   58 class loop_unroll_count : public ir_hierarchical_visitor {
   59 public:
   60    int nodes;
   61    bool unsupported_variable_indexing;
   62    bool array_indexed_by_induction_var_with_exact_iterations;
   63    /* If there are nested loops, the node count will be inaccurate. */
   64    bool nested_loop;
   65 
   66    loop_unroll_count(exec_list *list, loop_variable_state *ls,
   67                      const struct gl_shader_compiler_options *options)
   68       : ls(ls), options(options)
   69    {
   70       nodes = 0;
   71       nested_loop = false;
   72       unsupported_variable_indexing = false;
   73       array_indexed_by_induction_var_with_exact_iterations = false;
   74 
   75       run(list);
   76    }
   77 
   78    virtual ir_visitor_status visit_enter(ir_assignment *)
   79    {
   80       nodes++;
   81       return visit_continue;
   82    }
   83 
   84    virtual ir_visitor_status visit_enter(ir_expression *)
   85    {
   86       nodes++;
   87       return visit_continue;
   88    }
   89 
   90    virtual ir_visitor_status visit_enter(ir_loop *)
   91    {
   92       nested_loop = true;
   93       return visit_continue;
   94    }
   95 
   96    virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
   97    {
   98       /* Force unroll in case of dynamic indexing with sampler arrays
   99        * when EmitNoIndirectSampler is set.
  100        */
  101       if (options->EmitNoIndirectSampler) {
  102          if ((ir->array->type->is_array() &&
  103               ir->array->type->contains_sampler()) &&
  104              !ir->array_index->constant_expression_value(ralloc_parent(ir))) {
  105             unsupported_variable_indexing = true;
  106             return visit_continue;
  107          }
  108       }
  109 
  110       /* Check for arrays variably-indexed by a loop induction variable.
  111        * Unrolling the loop may convert that access into constant-indexing.
  112        *
  113        * Many drivers don't support particular kinds of variable indexing,
  114        * and have to resort to using lower_variable_index_to_cond_assign to
  115        * handle it.  This results in huge amounts of horrible code, so we'd
  116        * like to avoid that if possible.  Here, we just note that it will
  117        * happen.
  118        */
  119       if ((ir->array->type->is_array() || ir->array->type->is_matrix()) &&
  120           !ir->array_index->as_constant()) {
  121          ir_variable *array = ir->array->variable_referenced();
  122          loop_variable *lv = ls->get(ir->array_index->variable_referenced());
  123          if (array && lv && lv->is_induction_var()) {
  124             /* If an array is indexed by a loop induction variable, and the
  125              * array size is exactly the number of loop iterations, this is
  126              * probably a simple for-loop trying to access each element in
  127              * turn; the application may expect it to be unrolled.
  128              */
  129             if (int(array->type->length) == ls->limiting_terminator->iterations)
  130                array_indexed_by_induction_var_with_exact_iterations = true;
  131 
  132             switch (array->data.mode) {
  133             case ir_var_auto:
  134             case ir_var_temporary:
  135             case ir_var_const_in:
  136             case ir_var_function_in:
  137             case ir_var_function_out:
  138             case ir_var_function_inout:
  139                if (options->EmitNoIndirectTemp)
  140                   unsupported_variable_indexing = true;
  141                break;
  142             case ir_var_uniform:
  143             case ir_var_shader_storage:
  144                if (options->EmitNoIndirectUniform)
  145                   unsupported_variable_indexing = true;
  146                break;
  147             case ir_var_shader_in:
  148                if (options->EmitNoIndirectInput)
  149                   unsupported_variable_indexing = true;
  150                break;
  151             case ir_var_shader_out:
  152                if (options->EmitNoIndirectOutput)
  153                   unsupported_variable_indexing = true;
  154                break;
  155             }
  156          }
  157       }
  158       return visit_continue;
  159    }
  160 
  161 private:
  162    loop_variable_state *ls;
  163    const struct gl_shader_compiler_options *options;
  164 };
  165 
  166 
  167 /**
  168  * Unroll a loop which does not contain any jumps.  For example, if the input
  169  * is:
  170  *
  171  *     (loop (...) ...instrs...)
  172  *
  173  * And the iteration count is 3, the output will be:
  174  *
  175  *     ...instrs... ...instrs... ...instrs...
  176  */
  177 void
  178 loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
  179 {
  180    void *const mem_ctx = ralloc_parent(ir);
  181    loop_variable_state *const ls = this->state->get(ir);
  182 
  183    /* If there are no terminators, then the loop iteration count must be 1.
  184     * This is the 'do { } while (false);' case.
  185     */
  186    assert(!ls->terminators.is_empty() || iterations == 1);
  187 
  188    ir_instruction *first_ir =
  189       (ir_instruction *) ir->body_instructions.get_head();
  190 
  191    if (!first_ir) {
  192       /* The loop is empty remove it and return */
  193       ir->remove();
  194       return;
  195    }
  196 
  197    ir_if *limit_if = NULL;
  198    bool exit_branch_has_instructions = false;
  199    if (ls->limiting_terminator) {
  200       limit_if = ls->limiting_terminator->ir;
  201       ir_instruction *ir_if_last = (ir_instruction *)
  202          limit_if->then_instructions.get_tail();
  203 
  204       if (is_break(ir_if_last)) {
  205          if (ir_if_last != limit_if->then_instructions.get_head())
  206             exit_branch_has_instructions = true;
  207 
  208          splice_post_if_instructions(limit_if, &limit_if->else_instructions);
  209          ir_if_last->remove();
  210       } else {
  211          ir_if_last = (ir_instruction *)
  212             limit_if->else_instructions.get_tail();
  213          assert(is_break(ir_if_last));
  214 
  215          if (ir_if_last != limit_if->else_instructions.get_head())
  216             exit_branch_has_instructions = true;
  217 
  218          splice_post_if_instructions(limit_if, &limit_if->then_instructions);
  219          ir_if_last->remove();
  220       }
  221    }
  222 
  223    /* Because 'iterations' is the number of times we pass over the *entire*
  224     * loop body before hitting the first break, we need to bump the number of
  225     * iterations if the limiting terminator is not the first instruction in
  226     * the loop, or it the exit branch contains instructions. This ensures we
  227     * execute any instructions before the terminator or in its exit branch.
  228     */
  229    if (!ls->terminators.is_empty() &&
  230        (limit_if != first_ir->as_if() || exit_branch_has_instructions))
  231       iterations++;
  232 
  233    for (int i = 0; i < iterations; i++) {
  234       exec_list copy_list;
  235 
  236       copy_list.make_empty();
  237       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
  238 
  239       ir->insert_before(&copy_list);
  240    }
  241 
  242    /* The loop has been replaced by the unrolled copies.  Remove the original
  243     * loop from the IR sequence.
  244     */
  245    ir->remove();
  246 
  247    this->progress = true;
  248 }
  249 
  250 
  251 /**
  252  * Unroll a loop whose last statement is an ir_if.  If \c
  253  * continue_from_then_branch is true, the loop is repeated only when the
  254  * "then" branch of the if is taken; otherwise it is repeated only when the
  255  * "else" branch of the if is taken.
  256  *
  257  * For example, if the input is:
  258  *
  259  *     (loop (...)
  260  *      ...body...
  261  *      (if (cond)
  262  *          (...then_instrs...)
  263  *        (...else_instrs...)))
  264  *
  265  * And the iteration count is 3, and \c continue_from_then_branch is true,
  266  * then the output will be:
  267  *
  268  *     ...body...
  269  *     (if (cond)
  270  *         (...then_instrs...
  271  *          ...body...
  272  *          (if (cond)
  273  *              (...then_instrs...
  274  *               ...body...
  275  *               (if (cond)
  276  *                   (...then_instrs...)
  277  *                 (...else_instrs...)))
  278  *            (...else_instrs...)))
  279  *       (...else_instrs))
  280  */
  281 void
  282 loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations,
  283                                     bool second_term_then_continue,
  284                                     bool extra_iteration_required,
  285                                     bool first_term_then_continue)
  286 {
  287    void *const mem_ctx = ralloc_parent(ir);
  288    ir_instruction *ir_to_replace = ir;
  289 
  290    /* Because 'iterations' is the number of times we pass over the *entire*
  291     * loop body before hitting the first break, we need to bump the number of
  292     * iterations if the limiting terminator is not the first instruction in
  293     * the loop, or it the exit branch contains instructions. This ensures we
  294     * execute any instructions before the terminator or in its exit branch.
  295     */
  296    if (extra_iteration_required)
  297       iterations++;
  298 
  299    for (int i = 0; i < iterations; i++) {
  300       exec_list copy_list;
  301 
  302       copy_list.make_empty();
  303       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
  304 
  305       ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if();
  306       assert(ir_if != NULL);
  307 
  308       exec_list *const first_list = first_term_then_continue
  309          ? &ir_if->then_instructions : &ir_if->else_instructions;
  310       ir_if = ((ir_instruction *) first_list->get_tail())->as_if();
  311 
  312       ir_to_replace->insert_before(&copy_list);
  313       ir_to_replace->remove();
  314 
  315       /* placeholder that will be removed in the next iteration */
  316       ir_to_replace =
  317          new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue);
  318 
  319       exec_list *const second_term_continue_list = second_term_then_continue
  320          ? &ir_if->then_instructions : &ir_if->else_instructions;
  321 
  322       second_term_continue_list->push_tail(ir_to_replace);
  323    }
  324 
  325    ir_to_replace->remove();
  326 
  327    this->progress = true;
  328 }
  329 
  330 
  331 /**
  332  * Move all of the instructions which follow \c ir_if to the end of
  333  * \c splice_dest.
  334  *
  335  * For example, in the code snippet:
  336  *
  337  *     (if (cond)
  338  *         (...then_instructions...
  339  *          break)
  340  *       (...else_instructions...))
  341  *     ...post_if_instructions...
  342  *
  343  * If \c ir_if points to the "if" instruction, and \c splice_dest points to
  344  * (...else_instructions...), the code snippet is transformed into:
  345  *
  346  *     (if (cond)
  347  *         (...then_instructions...
  348  *          break)
  349  *       (...else_instructions...
  350  *        ...post_if_instructions...))
  351  */
  352 void
  353 loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if,
  354                                                  exec_list *splice_dest)
  355 {
  356    while (!ir_if->get_next()->is_tail_sentinel()) {
  357       ir_instruction *move_ir = (ir_instruction *) ir_if->get_next();
  358 
  359       move_ir->remove();
  360       splice_dest->push_tail(move_ir);
  361    }
  362 }
  363 
  364 static bool
  365 exit_branch_has_instructions(ir_if *term_if, bool lt_then_continue)
  366 {
  367    if (lt_then_continue) {
  368       if (term_if->else_instructions.get_head() ==
  369           term_if->else_instructions.get_tail())
  370          return false;
  371    } else {
  372       if (term_if->then_instructions.get_head() ==
  373           term_if->then_instructions.get_tail())
  374          return false;
  375    }
  376 
  377    return true;
  378 }
  379 
  380 ir_visitor_status
  381 loop_unroll_visitor::visit_leave(ir_loop *ir)
  382 {
  383    loop_variable_state *const ls = this->state->get(ir);
  384 
  385    /* If we've entered a loop that hasn't been analyzed, something really,
  386     * really bad has happened.
  387     */
  388    if (ls == NULL) {
  389       assert(ls != NULL);
  390       return visit_continue;
  391    }
  392 
  393    /* Limiting terminator may have iteration count of zero,
  394     * this is a valid case because the loop may break during
  395     * the first iteration.
  396     */
  397 
  398    /* Remove the conditional break statements associated with all terminators
  399     * that are associated with a fixed iteration count, except for the one
  400     * associated with the limiting terminator--that one needs to stay, since
  401     * it terminates the loop.  Exception: if the loop still has a normative
  402     * bound, then that terminates the loop, so we don't even need the limiting
  403     * terminator.
  404     */
  405    foreach_in_list_safe(loop_terminator, t, &ls->terminators) {
  406       if (t->iterations < 0)
  407          continue;
  408 
  409       exec_list *branch_instructions;
  410       if (t != ls->limiting_terminator) {
  411          ir_instruction *ir_if_last = (ir_instruction *)
  412             t->ir->then_instructions.get_tail();
  413          if (is_break(ir_if_last)) {
  414             branch_instructions = &t->ir->else_instructions;
  415          } else {
  416             branch_instructions = &t->ir->then_instructions;
  417             assert(is_break((ir_instruction *)
  418                             t->ir->else_instructions.get_tail()));
  419          }
  420 
  421          exec_list copy_list;
  422          copy_list.make_empty();
  423          clone_ir_list(ir, &copy_list, branch_instructions);
  424 
  425          t->ir->insert_before(&copy_list);
  426          t->ir->remove();
  427 
  428          assert(ls->num_loop_jumps > 0);
  429          ls->num_loop_jumps--;
  430 
  431          /* Also remove it from the terminator list */
  432          t->remove();
  433 
  434          this->progress = true;
  435       }
  436    }
  437 
  438    if (ls->limiting_terminator == NULL) {
  439       ir_instruction *last_ir =
  440          (ir_instruction *) ir->body_instructions.get_tail();
  441 
  442       /* If a loop has no induction variable and the last instruction is
  443        * a break, unroll the loop with a count of 1.  This is the classic
  444        *
  445        *    do {
  446        *        // ...
  447        *    } while (false)
  448        *
  449        * that is used to wrap multi-line macros.
  450        *
  451        * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to
  452        * be at least num_loop_jumps instructions in the loop.
  453        */
  454       if (ls->num_loop_jumps == 1 && is_break(last_ir)) {
  455          last_ir->remove();
  456 
  457          simple_unroll(ir, 1);
  458       }
  459 
  460       /* Don't try to unroll loops where the number of iterations is not known
  461        * at compile-time.
  462        */
  463       return visit_continue;
  464    }
  465 
  466    int iterations = ls->limiting_terminator->iterations;
  467 
  468    const int max_iterations = options->MaxUnrollIterations;
  469 
  470    /* Don't try to unroll loops that have zillions of iterations either.
  471     */
  472    if (iterations > max_iterations)
  473       return visit_continue;
  474 
  475    /* Don't try to unroll nested loops and loops with a huge body.
  476     */
  477    loop_unroll_count count(&ir->body_instructions, ls, options);
  478 
  479    bool loop_too_large =
  480       count.nested_loop || count.nodes * iterations > max_iterations * 5;
  481 
  482    if (loop_too_large && !count.unsupported_variable_indexing &&
  483        !count.array_indexed_by_induction_var_with_exact_iterations)
  484       return visit_continue;
  485 
  486    /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
  487     * We'll be removing the limiting terminator before we unroll.
  488     */
  489    assert(ls->num_loop_jumps > 0);
  490    unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
  491 
  492    if (predicted_num_loop_jumps > 1)
  493       return visit_continue;
  494 
  495    if (predicted_num_loop_jumps == 0) {
  496       simple_unroll(ir, iterations);
  497       return visit_continue;
  498    }
  499 
  500    ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail();
  501    assert(last_ir != NULL);
  502 
  503    if (is_break(last_ir)) {
  504       /* If the only loop-jump is a break at the end of the loop, the loop
  505        * will execute exactly once.  Remove the break and use the simple
  506        * unroller with an iteration count of 1.
  507        */
  508       last_ir->remove();
  509 
  510       simple_unroll(ir, 1);
  511       return visit_continue;
  512    }
  513 
  514    /* Complex unrolling can only handle two terminators. One with an unknown
  515     * iteration count and one with a known iteration count. We have already
  516     * made sure we have a known iteration count above and removed any
  517     * unreachable terminators with a known count. Here we make sure there
  518     * isn't any additional unknown terminators, or any other jumps nested
  519     * inside futher ifs.
  520     */
  521    if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2)
  522       return visit_continue;
  523 
  524    ir_instruction *first_ir =
  525       (ir_instruction *) ir->body_instructions.get_head();
  526 
  527    unsigned term_count = 0;
  528    bool first_term_then_continue = false;
  529    foreach_in_list(loop_terminator, t, &ls->terminators) {
  530       ir_if *ir_if = t->ir->as_if();
  531       assert(ir_if != NULL);
  532 
  533       ir_instruction *ir_if_last =
  534          (ir_instruction *) ir_if->then_instructions.get_tail();
  535 
  536       if (is_break(ir_if_last)) {
  537          splice_post_if_instructions(ir_if, &ir_if->else_instructions);
  538          ir_if_last->remove();
  539          if (term_count == 1) {
  540             bool ebi =
  541                exit_branch_has_instructions(ls->limiting_terminator->ir,
  542                                             first_term_then_continue);
  543             complex_unroll(ir, iterations, false,
  544                            first_ir->as_if() != ls->limiting_terminator->ir ||
  545                            ebi,
  546                            first_term_then_continue);
  547             return visit_continue;
  548          }
  549       } else {
  550          ir_if_last =
  551             (ir_instruction *) ir_if->else_instructions.get_tail();
  552 
  553          assert(is_break(ir_if_last));
  554          if (is_break(ir_if_last)) {
  555             splice_post_if_instructions(ir_if, &ir_if->then_instructions);
  556             ir_if_last->remove();
  557             if (term_count == 1) {
  558                bool ebi =
  559                   exit_branch_has_instructions(ls->limiting_terminator->ir,
  560                                                first_term_then_continue);
  561                complex_unroll(ir, iterations, true,
  562                               first_ir->as_if() != ls->limiting_terminator->ir ||
  563                               ebi,
  564                               first_term_then_continue);
  565                return visit_continue;
  566             } else {
  567                first_term_then_continue = true;
  568             }
  569          }
  570       }
  571 
  572       term_count++;
  573    }
  574 
  575    /* Did not find the break statement.  It must be in a complex if-nesting,
  576     * so don't try to unroll.
  577     */
  578    return visit_continue;
  579 }
  580 
  581 
  582 bool
  583 unroll_loops(exec_list *instructions, loop_state *ls,
  584              const struct gl_shader_compiler_options *options)
  585 {
  586    loop_unroll_visitor v(ls, options);
  587 
  588    v.run(instructions);
  589 
  590    return v.progress;
  591 }