"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/r600/sb/sb_if_conversion.cpp" (16 Sep 2020, 8571 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sb_if_conversion.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * on the rights to use, copy, modify, merge, publish, distribute, sub
    8  * license, and/or sell copies of the Software, and to permit persons to whom
    9  * the Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
   18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
   19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
   22  *
   23  * Authors:
   24  *      Vadim Girlin
   25  */
   26 
   27 #define IFC_DEBUG 0
   28 
   29 #if IFC_DEBUG
   30 #define IFC_DUMP(q) do { q } while (0)
   31 #else
   32 #define IFC_DUMP(q)
   33 #endif
   34 
   35 #include "sb_shader.h"
   36 #include "sb_pass.h"
   37 
   38 namespace r600_sb {
   39 
   40 int if_conversion::run() {
   41 
   42     regions_vec &rv = sh.get_regions();
   43 
   44     unsigned converted = 0;
   45     for (regions_vec::reverse_iterator I = rv.rbegin(); I != rv.rend(); ) {
   46         region_node *r = *I;
   47         if (run_on(r)) {
   48             I = regions_vec::reverse_iterator(rv.erase((++I).base()));
   49             ++converted;
   50         } else
   51             ++I;
   52     }
   53     return 0;
   54 }
   55 
   56 void if_conversion::convert_kill_instructions(region_node *r,
   57                                               value *em, bool branch,
   58                                               container_node *c) {
   59     value *cnd = NULL;
   60 
   61     for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) {
   62         N = I + 1;
   63 
   64         if (!I->is_alu_inst())
   65             continue;
   66 
   67         alu_node *a = static_cast<alu_node*>(*I);
   68         unsigned flags = a->bc.op_ptr->flags;
   69 
   70         if (!(flags & AF_KILL))
   71             continue;
   72 
   73         // ignore predicated or non-const kill instructions
   74         if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const())
   75             continue;
   76 
   77         literal l0 = a->src[0]->literal_value;
   78         literal l1 = a->src[1]->literal_value;
   79 
   80         expr_handler::apply_alu_src_mod(a->bc, 0, l0);
   81         expr_handler::apply_alu_src_mod(a->bc, 1, l1);
   82 
   83         if (expr_handler::evaluate_condition(flags, l0, l1)) {
   84             // kill with constant 'true' condition, we'll convert it to the
   85             // conditional kill outside of the if-then-else block
   86 
   87             a->remove();
   88 
   89             if (!cnd) {
   90                 cnd = get_select_value_for_em(sh, em);
   91             } else {
   92                 // more than one kill with the same condition, just remove it
   93                 continue;
   94             }
   95 
   96             r->insert_before(a);
   97             a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT);
   98 
   99             a->src[0] = cnd;
  100             a->src[1] = sh.get_const_value(0);
  101             // clear modifiers
  102             memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
  103             memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
  104         } else {
  105             // kill with constant 'false' condition, this shouldn't happen
  106             // but remove it anyway
  107             a->remove();
  108         }
  109     }
  110 }
  111 
  112 bool if_conversion::check_and_convert(region_node *r) {
  113 
  114     depart_node *nd1 = static_cast<depart_node*>(r->first);
  115     if (!nd1->is_depart() || nd1->target != r)
  116         return false;
  117     if_node *nif = static_cast<if_node*>(nd1->first);
  118     if (!nif->is_if())
  119         return false;
  120     depart_node *nd2 = static_cast<depart_node*>(nif->first);
  121     if (!nd2->is_depart() || nd2->target != r)
  122         return false;
  123 
  124     value* &em = nif->cond;
  125 
  126     node_stats s;
  127 
  128     r->collect_stats(s);
  129 
  130     IFC_DUMP(
  131         sblog << "ifcvt: region " << r->region_id << " :\n";
  132         s.dump();
  133     );
  134 
  135     if (s.region_count || s.fetch_count || s.alu_kill_count ||
  136                        s.if_count != 1 || s.repeat_count || s.uses_ar)
  137         return false;
  138 
  139     unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count;
  140 
  141     // if_conversion allows to eliminate JUMP-ALU_POP_AFTER or
  142     // JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions
  143     // are eliminated. According to the docs, cost of CF instruction is
  144     // equal to ~40 ALU VLIW instructions (instruction groups),
  145     // so we have eliminated cost equal to ~120 groups in total.
  146     // Let's also assume that we have avg 3 ALU instructions per group,
  147     // This means that potential eliminated cost is about 360 single alu inst.
  148     // On the other hand, we are speculatively executing conditional code now,
  149     // so we are increasing the cost in some cases. In the worst case, we'll
  150     // have to execute real_alu_count additional alu instructions instead of
  151     // jumping over them. Let's assume for now that average added cost is
  152     //
  153     // (0.9 * real_alu_count)
  154     //
  155     // So we should perform if_conversion if
  156     //
  157     // (0.9 * real_alu_count) < 360, or
  158     //
  159     // real_alu_count < 400
  160     //
  161     // So if real_alu_count is more than 400, than we think that if_conversion
  162     // doesn't make sense.
  163 
  164     // FIXME: We can use more precise heuristic, taking into account sizes of
  165     // the branches and their probability instead of total size.
  166     // Another way to improve this is to consider the number of the groups
  167     // instead of the number of instructions (taking into account actual VLIW
  168     // packing).
  169     // (Currently we don't know anything about packing at this stage, but
  170     // probably we can make some more precise estimations anyway)
  171 
  172     if (real_alu_count > 400)
  173         return false;
  174 
  175     IFC_DUMP( sblog << "if_cvt: processing...\n"; );
  176 
  177     value *select = get_select_value_for_em(sh, em);
  178 
  179     if (!select)
  180         return false;
  181 
  182     for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) {
  183         node *n = *I;
  184 
  185         alu_node *ns = convert_phi(select, n);
  186 
  187         if (ns)
  188             r->insert_after(ns);
  189     }
  190 
  191     nd2->expand();
  192     nif->expand();
  193     nd1->expand();
  194     r->expand();
  195 
  196     return true;
  197 }
  198 
  199 bool if_conversion::run_on(region_node* r) {
  200 
  201     if (r->dep_count() != 2 || r->rep_count() != 1)
  202         return false;
  203 
  204     depart_node *nd1 = static_cast<depart_node*>(r->first);
  205     if (!nd1->is_depart())
  206         return false;
  207     if_node *nif = static_cast<if_node*>(nd1->first);
  208     if (!nif->is_if())
  209         return false;
  210     depart_node *nd2 = static_cast<depart_node*>(nif->first);
  211     if (!nd2->is_depart())
  212         return false;
  213 
  214     value* &em = nif->cond;
  215 
  216     convert_kill_instructions(r, em, true, nd2);
  217     convert_kill_instructions(r, em, false, nd1);
  218 
  219     if (check_and_convert(r))
  220         return true;
  221 
  222     if (nd2->empty() && nif->next) {
  223         // empty true branch, non-empty false branch
  224         // we'll invert it to get rid of 'else'
  225 
  226         assert(em && em->def);
  227 
  228         alu_node *predset = static_cast<alu_node*>(em->def);
  229 
  230         // create clone of PREDSET instruction with inverted condition.
  231         // PREDSET has 3 dst operands in our IR (value written to gpr,
  232         // predicate value and exec mask value), we'll split it such that
  233         // new PREDSET will define exec mask value only, and two others will
  234         // be defined in the old PREDSET (if they are not used then DCE will
  235         // simply remove old PREDSET).
  236 
  237         alu_node *newpredset = sh.clone(predset);
  238         predset->insert_after(newpredset);
  239 
  240         predset->dst[2] = NULL;
  241 
  242         newpredset->dst[0] = NULL;
  243         newpredset->dst[1] = NULL;
  244 
  245         em->def = newpredset;
  246 
  247         unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK;
  248         unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK;
  249         bool swapargs = false;
  250 
  251         cc = invert_setcc_condition(cc, swapargs);
  252 
  253         if (swapargs) {
  254             std::swap(newpredset->src[0], newpredset->src[1]);
  255             std::swap(newpredset->bc.src[0], newpredset->bc.src[1]);
  256         }
  257 
  258         unsigned newopcode = get_predsetcc_op(cc, cmptype);
  259         newpredset->bc.set_op(newopcode);
  260 
  261         // move the code from the 'false' branch ('else') to the 'true' branch
  262         nd2->move(nif->next, NULL);
  263 
  264         // swap phi operands
  265         for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E;
  266                 ++I) {
  267             node *p = *I;
  268             assert(p->src.size() == 2);
  269             std::swap(p->src[0], p->src[1]);
  270         }
  271     }
  272 
  273     return false;
  274 }
  275 
  276 alu_node* if_conversion::convert_phi(value* select, node* phi) {
  277     assert(phi->dst.size() == 1 || phi->src.size() == 2);
  278 
  279     value *d = phi->dst[0];
  280     value *v1 = phi->src[0];
  281     value *v2 = phi->src[1];
  282 
  283     assert(d);
  284 
  285     if (!d->is_any_gpr())
  286         return NULL;
  287 
  288     if (v1->is_undef()) {
  289         if (v2->is_undef()) {
  290             return NULL;
  291         } else {
  292             return sh.create_mov(d, v2);
  293         }
  294     } else if (v2->is_undef())
  295         return sh.create_mov(d, v1);
  296 
  297     alu_node* n = sh.create_alu();
  298 
  299     n->bc.set_op(ALU_OP3_CNDE_INT);
  300     n->dst.push_back(d);
  301     n->src.push_back(select);
  302     n->src.push_back(v1);
  303     n->src.push_back(v2);
  304 
  305     return n;
  306 }
  307 
  308 } // namespace r600_sb