"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp" (16 Sep 2020, 12631 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sb_ra_coalesce.cpp" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * on the rights to use, copy, modify, merge, publish, distribute, sub
    8  * license, and/or sell copies of the Software, and to permit persons to whom
    9  * the Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
   18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
   19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
   22  *
   23  * Authors:
   24  *      Vadim Girlin
   25  */
   26 
   27 #define RA_DEBUG 0
   28 
   29 #if RA_DEBUG
   30 #define RA_DUMP(q) do { q } while (0)
   31 #else
   32 #define RA_DUMP(q)
   33 #endif
   34 
   35 #include "sb_shader.h"
   36 #include "sb_pass.h"
   37 
   38 namespace r600_sb {
   39 
   40 int ra_coalesce::run() {
   41     return sh.coal.run();
   42 }
   43 
   44 void coalescer::add_edge(value* a, value* b, unsigned cost) {
   45     assert(a->is_sgpr() && b->is_sgpr());
   46     edges.insert(new ra_edge(a,b, cost));
   47 }
   48 
   49 void coalescer::create_chunk(value *v) {
   50 
   51     assert(v->is_sgpr());
   52 
   53     ra_chunk *c = new ra_chunk();
   54 
   55     c->values.push_back(v);
   56 
   57     if (v->is_chan_pinned())
   58         c->flags |= RCF_PIN_CHAN;
   59     if (v->is_reg_pinned()) {
   60         c->flags |= RCF_PIN_REG;
   61     }
   62 
   63     c->pin = v->pin_gpr;
   64 
   65     RA_DUMP(
   66         sblog << "create_chunk: ";
   67         dump_chunk(c);
   68     );
   69 
   70     all_chunks.push_back(c);
   71     v->chunk = c;
   72 
   73 }
   74 
   75 void coalescer::unify_chunks(ra_edge *e) {
   76     ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
   77 
   78     RA_DUMP(
   79         sblog << "unify_chunks: ";
   80         dump_chunk(c1);
   81         dump_chunk(c2);
   82     );
   83 
   84     if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
   85         c1->flags |= RCF_PIN_CHAN;
   86         c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
   87     }
   88 
   89     if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
   90         c1->flags |= RCF_PIN_REG;
   91         c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
   92     }
   93 
   94     c1->values.reserve(c1->values.size() + c2->values.size());
   95 
   96     for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
   97             ++I) {
   98         (*I)->chunk = c1;
   99         c1->values.push_back(*I);
  100     }
  101 
  102     chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
  103     assert(F != all_chunks.end());
  104 
  105     all_chunks.erase(F);
  106 
  107     c1->cost += c2->cost + e->cost;
  108     delete c2;
  109 }
  110 
  111 bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
  112     unsigned pin_flags = (c1->flags & c2->flags) &
  113             (RCF_PIN_CHAN | RCF_PIN_REG);
  114 
  115     if ((pin_flags & RCF_PIN_CHAN) &&
  116             c1->pin.chan() != c2->pin.chan())
  117         return true;
  118 
  119     if ((pin_flags & RCF_PIN_REG) &&
  120             c1->pin.sel() != c2->pin.sel())
  121         return true;
  122 
  123     for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
  124             ++I) {
  125         value *v1 = *I;
  126 
  127         for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
  128                 ++I) {
  129             value *v2 = *I;
  130 
  131             if (!v1->v_equal(v2) && v1->interferences.contains(v2))
  132                 return true;
  133         }
  134     }
  135     return false;
  136 }
  137 
  138 void coalescer::build_chunks() {
  139 
  140     for (edge_queue::iterator I = edges.begin(), E = edges.end();
  141             I != E; ++I) {
  142 
  143         ra_edge *e = *I;
  144 
  145         if (!e->a->chunk)
  146             create_chunk(e->a);
  147 
  148         if (!e->b->chunk)
  149             create_chunk(e->b);
  150 
  151         ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
  152 
  153         if (c1 == c2) {
  154             c1->cost += e->cost;
  155         } else if (!chunks_interference(c1, c2))
  156             unify_chunks(e);
  157     }
  158 }
  159 
  160 ra_constraint* coalescer::create_constraint(constraint_kind kind) {
  161     ra_constraint *c = new ra_constraint(kind);
  162     all_constraints.push_back(c);
  163     return c;
  164 }
  165 
  166 void coalescer::dump_edges() {
  167     sblog << "######## affinity edges\n";
  168 
  169     for (edge_queue::iterator I = edges.begin(), E = edges.end();
  170             I != E; ++I) {
  171         ra_edge* e = *I;
  172         sblog << "  ra_edge ";
  173         dump::dump_val(e->a);
  174         sblog << " <-> ";
  175         dump::dump_val(e->b);
  176         sblog << "   cost = " << e->cost << "\n";
  177     }
  178 }
  179 
  180 void coalescer::dump_chunks() {
  181     sblog << "######## chunks\n";
  182 
  183     for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
  184             I != E; ++I) {
  185         ra_chunk* c = *I;
  186         dump_chunk(c);
  187     }
  188 }
  189 
  190 
  191 void coalescer::dump_constraint_queue() {
  192     sblog << "######## constraints\n";
  193 
  194     for (constraint_queue::iterator I = constraints.begin(),
  195             E = constraints.end(); I != E; ++I) {
  196         ra_constraint* c = *I;
  197         dump_constraint(c);
  198     }
  199 }
  200 
  201 void coalescer::dump_chunk(ra_chunk* c) {
  202     sblog << "  ra_chunk cost = " << c->cost << "  :  ";
  203     dump::dump_vec(c->values);
  204 
  205     if (c->flags & RCF_PIN_REG)
  206         sblog << "   REG = " << c->pin.sel();
  207 
  208     if (c->flags & RCF_PIN_CHAN)
  209         sblog << "   CHAN = " << c->pin.chan();
  210 
  211     sblog << (c->flags & RCF_GLOBAL ? "  GLOBAL" : "");
  212 
  213     sblog << "\n";
  214 }
  215 
  216 void coalescer::dump_constraint(ra_constraint* c) {
  217     sblog << "  ra_constraint: ";
  218     switch (c->kind) {
  219         case CK_PACKED_BS: sblog << "PACKED_BS"; break;
  220         case CK_PHI: sblog << "PHI"; break;
  221         case CK_SAME_REG: sblog << "SAME_REG"; break;
  222         default: sblog << "UNKNOWN_KIND"; assert(0); break;
  223     }
  224 
  225     sblog << "  cost = " << c->cost << "  : ";
  226     dump::dump_vec(c->values);
  227 
  228     sblog << "\n";
  229 }
  230 
  231 void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
  232 
  233     for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
  234             ++I) {
  235         value *v = *I;
  236         s.add_set(v->interferences);
  237     }
  238     s.remove_vec(c->values);
  239 }
  240 
  241 void coalescer::build_chunk_queue() {
  242     for (chunk_vec::iterator I = all_chunks.begin(),
  243             E = all_chunks.end(); I != E; ++I) {
  244         ra_chunk *c = *I;
  245 
  246         if (!c->is_fixed())
  247             chunks.insert(c);
  248     }
  249 }
  250 
  251 void coalescer::build_constraint_queue() {
  252     for (constraint_vec::iterator I = all_constraints.begin(),
  253             E = all_constraints.end(); I != E; ++I) {
  254         ra_constraint *c = *I;
  255         unsigned cost = 0;
  256 
  257         if (c->values.empty() || !c->values.front()->is_sgpr())
  258             continue;
  259 
  260         if (c->kind != CK_SAME_REG)
  261             continue;
  262 
  263         for (vvec::iterator I = c->values.begin(), E = c->values.end();
  264                 I != E; ++I) {
  265             value *v = *I;
  266             if (!v->chunk)
  267                 create_chunk(v);
  268             else
  269                 cost += v->chunk->cost;
  270         }
  271         c->cost = cost;
  272         constraints.insert(c);
  273     }
  274 }
  275 
  276 void coalescer::color_chunks() {
  277 
  278     for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
  279             I != E; ++I) {
  280         ra_chunk *c = *I;
  281         if (c->is_fixed() || c->values.size() == 1)
  282             continue;
  283 
  284         sb_bitset rb;
  285         val_set interf;
  286 
  287         get_chunk_interferences(c, interf);
  288 
  289         RA_DUMP(
  290             sblog << "color_chunks: ";
  291             dump_chunk(c);
  292             sblog << "\n interferences: ";
  293             dump::dump_set(sh,interf);
  294             sblog << "\n";
  295         );
  296 
  297         init_reg_bitset(rb, interf);
  298 
  299         unsigned pass = c->is_reg_pinned() ? 0 : 1;
  300 
  301         unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
  302         unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
  303 
  304         unsigned color = 0;
  305 
  306         while (pass < 2) {
  307 
  308             unsigned rs, re;
  309 
  310             if (pass == 0) {
  311                 rs = c->pin.sel();
  312                 re = rs + 1;
  313             } else {
  314                 rs = 0;
  315                 re = sh.num_nontemp_gpr();
  316             }
  317 
  318             for (unsigned reg = rs; reg < re; ++reg) {
  319                 for (unsigned chan = cs; chan < ce; ++chan) {
  320                     unsigned bit = sel_chan(reg, chan);
  321                     if (bit >= rb.size() || !rb.get(bit)) {
  322                         color = bit;
  323                         break;
  324                     }
  325                 }
  326                 if (color)
  327                     break;
  328             }
  329 
  330             if (color)
  331                 break;
  332 
  333             ++pass;
  334         }
  335 
  336         assert(color);
  337         color_chunk(c, color);
  338     }
  339 }
  340 
  341 void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
  342 
  343     for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
  344         value *v = *I;
  345 
  346         if (!v->is_any_gpr())
  347             continue;
  348 
  349         unsigned gpr = v->get_final_gpr();
  350         if (!gpr)
  351             continue;
  352 
  353         if (gpr) {
  354             if (gpr >= bs.size())
  355                 bs.resize(gpr + 64);
  356             bs.set(gpr, 1);
  357         }
  358     }
  359 }
  360 
  361 void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
  362 
  363     vvec vv = c->values;
  364 
  365     for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
  366             ++I) {
  367         value *v = *I;
  368 
  369         if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
  370             detach_value(v);
  371             continue;
  372         }
  373 
  374         if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
  375             detach_value(v);
  376             continue;
  377         }
  378 
  379         v->gpr = color;
  380 
  381         if (v->constraint && v->constraint->kind == CK_PHI)
  382             v->fix();
  383 
  384 
  385         RA_DUMP(
  386             sblog << " assigned " << color << " to ";
  387             dump::dump_val(v);
  388             sblog << "\n";
  389         );
  390     }
  391 
  392     c->pin = color;
  393 
  394     if (c->is_reg_pinned()) {
  395         c->fix();
  396     }
  397 }
  398 
  399 coalescer::~coalescer() {
  400 
  401     // FIXME use pool allocator ??
  402 
  403     for (constraint_vec::iterator I = all_constraints.begin(),
  404             E = all_constraints.end(); I != E; ++I) {
  405         delete (*I);
  406     }
  407 
  408     for (chunk_vec::iterator I = all_chunks.begin(),
  409             E = all_chunks.end(); I != E; ++I) {
  410         delete (*I);
  411     }
  412 
  413     for (edge_queue::iterator I = edges.begin(), E = edges.end();
  414             I != E; ++I) {
  415         delete (*I);
  416     }
  417 }
  418 
  419 int coalescer::run() {
  420     int r;
  421 
  422     RA_DUMP( dump_edges(); );
  423 
  424     build_chunks();
  425     RA_DUMP( dump_chunks(); );
  426 
  427     build_constraint_queue();
  428     RA_DUMP( dump_constraint_queue(); );
  429 
  430     if ((r = color_constraints()))
  431         return r;
  432 
  433     build_chunk_queue();
  434     color_chunks();
  435 
  436     return 0;
  437 }
  438 
  439 void coalescer::color_phi_constraint(ra_constraint* c) {
  440 }
  441 
  442 ra_chunk* coalescer::detach_value(value *v) {
  443 
  444     vvec::iterator F = std::find(v->chunk->values.begin(),
  445                                  v->chunk->values.end(), v);
  446 
  447     assert(F != v->chunk->values.end());
  448     v->chunk->values.erase(F);
  449     create_chunk(v);
  450 
  451     if (v->is_reg_pinned()) {
  452         v->chunk->fix();
  453     }
  454 
  455     RA_DUMP(
  456         sblog << "           detached : ";
  457         dump_chunk(v->chunk);
  458     );
  459 
  460     return v->chunk;
  461 
  462 }
  463 
  464 int coalescer::color_reg_constraint(ra_constraint *c) {
  465     unsigned k, cnt = c->values.size();
  466     vvec & cv = c->values;
  467 
  468     ra_chunk *ch[4];
  469     unsigned swz[4] = {0, 1, 2, 3};
  470     val_set interf[4];
  471     sb_bitset rb[4];
  472 
  473     bool reg_pinned = false;
  474     unsigned pin_reg = ~0;
  475 
  476     unsigned chan_mask = 0;
  477 
  478     k = 0;
  479     for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
  480         value *v = *I;
  481 
  482         if (!v->chunk)
  483             create_chunk(v);
  484 
  485         ch[k] = v->chunk;
  486 
  487         if (v->chunk->is_chan_pinned()) {
  488             unsigned chan = 1 << v->chunk->pin.chan();
  489 
  490             if (chan & chan_mask) { // channel already in use
  491                 ch[k] = detach_value(v);
  492                 assert(!ch[k]->is_chan_pinned());
  493             } else {
  494                 chan_mask |= chan;
  495             }
  496         }
  497 
  498         if (v->chunk->is_reg_pinned()) {
  499             if (!reg_pinned) {
  500                 reg_pinned = true;
  501                 pin_reg = v->chunk->pin.sel();
  502             }
  503         }
  504 
  505         get_chunk_interferences(ch[k], interf[k]);
  506         init_reg_bitset(rb[k], interf[k]);
  507     }
  508 
  509     unsigned start_reg, end_reg;
  510 
  511     start_reg = 0;
  512     end_reg = sh.num_nontemp_gpr();
  513 
  514     unsigned min_reg = end_reg;
  515     unsigned min_swz[4];
  516     unsigned i, pass = reg_pinned ? 0 : 1;
  517 
  518     bool done = false;
  519 
  520     while (pass < 2) {
  521 
  522         unsigned rs, re;
  523 
  524         if (pass == 0) {
  525             re = pin_reg + 1;
  526             rs = pin_reg;
  527         } else {
  528             re = end_reg;
  529             rs = start_reg;
  530         }
  531 
  532         min_reg = re;
  533 
  534         // cycle on swizzle combinations
  535         do {
  536             for (i = 0; i < cnt; ++i) {
  537                 if (ch[i]->flags & RCF_PIN_CHAN)
  538                     if (ch[i]->pin.chan() != swz[i])
  539                         break;
  540             }
  541             if (i != cnt)
  542                 continue;
  543 
  544             // looking for minimal reg number such that the constrained chunks
  545             // may be colored with the current swizzle combination
  546             for (unsigned reg = rs; reg < min_reg; ++reg) {
  547                 for (i = 0; i < cnt; ++i) {
  548                     unsigned bit = sel_chan(reg, swz[i]);
  549                     if (bit < rb[i].size() && rb[i].get(bit))
  550                         break;
  551                 }
  552                 if (i == cnt) {
  553                     done = true;
  554                     min_reg = reg;
  555                     std::copy(swz, swz + 4, min_swz);
  556                     break;
  557                 }
  558             }
  559 
  560             if (pass == 0 && done)
  561                 break;
  562 
  563         } while (std::next_permutation(swz, swz + 4));
  564 
  565         if (!done && pass) {
  566             sblog << "sb: ra_coalesce - out of registers\n";
  567             return -1;
  568         }
  569 
  570         if (pass == 0 && done)
  571             break;
  572 
  573         ++pass;
  574     };
  575 
  576     assert(done);
  577 
  578     RA_DUMP(
  579     sblog << "min reg = " << min_reg << "   min_swz = "
  580             << min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
  581     );
  582 
  583     for (i = 0; i < cnt; ++i) {
  584         sel_chan color(min_reg, min_swz[i]);
  585         ra_chunk *cc = ch[i];
  586 
  587         if (cc->is_fixed()) {
  588             if (cc->pin != color)
  589                 cc = detach_value(cv[i]);
  590             else
  591                 continue;
  592         }
  593 
  594         color_chunk(cc, color);
  595         cc->fix();
  596         cc->set_prealloc();
  597     }
  598 
  599     return 0;
  600 }
  601 
  602 int coalescer::color_constraints() {
  603     int r;
  604 
  605     for (constraint_queue::iterator I = constraints.begin(),
  606             E = constraints.end(); I != E; ++I) {
  607 
  608         ra_constraint *c = *I;
  609 
  610         RA_DUMP(
  611             sblog << "color_constraints: ";
  612             dump_constraint(c);
  613         );
  614 
  615         if (c->kind == CK_SAME_REG) {
  616             if ((r = color_reg_constraint(c)))
  617                 return r;
  618         } else if (c->kind == CK_PHI)
  619             color_phi_constraint(c);
  620     }
  621     return 0;
  622 }
  623 
  624 } // namespace r600_sb