"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/r600/sb/sb_sched.cpp" (16 Sep 2020, 45885 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sb_sched.cpp" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * on the rights to use, copy, modify, merge, publish, distribute, sub
    8  * license, and/or sell copies of the Software, and to permit persons to whom
    9  * the Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
   18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
   19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
   22  *
   23  * Authors:
   24  *      Vadim Girlin
   25  */
   26 
   27 #define PSC_DEBUG 0
   28 
   29 #if PSC_DEBUG
   30 #define PSC_DUMP(a) do { a } while (0)
   31 #else
   32 #define PSC_DUMP(a)
   33 #endif
   34 
   35 #include "sb_bc.h"
   36 #include "sb_shader.h"
   37 #include "sb_pass.h"
   38 #include "sb_sched.h"
   39 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
   40 
   41 namespace r600_sb {
   42 
   43 rp_kcache_tracker::rp_kcache_tracker(shader &sh) : rp(), uc(),
   44         // FIXME: for now we'll use "two const pairs" limit for r600, same as
   45         // for other chips, otherwise additional check in alu_group_tracker is
   46         // required to make sure that all 4 consts in the group fit into 2
   47         // kcache sets
   48         sel_count(2) {}
   49 
   50 bool rp_kcache_tracker::try_reserve(sel_chan r) {
   51     unsigned sel = kc_sel(r);
   52 
   53     for (unsigned i = 0; i < sel_count; ++i) {
   54         if (rp[i] == 0) {
   55             rp[i] = sel;
   56             ++uc[i];
   57             return true;
   58         }
   59         if (rp[i] == sel) {
   60             ++uc[i];
   61             return true;
   62         }
   63     }
   64     return false;
   65 }
   66 
   67 bool rp_kcache_tracker::try_reserve(node* n) {
   68     bool need_unreserve = false;
   69     vvec::iterator I(n->src.begin()), E(n->src.end());
   70 
   71     for (; I != E; ++I) {
   72         value *v = *I;
   73         if (v->is_kcache()) {
   74             if (!try_reserve(v->select))
   75                 break;
   76             else
   77                 need_unreserve = true;
   78         }
   79     }
   80     if (I == E)
   81         return true;
   82 
   83     if (need_unreserve && I != n->src.begin()) {
   84         do {
   85             --I;
   86             value *v =*I;
   87             if (v->is_kcache())
   88                 unreserve(v->select);
   89         } while (I != n->src.begin());
   90     }
   91     return false;
   92 }
   93 
   94 inline
   95 void rp_kcache_tracker::unreserve(node* n) {
   96     vvec::iterator I(n->src.begin()), E(n->src.end());
   97     for (; I != E; ++I) {
   98         value *v = *I;
   99         if (v->is_kcache())
  100             unreserve(v->select);
  101     }
  102 }
  103 
  104 void rp_kcache_tracker::unreserve(sel_chan r) {
  105     unsigned sel = kc_sel(r);
  106 
  107     for (unsigned i = 0; i < sel_count; ++i)
  108         if (rp[i] == sel) {
  109             if (--uc[i] == 0)
  110                 rp[i] = 0;
  111             return;
  112         }
  113     assert(0);
  114     return;
  115 }
  116 
  117 bool literal_tracker::try_reserve(alu_node* n) {
  118     bool need_unreserve = false;
  119 
  120     vvec::iterator I(n->src.begin()), E(n->src.end());
  121 
  122     for (; I != E; ++I) {
  123         value *v = *I;
  124         if (v->is_literal()) {
  125             if (!try_reserve(v->literal_value))
  126                 break;
  127             else
  128                 need_unreserve = true;
  129         }
  130     }
  131     if (I == E)
  132         return true;
  133 
  134     if (need_unreserve && I != n->src.begin()) {
  135         do {
  136             --I;
  137             value *v =*I;
  138             if (v->is_literal())
  139                 unreserve(v->literal_value);
  140         } while (I != n->src.begin());
  141     }
  142     return false;
  143 }
  144 
  145 void literal_tracker::unreserve(alu_node* n) {
  146     unsigned nsrc = n->bc.op_ptr->src_count, i;
  147 
  148     for (i = 0; i < nsrc; ++i) {
  149         value *v = n->src[i];
  150         if (v->is_literal())
  151             unreserve(v->literal_value);
  152     }
  153 }
  154 
  155 bool literal_tracker::try_reserve(literal l) {
  156 
  157     PSC_DUMP( sblog << "literal reserve " << l.u << "  " << l.f << "\n"; );
  158 
  159     for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
  160         if (lt[i] == 0) {
  161             lt[i] = l;
  162             ++uc[i];
  163             PSC_DUMP( sblog << "  reserved new uc = " << uc[i] << "\n"; );
  164             return true;
  165         } else if (lt[i] == l) {
  166             ++uc[i];
  167             PSC_DUMP( sblog << "  reserved uc = " << uc[i] << "\n"; );
  168             return true;
  169         }
  170     }
  171     PSC_DUMP( sblog << "  failed to reserve literal\n"; );
  172     return false;
  173 }
  174 
  175 void literal_tracker::unreserve(literal l) {
  176 
  177     PSC_DUMP( sblog << "literal unreserve " << l.u << "  " << l.f << "\n"; );
  178 
  179     for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
  180         if (lt[i] == l) {
  181             if (--uc[i] == 0)
  182                 lt[i] = 0;
  183             return;
  184         }
  185     }
  186     assert(0);
  187     return;
  188 }
  189 
  190 static inline unsigned bs_cycle_vector(unsigned bs, unsigned src) {
  191     static const unsigned swz[VEC_NUM][3] = {
  192         {0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
  193     };
  194     assert(bs < VEC_NUM && src < 3);
  195     return swz[bs][src];
  196 }
  197 
  198 static inline unsigned bs_cycle_scalar(unsigned bs, unsigned src) {
  199     static const unsigned swz[SCL_NUM][3] = {
  200         {2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
  201     };
  202 
  203     if (bs >= SCL_NUM || src >= 3) {
  204         // this prevents gcc warning "array subscript is above array bounds"
  205         // AFAICS we should never hit this path
  206         abort();
  207     }
  208     return swz[bs][src];
  209 }
  210 
  211 static inline unsigned bs_cycle(bool trans, unsigned bs, unsigned src) {
  212     return trans ? bs_cycle_scalar(bs, src) : bs_cycle_vector(bs, src);
  213 }
  214 
  215 inline
  216 bool rp_gpr_tracker::try_reserve(unsigned cycle, unsigned sel, unsigned chan) {
  217     ++sel;
  218     if (rp[cycle][chan] == 0) {
  219         rp[cycle][chan] = sel;
  220         ++uc[cycle][chan];
  221         return true;
  222     } else if (rp[cycle][chan] == sel) {
  223         ++uc[cycle][chan];
  224         return true;
  225     }
  226     return false;
  227 }
  228 
  229 inline
  230 void rp_gpr_tracker::unreserve(alu_node* n) {
  231     unsigned nsrc = n->bc.op_ptr->src_count, i;
  232     unsigned trans = n->bc.slot == SLOT_TRANS;
  233     unsigned bs = n->bc.bank_swizzle;
  234     unsigned opt = !trans
  235             && n->bc.src[0].sel == n->bc.src[1].sel
  236             && n->bc.src[0].chan == n->bc.src[1].chan;
  237 
  238     for (i = 0; i < nsrc; ++i) {
  239         value *v = n->src[i];
  240         if (v->is_readonly() || v->is_undef())
  241             continue;
  242         if (i == 1 && opt)
  243             continue;
  244         unsigned cycle = bs_cycle(trans, bs, i);
  245         unreserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan);
  246     }
  247 }
  248 
  249 inline
  250 void rp_gpr_tracker::unreserve(unsigned cycle, unsigned sel, unsigned chan) {
  251     ++sel;
  252     assert(rp[cycle][chan] == sel && uc[cycle][chan]);
  253     if (--uc[cycle][chan] == 0)
  254         rp[cycle][chan] = 0;
  255 }
  256 
  257 inline
  258 bool rp_gpr_tracker::try_reserve(alu_node* n) {
  259     unsigned nsrc = n->bc.op_ptr->src_count, i;
  260     unsigned trans = n->bc.slot == SLOT_TRANS;
  261     unsigned bs = n->bc.bank_swizzle;
  262     unsigned opt = !trans && nsrc >= 2 &&
  263             n->src[0] == n->src[1];
  264 
  265     bool need_unreserve = false;
  266     unsigned const_count = 0, min_gpr_cycle = 3;
  267 
  268     for (i = 0; i < nsrc; ++i) {
  269         value *v = n->src[i];
  270         if (v->is_readonly() || v->is_undef()) {
  271             const_count++;
  272             if (trans && const_count == 3)
  273                 break;
  274         } else {
  275             if (i == 1 && opt)
  276                 continue;
  277 
  278             unsigned cycle = bs_cycle(trans, bs, i);
  279 
  280             if (trans && cycle < min_gpr_cycle)
  281                 min_gpr_cycle = cycle;
  282 
  283             if (const_count && cycle < const_count && trans)
  284                 break;
  285 
  286             if (!try_reserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan))
  287                 break;
  288             else
  289                 need_unreserve = true;
  290         }
  291     }
  292 
  293     if ((i == nsrc) && (min_gpr_cycle + 1 > const_count))
  294         return true;
  295 
  296     if (need_unreserve && i--) {
  297         do {
  298             value *v = n->src[i];
  299             if (!v->is_readonly() && !v->is_undef()) {
  300             if (i == 1 && opt)
  301                 continue;
  302             unreserve(bs_cycle(trans, bs, i), n->bc.src[i].sel,
  303                       n->bc.src[i].chan);
  304             }
  305         } while (i--);
  306     }
  307     return false;
  308 }
  309 
  310 alu_group_tracker::alu_group_tracker(shader &sh)
  311     : sh(sh), kc(sh),
  312       gpr(), lt(), slots(),
  313       max_slots(sh.get_ctx().is_cayman() ? 4 : 5),
  314       has_mova(), uses_ar(), has_predset(), has_kill(),
  315       updates_exec_mask(), consumes_lds_oqa(), produces_lds_oqa(), chan_count(), interp_param(), next_id() {
  316 
  317     available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
  318 }
  319 
  320 inline
  321 sel_chan alu_group_tracker::get_value_id(value* v) {
  322     unsigned &id = vmap[v];
  323     if (!id)
  324         id = ++next_id;
  325     return sel_chan(id, v->get_final_chan());
  326 }
  327 
  328 inline
  329 void alu_group_tracker::assign_slot(unsigned slot, alu_node* n) {
  330     update_flags(n);
  331     slots[slot] = n;
  332     available_slots &= ~(1 << slot);
  333 
  334     unsigned param = n->interp_param();
  335 
  336     if (param) {
  337         assert(!interp_param || interp_param == param);
  338         interp_param = param;
  339     }
  340 }
  341 
  342 
  343 void alu_group_tracker::discard_all_slots(container_node &removed_nodes) {
  344     PSC_DUMP( sblog << "agt::discard_all_slots\n"; );
  345     discard_slots(~available_slots & ((1 << max_slots) - 1), removed_nodes);
  346 }
  347 
  348 void alu_group_tracker::discard_slots(unsigned slot_mask,
  349                                     container_node &removed_nodes) {
  350 
  351     PSC_DUMP(
  352         sblog << "discard_slots : packed_ops : "
  353             << (unsigned)packed_ops.size() << "\n";
  354     );
  355 
  356     for (node_vec::iterator N, I = packed_ops.begin();
  357             I != packed_ops.end(); I = N) {
  358         N = I; ++N;
  359 
  360         alu_packed_node *n = static_cast<alu_packed_node*>(*I);
  361         unsigned pslots = n->get_slot_mask();
  362 
  363         PSC_DUMP(
  364             sblog << "discard_slots : packed slot_mask : " << pslots << "\n";
  365         );
  366 
  367         if (pslots & slot_mask) {
  368 
  369             PSC_DUMP(
  370                 sblog << "discard_slots : discarding packed...\n";
  371             );
  372 
  373             removed_nodes.push_back(n);
  374             slot_mask &= ~pslots;
  375             N = packed_ops.erase(I);
  376             available_slots |= pslots;
  377             for (unsigned k = 0; k < max_slots; ++k) {
  378                 if (pslots & (1 << k))
  379                     slots[k] = NULL;
  380             }
  381         }
  382     }
  383 
  384     for (unsigned slot = 0; slot < max_slots; ++slot) {
  385         unsigned slot_bit = 1 << slot;
  386 
  387         if (slot_mask & slot_bit) {
  388             assert(!(available_slots & slot_bit));
  389             assert(slots[slot]);
  390 
  391             assert(!(slots[slot]->bc.slot_flags & AF_4SLOT));
  392 
  393             PSC_DUMP(
  394                 sblog << "discarding slot " << slot << " : ";
  395                 dump::dump_op(slots[slot]);
  396                 sblog << "\n";
  397             );
  398 
  399             removed_nodes.push_back(slots[slot]);
  400             slots[slot] = NULL;
  401             available_slots |= slot_bit;
  402         }
  403     }
  404 
  405     alu_node *t = slots[4];
  406     if (t && (t->bc.slot_flags & AF_V)) {
  407         unsigned chan = t->bc.dst_chan;
  408         if (!slots[chan]) {
  409             PSC_DUMP(
  410                 sblog << "moving ";
  411                 dump::dump_op(t);
  412                 sblog << " from trans slot to free slot " << chan << "\n";
  413             );
  414 
  415             slots[chan] = t;
  416             slots[4] = NULL;
  417             t->bc.slot = chan;
  418         }
  419     }
  420 
  421     reinit();
  422 }
  423 
  424 alu_group_node* alu_group_tracker::emit() {
  425 
  426     alu_group_node *g = sh.create_alu_group();
  427 
  428     lt.init_group_literals(g);
  429 
  430     for (unsigned i = 0; i < max_slots; ++i) {
  431         alu_node *n = slots[i];
  432         if (n) {
  433             g->push_back(n);
  434         }
  435     }
  436     return g;
  437 }
  438 
  439 bool alu_group_tracker::try_reserve(alu_node* n) {
  440     unsigned nsrc = n->bc.op_ptr->src_count;
  441     unsigned slot = n->bc.slot;
  442     bool trans = slot == 4;
  443 
  444     if (slots[slot])
  445         return false;
  446 
  447     unsigned flags = n->bc.op_ptr->flags;
  448 
  449     unsigned param = n->interp_param();
  450 
  451     if (param && interp_param && interp_param != param)
  452         return false;
  453 
  454     if ((flags & AF_KILL) && has_predset)
  455         return false;
  456     if ((flags & AF_ANY_PRED) && (has_kill || has_predset))
  457         return false;
  458     if ((flags & AF_MOVA) && (has_mova || uses_ar))
  459         return false;
  460 
  461     if (n->uses_ar() && has_mova)
  462         return false;
  463 
  464     if (consumes_lds_oqa)
  465         return false;
  466     if (n->consumes_lds_oq() && available_slots != (sh.get_ctx().has_trans ? 0x1F : 0x0F))
  467         return false;
  468     for (unsigned i = 0; i < nsrc; ++i) {
  469 
  470         unsigned last_id = next_id;
  471 
  472         value *v = n->src[i];
  473         if (!v->is_any_gpr() && !v->is_rel())
  474             continue;
  475         sel_chan vid = get_value_id(n->src[i]);
  476 
  477         if (vid > last_id && chan_count[vid.chan()] == 3) {
  478             return false;
  479         }
  480 
  481         n->bc.src[i].sel = vid.sel();
  482         n->bc.src[i].chan = vid.chan();
  483     }
  484 
  485     if (!lt.try_reserve(n))
  486         return false;
  487 
  488     if (!kc.try_reserve(n)) {
  489         lt.unreserve(n);
  490         return false;
  491     }
  492 
  493     unsigned fbs = n->forced_bank_swizzle();
  494 
  495     n->bc.bank_swizzle = 0;
  496 
  497     if (!trans && fbs)
  498         n->bc.bank_swizzle = VEC_210;
  499 
  500     if (gpr.try_reserve(n)) {
  501         assign_slot(slot, n);
  502         return true;
  503     }
  504 
  505     if (!fbs) {
  506         unsigned swz_num = trans ? SCL_NUM : VEC_NUM;
  507         for (unsigned bs = 0; bs < swz_num; ++bs) {
  508             n->bc.bank_swizzle = bs;
  509             if (gpr.try_reserve(n)) {
  510                 assign_slot(slot, n);
  511                 return true;
  512             }
  513         }
  514     }
  515 
  516     gpr.reset();
  517 
  518     slots[slot] = n;
  519     unsigned forced_swz_slots = 0;
  520     int first_slot = ~0, first_nf = ~0, last_slot = ~0;
  521     unsigned save_bs[5];
  522 
  523     for (unsigned i = 0; i < max_slots; ++i) {
  524         alu_node *a = slots[i];
  525         if (a) {
  526             if (first_slot == ~0)
  527                 first_slot = i;
  528             last_slot = i;
  529             save_bs[i] = a->bc.bank_swizzle;
  530             if (a->forced_bank_swizzle()) {
  531                 assert(i != SLOT_TRANS);
  532                 forced_swz_slots |= (1 << i);
  533                 a->bc.bank_swizzle = VEC_210;
  534                 if (!gpr.try_reserve(a))
  535                     assert(!"internal reservation error");
  536             } else {
  537                 if (first_nf == ~0)
  538                     first_nf = i;
  539 
  540                 a->bc.bank_swizzle = 0;
  541             }
  542         }
  543     }
  544 
  545     if (first_nf == ~0) {
  546         assign_slot(slot, n);
  547         return true;
  548     }
  549 
  550     assert(first_slot != ~0 && last_slot != ~0);
  551 
  552     // silence "array subscript is above array bounds" with gcc 4.8
  553     if (last_slot >= 5)
  554         abort();
  555 
  556     int i = first_nf;
  557     alu_node *a = slots[i];
  558     bool backtrack = false;
  559 
  560     while (1) {
  561 
  562         PSC_DUMP(
  563             sblog << " bs: trying s" << i << " bs:" << a->bc.bank_swizzle
  564                 << " bt:" << backtrack << "\n";
  565         );
  566 
  567         if (!backtrack && gpr.try_reserve(a)) {
  568             PSC_DUMP(
  569                 sblog << " bs: reserved s" << i << " bs:" << a->bc.bank_swizzle
  570                     << "\n";
  571             );
  572 
  573             while ((++i <= last_slot) && !slots[i]);
  574             if (i <= last_slot)
  575                 a = slots[i];
  576             else
  577                 break;
  578         } else {
  579             bool itrans = i == SLOT_TRANS;
  580             unsigned max_swz = itrans ? SCL_221 : VEC_210;
  581 
  582             if (a->bc.bank_swizzle < max_swz) {
  583                 ++a->bc.bank_swizzle;
  584 
  585                 PSC_DUMP(
  586                     sblog << " bs: inc s" << i << " bs:" << a->bc.bank_swizzle
  587                         << "\n";
  588                 );
  589 
  590             } else {
  591 
  592                 a->bc.bank_swizzle = 0;
  593                 while ((--i >= first_nf) && !slots[i]);
  594                 if (i < first_nf)
  595                     break;
  596                 a = slots[i];
  597                 PSC_DUMP(
  598                     sblog << " bs: unreserve s" << i << " bs:" << a->bc.bank_swizzle
  599                         << "\n";
  600                 );
  601                 gpr.unreserve(a);
  602                 backtrack = true;
  603 
  604                 continue;
  605             }
  606         }
  607         backtrack = false;
  608     }
  609 
  610     if (i == last_slot + 1) {
  611         assign_slot(slot, n);
  612         return true;
  613     }
  614 
  615     // reservation failed, restore previous state
  616     slots[slot] = NULL;
  617     gpr.reset();
  618     for (unsigned i = 0; i < max_slots; ++i) {
  619         alu_node *a = slots[i];
  620         if (a) {
  621             a->bc.bank_swizzle = save_bs[i];
  622             bool b = gpr.try_reserve(a);
  623             assert(b);
  624         }
  625     }
  626 
  627     kc.unreserve(n);
  628     lt.unreserve(n);
  629     return false;
  630 }
  631 
  632 bool alu_group_tracker::try_reserve(alu_packed_node* p) {
  633     bool need_unreserve = false;
  634     node_iterator I(p->begin()), E(p->end());
  635 
  636     for (; I != E; ++I) {
  637         alu_node *n = static_cast<alu_node*>(*I);
  638         if (!try_reserve(n))
  639             break;
  640         else
  641             need_unreserve = true;
  642     }
  643 
  644     if (I == E)  {
  645         packed_ops.push_back(p);
  646         return true;
  647     }
  648 
  649     if (need_unreserve) {
  650         while (--I != E) {
  651             alu_node *n = static_cast<alu_node*>(*I);
  652             slots[n->bc.slot] = NULL;
  653         }
  654         reinit();
  655     }
  656     return false;
  657 }
  658 
  659 void alu_group_tracker::reinit() {
  660     alu_node * s[5];
  661     memcpy(s, slots, sizeof(slots));
  662 
  663     reset(true);
  664 
  665     for (int i = max_slots - 1; i >= 0; --i) {
  666         if (s[i] && !try_reserve(s[i])) {
  667             sblog << "alu_group_tracker: reinit error on slot " << i <<  "\n";
  668             for (unsigned i = 0; i < max_slots; ++i) {
  669                 sblog << "  slot " << i << " : ";
  670                 if (s[i])
  671                     dump::dump_op(s[i]);
  672 
  673                 sblog << "\n";
  674             }
  675             assert(!"alu_group_tracker: reinit error");
  676         }
  677     }
  678 }
  679 
  680 void alu_group_tracker::reset(bool keep_packed) {
  681     kc.reset();
  682     gpr.reset();
  683     lt.reset();
  684     memset(slots, 0, sizeof(slots));
  685     vmap.clear();
  686     next_id = 0;
  687     produces_lds_oqa = 0;
  688     consumes_lds_oqa = 0;
  689     has_mova = false;
  690     uses_ar = false;
  691     has_predset = false;
  692     has_kill = false;
  693     updates_exec_mask = false;
  694     available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
  695     interp_param = 0;
  696 
  697     chan_count[0] = 0;
  698     chan_count[1] = 0;
  699     chan_count[2] = 0;
  700     chan_count[3] = 0;
  701 
  702     if (!keep_packed)
  703         packed_ops.clear();
  704 }
  705 
  706 void alu_group_tracker::update_flags(alu_node* n) {
  707     unsigned flags = n->bc.op_ptr->flags;
  708     has_kill |= (flags & AF_KILL);
  709     has_mova |= (flags & AF_MOVA);
  710     has_predset |= (flags & AF_ANY_PRED);
  711     uses_ar |= n->uses_ar();
  712     consumes_lds_oqa |= n->consumes_lds_oq();
  713     produces_lds_oqa |= n->produces_lds_oq();
  714     if (flags & AF_ANY_PRED) {
  715         if (n->dst[2] != NULL)
  716             updates_exec_mask = true;
  717     }
  718 }
  719 
  720 int post_scheduler::run() {
  721     return run_on(sh.root) ? 0 : 1;
  722 }
  723 
  724 bool post_scheduler::run_on(container_node* n) {
  725     int r = true;
  726     for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
  727         if (I->is_container()) {
  728             if (I->subtype == NST_BB) {
  729                 bb_node* bb = static_cast<bb_node*>(*I);
  730                 r = schedule_bb(bb);
  731             } else {
  732                 r = run_on(static_cast<container_node*>(*I));
  733             }
  734             if (!r)
  735                 break;
  736         }
  737     }
  738     return r;
  739 }
  740 
  741 void post_scheduler::init_uc_val(container_node *c, value *v) {
  742     node *d = v->any_def();
  743     if (d && d->parent == c)
  744         ++ucm[d];
  745 }
  746 
  747 void post_scheduler::init_uc_vec(container_node *c, vvec &vv, bool src) {
  748     for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
  749         value *v = *I;
  750         if (!v || v->is_readonly())
  751             continue;
  752 
  753         if (v->is_rel()) {
  754             init_uc_val(c, v->rel);
  755             init_uc_vec(c, v->muse, true);
  756         } if (src) {
  757             init_uc_val(c, v);
  758         }
  759     }
  760 }
  761 
  762 unsigned post_scheduler::init_ucm(container_node *c, node *n) {
  763     init_uc_vec(c, n->src, true);
  764     init_uc_vec(c, n->dst, false);
  765 
  766     uc_map::iterator F = ucm.find(n);
  767     return F == ucm.end() ? 0 : F->second;
  768 }
  769 
  770 bool post_scheduler::schedule_bb(bb_node* bb) {
  771     PSC_DUMP(
  772         sblog << "scheduling BB " << bb->id << "\n";
  773         if (!pending.empty())
  774             dump::dump_op_list(&pending);
  775     );
  776 
  777     assert(pending.empty());
  778     assert(bb_pending.empty());
  779     assert(ready.empty());
  780 
  781     bb_pending.append_from(bb);
  782     cur_bb = bb;
  783 
  784     node *n;
  785 
  786     while ((n = bb_pending.back())) {
  787 
  788         PSC_DUMP(
  789             sblog << "post_sched_bb ";
  790             dump::dump_op(n);
  791             sblog << "\n";
  792         );
  793 
  794         // May require emitting ALU ops to load index registers
  795         if (n->is_fetch_clause()) {
  796             n->remove();
  797             process_fetch(static_cast<container_node *>(n));
  798             continue;
  799         }
  800 
  801         if (n->is_alu_clause()) {
  802             n->remove();
  803             bool r = process_alu(static_cast<container_node*>(n));
  804             if (r)
  805                 continue;
  806             return false;
  807         }
  808 
  809         n->remove();
  810         bb->push_front(n);
  811     }
  812 
  813     this->cur_bb = NULL;
  814     return true;
  815 }
  816 
  817 void post_scheduler::init_regmap() {
  818 
  819     regmap.clear();
  820 
  821     PSC_DUMP(
  822         sblog << "init_regmap: live: ";
  823         dump::dump_set(sh, live);
  824         sblog << "\n";
  825     );
  826 
  827     for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
  828         value *v = *I;
  829         assert(v);
  830         if (!v->is_sgpr() || !v->is_prealloc())
  831             continue;
  832 
  833         sel_chan r = v->gpr;
  834 
  835         PSC_DUMP(
  836             sblog << "init_regmap:  " << r << " <= ";
  837             dump::dump_val(v);
  838             sblog << "\n";
  839         );
  840 
  841         assert(r);
  842         regmap[r] = v;
  843     }
  844 }
  845 
  846 static alu_node *create_set_idx(shader &sh, unsigned ar_idx) {
  847     alu_node *a = sh.create_alu();
  848 
  849     assert(ar_idx == V_SQ_CF_INDEX_0 || ar_idx == V_SQ_CF_INDEX_1);
  850     if (ar_idx == V_SQ_CF_INDEX_0)
  851         a->bc.set_op(ALU_OP0_SET_CF_IDX0);
  852     else
  853         a->bc.set_op(ALU_OP0_SET_CF_IDX1);
  854     a->bc.slot = SLOT_X;
  855     a->dst.resize(1); // Dummy needed for recolor
  856 
  857     PSC_DUMP(
  858         sblog << "created IDX load: ";
  859         dump::dump_op(a);
  860         sblog << "\n";
  861     );
  862 
  863     return a;
  864 }
  865 
  866 void post_scheduler::load_index_register(value *v, unsigned ar_idx)
  867 {
  868     alu.reset();
  869 
  870     if (!sh.get_ctx().is_cayman()) {
  871         // Evergreen has to first load address register, then use CF_SET_IDX0/1
  872         alu_group_tracker &rt = alu.grp();
  873         alu_node *set_idx = create_set_idx(sh, ar_idx);
  874         if (!rt.try_reserve(set_idx)) {
  875             sblog << "can't emit SET_CF_IDX";
  876             dump::dump_op(set_idx);
  877             sblog << "\n";
  878         }
  879         process_group();
  880 
  881         if (!alu.check_clause_limits()) {
  882             // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
  883         }
  884         alu.emit_group();
  885     }
  886 
  887     alu_group_tracker &rt = alu.grp();
  888     alu_node *a = alu.create_ar_load(v, ar_idx == V_SQ_CF_INDEX_1 ? SEL_Z : SEL_Y);
  889 
  890     if (!rt.try_reserve(a)) {
  891         sblog << "can't emit AR load : ";
  892         dump::dump_op(a);
  893         sblog << "\n";
  894     }
  895 
  896     process_group();
  897 
  898     if (!alu.check_clause_limits()) {
  899         // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
  900     }
  901 
  902     alu.emit_group();
  903     alu.emit_clause(cur_bb);
  904 }
  905 
  906 void post_scheduler::process_fetch(container_node *c) {
  907     if (c->empty())
  908         return;
  909 
  910     for (node_iterator N, I = c->begin(), E = c->end(); I != E; I = N) {
  911         N = I;
  912         ++N;
  913 
  914         node *n = *I;
  915 
  916         fetch_node *f = static_cast<fetch_node*>(n);
  917 
  918         PSC_DUMP(
  919             sblog << "process_tex ";
  920             dump::dump_op(n);
  921             sblog << "  ";
  922         );
  923 
  924         // TODO: If same values used can avoid reloading index register
  925         if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ||
  926             f->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
  927             unsigned index_mode = f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ?
  928                 f->bc.sampler_index_mode : f->bc.resource_index_mode;
  929 
  930             // Currently require prior opt passes to use one TEX per indexed op
  931             assert(f->parent->count() == 1);
  932 
  933             value *v = f->src.back(); // Last src is index offset
  934             assert(v);
  935 
  936             cur_bb->push_front(c);
  937 
  938             load_index_register(v, index_mode);
  939             f->src.pop_back(); // Don't need index value any more
  940 
  941             return;
  942         }
  943     }
  944 
  945     cur_bb->push_front(c);
  946 }
  947 
  948 bool post_scheduler::process_alu(container_node *c) {
  949 
  950     if (c->empty())
  951         return true;
  952 
  953     ucm.clear();
  954     alu.reset();
  955 
  956     live = c->live_after;
  957 
  958     init_globals(c->live_after, true);
  959     init_globals(c->live_before, true);
  960 
  961     init_regmap();
  962 
  963     update_local_interferences();
  964 
  965     for (node_riterator N, I = c->rbegin(), E = c->rend(); I != E; I = N) {
  966         N = I;
  967         ++N;
  968 
  969         node *n = *I;
  970         unsigned uc = init_ucm(c, n);
  971 
  972         PSC_DUMP(
  973             sblog << "process_alu uc=" << uc << "  ";
  974             dump::dump_op(n);
  975             sblog << "  ";
  976         );
  977 
  978         if (uc) {
  979             n->remove();
  980 
  981             pending.push_back(n);
  982             PSC_DUMP( sblog << "pending\n"; );
  983         } else {
  984             release_op(n);
  985         }
  986     }
  987 
  988     return schedule_alu(c);
  989 }
  990 
  991 void post_scheduler::update_local_interferences() {
  992 
  993     PSC_DUMP(
  994         sblog << "update_local_interferences : ";
  995         dump::dump_set(sh, live);
  996         sblog << "\n";
  997     );
  998 
  999 
 1000     for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
 1001         value *v = *I;
 1002         if (v->is_prealloc())
 1003             continue;
 1004 
 1005         v->interferences.add_set(live);
 1006     }
 1007 }
 1008 
 1009 void post_scheduler::update_live_src_vec(vvec &vv, val_set *born, bool src) {
 1010     for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 1011         value *v = *I;
 1012 
 1013         if (!v)
 1014             continue;
 1015 
 1016         if (src && v->is_any_gpr()) {
 1017             if (live.add_val(v)) {
 1018                 if (!v->is_prealloc()) {
 1019                     if (!cleared_interf.contains(v)) {
 1020                         PSC_DUMP(
 1021                             sblog << "clearing interferences for " << *v << "\n";
 1022                         );
 1023                         v->interferences.clear();
 1024                         cleared_interf.add_val(v);
 1025                     }
 1026                 }
 1027                 if (born)
 1028                     born->add_val(v);
 1029             }
 1030         } else if (v->is_rel()) {
 1031             if (!v->rel->is_any_gpr())
 1032                 live.add_val(v->rel);
 1033             update_live_src_vec(v->muse, born, true);
 1034         }
 1035     }
 1036 }
 1037 
 1038 void post_scheduler::update_live_dst_vec(vvec &vv) {
 1039     for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 1040         value *v = *I;
 1041         if (!v)
 1042             continue;
 1043 
 1044         if (v->is_rel()) {
 1045             update_live_dst_vec(v->mdef);
 1046         } else if (v->is_any_gpr()) {
 1047             if (!live.remove_val(v)) {
 1048                 PSC_DUMP(
 1049                         sblog << "failed to remove ";
 1050                 dump::dump_val(v);
 1051                 sblog << " from live : ";
 1052                 dump::dump_set(sh, live);
 1053                 sblog << "\n";
 1054                 );
 1055             }
 1056         }
 1057     }
 1058 }
 1059 
 1060 void post_scheduler::update_live(node *n, val_set *born) {
 1061     update_live_dst_vec(n->dst);
 1062     update_live_src_vec(n->src, born, true);
 1063     update_live_src_vec(n->dst, born, false);
 1064 }
 1065 
 1066 void post_scheduler::process_group() {
 1067     alu_group_tracker &rt = alu.grp();
 1068 
 1069     val_set vals_born;
 1070 
 1071     recolor_locals();
 1072 
 1073     PSC_DUMP(
 1074         sblog << "process_group: live_before : ";
 1075         dump::dump_set(sh, live);
 1076         sblog << "\n";
 1077     );
 1078 
 1079     for (unsigned s = 0; s < ctx.num_slots; ++s) {
 1080         alu_node *n = rt.slot(s);
 1081         if (!n)
 1082             continue;
 1083 
 1084         update_live(n, &vals_born);
 1085     }
 1086 
 1087     PSC_DUMP(
 1088         sblog << "process_group: live_after : ";
 1089         dump::dump_set(sh, live);
 1090         sblog << "\n";
 1091     );
 1092 
 1093     update_local_interferences();
 1094 
 1095     for (unsigned i = 0; i < 5; ++i) {
 1096         node *n = rt.slot(i);
 1097         if (n && !n->is_mova()) {
 1098             release_src_values(n);
 1099         }
 1100     }
 1101 }
 1102 
 1103 void post_scheduler::init_globals(val_set &s, bool prealloc) {
 1104 
 1105     PSC_DUMP(
 1106         sblog << "init_globals: ";
 1107         dump::dump_set(sh, s);
 1108         sblog << "\n";
 1109     );
 1110 
 1111     for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
 1112         value *v = *I;
 1113         if (v->is_sgpr() && !v->is_global()) {
 1114             v->set_global();
 1115 
 1116             if (prealloc && v->is_fixed()) {
 1117                 v->set_prealloc();
 1118             }
 1119         }
 1120     }
 1121 }
 1122 
 1123 void post_scheduler::emit_index_registers() {
 1124     for (unsigned i = 0; i < 2; i++) {
 1125         if (alu.current_idx[i]) {
 1126             regmap = prev_regmap;
 1127             alu.discard_current_group();
 1128 
 1129             load_index_register(alu.current_idx[i], KC_INDEX_0 + i);
 1130             alu.current_idx[i] = NULL;
 1131         }
 1132     }
 1133 }
 1134 
 1135 void post_scheduler::emit_clause() {
 1136 
 1137     if (alu.current_ar) {
 1138         emit_load_ar();
 1139         process_group();
 1140         if (!alu.check_clause_limits()) {
 1141             // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
 1142         }
 1143         alu.emit_group();
 1144     }
 1145 
 1146     if (!alu.is_empty()) {
 1147         alu.emit_clause(cur_bb);
 1148     }
 1149 
 1150     emit_index_registers();
 1151 }
 1152 
 1153 bool post_scheduler::schedule_alu(container_node *c) {
 1154 
 1155     assert(!ready.empty() || !ready_copies.empty());
 1156 
 1157     /* This number is rather arbitrary, important is that the scheduler has
 1158      * more than one try to create an instruction group
 1159      */
 1160     int improving = 10;
 1161     int last_pending = pending.count();
 1162     while (improving > 0) {
 1163         prev_regmap = regmap;
 1164         if (!prepare_alu_group()) {
 1165 
 1166             int new_pending = pending.count();
 1167             if ((new_pending < last_pending) || (last_pending == 0))
 1168                 improving = 10;
 1169             else
 1170                 --improving;
 1171 
 1172             last_pending = new_pending;
 1173 
 1174             if (alu.current_idx[0] || alu.current_idx[1]) {
 1175                 regmap = prev_regmap;
 1176                 emit_clause();
 1177                 init_globals(live, false);
 1178 
 1179                 continue;
 1180             }
 1181 
 1182             if (alu.current_ar) {
 1183                 emit_load_ar();
 1184                 continue;
 1185             } else
 1186                 break;
 1187         }
 1188 
 1189         if (!alu.check_clause_limits()) {
 1190             regmap = prev_regmap;
 1191             emit_clause();
 1192             init_globals(live, false);
 1193 
 1194             continue;
 1195         }
 1196 
 1197         process_group();
 1198         alu.emit_group();
 1199     };
 1200 
 1201     if (!alu.is_empty()) {
 1202         emit_clause();
 1203     }
 1204 
 1205     if (!ready.empty()) {
 1206         sblog << "##post_scheduler: unscheduled ready instructions :";
 1207         dump::dump_op_list(&ready);
 1208         assert(!"unscheduled ready instructions");
 1209     }
 1210 
 1211     if (!pending.empty()) {
 1212         sblog << "##post_scheduler: unscheduled pending instructions :";
 1213         dump::dump_op_list(&pending);
 1214         assert(!"unscheduled pending instructions");
 1215     }
 1216     return improving;
 1217 }
 1218 
 1219 void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
 1220     unsigned chan = v->gpr.chan();
 1221 
 1222     for (val_set::iterator I = vs.begin(sh), E = vs.end(sh);
 1223             I != E; ++I) {
 1224         value *vi = *I;
 1225         sel_chan gpr = vi->get_final_gpr();
 1226 
 1227         if (vi->is_any_gpr() && gpr && vi != v &&
 1228                 (!v->chunk || v->chunk != vi->chunk) &&
 1229                 vi->is_fixed() && gpr.chan() == chan) {
 1230 
 1231             unsigned r = gpr.sel();
 1232 
 1233             PSC_DUMP(
 1234                 sblog << "\tadd_interferences: " << *vi << "\n";
 1235             );
 1236 
 1237             if (rb.size() <= r)
 1238                 rb.resize(r + 32);
 1239             rb.set(r);
 1240         }
 1241     }
 1242 }
 1243 
 1244 void post_scheduler::set_color_local_val(value *v, sel_chan color) {
 1245     v->gpr = color;
 1246 
 1247     PSC_DUMP(
 1248         sblog << "     recolored: ";
 1249         dump::dump_val(v);
 1250         sblog << "\n";
 1251     );
 1252 }
 1253 
 1254 void post_scheduler::set_color_local(value *v, sel_chan color) {
 1255     if (v->chunk) {
 1256         vvec &vv = v->chunk->values;
 1257         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 1258             value *v2 =*I;
 1259             set_color_local_val(v2, color);
 1260         }
 1261         v->chunk->fix();
 1262     } else {
 1263         set_color_local_val(v, color);
 1264         v->fix();
 1265     }
 1266 }
 1267 
 1268 bool post_scheduler::recolor_local(value *v) {
 1269 
 1270     sb_bitset rb;
 1271 
 1272     assert(v->is_sgpr());
 1273     assert(!v->is_prealloc());
 1274     assert(v->gpr);
 1275 
 1276     unsigned chan = v->gpr.chan();
 1277 
 1278     PSC_DUMP(
 1279         sblog << "recolor_local: ";
 1280         dump::dump_val(v);
 1281         sblog << "   interferences: ";
 1282         dump::dump_set(sh, v->interferences);
 1283         sblog << "\n";
 1284         if (v->chunk) {
 1285             sblog << "     in chunk: ";
 1286             coalescer::dump_chunk(v->chunk);
 1287             sblog << "\n";
 1288         }
 1289     );
 1290 
 1291     if (v->chunk) {
 1292         for (vvec::iterator I = v->chunk->values.begin(),
 1293                 E = v->chunk->values.end(); I != E; ++I) {
 1294             value *v2 = *I;
 1295 
 1296             PSC_DUMP( sblog << "   add_interferences for " << *v2 << " :\n"; );
 1297 
 1298             add_interferences(v, rb, v2->interferences);
 1299         }
 1300     } else {
 1301         add_interferences(v, rb, v->interferences);
 1302     }
 1303 
 1304     PSC_DUMP(
 1305         unsigned sz = rb.size();
 1306         sblog << "registers bits: " << sz;
 1307         for (unsigned r = 0; r < sz; ++r) {
 1308             if ((r & 7) == 0)
 1309                 sblog << "\n  " << r << "   ";
 1310             sblog << (rb.get(r) ? 1 : 0);
 1311         }
 1312     );
 1313 
 1314     bool no_temp_gprs = v->is_global();
 1315     unsigned rs, re, pass = no_temp_gprs ? 1 : 0;
 1316 
 1317     while (pass < 2) {
 1318 
 1319         if (pass == 0) {
 1320             rs = sh.first_temp_gpr();
 1321             re = MAX_GPR;
 1322         } else {
 1323             rs = 0;
 1324             re = sh.num_nontemp_gpr();
 1325         }
 1326 
 1327         for (unsigned reg = rs; reg < re; ++reg) {
 1328             if (reg >= rb.size() || !rb.get(reg)) {
 1329                 // color found
 1330                 set_color_local(v, sel_chan(reg, chan));
 1331                 return true;
 1332             }
 1333         }
 1334         ++pass;
 1335     }
 1336 
 1337     assert(!"recolor_local failed");
 1338     return true;
 1339 }
 1340 
 1341 void post_scheduler::emit_load_ar() {
 1342 
 1343     regmap = prev_regmap;
 1344     alu.discard_current_group();
 1345 
 1346     alu_group_tracker &rt = alu.grp();
 1347     alu_node *a = alu.create_ar_load(alu.current_ar, SEL_X);
 1348 
 1349     if (!rt.try_reserve(a)) {
 1350         sblog << "can't emit AR load : ";
 1351         dump::dump_op(a);
 1352         sblog << "\n";
 1353     }
 1354 
 1355     alu.current_ar = 0;
 1356 }
 1357 
 1358 bool post_scheduler::unmap_dst_val(value *d) {
 1359 
 1360     if (d == alu.current_ar) {
 1361         emit_load_ar();
 1362         return false;
 1363     }
 1364 
 1365     if (d->is_prealloc()) {
 1366         sel_chan gpr = d->get_final_gpr();
 1367         rv_map::iterator F = regmap.find(gpr);
 1368         value *c = NULL;
 1369         if (F != regmap.end())
 1370             c = F->second;
 1371 
 1372         if (c && c!=d && (!c->chunk || c->chunk != d->chunk)) {
 1373             PSC_DUMP(
 1374                 sblog << "dst value conflict : ";
 1375                 dump::dump_val(d);
 1376                 sblog << "   regmap contains ";
 1377                 dump::dump_val(c);
 1378                 sblog << "\n";
 1379             );
 1380             assert(!"scheduler error");
 1381             return false;
 1382         } else if (c) {
 1383             regmap.erase(F);
 1384         }
 1385     }
 1386     return true;
 1387 }
 1388 
 1389 bool post_scheduler::unmap_dst(alu_node *n) {
 1390     value *d = n->dst.empty() ? NULL : n->dst[0];
 1391 
 1392     if (!d)
 1393         return true;
 1394 
 1395     if (!d->is_rel()) {
 1396         if (d && d->is_any_reg()) {
 1397 
 1398             if (d->is_AR()) {
 1399                 if (alu.current_ar != d) {
 1400                     sblog << "loading wrong ar value\n";
 1401                     assert(0);
 1402                 } else {
 1403                     alu.current_ar = NULL;
 1404                 }
 1405 
 1406             } else if (d->is_any_gpr()) {
 1407                 if (!unmap_dst_val(d))
 1408                     return false;
 1409             }
 1410         }
 1411     } else {
 1412         for (vvec::iterator I = d->mdef.begin(), E = d->mdef.end();
 1413                 I != E; ++I) {
 1414             d = *I;
 1415             if (!d)
 1416                 continue;
 1417 
 1418             assert(d->is_any_gpr());
 1419 
 1420             if (!unmap_dst_val(d))
 1421                 return false;
 1422         }
 1423     }
 1424     return true;
 1425 }
 1426 
 1427 bool post_scheduler::map_src_val(value *v) {
 1428 
 1429     if (!v->is_prealloc())
 1430         return true;
 1431 
 1432     sel_chan gpr = v->get_final_gpr();
 1433     rv_map::iterator F = regmap.find(gpr);
 1434     value *c = NULL;
 1435     if (F != regmap.end()) {
 1436         c = F->second;
 1437         if (!v->v_equal(c)) {
 1438             PSC_DUMP(
 1439                 sblog << "can't map src value ";
 1440                 dump::dump_val(v);
 1441                 sblog << ", regmap contains ";
 1442                 dump::dump_val(c);
 1443                 sblog << "\n";
 1444             );
 1445             return false;
 1446         }
 1447     } else {
 1448         regmap.insert(std::make_pair(gpr, v));
 1449     }
 1450     return true;
 1451 }
 1452 
 1453 bool post_scheduler::map_src_vec(vvec &vv, bool src) {
 1454     if (src) {
 1455         // Handle possible UBO indexing
 1456         bool ubo_indexing[2] = { false, false };
 1457         for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 1458             value *v = *I;
 1459             if (!v)
 1460                 continue;
 1461 
 1462             if (v->is_kcache()) {
 1463                 unsigned index_mode = v->select.kcache_index_mode();
 1464                 if (index_mode == KC_INDEX_0 || index_mode == KC_INDEX_1) {
 1465                     ubo_indexing[index_mode - KC_INDEX_0] = true;
 1466                 }
 1467             }
 1468         }
 1469 
 1470         // idx values stored at end of src vec, see bc_parser::prepare_alu_group
 1471         for (unsigned i = 2; i != 0; i--) {
 1472             if (ubo_indexing[i-1]) {
 1473                 // TODO: skip adding value to kcache reservation somehow, causes
 1474                 // unnecessary group breaks and cache line locks
 1475                 value *v = vv.back();
 1476                 if (alu.current_idx[i-1] && alu.current_idx[i-1] != v) {
 1477                     PSC_DUMP(
 1478                         sblog << "IDX" << i-1 << " already set to " <<
 1479                         *alu.current_idx[i-1] << ", trying to set " << *v << "\n";
 1480                     );
 1481                     return false;
 1482                 }
 1483 
 1484                 alu.current_idx[i-1] = v;
 1485                 PSC_DUMP(sblog << "IDX" << i-1 << " set to " << *v << "\n";);
 1486             }
 1487         }
 1488     }
 1489 
 1490     for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 1491         value *v = *I;
 1492         if (!v)
 1493             continue;
 1494 
 1495         if ((!v->is_any_gpr() || !v->is_fixed()) && !v->is_rel())
 1496             continue;
 1497 
 1498         if (v->is_rel()) {
 1499             value *rel = v->rel;
 1500             assert(rel);
 1501 
 1502             if (!rel->is_const()) {
 1503                 if (!map_src_vec(v->muse, true))
 1504                     return false;
 1505 
 1506                 if (rel != alu.current_ar) {
 1507                     if (alu.current_ar) {
 1508                         PSC_DUMP(
 1509                             sblog << "  current_AR is " << *alu.current_ar
 1510                                 << "  trying to use " << *rel << "\n";
 1511                         );
 1512                         return false;
 1513                     }
 1514 
 1515                     alu.current_ar = rel;
 1516 
 1517                     PSC_DUMP(
 1518                         sblog << "  new current_AR assigned: " << *alu.current_ar
 1519                             << "\n";
 1520                     );
 1521                 }
 1522             }
 1523 
 1524         } else if (src) {
 1525             if (!map_src_val(v)) {
 1526                 return false;
 1527             }
 1528         }
 1529     }
 1530     return true;
 1531 }
 1532 
 1533 bool post_scheduler::map_src(alu_node *n) {
 1534     if (!map_src_vec(n->dst, false))
 1535         return false;
 1536 
 1537     if (!map_src_vec(n->src, true))
 1538         return false;
 1539 
 1540     return true;
 1541 }
 1542 
 1543 void post_scheduler::dump_regmap() {
 1544 
 1545     sblog << "# REGMAP :\n";
 1546 
 1547     for(rv_map::iterator I = regmap.begin(), E = regmap.end(); I != E; ++I) {
 1548         sblog << "  # " << I->first << " => " << *(I->second) << "\n";
 1549     }
 1550 
 1551     if (alu.current_ar)
 1552         sblog << "    current_AR: " << *alu.current_ar << "\n";
 1553     if (alu.current_pr)
 1554         sblog << "    current_PR: " << *alu.current_pr << "\n";
 1555     if (alu.current_idx[0])
 1556         sblog << "    current IDX0: " << *alu.current_idx[0] << "\n";
 1557     if (alu.current_idx[1])
 1558         sblog << "    current IDX1: " << *alu.current_idx[1] << "\n";
 1559 }
 1560 
 1561 void post_scheduler::recolor_locals() {
 1562     alu_group_tracker &rt = alu.grp();
 1563 
 1564     for (unsigned s = 0; s < ctx.num_slots; ++s) {
 1565         alu_node *n = rt.slot(s);
 1566         if (n) {
 1567             value *d = n->dst[0];
 1568             if (d && d->is_sgpr() && !d->is_prealloc()) {
 1569                 recolor_local(d);
 1570             }
 1571         }
 1572     }
 1573 }
 1574 
 1575 // returns true if there are interferences
 1576 bool post_scheduler::check_interferences() {
 1577 
 1578     alu_group_tracker &rt = alu.grp();
 1579 
 1580     unsigned interf_slots;
 1581 
 1582     bool discarded = false;
 1583 
 1584     PSC_DUMP(
 1585             sblog << "check_interferences: before: \n";
 1586     dump_regmap();
 1587     );
 1588 
 1589     do {
 1590 
 1591         interf_slots = 0;
 1592 
 1593         for (unsigned s = 0; s < ctx.num_slots; ++s) {
 1594             alu_node *n = rt.slot(s);
 1595             if (n) {
 1596                 if (!unmap_dst(n)) {
 1597                     return true;
 1598                 }
 1599             }
 1600         }
 1601 
 1602         for (unsigned s = 0; s < ctx.num_slots; ++s) {
 1603             alu_node *n = rt.slot(s);
 1604             if (n) {
 1605                 if (!map_src(n)) {
 1606                     interf_slots |= (1 << s);
 1607                 }
 1608             }
 1609         }
 1610 
 1611         PSC_DUMP(
 1612                 for (unsigned i = 0; i < 5; ++i) {
 1613                     if (interf_slots & (1 << i)) {
 1614                         sblog << "!!!!!! interf slot: " << i << "  : ";
 1615                         dump::dump_op(rt.slot(i));
 1616                         sblog << "\n";
 1617                     }
 1618                 }
 1619         );
 1620 
 1621         if (!interf_slots)
 1622             break;
 1623 
 1624         PSC_DUMP( sblog << "ci: discarding slots " << interf_slots << "\n"; );
 1625 
 1626         rt.discard_slots(interf_slots, alu.conflict_nodes);
 1627         regmap = prev_regmap;
 1628         discarded = true;
 1629 
 1630     } while(1);
 1631 
 1632     PSC_DUMP(
 1633         sblog << "check_interferences: after: \n";
 1634         dump_regmap();
 1635     );
 1636 
 1637     return discarded;
 1638 }
 1639 
 1640 // add instruction(s) (alu_node or contents of alu_packed_node) to current group
 1641 // returns the number of added instructions on success
 1642 unsigned post_scheduler::try_add_instruction(node *n) {
 1643 
 1644     alu_group_tracker &rt = alu.grp();
 1645 
 1646     unsigned avail_slots = rt.avail_slots();
 1647 
 1648     // Cannot schedule in same clause as instructions using this index value
 1649     if (!n->dst.empty() && n->dst[0] &&
 1650         (n->dst[0] == alu.current_idx[0] || n->dst[0] == alu.current_idx[1])) {
 1651         PSC_DUMP(sblog << "   CF_IDX source: " << *n->dst[0] << "\n";);
 1652         return 0;
 1653     }
 1654 
 1655     if (n->is_alu_packed()) {
 1656         alu_packed_node *p = static_cast<alu_packed_node*>(n);
 1657         unsigned slots = p->get_slot_mask();
 1658         unsigned cnt = __builtin_popcount(slots);
 1659 
 1660         if ((slots & avail_slots) != slots) {
 1661             PSC_DUMP( sblog << "   no slots \n"; );
 1662             return 0;
 1663         }
 1664 
 1665         p->update_packed_items(ctx);
 1666 
 1667         if (!rt.try_reserve(p)) {
 1668             PSC_DUMP( sblog << "   reservation failed \n"; );
 1669             return 0;
 1670         }
 1671 
 1672         p->remove();
 1673         return cnt;
 1674 
 1675     } else {
 1676         alu_node *a = static_cast<alu_node*>(n);
 1677         value *d = a->dst.empty() ? NULL : a->dst[0];
 1678 
 1679         if (d && d->is_special_reg()) {
 1680             assert((a->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access() || d->is_scratch());
 1681             d = NULL;
 1682         }
 1683 
 1684         unsigned allowed_slots = ctx.alu_slots_mask(a->bc.op_ptr);
 1685         unsigned slot;
 1686 
 1687         allowed_slots &= avail_slots;
 1688 
 1689         if (!allowed_slots)
 1690             return 0;
 1691 
 1692         if (d) {
 1693             slot = d->get_final_chan();
 1694             a->bc.dst_chan = slot;
 1695             allowed_slots &= (1 << slot) | 0x10;
 1696         } else {
 1697             if (a->bc.op_ptr->flags & AF_MOVA) {
 1698                 if (a->bc.slot_flags & AF_V)
 1699                     allowed_slots &= (1 << SLOT_X);
 1700                 else
 1701                     allowed_slots &= (1 << SLOT_TRANS);
 1702             }
 1703         }
 1704 
 1705         // FIXME workaround for some problems with MULADD in trans slot on r700,
 1706         // (is it really needed on r600?)
 1707         if ((a->bc.op == ALU_OP3_MULADD || a->bc.op == ALU_OP3_MULADD_IEEE) &&
 1708                 !ctx.is_egcm()) {
 1709             allowed_slots &= 0x0F;
 1710         }
 1711 
 1712         if (!allowed_slots) {
 1713             PSC_DUMP( sblog << "   no suitable slots\n"; );
 1714             return 0;
 1715         }
 1716 
 1717         slot = __builtin_ctz(allowed_slots);
 1718         a->bc.slot = slot;
 1719 
 1720         PSC_DUMP( sblog << "slot: " << slot << "\n"; );
 1721 
 1722         if (!rt.try_reserve(a)) {
 1723             PSC_DUMP( sblog << "   reservation failed\n"; );
 1724             return 0;
 1725         }
 1726 
 1727         a->remove();
 1728         return 1;
 1729     }
 1730 }
 1731 
 1732 bool post_scheduler::check_copy(node *n) {
 1733     if (!n->is_copy_mov())
 1734         return false;
 1735 
 1736     value *s = n->src[0];
 1737     value *d = n->dst[0];
 1738 
 1739     if (!s->is_sgpr() || !d->is_sgpr())
 1740         return false;
 1741 
 1742     if (!s->is_prealloc()) {
 1743         recolor_local(s);
 1744 
 1745         if (!s->chunk || s->chunk != d->chunk)
 1746             return false;
 1747     }
 1748 
 1749     if (s->gpr == d->gpr) {
 1750 
 1751         PSC_DUMP(
 1752             sblog << "check_copy: ";
 1753             dump::dump_op(n);
 1754             sblog << "\n";
 1755         );
 1756 
 1757         rv_map::iterator F = regmap.find(d->gpr);
 1758         bool gpr_free = (F == regmap.end());
 1759 
 1760         if (d->is_prealloc()) {
 1761             if (gpr_free) {
 1762                 PSC_DUMP( sblog << "    copy not ready...\n";);
 1763                 return true;
 1764             }
 1765 
 1766             value *rv = F->second;
 1767             if (rv != d && (!rv->chunk || rv->chunk != d->chunk)) {
 1768                 PSC_DUMP( sblog << "    copy not ready(2)...\n";);
 1769                 return true;
 1770             }
 1771 
 1772             unmap_dst(static_cast<alu_node*>(n));
 1773         }
 1774 
 1775         if (s->is_prealloc() && !map_src_val(s))
 1776             return true;
 1777 
 1778         update_live(n, NULL);
 1779 
 1780         release_src_values(n);
 1781         n->remove();
 1782         PSC_DUMP( sblog << "    copy coalesced...\n";);
 1783         return true;
 1784     }
 1785     return false;
 1786 }
 1787 
 1788 void post_scheduler::dump_group(alu_group_tracker &rt) {
 1789     for (unsigned i = 0; i < 5; ++i) {
 1790         node *n = rt.slot(i);
 1791         if (n) {
 1792             sblog << "slot " << i << " : ";
 1793             dump::dump_op(n);
 1794             sblog << "\n";
 1795         }
 1796     }
 1797 }
 1798 
 1799 void post_scheduler::process_ready_copies() {
 1800 
 1801     node *last;
 1802 
 1803     do {
 1804         last = ready_copies.back();
 1805 
 1806         for (node_iterator N, I = ready_copies.begin(), E = ready_copies.end();
 1807                 I != E; I = N) {
 1808             N = I; ++N;
 1809 
 1810             node *n = *I;
 1811 
 1812             if (!check_copy(n)) {
 1813                 n->remove();
 1814                 ready.push_back(n);
 1815             }
 1816         }
 1817     } while (last != ready_copies.back());
 1818 
 1819     update_local_interferences();
 1820 }
 1821 
 1822 
 1823 bool post_scheduler::prepare_alu_group() {
 1824 
 1825     alu_group_tracker &rt = alu.grp();
 1826 
 1827     unsigned i1 = 0;
 1828 
 1829     PSC_DUMP(
 1830         sblog << "prepare_alu_group: starting...\n";
 1831         dump_group(rt);
 1832     );
 1833 
 1834     ready.append_from(&alu.conflict_nodes);
 1835 
 1836     // FIXME rework this loop
 1837 
 1838     do {
 1839 
 1840         process_ready_copies();
 1841 
 1842         ++i1;
 1843 
 1844         for (node_iterator N, I = ready.begin(), E = ready.end(); I != E;
 1845                 I = N) {
 1846             N = I; ++N;
 1847             node *n = *I;
 1848 
 1849             PSC_DUMP(
 1850                 sblog << "p_a_g: ";
 1851                 dump::dump_op(n);
 1852                 sblog << "\n";
 1853             );
 1854 
 1855 
 1856             unsigned cnt = try_add_instruction(n);
 1857 
 1858             if (!cnt)
 1859                 continue;
 1860 
 1861             PSC_DUMP(
 1862                 sblog << "current group:\n";
 1863                 dump_group(rt);
 1864             );
 1865 
 1866             if (rt.inst_count() == ctx.num_slots) {
 1867                 PSC_DUMP( sblog << " all slots used\n"; );
 1868                 break;
 1869             }
 1870         }
 1871 
 1872         if (!check_interferences())
 1873             break;
 1874 
 1875         // don't try to add more instructions to the group with mova if this
 1876         // can lead to breaking clause slot count limit - we don't want mova to
 1877         // end up in the end of the new clause instead of beginning of the
 1878         // current clause.
 1879         if (rt.has_ar_load() && alu.total_slots() > 121)
 1880             break;
 1881 
 1882         if (rt.inst_count() && i1 > 50)
 1883             break;
 1884 
 1885         regmap = prev_regmap;
 1886 
 1887     } while (1);
 1888 
 1889     PSC_DUMP(
 1890         sblog << " prepare_alu_group done, " << rt.inst_count()
 1891               << " slot(s) \n";
 1892 
 1893         sblog << "$$$$$$$$PAG i1=" << i1
 1894                 << "  ready " << ready.count()
 1895                 << "  pending " << pending.count()
 1896                 << "  conflicting " << alu.conflict_nodes.count()
 1897                 <<"\n";
 1898 
 1899     );
 1900 
 1901     return rt.inst_count();
 1902 }
 1903 
 1904 void post_scheduler::release_src_values(node* n) {
 1905     release_src_vec(n->src, true);
 1906     release_src_vec(n->dst, false);
 1907 }
 1908 
 1909 void post_scheduler::release_op(node *n) {
 1910     PSC_DUMP(
 1911         sblog << "release_op ";
 1912         dump::dump_op(n);
 1913         sblog << "\n";
 1914     );
 1915 
 1916     n->remove();
 1917 
 1918     if (n->is_copy_mov()) {
 1919         ready_copies.push_back(n);
 1920     } else if (n->is_mova() || n->is_pred_set()) {
 1921         ready.push_front(n);
 1922     } else {
 1923         ready.push_back(n);
 1924     }
 1925 }
 1926 
 1927 void post_scheduler::release_src_val(value *v) {
 1928     node *d = v->any_def();
 1929     if (d) {
 1930         if (!--ucm[d])
 1931             release_op(d);
 1932     }
 1933 }
 1934 
 1935 void post_scheduler::release_src_vec(vvec& vv, bool src) {
 1936 
 1937     for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
 1938         value *v = *I;
 1939         if (!v || v->is_readonly())
 1940             continue;
 1941 
 1942         if (v->is_rel()) {
 1943             release_src_val(v->rel);
 1944             release_src_vec(v->muse, true);
 1945 
 1946         } else if (src) {
 1947             release_src_val(v);
 1948         }
 1949     }
 1950 }
 1951 
 1952 void literal_tracker::reset() {
 1953     memset(lt, 0, sizeof(lt));
 1954     memset(uc, 0, sizeof(uc));
 1955 }
 1956 
 1957 void rp_gpr_tracker::reset() {
 1958     memset(rp, 0, sizeof(rp));
 1959     memset(uc, 0, sizeof(uc));
 1960 }
 1961 
 1962 void rp_kcache_tracker::reset() {
 1963     memset(rp, 0, sizeof(rp));
 1964     memset(uc, 0, sizeof(uc));
 1965 }
 1966 
 1967 void alu_kcache_tracker::reset() {
 1968     memset(kc, 0, sizeof(kc));
 1969     lines.clear();
 1970 }
 1971 
 1972 void alu_clause_tracker::reset() {
 1973     group = 0;
 1974     slot_count = 0;
 1975     outstanding_lds_oqa_reads = 0;
 1976     grp0.reset();
 1977     grp1.reset();
 1978 }
 1979 
 1980 alu_clause_tracker::alu_clause_tracker(shader &sh)
 1981     : sh(sh), kt(sh.get_ctx().hw_class), slot_count(),
 1982       grp0(sh), grp1(sh),
 1983       group(), clause(),
 1984       push_exec_mask(), outstanding_lds_oqa_reads(),
 1985       current_ar(), current_pr(), current_idx() {}
 1986 
 1987 void alu_clause_tracker::emit_group() {
 1988 
 1989     assert(grp().inst_count());
 1990 
 1991     alu_group_node *g = grp().emit();
 1992 
 1993     if (grp().has_update_exec_mask()) {
 1994         assert(!push_exec_mask);
 1995         push_exec_mask = true;
 1996     }
 1997 
 1998     assert(g);
 1999 
 2000     if (!clause) {
 2001         clause = sh.create_clause(NST_ALU_CLAUSE);
 2002     }
 2003 
 2004     clause->push_front(g);
 2005 
 2006     outstanding_lds_oqa_reads += grp().get_consumes_lds_oqa();
 2007     outstanding_lds_oqa_reads -= grp().get_produces_lds_oqa();
 2008     slot_count += grp().slot_count();
 2009 
 2010     new_group();
 2011 
 2012     PSC_DUMP( sblog << "   #### group emitted\n"; );
 2013 }
 2014 
 2015 void alu_clause_tracker::emit_clause(container_node *c) {
 2016     assert(clause);
 2017 
 2018     kt.init_clause(clause->bc);
 2019 
 2020     assert(!outstanding_lds_oqa_reads);
 2021     assert(!current_ar);
 2022     assert(!current_pr);
 2023 
 2024     if (push_exec_mask)
 2025         clause->bc.set_op(CF_OP_ALU_PUSH_BEFORE);
 2026 
 2027     c->push_front(clause);
 2028 
 2029     clause = NULL;
 2030     push_exec_mask = false;
 2031     slot_count = 0;
 2032     kt.reset();
 2033 
 2034     PSC_DUMP( sblog << "######### ALU clause emitted\n"; );
 2035 }
 2036 
 2037 bool alu_clause_tracker::check_clause_limits() {
 2038 
 2039     alu_group_tracker &gt = grp();
 2040 
 2041     unsigned slots = gt.slot_count();
 2042 
 2043     // reserving slots to load AR and PR values
 2044     unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
 2045     // ...and index registers
 2046     reserve_slots += (current_idx[0] != NULL) + (current_idx[1] != NULL);
 2047 
 2048     if (gt.get_consumes_lds_oqa() && !outstanding_lds_oqa_reads)
 2049         reserve_slots += 60;
 2050 
 2051     if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
 2052         return false;
 2053 
 2054     if (!kt.try_reserve(gt))
 2055         return false;
 2056 
 2057     return true;
 2058 }
 2059 
 2060 void alu_clause_tracker::new_group() {
 2061     group = !group;
 2062     grp().reset();
 2063 }
 2064 
 2065 bool alu_clause_tracker::is_empty() {
 2066     return clause == NULL;
 2067 }
 2068 
 2069 void literal_tracker::init_group_literals(alu_group_node* g) {
 2070 
 2071     g->literals.clear();
 2072     for (unsigned i = 0; i < 4; ++i) {
 2073         if (!lt[i])
 2074             break;
 2075 
 2076         g->literals.push_back(lt[i]);
 2077 
 2078         PSC_DUMP(
 2079             sblog << "literal emitted: " << lt[i].f;
 2080             sblog.print_zw_hex(lt[i].u, 8);
 2081             sblog << "   " << lt[i].i << "\n";
 2082         );
 2083     }
 2084 }
 2085 
 2086 bool alu_kcache_tracker::try_reserve(alu_group_tracker& gt) {
 2087     rp_kcache_tracker &kt = gt.kcache();
 2088 
 2089     if (!kt.num_sels())
 2090         return true;
 2091 
 2092     sb_set<unsigned> group_lines;
 2093 
 2094     unsigned nl = kt.get_lines(group_lines);
 2095     assert(nl);
 2096 
 2097     sb_set<unsigned> clause_lines(lines);
 2098     lines.add_set(group_lines);
 2099 
 2100     if (clause_lines.size() == lines.size())
 2101         return true;
 2102 
 2103     if (update_kc())
 2104         return true;
 2105 
 2106     lines = clause_lines;
 2107 
 2108     return false;
 2109 }
 2110 
 2111 unsigned rp_kcache_tracker::get_lines(kc_lines& lines) {
 2112     unsigned cnt = 0;
 2113 
 2114     for (unsigned i = 0; i < sel_count; ++i) {
 2115         unsigned line = rp[i] & 0x1fffffffu;
 2116         unsigned index_mode = rp[i] >> 29;
 2117 
 2118         if (!line)
 2119             return cnt;
 2120 
 2121         --line;
 2122         line = (sel_count == 2) ? line >> 5 : line >> 6;
 2123         line |= index_mode << 29;
 2124 
 2125         if (lines.insert(line).second)
 2126             ++cnt;
 2127     }
 2128     return cnt;
 2129 }
 2130 
 2131 bool alu_kcache_tracker::update_kc() {
 2132     unsigned c = 0;
 2133 
 2134     bc_kcache old_kc[4];
 2135     memcpy(old_kc, kc, sizeof(kc));
 2136 
 2137     for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) {
 2138         unsigned index_mode = *I >> 29;
 2139         unsigned line = *I & 0x1fffffffu;
 2140         unsigned bank = line >> 8;
 2141 
 2142         assert(index_mode <= KC_INDEX_INVALID);
 2143         line &= 0xFF;
 2144 
 2145         if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line) &&
 2146             kc[c-1].index_mode == index_mode)
 2147         {
 2148             kc[c-1].mode = KC_LOCK_2;
 2149         } else {
 2150             if (c == max_kcs) {
 2151                 memcpy(kc, old_kc, sizeof(kc));
 2152                 return false;
 2153             }
 2154 
 2155             kc[c].mode = KC_LOCK_1;
 2156 
 2157             kc[c].bank = bank;
 2158             kc[c].addr = line;
 2159             kc[c].index_mode = index_mode;
 2160             ++c;
 2161         }
 2162     }
 2163     return true;
 2164 }
 2165 
 2166 alu_node* alu_clause_tracker::create_ar_load(value *v, chan_select ar_channel) {
 2167     alu_node *a = sh.create_alu();
 2168 
 2169     if (sh.get_ctx().uses_mova_gpr) {
 2170         a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
 2171         a->bc.slot = SLOT_TRANS;
 2172     } else {
 2173         a->bc.set_op(ALU_OP1_MOVA_INT);
 2174         a->bc.slot = SLOT_X;
 2175     }
 2176     a->bc.dst_chan = ar_channel;
 2177     if (ar_channel != SEL_X && sh.get_ctx().is_cayman()) {
 2178         a->bc.dst_gpr = ar_channel == SEL_Y ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
 2179     }
 2180 
 2181     a->dst.resize(1);
 2182     a->src.push_back(v);
 2183 
 2184     PSC_DUMP(
 2185         sblog << "created AR load: ";
 2186         dump::dump_op(a);
 2187         sblog << "\n";
 2188     );
 2189 
 2190     return a;
 2191 }
 2192 
 2193 void alu_clause_tracker::discard_current_group() {
 2194     PSC_DUMP( sblog << "act::discard_current_group\n"; );
 2195     grp().discard_all_slots(conflict_nodes);
 2196 }
 2197 
 2198 void rp_gpr_tracker::dump() {
 2199     sblog << "=== gpr_tracker dump:\n";
 2200     for (int c = 0; c < 3; ++c) {
 2201         sblog << "cycle " << c << "      ";
 2202         for (int h = 0; h < 4; ++h) {
 2203             sblog << rp[c][h] << ":" << uc[c][h] << "   ";
 2204         }
 2205         sblog << "\n";
 2206     }
 2207 }
 2208 
 2209 } // namespace r600_sb