"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/r600/sb/sb_pass.h" (16 Sep 2020, 17305 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sb_pass.h" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * on the rights to use, copy, modify, merge, publish, distribute, sub
    8  * license, and/or sell copies of the Software, and to permit persons to whom
    9  * the Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
   18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
   19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
   22  *
   23  * Authors:
   24  *      Vadim Girlin
   25  */
   26 
   27 #ifndef SB_PASS_H_
   28 #define SB_PASS_H_
   29 
   30 #include <stack>
   31 
   32 namespace r600_sb {
   33 
   34 class pass {
   35 protected:
   36     sb_context &ctx;
   37     shader &sh;
   38 
   39 public:
   40     pass(shader &s);
   41 
   42     virtual int run();
   43 
   44     virtual ~pass() {}
   45 };
   46 
   47 class vpass : public pass {
   48 
   49 public:
   50 
   51     vpass(shader &s) : pass(s) {}
   52 
   53     virtual int init();
   54     virtual int done();
   55 
   56     virtual int run();
   57     virtual void run_on(container_node &n);
   58 
   59     virtual bool visit(node &n, bool enter);
   60     virtual bool visit(container_node &n, bool enter);
   61     virtual bool visit(alu_group_node &n, bool enter);
   62     virtual bool visit(cf_node &n, bool enter);
   63     virtual bool visit(alu_node &n, bool enter);
   64     virtual bool visit(alu_packed_node &n, bool enter);
   65     virtual bool visit(fetch_node &n, bool enter);
   66     virtual bool visit(region_node &n, bool enter);
   67     virtual bool visit(repeat_node &n, bool enter);
   68     virtual bool visit(depart_node &n, bool enter);
   69     virtual bool visit(if_node &n, bool enter);
   70     virtual bool visit(bb_node &n, bool enter);
   71 
   72 };
   73 
   74 class rev_vpass : public vpass {
   75 
   76 public:
   77     rev_vpass(shader &s) : vpass(s) {}
   78 
   79     virtual void run_on(container_node &n);
   80 };
   81 
   82 
   83 // =================== PASSES
   84 
   85 class bytecode;
   86 
   87 class bc_dump : public vpass {
   88     using vpass::visit;
   89 
   90     uint32_t *bc_data;
   91     unsigned ndw;
   92 
   93     unsigned id;
   94 
   95     unsigned new_group, group_index;
   96 
   97 public:
   98 
   99     bc_dump(shader &s, bytecode *bc = NULL);
  100 
  101     bc_dump(shader &s, uint32_t *bc_ptr, unsigned ndw) :
  102         vpass(s), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {}
  103 
  104     virtual int init();
  105     virtual int done();
  106 
  107     virtual bool visit(cf_node &n, bool enter);
  108     virtual bool visit(alu_node &n, bool enter);
  109     virtual bool visit(fetch_node &n, bool enter);
  110 
  111     void dump_dw(unsigned dw_id, unsigned count = 2);
  112 
  113     void dump(cf_node& n);
  114     void dump(alu_node& n);
  115     void dump(fetch_node& n);
  116 };
  117 
  118 
  119 class dce_cleanup : public vpass {
  120     using vpass::visit;
  121 
  122     bool remove_unused;
  123 
  124 public:
  125 
  126     dce_cleanup(shader &s) : vpass(s),
  127         remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {}
  128 
  129     virtual int run();
  130 
  131     virtual bool visit(node &n, bool enter);
  132     virtual bool visit(alu_group_node &n, bool enter);
  133     virtual bool visit(cf_node &n, bool enter);
  134     virtual bool visit(alu_node &n, bool enter);
  135     virtual bool visit(alu_packed_node &n, bool enter);
  136     virtual bool visit(fetch_node &n, bool enter);
  137     virtual bool visit(region_node &n, bool enter);
  138     virtual bool visit(container_node &n, bool enter);
  139 
  140 private:
  141 
  142     void cleanup_dst(node &n);
  143     bool cleanup_dst_vec(vvec &vv);
  144 
  145     // Did we alter/remove nodes during a single pass?
  146     bool nodes_changed;
  147 };
  148 
  149 
  150 class def_use : public pass {
  151 
  152 public:
  153 
  154     def_use(shader &sh) : pass(sh) {}
  155 
  156     virtual int run();
  157     void run_on(node *n, bool defs);
  158 
  159 private:
  160 
  161     void process_uses(node *n);
  162     void process_defs(node *n, vvec &vv, bool arr_def);
  163     void process_phi(container_node *c, bool defs, bool uses);
  164 };
  165 
  166 
  167 
  168 class dump : public vpass {
  169     using vpass::visit;
  170 
  171     int level;
  172 
  173 public:
  174 
  175     dump(shader &s) : vpass(s), level(0) {}
  176 
  177     virtual bool visit(node &n, bool enter);
  178     virtual bool visit(container_node &n, bool enter);
  179     virtual bool visit(alu_group_node &n, bool enter);
  180     virtual bool visit(cf_node &n, bool enter);
  181     virtual bool visit(alu_node &n, bool enter);
  182     virtual bool visit(alu_packed_node &n, bool enter);
  183     virtual bool visit(fetch_node &n, bool enter);
  184     virtual bool visit(region_node &n, bool enter);
  185     virtual bool visit(repeat_node &n, bool enter);
  186     virtual bool visit(depart_node &n, bool enter);
  187     virtual bool visit(if_node &n, bool enter);
  188     virtual bool visit(bb_node &n, bool enter);
  189 
  190 
  191     static void dump_op(node &n, const char *name);
  192     static void dump_vec(const vvec & vv);
  193     static void dump_set(shader &sh, val_set & v);
  194 
  195     static void dump_rels(vvec & vv);
  196 
  197     static void dump_val(value *v);
  198     static void dump_op(node *n);
  199 
  200     static void dump_op_list(container_node *c);
  201     static void dump_queue(sched_queue &q);
  202 
  203     static void dump_alu(alu_node *n);
  204 
  205 private:
  206 
  207     void indent();
  208 
  209     void dump_common(node &n);
  210     void dump_flags(node &n);
  211 
  212     void dump_live_values(container_node &n, bool before);
  213 };
  214 
  215 
  216 // Global Code Motion
  217 
  218 class gcm : public pass {
  219 
  220     sched_queue bu_ready[SQ_NUM];
  221     sched_queue bu_ready_next[SQ_NUM];
  222     sched_queue bu_ready_early[SQ_NUM];
  223     sched_queue ready;
  224     sched_queue ready_above;
  225 
  226     unsigned outstanding_lds_oq;
  227     container_node pending;
  228 
  229     struct op_info {
  230         bb_node* top_bb;
  231         bb_node* bottom_bb;
  232         op_info() : top_bb(), bottom_bb() {}
  233     };
  234 
  235     typedef std::map<node*, op_info> op_info_map;
  236 
  237     typedef std::map<node*, unsigned> nuc_map;
  238 
  239     op_info_map op_map;
  240     nuc_map uses;
  241 
  242     typedef std::vector<nuc_map> nuc_stack;
  243 
  244     nuc_stack nuc_stk;
  245     unsigned ucs_level;
  246 
  247     bb_node * bu_bb;
  248 
  249     vvec pending_defs;
  250 
  251     node_list pending_nodes;
  252 
  253     unsigned cur_sq;
  254 
  255     // for register pressure tracking in bottom-up pass
  256     val_set live;
  257     int live_count;
  258 
  259     static const int rp_threshold = 100;
  260 
  261     bool pending_exec_mask_update;
  262 
  263 public:
  264 
  265     gcm(shader &sh) : pass(sh),
  266         bu_ready(), bu_ready_next(), bu_ready_early(),
  267         ready(), outstanding_lds_oq(),
  268         op_map(), uses(), nuc_stk(1), ucs_level(),
  269         bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
  270         live(), live_count(), pending_exec_mask_update() {}
  271 
  272     virtual int run();
  273 
  274 private:
  275 
  276     void collect_instructions(container_node *c, bool early_pass);
  277 
  278     void sched_early(container_node *n);
  279     void td_sched_bb(bb_node *bb);
  280     bool td_is_ready(node *n);
  281     void td_release_uses(vvec &v);
  282     void td_release_val(value *v);
  283     void td_schedule(bb_node *bb, node *n);
  284 
  285     void sched_late(container_node *n);
  286     void bu_sched_bb(bb_node *bb);
  287     void bu_release_defs(vvec &v, bool src);
  288     void bu_release_phi_defs(container_node *p, unsigned op);
  289     bool bu_is_ready(node *n);
  290     void bu_release_val(value *v);
  291     void bu_release_op(node * n);
  292     void bu_find_best_bb(node *n, op_info &oi);
  293     void bu_schedule(container_node *bb, node *n);
  294 
  295     void push_uc_stack();
  296     void pop_uc_stack();
  297 
  298     void init_def_count(nuc_map &m, container_node &s);
  299     void init_use_count(nuc_map &m, container_node &s);
  300     unsigned get_uc_vec(vvec &vv);
  301     unsigned get_dc_vec(vvec &vv, bool src);
  302 
  303     void add_ready(node *n);
  304 
  305     void dump_uc_stack();
  306 
  307     unsigned real_alu_count(sched_queue &q, unsigned max);
  308 
  309     // check if we have not less than threshold ready alu instructions
  310     bool check_alu_ready_count(unsigned threshold);
  311 };
  312 
  313 
  314 class gvn : public vpass {
  315     using vpass::visit;
  316 
  317 public:
  318 
  319     gvn(shader &sh) : vpass(sh) {}
  320 
  321     virtual bool visit(node &n, bool enter);
  322     virtual bool visit(cf_node &n, bool enter);
  323     virtual bool visit(alu_node &n, bool enter);
  324     virtual bool visit(alu_packed_node &n, bool enter);
  325     virtual bool visit(fetch_node &n, bool enter);
  326     virtual bool visit(region_node &n, bool enter);
  327 
  328 private:
  329 
  330     void process_op(node &n, bool rewrite = true);
  331 
  332     // returns true if the value was rewritten
  333     bool process_src(value* &v, bool rewrite);
  334 
  335 
  336     void process_alu_src_constants(node &n, value* &v);
  337 };
  338 
  339 
  340 class if_conversion : public pass {
  341 
  342 public:
  343 
  344     if_conversion(shader &sh) : pass(sh) {}
  345 
  346     virtual int run();
  347 
  348     bool run_on(region_node *r);
  349 
  350     void convert_kill_instructions(region_node *r, value *em, bool branch,
  351                                    container_node *c);
  352 
  353     bool check_and_convert(region_node *r);
  354 
  355     alu_node* convert_phi(value *select, node *phi);
  356 
  357 };
  358 
  359 
  360 class liveness : public rev_vpass {
  361     using vpass::visit;
  362 
  363     val_set live;
  364     bool live_changed;
  365 
  366 public:
  367 
  368     liveness(shader &s) : rev_vpass(s), live_changed(false) {}
  369 
  370     virtual int init();
  371 
  372     virtual bool visit(node &n, bool enter);
  373     virtual bool visit(bb_node &n, bool enter);
  374     virtual bool visit(container_node &n, bool enter);
  375     virtual bool visit(alu_group_node &n, bool enter);
  376     virtual bool visit(cf_node &n, bool enter);
  377     virtual bool visit(alu_node &n, bool enter);
  378     virtual bool visit(alu_packed_node &n, bool enter);
  379     virtual bool visit(fetch_node &n, bool enter);
  380     virtual bool visit(region_node &n, bool enter);
  381     virtual bool visit(repeat_node &n, bool enter);
  382     virtual bool visit(depart_node &n, bool enter);
  383     virtual bool visit(if_node &n, bool enter);
  384 
  385 private:
  386 
  387     void update_interferences();
  388     void process_op(node &n);
  389 
  390     bool remove_val(value *v);
  391     bool remove_vec(vvec &v);
  392     bool process_outs(node& n);
  393     void process_ins(node& n);
  394 
  395     void process_phi_outs(container_node *phi);
  396     void process_phi_branch(container_node *phi, unsigned id);
  397 
  398     bool process_maydef(value *v);
  399 
  400     bool add_vec(vvec &vv, bool src);
  401 
  402     void update_src_vec(vvec &vv, bool src);
  403 };
  404 
  405 
  406 struct bool_op_info {
  407     bool invert;
  408     unsigned int_cvt;
  409 
  410     alu_node *n;
  411 };
  412 
  413 class peephole : public pass {
  414 
  415 public:
  416 
  417     peephole(shader &sh) : pass(sh) {}
  418 
  419     virtual int run();
  420 
  421     void run_on(container_node *c);
  422 
  423     void optimize_cc_op(alu_node *a);
  424 
  425     void optimize_cc_op2(alu_node *a);
  426     void optimize_CNDcc_op(alu_node *a);
  427 
  428     bool get_bool_op_info(value *b, bool_op_info& bop);
  429     bool get_bool_flt_to_int_source(alu_node* &a);
  430     void convert_float_setcc(alu_node *f2i, alu_node *s);
  431 };
  432 
  433 
  434 class psi_ops : public rev_vpass {
  435     using rev_vpass::visit;
  436 
  437 public:
  438 
  439     psi_ops(shader &s) : rev_vpass(s) {}
  440 
  441     virtual bool visit(node &n, bool enter);
  442     virtual bool visit(alu_node &n, bool enter);
  443 
  444     bool try_inline(node &n);
  445     bool try_reduce(node &n);
  446     bool eliminate(node &n);
  447 
  448     void unpredicate(node *n);
  449 };
  450 
  451 
  452 // check correctness of the generated code, e.g.:
  453 // - expected source operand value is the last value written to its gpr,
  454 // - all arguments of phi node should be allocated to the same gpr,
  455 // TODO other tests
  456 class ra_checker : public pass {
  457 
  458     typedef std::map<sel_chan, value *> reg_value_map;
  459 
  460     typedef std::vector<reg_value_map> regmap_stack;
  461 
  462     regmap_stack rm_stack;
  463     unsigned rm_stk_level;
  464 
  465     value* prev_dst[5];
  466 
  467 public:
  468 
  469     ra_checker(shader &sh) : pass(sh), rm_stk_level(0), prev_dst() {}
  470 
  471     virtual int run();
  472 
  473     void run_on(container_node *c);
  474 
  475     void dump_error(const error_info &e);
  476     void dump_all_errors();
  477 
  478 private:
  479 
  480     reg_value_map& rmap() { return rm_stack[rm_stk_level]; }
  481 
  482     void push_stack();
  483     void pop_stack();
  484 
  485     // when going out of the alu clause, values in the clause temporary gprs,
  486     // AR, predicate values, PS/PV are destroyed
  487     void kill_alu_only_regs();
  488     void error(node *n, unsigned id, std::string msg);
  489 
  490     void check_phi_src(container_node *p, unsigned id);
  491     void process_phi_dst(container_node *p);
  492     void check_alu_group(alu_group_node *g);
  493     void process_op_dst(node *n);
  494     void check_op_src(node *n);
  495     void check_src_vec(node *n, unsigned id, vvec &vv, bool src);
  496     void check_value_gpr(node *n, unsigned id, value *v);
  497 };
  498 
  499 // =======================================
  500 
  501 
  502 class ra_coalesce : public pass {
  503 
  504 public:
  505 
  506     ra_coalesce(shader &sh) : pass(sh) {}
  507 
  508     virtual int run();
  509 };
  510 
  511 
  512 
  513 // =======================================
  514 
  515 class ra_init : public pass {
  516 
  517 public:
  518 
  519     ra_init(shader &sh) : pass(sh), prev_chans() {
  520 
  521         // The parameter below affects register channels distribution.
  522         // For cayman (VLIW-4) we're trying to distribute the channels
  523         // uniformly, this means significantly better alu slots utilization
  524         // at the expense of higher gpr usage. Hopefully this will improve
  525         // performance, though it has to be proven with real benchmarks yet.
  526         // For VLIW-5 this method could also slightly improve slots
  527         // utilization, but increased register pressure seems more significant
  528         // and overall performance effect is negative according to some
  529         // benchmarks, so it's not used currently. Basically, VLIW-5 doesn't
  530         // really need it because trans slot (unrestricted by register write
  531         // channel) allows to consume most deviations from uniform channel
  532         // distribution.
  533         // Value 3 means that for new allocation we'll use channel that differs
  534         // from 3 last used channels. 0 for VLIW-5 effectively turns this off.
  535 
  536         ra_tune = sh.get_ctx().is_cayman() ? 3 : 0;
  537     }
  538 
  539     virtual int run();
  540 
  541 private:
  542 
  543     unsigned prev_chans;
  544     unsigned ra_tune;
  545 
  546     void add_prev_chan(unsigned chan);
  547     unsigned get_preferable_chan_mask();
  548 
  549     void ra_node(container_node *c);
  550     void process_op(node *n);
  551 
  552     void color(value *v);
  553 
  554     void color_bs_constraint(ra_constraint *c);
  555 
  556     void assign_color(value *v, sel_chan c);
  557     void alloc_arrays();
  558 };
  559 
  560 // =======================================
  561 
  562 class ra_split : public pass {
  563 
  564 public:
  565 
  566     ra_split(shader &sh) : pass(sh) {}
  567 
  568     virtual int run();
  569 
  570     void split(container_node *n);
  571     void split_op(node *n);
  572     void split_alu_packed(alu_packed_node *n);
  573     void split_vector_inst(node *n);
  574 
  575     void split_packed_ins(alu_packed_node *n);
  576 
  577 #if 0
  578     void split_pinned_outs(node *n);
  579 #endif
  580 
  581     void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz);
  582 
  583     void split_phi_src(container_node *loc, container_node *c, unsigned id,
  584                        bool loop);
  585     void split_phi_dst(node *loc, container_node *c, bool loop);
  586     void init_phi_constraints(container_node *c);
  587 };
  588 
  589 
  590 
  591 class ssa_prepare : public vpass {
  592     using vpass::visit;
  593 
  594     typedef std::vector<val_set> vd_stk;
  595     vd_stk stk;
  596 
  597     unsigned level;
  598 
  599 public:
  600     ssa_prepare(shader &s) : vpass(s), level(0) {}
  601 
  602     virtual bool visit(cf_node &n, bool enter);
  603     virtual bool visit(alu_node &n, bool enter);
  604     virtual bool visit(fetch_node &n, bool enter);
  605     virtual bool visit(region_node &n, bool enter);
  606     virtual bool visit(repeat_node &n, bool enter);
  607     virtual bool visit(depart_node &n, bool enter);
  608 
  609 private:
  610 
  611     void push_stk() {
  612         ++level;
  613         if (level + 1 > stk.size())
  614             stk.resize(level+1);
  615         else
  616             stk[level].clear();
  617     }
  618     void pop_stk() {
  619         assert(level);
  620         --level;
  621         stk[level].add_set(stk[level + 1]);
  622     }
  623 
  624     void add_defs(node &n);
  625 
  626     val_set & cur_set() { return stk[level]; }
  627 
  628     container_node* create_phi_nodes(int count);
  629 };
  630 
  631 class ssa_rename : public vpass {
  632     using vpass::visit;
  633 
  634     typedef sb_map<value*, unsigned> def_map;
  635 
  636     def_map def_count;
  637     def_map lds_oq_count;
  638     def_map lds_rw_count;
  639     std::stack<def_map> rename_stack;
  640     std::stack<def_map> rename_lds_oq_stack;
  641     std::stack<def_map> rename_lds_rw_stack;
  642 
  643     typedef std::map<uint32_t, value*> val_map;
  644     val_map values;
  645 
  646 public:
  647 
  648     ssa_rename(shader &s) : vpass(s) {}
  649 
  650     virtual int init();
  651 
  652     virtual bool visit(container_node &n, bool enter);
  653     virtual bool visit(node &n, bool enter);
  654     virtual bool visit(alu_group_node &n, bool enter);
  655     virtual bool visit(cf_node &n, bool enter);
  656     virtual bool visit(alu_node &n, bool enter);
  657     virtual bool visit(alu_packed_node &n, bool enter);
  658     virtual bool visit(fetch_node &n, bool enter);
  659     virtual bool visit(region_node &n, bool enter);
  660     virtual bool visit(repeat_node &n, bool enter);
  661     virtual bool visit(depart_node &n, bool enter);
  662     virtual bool visit(if_node &n, bool enter);
  663 
  664 private:
  665 
  666     void push(node *phi);
  667     void pop();
  668 
  669     unsigned get_index(def_map& m, value* v);
  670     void set_index(def_map& m, value* v, unsigned index);
  671     unsigned new_index(def_map& m, value* v);
  672 
  673     value* rename_use(node *n, value* v);
  674     value* rename_def(node *def, value* v);
  675 
  676     void rename_src_vec(node *n, vvec &vv, bool src);
  677     void rename_dst_vec(node *def, vvec &vv, bool set_def);
  678 
  679     void rename_src(node *n);
  680     void rename_dst(node *n);
  681 
  682     void rename_phi_args(container_node *phi, unsigned op, bool def);
  683 
  684     void rename_virt(node *n);
  685     void rename_virt_val(node *n, value *v);
  686 };
  687 
  688 class bc_finalizer : public pass {
  689 
  690     cf_node *last_export[EXP_TYPE_COUNT];
  691     cf_node *last_cf;
  692 
  693     unsigned ngpr;
  694     unsigned nstack;
  695 
  696 public:
  697 
  698     bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(),
  699         nstack() {}
  700 
  701     virtual int run();
  702 
  703     void finalize_loop(region_node *r);
  704     void finalize_if(region_node *r);
  705 
  706     void run_on(container_node *c);
  707 
  708     void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
  709     void finalize_alu_group(alu_group_node *g, node *prev_node);
  710     bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node);
  711 
  712     void emit_set_grad(fetch_node* f);
  713     void finalize_fetch(fetch_node *f);
  714 
  715     void finalize_cf(cf_node *c);
  716 
  717     sel_chan translate_kcache(cf_node *alu, value *v);
  718 
  719     void update_ngpr(unsigned gpr);
  720     void update_nstack(region_node *r, unsigned add = 0);
  721 
  722     unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs,
  723                              unsigned add = 0);
  724 
  725     void cf_peephole();
  726 
  727 private:
  728     void copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start);
  729     void emit_set_texture_offsets(fetch_node &f);
  730 };
  731 
  732 
  733 } // namespace r600_sb
  734 
  735 #endif /* SB_PASS_H_ */