"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/r600/sb/sb_bc.h" (16 Sep 2020, 19491 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sb_bc.h" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * on the rights to use, copy, modify, merge, publish, distribute, sub
    8  * license, and/or sell copies of the Software, and to permit persons to whom
    9  * the Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
   18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
   19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
   22  *
   23  * Authors:
   24  *      Vadim Girlin
   25  */
   26 
   27 #ifndef SB_BC_H_
   28 #define SB_BC_H_
   29 
   30 #include <stdint.h>
   31 #include "r600_isa.h"
   32 
   33 #include <cstdio>
   34 #include <string>
   35 #include <vector>
   36 #include <stack>
   37 
   38 struct r600_bytecode;
   39 struct r600_shader;
   40 
   41 namespace r600_sb {
   42 
   43 class hw_encoding_format;
   44 class node;
   45 class alu_node;
   46 class cf_node;
   47 class fetch_node;
   48 class alu_group_node;
   49 class region_node;
   50 class shader;
   51 class value;
   52 
   53 class sb_ostream {
   54 public:
   55     sb_ostream() {}
   56 
   57     virtual void write(const char *s) = 0;
   58 
   59     sb_ostream& operator <<(const char *s) {
   60         write(s);
   61         return *this;
   62     }
   63 
   64     sb_ostream& operator <<(const std::string& s) {
   65         return *this << s.c_str();
   66     }
   67 
   68     sb_ostream& operator <<(void *p) {
   69         char b[32];
   70         sprintf(b, "%p", p);
   71         return *this << b;
   72     }
   73 
   74     sb_ostream& operator <<(char c) {
   75         char b[2];
   76         sprintf(b, "%c", c);
   77         return *this << b;
   78     }
   79 
   80     sb_ostream& operator <<(int n) {
   81         char b[32];
   82         sprintf(b, "%d", n);
   83         return *this << b;
   84     }
   85 
   86     sb_ostream& operator <<(unsigned n) {
   87         char b[32];
   88         sprintf(b, "%u", n);
   89         return *this << b;
   90     }
   91 
   92     sb_ostream& operator <<(double d) {
   93         char b[32];
   94         snprintf(b, 32, "%g", d);
   95         return *this << b;
   96     }
   97 
   98     // print as field of specified width, right aligned
   99     void print_w(int n, int width) {
  100         char b[256],f[8];
  101         sprintf(f, "%%%dd", width);
  102         snprintf(b, 256, f, n);
  103         write(b);
  104     }
  105 
  106     // print as field of specified width, left aligned
  107     void print_wl(int n, int width) {
  108         char b[256],f[8];
  109         sprintf(f, "%%-%dd", width);
  110         snprintf(b, 256, f, n);
  111         write(b);
  112     }
  113 
  114     // print as field of specified width, left aligned
  115     void print_wl(const std::string &s, int width) {
  116         write(s.c_str());
  117         int l = s.length();
  118         while (l++ < width) {
  119             write(" ");
  120         }
  121     }
  122 
  123     // print int as field of specified width, right aligned, zero-padded
  124     void print_zw(int n, int width) {
  125         char b[256],f[8];
  126         sprintf(f, "%%0%dd", width);
  127         snprintf(b, 256, f, n);
  128         write(b);
  129     }
  130 
  131     // print int as field of specified width, right aligned, zero-padded, hex
  132     void print_zw_hex(int n, int width) {
  133         char b[256],f[8];
  134         sprintf(f, "%%0%dx", width);
  135         snprintf(b, 256, f, n);
  136         write(b);
  137     }
  138 };
  139 
  140 class sb_ostringstream : public sb_ostream {
  141     std::string data;
  142 public:
  143     sb_ostringstream() : data() {}
  144 
  145     virtual void write(const char *s) {
  146         data += s;
  147     }
  148 
  149     void clear() { data.clear(); }
  150 
  151     const char* c_str() { return data.c_str(); }
  152     std::string& str() { return data; }
  153 };
  154 
  155 class sb_log : public sb_ostream {
  156     FILE *o;
  157 public:
  158     sb_log() : o(stderr) {}
  159 
  160     virtual void write(const char *s) {
  161         fputs(s, o);
  162     }
  163 };
  164 
  165 extern sb_log sblog;
  166 
  167 enum shader_target
  168 {
  169     TARGET_UNKNOWN,
  170     TARGET_VS,
  171     TARGET_ES,
  172     TARGET_PS,
  173     TARGET_GS,
  174     TARGET_GS_COPY,
  175     TARGET_COMPUTE,
  176     TARGET_FETCH,
  177     TARGET_HS,
  178     TARGET_LS,
  179 
  180     TARGET_NUM
  181 };
  182 
  183 enum sb_hw_class_bits
  184 {
  185     HB_R6   = (1<<0),
  186     HB_R7   = (1<<1),
  187     HB_EG   = (1<<2),
  188     HB_CM   = (1<<3),
  189 
  190     HB_R6R7 = (HB_R6 | HB_R7),
  191     HB_EGCM = (HB_EG | HB_CM),
  192     HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
  193     HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
  194 
  195     HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
  196 };
  197 
  198 enum sb_hw_chip
  199 {
  200     HW_CHIP_UNKNOWN,
  201     HW_CHIP_R600,
  202     HW_CHIP_RV610,
  203     HW_CHIP_RV630,
  204     HW_CHIP_RV670,
  205     HW_CHIP_RV620,
  206     HW_CHIP_RV635,
  207     HW_CHIP_RS780,
  208     HW_CHIP_RS880,
  209     HW_CHIP_RV770,
  210     HW_CHIP_RV730,
  211     HW_CHIP_RV710,
  212     HW_CHIP_RV740,
  213     HW_CHIP_CEDAR,
  214     HW_CHIP_REDWOOD,
  215     HW_CHIP_JUNIPER,
  216     HW_CHIP_CYPRESS,
  217     HW_CHIP_HEMLOCK,
  218     HW_CHIP_PALM,
  219     HW_CHIP_SUMO,
  220     HW_CHIP_SUMO2,
  221     HW_CHIP_BARTS,
  222     HW_CHIP_TURKS,
  223     HW_CHIP_CAICOS,
  224     HW_CHIP_CAYMAN,
  225     HW_CHIP_ARUBA
  226 };
  227 
  228 enum sb_hw_class
  229 {
  230     HW_CLASS_UNKNOWN,
  231     HW_CLASS_R600,
  232     HW_CLASS_R700,
  233     HW_CLASS_EVERGREEN,
  234     HW_CLASS_CAYMAN
  235 };
  236 
  237 enum alu_slots {
  238     SLOT_X = 0,
  239     SLOT_Y = 1,
  240     SLOT_Z = 2,
  241     SLOT_W = 3,
  242     SLOT_TRANS = 4
  243 };
  244 
  245 enum misc_consts {
  246     MAX_ALU_LITERALS = 4,
  247     MAX_ALU_SLOTS = 128,
  248     MAX_GPR = 128,
  249     MAX_CHAN = 4
  250 
  251 };
  252 
  253 enum alu_src_sel {
  254 
  255     ALU_SRC_LDS_OQ_A = 219,
  256     ALU_SRC_LDS_OQ_B = 220,
  257     ALU_SRC_LDS_OQ_A_POP = 221,
  258     ALU_SRC_LDS_OQ_B_POP = 222,
  259     ALU_SRC_LDS_DIRECT_A = 223,
  260     ALU_SRC_LDS_DIRECT_B = 224,
  261     ALU_SRC_TIME_HI = 227,
  262     ALU_SRC_TIME_LO = 228,
  263     ALU_SRC_MASK_HI = 229,
  264     ALU_SRC_MASK_LO = 230,
  265     ALU_SRC_HW_WAVE_ID = 231,
  266     ALU_SRC_SIMD_ID = 232,
  267     ALU_SRC_SE_ID = 233,
  268     ALU_SRC_HW_THREADGRP_ID = 234,
  269     ALU_SRC_WAVE_ID_IN_GRP = 235,
  270     ALU_SRC_NUM_THREADGRP_WAVES = 236,
  271     ALU_SRC_HW_ALU_ODD = 237,
  272     ALU_SRC_LOOP_IDX = 238,
  273     ALU_SRC_PARAM_BASE_ADDR = 240,
  274     ALU_SRC_NEW_PRIM_MASK = 241,
  275     ALU_SRC_PRIM_MASK_HI = 242,
  276     ALU_SRC_PRIM_MASK_LO = 243,
  277     ALU_SRC_1_DBL_L = 244,
  278     ALU_SRC_1_DBL_M = 245,
  279     ALU_SRC_0_5_DBL_L = 246,
  280     ALU_SRC_0_5_DBL_M = 247,
  281     ALU_SRC_0 = 248,
  282     ALU_SRC_1 = 249,
  283     ALU_SRC_1_INT = 250,
  284     ALU_SRC_M_1_INT = 251,
  285     ALU_SRC_0_5 = 252,
  286     ALU_SRC_LITERAL = 253,
  287     ALU_SRC_PV = 254,
  288     ALU_SRC_PS = 255,
  289 
  290     ALU_SRC_PARAM_OFFSET = 448
  291 };
  292 
  293 enum alu_predicate_select
  294 {
  295     PRED_SEL_OFF    = 0,
  296 //  RESERVED        = 1,
  297     PRED_SEL_0      = 2,
  298     PRED_SEL_1      = 3
  299 };
  300 
  301 
  302 enum alu_omod {
  303     OMOD_OFF  = 0,
  304     OMOD_M2   = 1,
  305     OMOD_M4   = 2,
  306     OMOD_D2   = 3
  307 };
  308 
  309 enum alu_index_mode {
  310     INDEX_AR_X        = 0,
  311     INDEX_AR_Y_R600   = 1,
  312     INDEX_AR_Z_R600   = 2,
  313     INDEX_AR_W_R600   = 3,
  314 
  315     INDEX_LOOP        = 4,
  316     INDEX_GLOBAL      = 5,
  317     INDEX_GLOBAL_AR_X = 6
  318 };
  319 
  320 enum alu_cayman_mova_dst {
  321     CM_MOVADST_AR_X,
  322     CM_MOVADST_PC,
  323     CM_MOVADST_IDX0,
  324     CM_MOVADST_IDX1,
  325     CM_MOVADST_CG0,     // clause-global byte 0
  326     CM_MOVADST_CG1,
  327     CM_MOVADST_CG2,
  328     CM_MOVADST_CG3
  329 };
  330 
  331 enum alu_cayman_exec_mask_op {
  332     CM_EMO_DEACTIVATE,
  333     CM_EMO_BREAK,
  334     CM_EMO_CONTINUE,
  335     CM_EMO_KILL
  336 };
  337 
  338 
  339 enum cf_exp_type {
  340     EXP_PIXEL,
  341     EXP_POS,
  342     EXP_PARAM,
  343 
  344     EXP_TYPE_COUNT
  345 };
  346 
  347 enum cf_mem_type {
  348     MEM_WRITE,
  349     MEM_WRITE_IND,
  350     MEM_WRITE_ACK,
  351     MEM_WRITE_IND_ACK
  352 };
  353 
  354 
  355 enum alu_kcache_mode {
  356     KC_LOCK_NONE,
  357     KC_LOCK_1,
  358     KC_LOCK_2,
  359     KC_LOCK_LOOP
  360 };
  361 
  362 enum alu_kcache_index_mode {
  363     KC_INDEX_NONE,
  364     KC_INDEX_0,
  365     KC_INDEX_1,
  366     KC_INDEX_INVALID
  367 };
  368 
  369 enum chan_select {
  370     SEL_X   = 0,
  371     SEL_Y   = 1,
  372     SEL_Z   = 2,
  373     SEL_W   = 3,
  374     SEL_0   = 4,
  375     SEL_1   = 5,
  376 //  RESERVED = 6,
  377     SEL_MASK = 7
  378 };
  379 
  380 enum bank_swizzle {
  381     VEC_012 = 0,
  382     VEC_021 = 1,
  383     VEC_120 = 2,
  384     VEC_102 = 3,
  385     VEC_201 = 4,
  386     VEC_210 = 5,
  387 
  388     VEC_NUM = 6,
  389 
  390     SCL_210 = 0,
  391     SCL_122 = 1,
  392     SCL_212 = 2,
  393     SCL_221 = 3,
  394 
  395     SCL_NUM = 4
  396 
  397 };
  398 
  399 enum sched_queue_id {
  400     SQ_CF,
  401     SQ_ALU,
  402     SQ_TEX,
  403     SQ_VTX,
  404     SQ_GDS,
  405 
  406     SQ_NUM
  407 };
  408 
  409 struct literal {
  410     union {
  411         int32_t i;
  412         uint32_t u;
  413         float f;
  414     };
  415 
  416     literal(int32_t i = 0) : i(i) {}
  417     literal(uint32_t u) : u(u) {}
  418     literal(float f) : f(f) {}
  419     literal(double f) : f(f) {}
  420     operator uint32_t() const { return u; }
  421     bool operator ==(literal l) { return u == l.u; }
  422     bool operator ==(int v_int) { return i == v_int; }
  423     bool operator ==(unsigned v_uns) { return u == v_uns; }
  424 };
  425 
  426 struct bc_kcache {
  427     unsigned mode;
  428     unsigned bank;
  429     unsigned addr;
  430     unsigned index_mode;
  431 } ;
  432 
  433 // TODO optimize bc structures
  434 
  435 struct bc_cf {
  436 
  437     bc_kcache kc[4];
  438 
  439     unsigned id;
  440 
  441 
  442     const cf_op_info * op_ptr;
  443     unsigned op;
  444 
  445     unsigned addr:32;
  446 
  447     unsigned alt_const:1;
  448     unsigned uses_waterfall:1;
  449 
  450     unsigned barrier:1;
  451     unsigned count:7;
  452     unsigned pop_count:3;
  453     unsigned call_count:6;
  454     unsigned whole_quad_mode:1;
  455     unsigned valid_pixel_mode:1;
  456 
  457     unsigned jumptable_sel:3;
  458     unsigned cf_const:5;
  459     unsigned cond:2;
  460     unsigned end_of_program:1;
  461 
  462     unsigned array_base:13;
  463     unsigned elem_size:2;
  464     unsigned index_gpr:7;
  465     unsigned rw_gpr:7;
  466     unsigned rw_rel:1;
  467     unsigned type:2;
  468 
  469     unsigned burst_count:4;
  470     unsigned mark:1;
  471     unsigned sel[4];
  472 
  473     unsigned array_size:12;
  474     unsigned comp_mask:4;
  475 
  476     unsigned rat_id:4;
  477     unsigned rat_inst:6;
  478     unsigned rat_index_mode:2;
  479 
  480     void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
  481 
  482     bool is_alu_extended() {
  483         assert(op_ptr->flags & CF_ALU);
  484         return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
  485             kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
  486             kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
  487     }
  488 
  489 };
  490 
  491 struct bc_alu_src {
  492     unsigned sel:9;
  493     unsigned chan:2;
  494     unsigned neg:1;
  495     unsigned abs:1;
  496     unsigned rel:1;
  497     literal value;
  498 };
  499 
  500 struct bc_alu {
  501     const alu_op_info * op_ptr;
  502     unsigned op;
  503 
  504     bc_alu_src src[3];
  505 
  506     unsigned dst_gpr:7;
  507     unsigned dst_chan:2;
  508     unsigned dst_rel:1;
  509     unsigned clamp:1;
  510     unsigned omod:2;
  511     unsigned bank_swizzle:3;
  512 
  513     unsigned index_mode:3;
  514     unsigned last:1;
  515     unsigned pred_sel:2;
  516 
  517     unsigned fog_merge:1;
  518     unsigned write_mask:1;
  519     unsigned update_exec_mask:1;
  520     unsigned update_pred:1;
  521 
  522     unsigned slot:3;
  523 
  524     unsigned lds_idx_offset:6;
  525 
  526     alu_op_flags slot_flags;
  527 
  528     void set_op(unsigned op) {
  529         this->op = op;
  530         op_ptr = r600_isa_alu(op);
  531     }
  532 };
  533 
  534 struct bc_fetch {
  535     const fetch_op_info * op_ptr;
  536     unsigned op;
  537 
  538     unsigned bc_frac_mode:1;
  539     unsigned fetch_whole_quad:1;
  540     unsigned resource_id:8;
  541 
  542     unsigned src_gpr:7;
  543     unsigned src_rel:1;
  544     unsigned src_rel_global:1; /* for GDS ops */
  545     unsigned src_sel[4];
  546 
  547     unsigned dst_gpr:7;
  548     unsigned dst_rel:1;
  549     unsigned dst_rel_global:1; /* for GDS ops */
  550     unsigned dst_sel[4];
  551 
  552     unsigned alt_const:1;
  553 
  554     unsigned inst_mod:2;
  555     unsigned resource_index_mode:2;
  556     unsigned sampler_index_mode:2;
  557 
  558     unsigned coord_type[4];
  559     unsigned lod_bias:7;
  560 
  561     unsigned offset[3];
  562 
  563     unsigned sampler_id:5;
  564 
  565 
  566     unsigned fetch_type:2;
  567     unsigned mega_fetch_count:6;
  568     unsigned coalesced_read:1;
  569     unsigned structured_read:2;
  570     unsigned lds_req:1;
  571 
  572     unsigned data_format:6;
  573     unsigned format_comp_all:1;
  574     unsigned num_format_all:2;
  575     unsigned semantic_id:8;
  576     unsigned srf_mode_all:1;
  577     unsigned use_const_fields:1;
  578 
  579     unsigned const_buf_no_stride:1;
  580     unsigned endian_swap:2;
  581     unsigned mega_fetch:1;
  582 
  583     unsigned src2_gpr:7; /* for GDS */
  584     unsigned alloc_consume:1;
  585     unsigned uav_id:4;
  586     unsigned uav_index_mode:2;
  587     unsigned bcast_first_req:1;
  588 
  589     /* for MEM ops */
  590     unsigned elem_size:2;
  591     unsigned uncached:1;
  592     unsigned indexed:1;
  593     unsigned burst_count:4;
  594     unsigned array_base:13;
  595     unsigned array_size:12;
  596 
  597     void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
  598 };
  599 
  600 struct shader_stats {
  601     unsigned    ndw;
  602     unsigned    ngpr;
  603     unsigned    nstack;
  604 
  605     unsigned    cf; // clause instructions not included
  606     unsigned    alu;
  607     unsigned    alu_clauses;
  608     unsigned    fetch_clauses;
  609     unsigned    fetch;
  610     unsigned    alu_groups;
  611 
  612     unsigned    shaders;        // number of shaders (for accumulated stats)
  613 
  614     shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
  615             fetch_clauses(), fetch(), alu_groups(), shaders() {}
  616 
  617     void collect(node *n);
  618     void accumulate(shader_stats &s);
  619     void dump();
  620     void dump_diff(shader_stats &s);
  621 };
  622 
  623 class sb_context {
  624 
  625 public:
  626 
  627     shader_stats src_stats, opt_stats;
  628 
  629     r600_isa *isa;
  630 
  631     sb_hw_chip hw_chip;
  632     sb_hw_class hw_class;
  633 
  634     unsigned alu_temp_gprs;
  635     unsigned max_fetch;
  636     bool has_trans;
  637     unsigned vtx_src_num;
  638     unsigned num_slots;
  639     bool uses_mova_gpr;
  640 
  641     bool r6xx_gpr_index_workaround;
  642 
  643     bool stack_workaround_8xx;
  644     bool stack_workaround_9xx;
  645 
  646     unsigned wavefront_size;
  647     unsigned stack_entry_size;
  648 
  649     static unsigned dump_pass;
  650     static unsigned dump_stat;
  651 
  652     static unsigned dry_run;
  653     static unsigned no_fallback;
  654     static unsigned safe_math;
  655 
  656     static unsigned dskip_start;
  657     static unsigned dskip_end;
  658     static unsigned dskip_mode;
  659 
  660     sb_context() : src_stats(), opt_stats(), isa(0),
  661             hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
  662 
  663     int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
  664 
  665     bool is_r600() {return hw_class == HW_CLASS_R600;}
  666     bool is_r700() {return hw_class == HW_CLASS_R700;}
  667     bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
  668     bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
  669     bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
  670 
  671     bool needs_8xx_stack_workaround() {
  672         if (!is_evergreen())
  673             return false;
  674 
  675         switch (hw_chip) {
  676         case HW_CHIP_HEMLOCK:
  677         case HW_CHIP_CYPRESS:
  678         case HW_CHIP_JUNIPER:
  679             return false;
  680         default:
  681             return true;
  682         }
  683     }
  684 
  685     bool needs_9xx_stack_workaround() {
  686         return is_cayman();
  687     }
  688 
  689     sb_hw_class_bits hw_class_bit() {
  690         switch (hw_class) {
  691         case HW_CLASS_R600:return HB_R6;
  692         case HW_CLASS_R700:return HB_R7;
  693         case HW_CLASS_EVERGREEN:return HB_EG;
  694         case HW_CLASS_CAYMAN:return HB_CM;
  695         default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
  696 
  697         }
  698     }
  699 
  700     unsigned cf_opcode(unsigned op) {
  701         return r600_isa_cf_opcode(isa->hw_class, op);
  702     }
  703 
  704     unsigned alu_opcode(unsigned op) {
  705         return r600_isa_alu_opcode(isa->hw_class, op);
  706     }
  707 
  708     unsigned alu_slots(unsigned op) {
  709         return r600_isa_alu_slots(isa->hw_class, op);
  710     }
  711 
  712     unsigned alu_slots(const alu_op_info * op_ptr) {
  713         return op_ptr->slots[isa->hw_class];
  714     }
  715 
  716     unsigned alu_slots_mask(const alu_op_info * op_ptr) {
  717         unsigned mask = 0;
  718         unsigned slot_flags = alu_slots(op_ptr);
  719         if (slot_flags & AF_V)
  720             mask = 0x0F;
  721         if (!is_cayman() && (slot_flags & AF_S))
  722             mask |= 0x10;
  723         /* Force LDS_IDX ops into SLOT_X */
  724         if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11))
  725             mask = 0x01;
  726         return mask;
  727     }
  728 
  729     unsigned fetch_opcode(unsigned op) {
  730         return r600_isa_fetch_opcode(isa->hw_class, op);
  731     }
  732 
  733     bool is_kcache_sel(unsigned sel) {
  734         return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
  735     }
  736 
  737     bool is_lds_oq(unsigned sel) {
  738         return (sel >= 0xdb && sel <= 0xde);
  739     }
  740 
  741     const char * get_hw_class_name();
  742     const char * get_hw_chip_name();
  743 
  744 };
  745 
  746 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
  747 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
  748 
  749 class bc_decoder {
  750 
  751     sb_context &ctx;
  752 
  753     uint32_t* dw;
  754     unsigned ndw;
  755 
  756 public:
  757 
  758     bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
  759         : ctx(sctx), dw(data), ndw(size) {}
  760 
  761     int decode_cf(unsigned &i, bc_cf &bc);
  762     int decode_alu(unsigned &i, bc_alu &bc);
  763     int decode_fetch(unsigned &i, bc_fetch &bc);
  764 
  765 private:
  766     int decode_cf_alu(unsigned &i, bc_cf &bc);
  767     int decode_cf_exp(unsigned &i, bc_cf &bc);
  768     int decode_cf_mem(unsigned &i, bc_cf &bc);
  769 
  770     int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
  771     int decode_fetch_gds(unsigned &i, bc_fetch &bc);
  772     int decode_fetch_mem(unsigned &i, bc_fetch &bc);
  773 };
  774 
  775 // bytecode format definition
  776 
  777 class hw_encoding_format {
  778     const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
  779     hw_encoding_format();
  780 protected:
  781     uint32_t value;
  782 public:
  783     hw_encoding_format(sb_hw_class_bits hw)
  784         : hw_target(hw), value(0) {}
  785     hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
  786         : hw_target(hw), value(v) {}
  787     uint32_t get_value(sb_hw_class_bits hw) const {
  788         assert((hw & hw_target) == hw);
  789         return value;
  790     }
  791 };
  792 
  793 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \
  794 class fmt##_##hwset : public hw_encoding_format {\
  795     typedef fmt##_##hwset thistype; \
  796 public: \
  797     fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
  798     fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
  799 
  800 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
  801 
  802 #define BC_FORMAT_END(fmt) };
  803 
  804 // bytecode format field definition
  805 
  806 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
  807     thistype & name(unsigned v) { \
  808         value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
  809         return *this; \
  810     } \
  811     unsigned get_##name() const { \
  812         return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
  813     }
  814 
  815 #define BC_RSRVD(fmt, last_bit, first_bit)
  816 
  817 // CLAMP macro defined elsewhere interferes with bytecode field name
  818 #undef CLAMP
  819 #include "sb_bc_fmt_def.inc"
  820 
  821 #undef BC_FORMAT_BEGIN
  822 #undef BC_FORMAT_END
  823 #undef BC_FIELD
  824 #undef BC_RSRVD
  825 
  826 class bc_parser {
  827     sb_context & ctx;
  828 
  829     bc_decoder *dec;
  830 
  831     r600_bytecode *bc;
  832     r600_shader *pshader;
  833 
  834     uint32_t *dw;
  835     unsigned bc_ndw;
  836 
  837     unsigned max_cf;
  838 
  839     shader *sh;
  840 
  841     int error;
  842 
  843     alu_node *slots[2][5];
  844     unsigned cgroup;
  845 
  846     typedef std::vector<cf_node*> id_cf_map;
  847     id_cf_map cf_map;
  848 
  849     typedef std::stack<region_node*> region_stack;
  850     region_stack loop_stack;
  851 
  852     bool gpr_reladdr;
  853 
  854     // Note: currently relies on input emitting SET_CF in same basic block as uses
  855     value *cf_index_value[2];
  856     alu_node *mova;
  857 public:
  858 
  859     bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
  860         ctx(sctx), dec(), bc(bc), pshader(pshader),
  861         dw(), bc_ndw(), max_cf(),
  862         sh(), error(), slots(), cgroup(),
  863         cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
  864 
  865     int decode();
  866     int prepare();
  867 
  868     shader* get_shader() { assert(!error); return sh; }
  869 
  870 private:
  871 
  872     int decode_shader();
  873 
  874     int parse_decls();
  875 
  876     int decode_cf(unsigned &i, bool &eop);
  877 
  878     int decode_alu_clause(cf_node *cf);
  879     int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
  880 
  881     int decode_fetch_clause(cf_node *cf);
  882 
  883     int prepare_ir();
  884     int prepare_alu_clause(cf_node *cf);
  885     int prepare_alu_group(cf_node* cf, alu_group_node *g);
  886     int prepare_fetch_clause(cf_node *cf);
  887 
  888     int prepare_loop(cf_node *c);
  889     int prepare_if(cf_node *c);
  890 
  891     void save_set_cf_index(value *val, unsigned idx);
  892     value *get_cf_index_value(unsigned idx);
  893     void save_mova(alu_node *mova);
  894     alu_node *get_mova();
  895 };
  896 
  897 
  898 
  899 
  900 class bytecode {
  901     typedef std::vector<uint32_t> bc_vector;
  902     sb_hw_class_bits hw_class_bit;
  903 
  904     bc_vector bc;
  905 
  906     unsigned pos;
  907 
  908 public:
  909 
  910     bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
  911         : hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
  912 
  913     unsigned ndw() { return bc.size(); }
  914 
  915     void write_data(uint32_t* dst) {
  916         std::copy(bc.begin(), bc.end(), dst);
  917     }
  918 
  919     void align(unsigned a) {
  920         unsigned size = bc.size();
  921         size = (size + a - 1) & ~(a-1);
  922         bc.resize(size);
  923     }
  924 
  925     void set_size(unsigned sz) {
  926         assert(sz >= bc.size());
  927         bc.resize(sz);
  928     }
  929 
  930     void seek(unsigned p) {
  931         if (p != pos) {
  932             if (p > bc.size()) {
  933                 bc.resize(p);
  934             }
  935             pos = p;
  936         }
  937     }
  938 
  939     unsigned get_pos() { return pos; }
  940     uint32_t *data() { return &bc[0]; }
  941 
  942     bytecode & operator <<(uint32_t v) {
  943         if (pos == ndw()) {
  944             bc.push_back(v);
  945         } else
  946             bc.at(pos) = v;
  947         ++pos;
  948         return *this;
  949     }
  950 
  951     bytecode & operator <<(const hw_encoding_format &e) {
  952         *this << e.get_value(hw_class_bit);
  953         return *this;
  954     }
  955 
  956     bytecode & operator <<(const bytecode &b) {
  957         bc.insert(bc.end(), b.bc.begin(), b.bc.end());
  958         return *this;
  959     }
  960 
  961     uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
  962 };
  963 
  964 
  965 class bc_builder {
  966     shader &sh;
  967     sb_context &ctx;
  968     bytecode bb;
  969     int error;
  970 
  971 public:
  972 
  973     bc_builder(shader &s);
  974     int build();
  975     bytecode& get_bytecode() { assert(!error); return bb; }
  976 
  977 private:
  978 
  979     int build_cf(cf_node *n);
  980 
  981     int build_cf_alu(cf_node *n);
  982     int build_cf_mem(cf_node *n);
  983     int build_cf_exp(cf_node *n);
  984 
  985     int build_alu_clause(cf_node *n);
  986     int build_alu_group(alu_group_node *n);
  987     int build_alu(alu_node *n);
  988 
  989     int build_fetch_clause(cf_node *n);
  990     int build_fetch_tex(fetch_node *n);
  991     int build_fetch_vtx(fetch_node *n);
  992     int build_fetch_gds(fetch_node *n);
  993     int build_fetch_mem(fetch_node* n);
  994 };
  995 
  996 } // namespace r600_sb
  997 
  998 #endif /* SB_BC_H_ */