"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/amd/compiler/aco_print_ir.cpp" (16 Sep 2020, 28464 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "aco_print_ir.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 #include "aco_ir.h"
    2 #include "aco_builder.h"
    3 
    4 #include "sid.h"
    5 #include "ac_shader_util.h"
    6 
    7 namespace aco {
    8 
    9 static const char *reduce_ops[] = {
   10    [iadd32] = "iadd32",
   11    [iadd64] = "iadd64",
   12    [imul32] = "imul32",
   13    [imul64] = "imul64",
   14    [fadd32] = "fadd32",
   15    [fadd64] = "fadd64",
   16    [fmul32] = "fmul32",
   17    [fmul64] = "fmul64",
   18    [imin32] = "imin32",
   19    [imin64] = "imin64",
   20    [imax32] = "imax32",
   21    [imax64] = "imax64",
   22    [umin32] = "umin32",
   23    [umin64] = "umin64",
   24    [umax32] = "umax32",
   25    [umax64] = "umax64",
   26    [fmin32] = "fmin32",
   27    [fmin64] = "fmin64",
   28    [fmax32] = "fmax32",
   29    [fmax64] = "fmax64",
   30    [iand32] = "iand32",
   31    [iand64] = "iand64",
   32    [ior32] = "ior32",
   33    [ior64] = "ior64",
   34    [ixor32] = "ixor32",
   35    [ixor64] = "ixor64",
   36    [gfx10_wave64_bpermute] = "gfx10_wave64_bpermute",
   37 };
   38 
   39 static void print_reg_class(const RegClass rc, FILE *output)
   40 {
   41    switch (rc) {
   42       case RegClass::s1: fprintf(output, " s1: "); return;
   43       case RegClass::s2: fprintf(output, " s2: "); return;
   44       case RegClass::s3: fprintf(output, " s3: "); return;
   45       case RegClass::s4: fprintf(output, " s4: "); return;
   46       case RegClass::s6: fprintf(output, " s6: "); return;
   47       case RegClass::s8: fprintf(output, " s8: "); return;
   48       case RegClass::s16: fprintf(output, "s16: "); return;
   49       case RegClass::v1: fprintf(output, " v1: "); return;
   50       case RegClass::v2: fprintf(output, " v2: "); return;
   51       case RegClass::v3: fprintf(output, " v3: "); return;
   52       case RegClass::v4: fprintf(output, " v4: "); return;
   53       case RegClass::v5: fprintf(output, " v5: "); return;
   54       case RegClass::v6: fprintf(output, " v6: "); return;
   55       case RegClass::v7: fprintf(output, " v7: "); return;
   56       case RegClass::v8: fprintf(output, " v8: "); return;
   57       case RegClass::v1b: fprintf(output, " v1b: "); return;
   58       case RegClass::v2b: fprintf(output, " v2b: "); return;
   59       case RegClass::v3b: fprintf(output, " v3b: "); return;
   60       case RegClass::v4b: fprintf(output, " v4b: "); return;
   61       case RegClass::v6b: fprintf(output, " v6b: "); return;
   62       case RegClass::v8b: fprintf(output, " v8b: "); return;
   63       case RegClass::v1_linear: fprintf(output, " v1: "); return;
   64       case RegClass::v2_linear: fprintf(output, " v2: "); return;
   65    }
   66 }
   67 
   68 void print_physReg(PhysReg reg, unsigned bytes, FILE *output)
   69 {
   70    if (reg == 124) {
   71       fprintf(output, ":m0");
   72    } else if (reg == 106) {
   73       fprintf(output, ":vcc");
   74    } else if (reg == 253) {
   75       fprintf(output, ":scc");
   76    } else if (reg == 126) {
   77       fprintf(output, ":exec");
   78    } else {
   79       bool is_vgpr = reg / 256;
   80       unsigned r = reg % 256;
   81       unsigned size = DIV_ROUND_UP(bytes, 4);
   82       fprintf(output, ":%c[%d", is_vgpr ? 'v' : 's', r);
   83       if (size > 1)
   84          fprintf(output, "-%d]", r + size -1);
   85       else
   86          fprintf(output, "]");
   87       if (reg.byte() || bytes % 4)
   88          fprintf(output, "[%d:%d]", reg.byte()*8, (reg.byte()+bytes) * 8);
   89    }
   90 }
   91 
   92 static void print_constant(uint8_t reg, FILE *output)
   93 {
   94    if (reg >= 128 && reg <= 192) {
   95       fprintf(output, "%d", reg - 128);
   96       return;
   97    } else if (reg >= 192 && reg <= 208) {
   98       fprintf(output, "%d", 192 - reg);
   99       return;
  100    }
  101 
  102    switch (reg) {
  103    case 240:
  104       fprintf(output, "0.5");
  105       break;
  106    case 241:
  107       fprintf(output, "-0.5");
  108       break;
  109    case 242:
  110       fprintf(output, "1.0");
  111       break;
  112    case 243:
  113       fprintf(output, "-1.0");
  114       break;
  115    case 244:
  116       fprintf(output, "2.0");
  117       break;
  118    case 245:
  119       fprintf(output, "-2.0");
  120       break;
  121    case 246:
  122       fprintf(output, "4.0");
  123       break;
  124    case 247:
  125       fprintf(output, "-4.0");
  126       break;
  127    case 248:
  128       fprintf(output, "1/(2*PI)");
  129       break;
  130    }
  131 }
  132 
  133 static void print_operand(const Operand *operand, FILE *output)
  134 {
  135    if (operand->isLiteral()) {
  136       fprintf(output, "0x%x", operand->constantValue());
  137    } else if (operand->isConstant()) {
  138       print_constant(operand->physReg().reg(), output);
  139    } else if (operand->isUndefined()) {
  140       print_reg_class(operand->regClass(), output);
  141       fprintf(output, "undef");
  142    } else {
  143       if (operand->isLateKill())
  144          fprintf(output, "(latekill)");
  145 
  146       fprintf(output, "%%%d", operand->tempId());
  147 
  148       if (operand->isFixed())
  149          print_physReg(operand->physReg(), operand->bytes(), output);
  150    }
  151 }
  152 
  153 static void print_definition(const Definition *definition, FILE *output)
  154 {
  155    print_reg_class(definition->regClass(), output);
  156    fprintf(output, "%%%d", definition->tempId());
  157 
  158    if (definition->isFixed())
  159       print_physReg(definition->physReg(), definition->bytes(), output);
  160 }
  161 
  162 static void print_barrier_reorder(bool can_reorder, barrier_interaction barrier, FILE *output)
  163 {
  164    if (can_reorder)
  165       fprintf(output, " reorder");
  166 
  167    if (barrier & barrier_buffer)
  168       fprintf(output, " buffer");
  169    if (barrier & barrier_image)
  170       fprintf(output, " image");
  171    if (barrier & barrier_atomic)
  172       fprintf(output, " atomic");
  173    if (barrier & barrier_shared)
  174       fprintf(output, " shared");
  175    if (barrier & barrier_gs_data)
  176       fprintf(output, " gs_data");
  177    if (barrier & barrier_gs_sendmsg)
  178       fprintf(output, " gs_sendmsg");
  179 }
  180 
  181 static void print_instr_format_specific(const Instruction *instr, FILE *output)
  182 {
  183    switch (instr->format) {
  184    case Format::SOPK: {
  185       const SOPK_instruction* sopk = static_cast<const SOPK_instruction*>(instr);
  186       fprintf(output, " imm:%d", sopk->imm & 0x8000 ? (sopk->imm - 65536) : sopk->imm);
  187       break;
  188    }
  189    case Format::SOPP: {
  190       const SOPP_instruction* sopp = static_cast<const SOPP_instruction*>(instr);
  191       uint16_t imm = sopp->imm;
  192       switch (instr->opcode) {
  193       case aco_opcode::s_waitcnt: {
  194          /* we usually should check the chip class for vmcnt/lgkm, but
  195           * insert_waitcnt() should fill it in regardless. */
  196          unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
  197          if (vmcnt != 63) fprintf(output, " vmcnt(%d)", vmcnt);
  198          if (((imm >> 4) & 0x7) < 0x7) fprintf(output, " expcnt(%d)", (imm >> 4) & 0x7);
  199          if (((imm >> 8) & 0x3F) < 0x3F) fprintf(output, " lgkmcnt(%d)", (imm >> 8) & 0x3F);
  200          break;
  201       }
  202       case aco_opcode::s_endpgm:
  203       case aco_opcode::s_endpgm_saved:
  204       case aco_opcode::s_endpgm_ordered_ps_done:
  205       case aco_opcode::s_wakeup:
  206       case aco_opcode::s_barrier:
  207       case aco_opcode::s_icache_inv:
  208       case aco_opcode::s_ttracedata:
  209       case aco_opcode::s_set_gpr_idx_off: {
  210          break;
  211       }
  212       case aco_opcode::s_sendmsg: {
  213          unsigned id = imm & sendmsg_id_mask;
  214          switch (id) {
  215          case sendmsg_none:
  216             fprintf(output, " sendmsg(MSG_NONE)");
  217             break;
  218          case _sendmsg_gs:
  219             fprintf(output, " sendmsg(gs%s%s, %u)",
  220                     imm & 0x10 ? ", cut" : "", imm & 0x20 ? ", emit" : "", imm >> 8);
  221             break;
  222          case _sendmsg_gs_done:
  223             fprintf(output, " sendmsg(gs_done%s%s, %u)",
  224                     imm & 0x10 ? ", cut" : "", imm & 0x20 ? ", emit" : "", imm >> 8);
  225             break;
  226          case sendmsg_save_wave:
  227             fprintf(output, " sendmsg(save_wave)");
  228             break;
  229          case sendmsg_stall_wave_gen:
  230             fprintf(output, " sendmsg(stall_wave_gen)");
  231             break;
  232          case sendmsg_halt_waves:
  233             fprintf(output, " sendmsg(halt_waves)");
  234             break;
  235          case sendmsg_ordered_ps_done:
  236             fprintf(output, " sendmsg(ordered_ps_done)");
  237             break;
  238          case sendmsg_early_prim_dealloc:
  239             fprintf(output, " sendmsg(early_prim_dealloc)");
  240             break;
  241          case sendmsg_gs_alloc_req:
  242             fprintf(output, " sendmsg(gs_alloc_req)");
  243             break;
  244          }
  245          break;
  246       }
  247       default: {
  248          if (imm)
  249             fprintf(output, " imm:%u", imm);
  250          break;
  251       }
  252       }
  253       if (sopp->block != -1)
  254          fprintf(output, " block:BB%d", sopp->block);
  255       break;
  256    }
  257    case Format::SMEM: {
  258       const SMEM_instruction* smem = static_cast<const SMEM_instruction*>(instr);
  259       if (smem->glc)
  260          fprintf(output, " glc");
  261       if (smem->dlc)
  262          fprintf(output, " dlc");
  263       if (smem->nv)
  264          fprintf(output, " nv");
  265       print_barrier_reorder(smem->can_reorder, smem->barrier, output);
  266       break;
  267    }
  268    case Format::VINTRP: {
  269       const Interp_instruction* vintrp = static_cast<const Interp_instruction*>(instr);
  270       fprintf(output, " attr%d.%c", vintrp->attribute, "xyzw"[vintrp->component]);
  271       break;
  272    }
  273    case Format::DS: {
  274       const DS_instruction* ds = static_cast<const DS_instruction*>(instr);
  275       if (ds->offset0)
  276          fprintf(output, " offset0:%u", ds->offset0);
  277       if (ds->offset1)
  278          fprintf(output, " offset1:%u", ds->offset1);
  279       if (ds->gds)
  280          fprintf(output, " gds");
  281       break;
  282    }
  283    case Format::MUBUF: {
  284       const MUBUF_instruction* mubuf = static_cast<const MUBUF_instruction*>(instr);
  285       if (mubuf->offset)
  286          fprintf(output, " offset:%u", mubuf->offset);
  287       if (mubuf->offen)
  288          fprintf(output, " offen");
  289       if (mubuf->idxen)
  290          fprintf(output, " idxen");
  291       if (mubuf->addr64)
  292          fprintf(output, " addr64");
  293       if (mubuf->glc)
  294          fprintf(output, " glc");
  295       if (mubuf->dlc)
  296          fprintf(output, " dlc");
  297       if (mubuf->slc)
  298          fprintf(output, " slc");
  299       if (mubuf->tfe)
  300          fprintf(output, " tfe");
  301       if (mubuf->lds)
  302          fprintf(output, " lds");
  303       if (mubuf->disable_wqm)
  304          fprintf(output, " disable_wqm");
  305       print_barrier_reorder(mubuf->can_reorder, mubuf->barrier, output);
  306       break;
  307    }
  308    case Format::MIMG: {
  309       const MIMG_instruction* mimg = static_cast<const MIMG_instruction*>(instr);
  310       unsigned identity_dmask = !instr->definitions.empty() ?
  311                                 (1 << instr->definitions[0].size()) - 1 :
  312                                 0xf;
  313       if ((mimg->dmask & identity_dmask) != identity_dmask)
  314          fprintf(output, " dmask:%s%s%s%s",
  315                  mimg->dmask & 0x1 ? "x" : "",
  316                  mimg->dmask & 0x2 ? "y" : "",
  317                  mimg->dmask & 0x4 ? "z" : "",
  318                  mimg->dmask & 0x8 ? "w" : "");
  319       switch (mimg->dim) {
  320       case ac_image_1d:
  321          fprintf(output, " 1d");
  322          break;
  323       case ac_image_2d:
  324          fprintf(output, " 2d");
  325          break;
  326       case ac_image_3d:
  327          fprintf(output, " 3d");
  328          break;
  329       case ac_image_cube:
  330          fprintf(output, " cube");
  331          break;
  332       case ac_image_1darray:
  333          fprintf(output, " 1darray");
  334          break;
  335       case ac_image_2darray:
  336          fprintf(output, " 2darray");
  337          break;
  338       case ac_image_2dmsaa:
  339          fprintf(output, " 2dmsaa");
  340          break;
  341       case ac_image_2darraymsaa:
  342          fprintf(output, " 2darraymsaa");
  343          break;
  344       }
  345       if (mimg->unrm)
  346          fprintf(output, " unrm");
  347       if (mimg->glc)
  348          fprintf(output, " glc");
  349       if (mimg->dlc)
  350          fprintf(output, " dlc");
  351       if (mimg->slc)
  352          fprintf(output, " slc");
  353       if (mimg->tfe)
  354          fprintf(output, " tfe");
  355       if (mimg->da)
  356          fprintf(output, " da");
  357       if (mimg->lwe)
  358          fprintf(output, " lwe");
  359       if (mimg->r128 || mimg->a16)
  360          fprintf(output, " r128/a16");
  361       if (mimg->d16)
  362          fprintf(output, " d16");
  363       if (mimg->disable_wqm)
  364          fprintf(output, " disable_wqm");
  365       print_barrier_reorder(mimg->can_reorder, mimg->barrier, output);
  366       break;
  367    }
  368    case Format::EXP: {
  369       const Export_instruction* exp = static_cast<const Export_instruction*>(instr);
  370       unsigned identity_mask = exp->compressed ? 0x5 : 0xf;
  371       if ((exp->enabled_mask & identity_mask) != identity_mask)
  372          fprintf(output, " en:%c%c%c%c",
  373                  exp->enabled_mask & 0x1 ? 'r' : '*',
  374                  exp->enabled_mask & 0x2 ? 'g' : '*',
  375                  exp->enabled_mask & 0x4 ? 'b' : '*',
  376                  exp->enabled_mask & 0x8 ? 'a' : '*');
  377       if (exp->compressed)
  378          fprintf(output, " compr");
  379       if (exp->done)
  380          fprintf(output, " done");
  381       if (exp->valid_mask)
  382          fprintf(output, " vm");
  383 
  384       if (exp->dest <= V_008DFC_SQ_EXP_MRT + 7)
  385          fprintf(output, " mrt%d", exp->dest - V_008DFC_SQ_EXP_MRT);
  386       else if (exp->dest == V_008DFC_SQ_EXP_MRTZ)
  387          fprintf(output, " mrtz");
  388       else if (exp->dest == V_008DFC_SQ_EXP_NULL)
  389          fprintf(output, " null");
  390       else if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= V_008DFC_SQ_EXP_POS + 3)
  391          fprintf(output, " pos%d", exp->dest - V_008DFC_SQ_EXP_POS);
  392       else if (exp->dest >= V_008DFC_SQ_EXP_PARAM && exp->dest <= V_008DFC_SQ_EXP_PARAM + 31)
  393          fprintf(output, " param%d", exp->dest - V_008DFC_SQ_EXP_PARAM);
  394       break;
  395    }
  396    case Format::PSEUDO_BRANCH: {
  397       const Pseudo_branch_instruction* branch = static_cast<const Pseudo_branch_instruction*>(instr);
  398       /* Note: BB0 cannot be a branch target */
  399       if (branch->target[0] != 0)
  400          fprintf(output, " BB%d", branch->target[0]);
  401       if (branch->target[1] != 0)
  402          fprintf(output, ", BB%d", branch->target[1]);
  403       break;
  404    }
  405    case Format::PSEUDO_REDUCTION: {
  406       const Pseudo_reduction_instruction* reduce = static_cast<const Pseudo_reduction_instruction*>(instr);
  407       fprintf(output, " op:%s", reduce_ops[reduce->reduce_op]);
  408       if (reduce->cluster_size)
  409          fprintf(output, " cluster_size:%u", reduce->cluster_size);
  410       break;
  411    }
  412    case Format::FLAT:
  413    case Format::GLOBAL:
  414    case Format::SCRATCH: {
  415       const FLAT_instruction* flat = static_cast<const FLAT_instruction*>(instr);
  416       if (flat->offset)
  417          fprintf(output, " offset:%u", flat->offset);
  418       if (flat->glc)
  419          fprintf(output, " glc");
  420       if (flat->dlc)
  421          fprintf(output, " dlc");
  422       if (flat->slc)
  423          fprintf(output, " slc");
  424       if (flat->lds)
  425          fprintf(output, " lds");
  426       if (flat->nv)
  427          fprintf(output, " nv");
  428       if (flat->disable_wqm)
  429          fprintf(output, " disable_wqm");
  430       print_barrier_reorder(flat->can_reorder, flat->barrier, output);
  431       break;
  432    }
  433    case Format::MTBUF: {
  434       const MTBUF_instruction* mtbuf = static_cast<const MTBUF_instruction*>(instr);
  435       fprintf(output, " dfmt:");
  436       switch (mtbuf->dfmt) {
  437       case V_008F0C_BUF_DATA_FORMAT_8: fprintf(output, "8"); break;
  438       case V_008F0C_BUF_DATA_FORMAT_16: fprintf(output, "16"); break;
  439       case V_008F0C_BUF_DATA_FORMAT_8_8: fprintf(output, "8_8"); break;
  440       case V_008F0C_BUF_DATA_FORMAT_32: fprintf(output, "32"); break;
  441       case V_008F0C_BUF_DATA_FORMAT_16_16: fprintf(output, "16_16"); break;
  442       case V_008F0C_BUF_DATA_FORMAT_10_11_11: fprintf(output, "10_11_11"); break;
  443       case V_008F0C_BUF_DATA_FORMAT_11_11_10: fprintf(output, "11_11_10"); break;
  444       case V_008F0C_BUF_DATA_FORMAT_10_10_10_2: fprintf(output, "10_10_10_2"); break;
  445       case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: fprintf(output, "2_10_10_10"); break;
  446       case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: fprintf(output, "8_8_8_8"); break;
  447       case V_008F0C_BUF_DATA_FORMAT_32_32: fprintf(output, "32_32"); break;
  448       case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: fprintf(output, "16_16_16_16"); break;
  449       case V_008F0C_BUF_DATA_FORMAT_32_32_32: fprintf(output, "32_32_32"); break;
  450       case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: fprintf(output, "32_32_32_32"); break;
  451       case V_008F0C_BUF_DATA_FORMAT_RESERVED_15: fprintf(output, "reserved15"); break;
  452       }
  453       fprintf(output, " nfmt:");
  454       switch (mtbuf->nfmt) {
  455       case V_008F0C_BUF_NUM_FORMAT_UNORM: fprintf(output, "unorm"); break;
  456       case V_008F0C_BUF_NUM_FORMAT_SNORM: fprintf(output, "snorm"); break;
  457       case V_008F0C_BUF_NUM_FORMAT_USCALED: fprintf(output, "uscaled"); break;
  458       case V_008F0C_BUF_NUM_FORMAT_SSCALED: fprintf(output, "sscaled"); break;
  459       case V_008F0C_BUF_NUM_FORMAT_UINT: fprintf(output, "uint"); break;
  460       case V_008F0C_BUF_NUM_FORMAT_SINT: fprintf(output, "sint"); break;
  461       case V_008F0C_BUF_NUM_FORMAT_SNORM_OGL: fprintf(output, "snorm"); break;
  462       case V_008F0C_BUF_NUM_FORMAT_FLOAT: fprintf(output, "float"); break;
  463       }
  464       if (mtbuf->offset)
  465          fprintf(output, " offset:%u", mtbuf->offset);
  466       if (mtbuf->offen)
  467          fprintf(output, " offen");
  468       if (mtbuf->idxen)
  469          fprintf(output, " idxen");
  470       if (mtbuf->glc)
  471          fprintf(output, " glc");
  472       if (mtbuf->dlc)
  473          fprintf(output, " dlc");
  474       if (mtbuf->slc)
  475          fprintf(output, " slc");
  476       if (mtbuf->tfe)
  477          fprintf(output, " tfe");
  478       if (mtbuf->disable_wqm)
  479          fprintf(output, " disable_wqm");
  480       print_barrier_reorder(mtbuf->can_reorder, mtbuf->barrier, output);
  481       break;
  482    }
  483    case Format::VOP3P: {
  484       if (static_cast<const VOP3P_instruction*>(instr)->clamp)
  485          fprintf(output, " clamp");
  486       break;
  487    }
  488    default: {
  489       break;
  490    }
  491    }
  492    if (instr->isVOP3()) {
  493       const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr);
  494       switch (vop3->omod) {
  495       case 1:
  496          fprintf(output, " *2");
  497          break;
  498       case 2:
  499          fprintf(output, " *4");
  500          break;
  501       case 3:
  502          fprintf(output, " *0.5");
  503          break;
  504       }
  505       if (vop3->clamp)
  506          fprintf(output, " clamp");
  507       if (vop3->opsel & (1 << 3))
  508          fprintf(output, " opsel_hi");
  509    } else if (instr->isDPP()) {
  510       const DPP_instruction* dpp = static_cast<const DPP_instruction*>(instr);
  511       if (dpp->dpp_ctrl <= 0xff) {
  512          fprintf(output, " quad_perm:[%d,%d,%d,%d]",
  513                  dpp->dpp_ctrl & 0x3, (dpp->dpp_ctrl >> 2) & 0x3,
  514                  (dpp->dpp_ctrl >> 4) & 0x3, (dpp->dpp_ctrl >> 6) & 0x3);
  515       } else if (dpp->dpp_ctrl >= 0x101 && dpp->dpp_ctrl <= 0x10f) {
  516          fprintf(output, " row_shl:%d", dpp->dpp_ctrl & 0xf);
  517       } else if (dpp->dpp_ctrl >= 0x111 && dpp->dpp_ctrl <= 0x11f) {
  518          fprintf(output, " row_shr:%d", dpp->dpp_ctrl & 0xf);
  519       } else if (dpp->dpp_ctrl >= 0x121 && dpp->dpp_ctrl <= 0x12f) {
  520          fprintf(output, " row_ror:%d", dpp->dpp_ctrl & 0xf);
  521       } else if (dpp->dpp_ctrl == dpp_wf_sl1) {
  522          fprintf(output, " wave_shl:1");
  523       } else if (dpp->dpp_ctrl == dpp_wf_rl1) {
  524          fprintf(output, " wave_rol:1");
  525       } else if (dpp->dpp_ctrl == dpp_wf_sr1) {
  526          fprintf(output, " wave_shr:1");
  527       } else if (dpp->dpp_ctrl == dpp_wf_rr1) {
  528          fprintf(output, " wave_ror:1");
  529       } else if (dpp->dpp_ctrl == dpp_row_mirror) {
  530          fprintf(output, " row_mirror");
  531       } else if (dpp->dpp_ctrl == dpp_row_half_mirror) {
  532          fprintf(output, " row_half_mirror");
  533       } else if (dpp->dpp_ctrl == dpp_row_bcast15) {
  534          fprintf(output, " row_bcast:15");
  535       } else if (dpp->dpp_ctrl == dpp_row_bcast31) {
  536          fprintf(output, " row_bcast:31");
  537       } else {
  538          fprintf(output, " dpp_ctrl:0x%.3x", dpp->dpp_ctrl);
  539       }
  540       if (dpp->row_mask != 0xf)
  541          fprintf(output, " row_mask:0x%.1x", dpp->row_mask);
  542       if (dpp->bank_mask != 0xf)
  543          fprintf(output, " bank_mask:0x%.1x", dpp->bank_mask);
  544       if (dpp->bound_ctrl)
  545          fprintf(output, " bound_ctrl:1");
  546    } else if ((int)instr->format & (int)Format::SDWA) {
  547       const SDWA_instruction* sdwa = static_cast<const SDWA_instruction*>(instr);
  548       switch (sdwa->omod) {
  549       case 1:
  550          fprintf(output, " *2");
  551          break;
  552       case 2:
  553          fprintf(output, " *4");
  554          break;
  555       case 3:
  556          fprintf(output, " *0.5");
  557          break;
  558       }
  559       if (sdwa->clamp)
  560          fprintf(output, " clamp");
  561       switch (sdwa->dst_sel & sdwa_asuint) {
  562       case sdwa_udword:
  563          break;
  564       case sdwa_ubyte0:
  565       case sdwa_ubyte1:
  566       case sdwa_ubyte2:
  567       case sdwa_ubyte3:
  568          fprintf(output, " dst_sel:%sbyte%u", sdwa->dst_sel & sdwa_sext ? "s" : "u",
  569                  sdwa->dst_sel & sdwa_bytenum);
  570          break;
  571       case sdwa_uword0:
  572       case sdwa_uword1:
  573          fprintf(output, " dst_sel:%sword%u", sdwa->dst_sel & sdwa_sext ? "s" : "u",
  574                  sdwa->dst_sel & sdwa_wordnum);
  575          break;
  576       }
  577       if (sdwa->dst_preserve)
  578          fprintf(output, " dst_preserve");
  579    }
  580 }
  581 
  582 void aco_print_instr(const Instruction *instr, FILE *output)
  583 {
  584    if (!instr->definitions.empty()) {
  585       for (unsigned i = 0; i < instr->definitions.size(); ++i) {
  586          print_definition(&instr->definitions[i], output);
  587          if (i + 1 != instr->definitions.size())
  588             fprintf(output, ", ");
  589       }
  590       fprintf(output, " = ");
  591    }
  592    fprintf(output, "%s", instr_info.name[(int)instr->opcode]);
  593    if (instr->operands.size()) {
  594       bool abs[instr->operands.size()];
  595       bool neg[instr->operands.size()];
  596       bool opsel[instr->operands.size()];
  597       uint8_t sel[instr->operands.size()];
  598       if ((int)instr->format & (int)Format::VOP3A) {
  599          const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr);
  600          for (unsigned i = 0; i < instr->operands.size(); ++i) {
  601             abs[i] = vop3->abs[i];
  602             neg[i] = vop3->neg[i];
  603             opsel[i] = vop3->opsel & (1 << i);
  604             sel[i] = sdwa_udword;
  605          }
  606       } else if (instr->isDPP()) {
  607          const DPP_instruction* dpp = static_cast<const DPP_instruction*>(instr);
  608          for (unsigned i = 0; i < instr->operands.size(); ++i) {
  609             abs[i] = i < 2 ? dpp->abs[i] : false;
  610             neg[i] = i < 2 ? dpp->neg[i] : false;
  611             opsel[i] = false;
  612             sel[i] = sdwa_udword;
  613          }
  614       } else if (instr->isSDWA()) {
  615          const SDWA_instruction* sdwa = static_cast<const SDWA_instruction*>(instr);
  616          for (unsigned i = 0; i < instr->operands.size(); ++i) {
  617             abs[i] = i < 2 ? sdwa->abs[i] : false;
  618             neg[i] = i < 2 ? sdwa->neg[i] : false;
  619             opsel[i] = false;
  620             sel[i] = i < 2 ? sdwa->sel[i] : sdwa_udword;
  621          }
  622       } else {
  623          for (unsigned i = 0; i < instr->operands.size(); ++i) {
  624             abs[i] = false;
  625             neg[i] = false;
  626             opsel[i] = false;
  627             sel[i] = sdwa_udword;
  628          }
  629       }
  630       for (unsigned i = 0; i < instr->operands.size(); ++i) {
  631          if (i)
  632             fprintf(output, ", ");
  633          else
  634             fprintf(output, " ");
  635 
  636          if (neg[i])
  637             fprintf(output, "-");
  638          if (abs[i])
  639             fprintf(output, "|");
  640          if (opsel[i])
  641             fprintf(output, "hi(");
  642          else if (sel[i] & sdwa_sext)
  643             fprintf(output, "sext(");
  644          print_operand(&instr->operands[i], output);
  645          if (opsel[i] || (sel[i] & sdwa_sext))
  646             fprintf(output, ")");
  647          if (!(sel[i] & sdwa_isra)) {
  648             if (sel[i] & sdwa_udword) {
  649                /* print nothing */
  650             } else if (sel[i] & sdwa_isword) {
  651                unsigned index = sel[i] & sdwa_wordnum;
  652                fprintf(output, "[%u:%u]", index * 16, index * 16 + 15);
  653             } else {
  654                unsigned index = sel[i] & sdwa_bytenum;
  655                fprintf(output, "[%u:%u]", index * 8, index * 8 + 7);
  656             }
  657          }
  658          if (abs[i])
  659             fprintf(output, "|");
  660 
  661          if (instr->format == Format::VOP3P) {
  662             const VOP3P_instruction* vop3 = static_cast<const VOP3P_instruction*>(instr);
  663             if ((vop3->opsel_lo & (1 << i)) || !(vop3->opsel_hi & (1 << i))) {
  664                fprintf(output, ".%c%c",
  665                        vop3->opsel_lo & (1 << i) ? 'y' : 'x',
  666                        vop3->opsel_hi & (1 << i) ? 'y' : 'x');
  667             }
  668             if (vop3->neg_lo[i] && vop3->neg_hi[i])
  669                fprintf(output, "*[-1,-1]");
  670             else if (vop3->neg_lo[i])
  671                fprintf(output, "*[-1,1]");
  672             else if (vop3->neg_hi[i])
  673                fprintf(output, "*[1,-1]");
  674          }
  675       }
  676    }
  677    print_instr_format_specific(instr, output);
  678 }
  679 
  680 static void print_block_kind(uint16_t kind, FILE *output)
  681 {
  682    if (kind & block_kind_uniform)
  683       fprintf(output, "uniform, ");
  684    if (kind & block_kind_top_level)
  685       fprintf(output, "top-level, ");
  686    if (kind & block_kind_loop_preheader)
  687       fprintf(output, "loop-preheader, ");
  688    if (kind & block_kind_loop_header)
  689       fprintf(output, "loop-header, ");
  690    if (kind & block_kind_loop_exit)
  691       fprintf(output, "loop-exit, ");
  692    if (kind & block_kind_continue)
  693       fprintf(output, "continue, ");
  694    if (kind & block_kind_break)
  695       fprintf(output, "break, ");
  696    if (kind & block_kind_continue_or_break)
  697       fprintf(output, "continue_or_break, ");
  698    if (kind & block_kind_discard)
  699       fprintf(output, "discard, ");
  700    if (kind & block_kind_branch)
  701       fprintf(output, "branch, ");
  702    if (kind & block_kind_merge)
  703       fprintf(output, "merge, ");
  704    if (kind & block_kind_invert)
  705       fprintf(output, "invert, ");
  706    if (kind & block_kind_uses_discard_if)
  707       fprintf(output, "discard_if, ");
  708    if (kind & block_kind_needs_lowering)
  709       fprintf(output, "needs_lowering, ");
  710    if (kind & block_kind_uses_demote)
  711       fprintf(output, "uses_demote, ");
  712    if (kind & block_kind_export_end)
  713       fprintf(output, "export_end, ");
  714 }
  715 
  716 static void print_stage(Stage stage, FILE *output)
  717 {
  718    fprintf(output, "ACO shader stage: ");
  719 
  720    if (stage == compute_cs)
  721       fprintf(output, "compute_cs");
  722    else if (stage == fragment_fs)
  723       fprintf(output, "fragment_fs");
  724    else if (stage == gs_copy_vs)
  725       fprintf(output, "gs_copy_vs");
  726    else if (stage == vertex_ls)
  727       fprintf(output, "vertex_ls");
  728    else if (stage == vertex_es)
  729       fprintf(output, "vertex_es");
  730    else if (stage == vertex_vs)
  731       fprintf(output, "vertex_vs");
  732    else if (stage == tess_control_hs)
  733       fprintf(output, "tess_control_hs");
  734    else if (stage == vertex_tess_control_hs)
  735       fprintf(output, "vertex_tess_control_hs");
  736    else if (stage == tess_eval_es)
  737       fprintf(output, "tess_eval_es");
  738    else if (stage == tess_eval_vs)
  739       fprintf(output, "tess_eval_vs");
  740    else if (stage == geometry_gs)
  741       fprintf(output, "geometry_gs");
  742    else if (stage == vertex_geometry_gs)
  743       fprintf(output, "vertex_geometry_gs");
  744    else if (stage == tess_eval_geometry_gs)
  745       fprintf(output, "tess_eval_geometry_gs");
  746    else if (stage == ngg_vertex_gs)
  747       fprintf(output, "ngg_vertex_gs");
  748    else if (stage == ngg_tess_eval_gs)
  749       fprintf(output, "ngg_tess_eval_gs");
  750    else if (stage == ngg_vertex_geometry_gs)
  751       fprintf(output, "ngg_vertex_geometry_gs");
  752    else if (stage == ngg_tess_eval_geometry_gs)
  753       fprintf(output, "ngg_tess_eval_geometry_gs");
  754    else
  755       fprintf(output, "unknown");
  756 
  757    fprintf(output, "\n");
  758 }
  759 
  760 void aco_print_block(const Block* block, FILE *output)
  761 {
  762    fprintf(output, "BB%d\n", block->index);
  763    fprintf(output, "/* logical preds: ");
  764    for (unsigned pred : block->logical_preds)
  765       fprintf(output, "BB%d, ", pred);
  766    fprintf(output, "/ linear preds: ");
  767    for (unsigned pred : block->linear_preds)
  768       fprintf(output, "BB%d, ", pred);
  769    fprintf(output, "/ kind: ");
  770    print_block_kind(block->kind, output);
  771    fprintf(output, "*/\n");
  772    for (auto const& instr : block->instructions) {
  773       fprintf(output, "\t");
  774       aco_print_instr(instr.get(), output);
  775       fprintf(output, "\n");
  776    }
  777 }
  778 
  779 void aco_print_program(const Program *program, FILE *output)
  780 {
  781    print_stage(program->stage, output);
  782 
  783    for (Block const& block : program->blocks)
  784       aco_print_block(&block, output);
  785 
  786    if (program->constant_data.size()) {
  787       fprintf(output, "\n/* constant data */\n");
  788       for (unsigned i = 0; i < program->constant_data.size(); i += 32) {
  789          fprintf(output, "[%06d] ", i);
  790          unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32);
  791          for (unsigned j = 0; j < line_size; j += 4) {
  792             unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4);
  793             uint32_t v = 0;
  794             memcpy(&v, &program->constant_data[i + j], size);
  795             fprintf(output, " %08x", v);
  796          }
  797          fprintf(output, "\n");
  798       }
  799    }
  800 
  801    fprintf(output, "\n");
  802 }
  803 
  804 }