"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/freedreno/ir3/disasm-a3xx.c" (16 Sep 2020, 35877 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "disasm-a3xx.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.2.0-rc2_vs_20.2.0-rc3.

    1 /*
    2  * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   21  * SOFTWARE.
   22  */
   23 
   24 #include <stdio.h>
   25 #include <stdlib.h>
   26 #include <stdint.h>
   27 #include <stdbool.h>
   28 #include <string.h>
   29 #include <assert.h>
   30 
   31 #include <util/u_debug.h>
   32 
   33 #include "instr-a3xx.h"
   34 
   35 /* bitmask of debug flags */
   36 enum debug_t {
   37     PRINT_RAW      = 0x1,    /* dump raw hexdump */
   38     PRINT_VERBOSE  = 0x2,
   39     EXPAND_REPEAT  = 0x4,
   40 };
   41 
   42 static enum debug_t debug;
   43 
   44 #define printf debug_printf
   45 
   46 static const char *levels[] = {
   47         "",
   48         "\t",
   49         "\t\t",
   50         "\t\t\t",
   51         "\t\t\t\t",
   52         "\t\t\t\t\t",
   53         "\t\t\t\t\t\t",
   54         "\t\t\t\t\t\t\t",
   55         "\t\t\t\t\t\t\t\t",
   56         "\t\t\t\t\t\t\t\t\t",
   57         "x",
   58         "x",
   59         "x",
   60         "x",
   61         "x",
   62         "x",
   63 };
   64 
   65 static const char *component = "xyzw";
   66 
   67 static const char *type[] = {
   68         [TYPE_F16] = "f16",
   69         [TYPE_F32] = "f32",
   70         [TYPE_U16] = "u16",
   71         [TYPE_U32] = "u32",
   72         [TYPE_S16] = "s16",
   73         [TYPE_S32] = "s32",
   74         [TYPE_U8]  = "u8",
   75         [TYPE_S8]  = "s8",
   76 };
   77 
   78 struct disasm_ctx {
   79     FILE *out;
   80     int level;
   81     unsigned gpu_id;
   82 
   83     /* current instruction repeat flag: */
   84     unsigned repeat;
   85     /* current instruction repeat indx/offset (for --expand): */
   86     unsigned repeatidx;
   87 
   88     unsigned instructions;
   89 };
   90 
   91 static const char *float_imms[] = {
   92     "0.0",
   93     "0.5",
   94     "1.0",
   95     "2.0",
   96     "e",
   97     "pi",
   98     "1/pi",
   99     "1/log2(e)",
  100     "log2(e)",
  101     "1/log2(10)",
  102     "log2(10)",
  103     "4.0",
  104 };
  105 
  106 static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full,
  107         bool is_float, bool r,
  108         bool c, bool im, bool neg, bool abs, bool addr_rel)
  109 {
  110     const char type = c ? 'c' : 'r';
  111 
  112     // XXX I prefer - and || for neg/abs, but preserving format used
  113     // by libllvm-a3xx for easy diffing..
  114 
  115     if (abs && neg)
  116         fprintf(ctx->out, "(absneg)");
  117     else if (neg)
  118         fprintf(ctx->out, "(neg)");
  119     else if (abs)
  120         fprintf(ctx->out, "(abs)");
  121 
  122     if (r)
  123         fprintf(ctx->out, "(r)");
  124 
  125     if (im) {
  126         if (is_float && full && reg.iim_val < ARRAY_SIZE(float_imms)) {
  127             fprintf(ctx->out, "(%s)", float_imms[reg.iim_val]);
  128         } else {
  129             fprintf(ctx->out, "%d", reg.iim_val);
  130         }
  131     } else if (addr_rel) {
  132         /* I would just use %+d but trying to make it diff'able with
  133          * libllvm-a3xx...
  134          */
  135         if (reg.iim_val < 0)
  136             fprintf(ctx->out, "%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
  137         else if (reg.iim_val > 0)
  138             fprintf(ctx->out, "%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
  139         else
  140             fprintf(ctx->out, "%s%c<a0.x>", full ? "" : "h", type);
  141     } else if ((reg.num == REG_A0) && !c) {
  142         /* This matches libllvm output, the second (scalar) address register
  143          * seems to be called a1.x instead of a0.y.
  144          */
  145         fprintf(ctx->out, "a%d.x", reg.comp);
  146     } else if ((reg.num == REG_P0) && !c) {
  147         fprintf(ctx->out, "p0.%c", component[reg.comp]);
  148     } else {
  149         fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
  150     }
  151 }
  152 
  153 static unsigned regidx(reg_t reg)
  154 {
  155     return (4 * reg.num) + reg.comp;
  156 }
  157 
  158 static reg_t idxreg(unsigned idx)
  159 {
  160     return (reg_t){
  161         .comp = idx & 0x3,
  162         .num  = idx >> 2,
  163     };
  164 }
  165 
  166 static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel)
  167 {
  168     reg = idxreg(regidx(reg) + ctx->repeatidx);
  169     print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel);
  170 }
  171 
  172 /* TODO switch to using reginfo struct everywhere, since more readable
  173  * than passing a bunch of bools to print_reg_src
  174  */
  175 
  176 struct reginfo {
  177     reg_t reg;
  178     bool full;
  179     bool r;
  180     bool c;
  181     bool f; /* src reg is interpreted as float, used for printing immediates */
  182     bool im;
  183     bool neg;
  184     bool abs;
  185     bool addr_rel;
  186 };
  187 
  188 static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
  189 {
  190     reg_t reg = info->reg;
  191 
  192     if (info->r)
  193         reg = idxreg(regidx(info->reg) + ctx->repeatidx);
  194 
  195     print_reg(ctx, reg, info->full, info->f, info->r, info->c, info->im,
  196             info->neg, info->abs, info->addr_rel);
  197 }
  198 
  199 //static void print_dst(struct disasm_ctx *ctx, struct reginfo *info)
  200 //{
  201 //  print_reg_dst(ctx, info->reg, info->full, info->addr_rel);
  202 //}
  203 
  204 static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr)
  205 {
  206     instr_cat0_t *cat0 = &instr->cat0;
  207 
  208     switch (cat0->opc) {
  209     case OPC_KILL:
  210     case OPC_IF:
  211         fprintf(ctx->out, " %sp0.%c", cat0->inv ? "!" : "",
  212                 component[cat0->comp]);
  213         break;
  214     case OPC_BR:
  215         fprintf(ctx->out, " %sp0.%c, #%d", cat0->inv ? "!" : "",
  216                 component[cat0->comp], cat0->a3xx.immed);
  217         break;
  218     case OPC_JUMP:
  219     case OPC_CALL:
  220         fprintf(ctx->out, " #%d", cat0->a3xx.immed);
  221         break;
  222     }
  223 
  224     if ((debug & PRINT_VERBOSE) && (cat0->dummy2|cat0->dummy3|cat0->dummy4))
  225         fprintf(ctx->out, "\t{0: %x,%x,%x}", cat0->dummy2, cat0->dummy3, cat0->dummy4);
  226 }
  227 
  228 static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr)
  229 {
  230     instr_cat1_t *cat1 = &instr->cat1;
  231 
  232     if (cat1->ul)
  233         fprintf(ctx->out, "(ul)");
  234 
  235     if (cat1->src_type == cat1->dst_type) {
  236         if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
  237             /* special case (nmemonic?): */
  238             fprintf(ctx->out, "mova");
  239         } else {
  240             fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
  241         }
  242     } else {
  243         fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
  244     }
  245 
  246     fprintf(ctx->out, " ");
  247 
  248     if (cat1->even)
  249         fprintf(ctx->out, "(even)");
  250 
  251     if (cat1->pos_inf)
  252         fprintf(ctx->out, "(pos_infinity)");
  253 
  254     print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
  255             cat1->dst_rel);
  256 
  257     fprintf(ctx->out, ", ");
  258 
  259     /* ugg, have to special case this.. vs print_reg().. */
  260     if (cat1->src_im) {
  261         if (type_float(cat1->src_type))
  262             fprintf(ctx->out, "(%f)", cat1->fim_val);
  263         else if (type_uint(cat1->src_type))
  264             fprintf(ctx->out, "0x%08x", cat1->uim_val);
  265         else
  266             fprintf(ctx->out, "%d", cat1->iim_val);
  267     } else if (cat1->src_rel && !cat1->src_c) {
  268         /* I would just use %+d but trying to make it diff'able with
  269          * libllvm-a3xx...
  270          */
  271         char type = cat1->src_rel_c ? 'c' : 'r';
  272         const char *full = (type_size(cat1->src_type) == 32) ? "" : "h";
  273         if (cat1->off < 0)
  274             fprintf(ctx->out, "%s%c<a0.x - %d>", full, type, -cat1->off);
  275         else if (cat1->off > 0)
  276             fprintf(ctx->out, "%s%c<a0.x + %d>", full, type, cat1->off);
  277         else
  278             fprintf(ctx->out, "%s%c<a0.x>", full, type);
  279     } else {
  280         struct reginfo src = {
  281             .reg = (reg_t)cat1->src,
  282             .full = type_size(cat1->src_type) == 32,
  283             .r = cat1->src_r,
  284             .c = cat1->src_c,
  285             .im = cat1->src_im,
  286         };
  287         print_src(ctx, &src);
  288     }
  289 
  290     if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
  291         fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0);
  292 }
  293 
  294 static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr)
  295 {
  296     instr_cat2_t *cat2 = &instr->cat2;
  297     int opc = _OPC(2, cat2->opc);
  298     static const char *cond[] = {
  299             "lt",
  300             "le",
  301             "gt",
  302             "ge",
  303             "eq",
  304             "ne",
  305             "?6?",
  306     };
  307 
  308     switch (opc) {
  309     case OPC_CMPS_F:
  310     case OPC_CMPS_U:
  311     case OPC_CMPS_S:
  312     case OPC_CMPV_F:
  313     case OPC_CMPV_U:
  314     case OPC_CMPV_S:
  315         fprintf(ctx->out, ".%s", cond[cat2->cond]);
  316         break;
  317     }
  318 
  319     fprintf(ctx->out, " ");
  320     if (cat2->ei)
  321         fprintf(ctx->out, "(ei)");
  322     print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
  323     fprintf(ctx->out, ", ");
  324 
  325     struct reginfo src1 = {
  326         .full = cat2->full,
  327         .r = cat2->repeat ? cat2->src1_r : 0,
  328         .f = is_cat2_float(opc),
  329         .im = cat2->src1_im,
  330         .abs = cat2->src1_abs,
  331         .neg = cat2->src1_neg,
  332     };
  333 
  334     if (cat2->c1.src1_c) {
  335         src1.reg = (reg_t)(cat2->c1.src1);
  336         src1.c = true;
  337     } else if (cat2->rel1.src1_rel) {
  338         src1.reg = (reg_t)(cat2->rel1.src1);
  339         src1.c = cat2->rel1.src1_c;
  340         src1.addr_rel = true;
  341     } else {
  342         src1.reg = (reg_t)(cat2->src1);
  343     }
  344     print_src(ctx, &src1);
  345 
  346     struct reginfo src2 = {
  347         .r = cat2->repeat ? cat2->src2_r : 0,
  348         .full = cat2->full,
  349         .f = is_cat2_float(opc),
  350         .abs = cat2->src2_abs,
  351         .neg = cat2->src2_neg,
  352         .im = cat2->src2_im,
  353     };
  354     switch (opc) {
  355     case OPC_ABSNEG_F:
  356     case OPC_ABSNEG_S:
  357     case OPC_CLZ_B:
  358     case OPC_CLZ_S:
  359     case OPC_SIGN_F:
  360     case OPC_FLOOR_F:
  361     case OPC_CEIL_F:
  362     case OPC_RNDNE_F:
  363     case OPC_RNDAZ_F:
  364     case OPC_TRUNC_F:
  365     case OPC_NOT_B:
  366     case OPC_BFREV_B:
  367     case OPC_SETRM:
  368     case OPC_CBITS_B:
  369         /* these only have one src reg */
  370         break;
  371     default:
  372         fprintf(ctx->out, ", ");
  373         if (cat2->c2.src2_c) {
  374             src2.reg = (reg_t)(cat2->c2.src2);
  375             src2.c = true;
  376         } else if (cat2->rel2.src2_rel) {
  377             src2.reg = (reg_t)(cat2->rel2.src2);
  378             src2.c = cat2->rel2.src2_c;
  379             src2.addr_rel = true;
  380         } else {
  381             src2.reg = (reg_t)(cat2->src2);
  382         }
  383         print_src(ctx, &src2);
  384         break;
  385     }
  386 }
  387 
  388 static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr)
  389 {
  390     instr_cat3_t *cat3 = &instr->cat3;
  391     bool full = instr_cat3_full(cat3);
  392 
  393     fprintf(ctx->out, " ");
  394     print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false);
  395     fprintf(ctx->out, ", ");
  396 
  397     struct reginfo src1 = {
  398         .r = cat3->repeat ? cat3->src1_r : 0,
  399         .full = full,
  400         .neg = cat3->src1_neg,
  401     };
  402     if (cat3->c1.src1_c) {
  403         src1.reg = (reg_t)(cat3->c1.src1);
  404         src1.c = true;
  405     } else if (cat3->rel1.src1_rel) {
  406         src1.reg = (reg_t)(cat3->rel1.src1);
  407         src1.c = cat3->rel1.src1_c;
  408         src1.addr_rel = true;
  409     } else {
  410         src1.reg = (reg_t)(cat3->src1);
  411     }
  412     print_src(ctx, &src1);
  413 
  414     fprintf(ctx->out, ", ");
  415     struct reginfo src2 = {
  416         .reg = (reg_t)cat3->src2,
  417         .full = full,
  418         .r = cat3->repeat ? cat3->src2_r : 0,
  419         .c = cat3->src2_c,
  420         .neg = cat3->src2_neg,
  421     };
  422     print_src(ctx, &src2);
  423 
  424     fprintf(ctx->out, ", ");
  425     struct reginfo src3 = {
  426         .r = cat3->src3_r,
  427         .full = full,
  428         .neg = cat3->src3_neg,
  429     };
  430     if (cat3->c2.src3_c) {
  431         src3.reg = (reg_t)(cat3->c2.src3);
  432         src3.c = true;
  433     } else if (cat3->rel2.src3_rel) {
  434         src3.reg = (reg_t)(cat3->rel2.src3);
  435         src3.c = cat3->rel2.src3_c;
  436         src3.addr_rel = true;
  437     } else {
  438         src3.reg = (reg_t)(cat3->src3);
  439     }
  440     print_src(ctx, &src3);
  441 }
  442 
  443 static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr)
  444 {
  445     instr_cat4_t *cat4 = &instr->cat4;
  446 
  447     fprintf(ctx->out, " ");
  448     print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
  449     fprintf(ctx->out, ", ");
  450 
  451     struct reginfo src = {
  452         .r = cat4->src_r,
  453         .im = cat4->src_im,
  454         .full = cat4->full,
  455         .neg = cat4->src_neg,
  456         .abs = cat4->src_abs,
  457     };
  458     if (cat4->c.src_c) {
  459         src.reg = (reg_t)(cat4->c.src);
  460         src.c = true;
  461     } else if (cat4->rel.src_rel) {
  462         src.reg = (reg_t)(cat4->rel.src);
  463         src.c = cat4->rel.src_c;
  464         src.addr_rel = true;
  465     } else {
  466         src.reg = (reg_t)(cat4->src);
  467     }
  468     print_src(ctx, &src);
  469 
  470     if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
  471         fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
  472 }
  473 
  474 static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr)
  475 {
  476     static const struct {
  477         bool src1, src2, samp, tex;
  478     } info[0x1f] = {
  479             [opc_op(OPC_ISAM)]     = { true,  false, true,  true,  },
  480             [opc_op(OPC_ISAML)]    = { true,  true,  true,  true,  },
  481             [opc_op(OPC_ISAMM)]    = { true,  false, true,  true,  },
  482             [opc_op(OPC_SAM)]      = { true,  false, true,  true,  },
  483             [opc_op(OPC_SAMB)]     = { true,  true,  true,  true,  },
  484             [opc_op(OPC_SAML)]     = { true,  true,  true,  true,  },
  485             [opc_op(OPC_SAMGQ)]    = { true,  false, true,  true,  },
  486             [opc_op(OPC_GETLOD)]   = { true,  false, true,  true,  },
  487             [opc_op(OPC_CONV)]     = { true,  true,  true,  true,  },
  488             [opc_op(OPC_CONVM)]    = { true,  true,  true,  true,  },
  489             [opc_op(OPC_GETSIZE)]  = { true,  false, false, true,  },
  490             [opc_op(OPC_GETBUF)]   = { false, false, false, true,  },
  491             [opc_op(OPC_GETPOS)]   = { true,  false, false, true,  },
  492             [opc_op(OPC_GETINFO)]  = { false, false, false, true,  },
  493             [opc_op(OPC_DSX)]      = { true,  false, false, false, },
  494             [opc_op(OPC_DSY)]      = { true,  false, false, false, },
  495             [opc_op(OPC_GATHER4R)] = { true,  false, true,  true,  },
  496             [opc_op(OPC_GATHER4G)] = { true,  false, true,  true,  },
  497             [opc_op(OPC_GATHER4B)] = { true,  false, true,  true,  },
  498             [opc_op(OPC_GATHER4A)] = { true,  false, true,  true,  },
  499             [opc_op(OPC_SAMGP0)]   = { true,  false, true,  true,  },
  500             [opc_op(OPC_SAMGP1)]   = { true,  false, true,  true,  },
  501             [opc_op(OPC_SAMGP2)]   = { true,  false, true,  true,  },
  502             [opc_op(OPC_SAMGP3)]   = { true,  false, true,  true,  },
  503             [opc_op(OPC_DSXPP_1)]  = { true,  false, false, false, },
  504             [opc_op(OPC_DSYPP_1)]  = { true,  false, false, false, },
  505             [opc_op(OPC_RGETPOS)]  = { true,  false, false, false, },
  506             [opc_op(OPC_RGETINFO)] = { false, false, false, false, },
  507     };
  508 
  509     static const struct {
  510         bool indirect;
  511         bool bindless;
  512         bool use_a1;
  513         bool uniform;
  514     } desc_features[8] = {
  515         [CAT5_NONUNIFORM] = { .indirect = true, },
  516         [CAT5_UNIFORM] = { .indirect = true, .uniform = true, },
  517         [CAT5_BINDLESS_IMM] = { .bindless = true, },
  518         [CAT5_BINDLESS_UNIFORM] = {
  519             .bindless = true,
  520             .indirect = true,
  521             .uniform = true,
  522         },
  523         [CAT5_BINDLESS_NONUNIFORM] = {
  524             .bindless = true,
  525             .indirect = true,
  526         },
  527         [CAT5_BINDLESS_A1_IMM] = {
  528             .bindless = true,
  529             .use_a1 = true,
  530         },
  531         [CAT5_BINDLESS_A1_UNIFORM] = {
  532             .bindless = true,
  533             .indirect = true,
  534             .uniform = true,
  535             .use_a1 = true,
  536         },
  537         [CAT5_BINDLESS_A1_NONUNIFORM] = {
  538             .bindless = true,
  539             .indirect = true,
  540             .use_a1 = true,
  541         },
  542     };
  543 
  544     instr_cat5_t *cat5 = &instr->cat5;
  545     int i;
  546 
  547     bool desc_indirect =
  548         cat5->is_s2en_bindless &&
  549         desc_features[cat5->s2en_bindless.desc_mode].indirect;
  550     bool bindless =
  551         cat5->is_s2en_bindless &&
  552         desc_features[cat5->s2en_bindless.desc_mode].bindless;
  553     bool use_a1 =
  554         cat5->is_s2en_bindless &&
  555         desc_features[cat5->s2en_bindless.desc_mode].use_a1;
  556     bool uniform =
  557         cat5->is_s2en_bindless &&
  558         desc_features[cat5->s2en_bindless.desc_mode].uniform;
  559 
  560     if (cat5->is_3d)   fprintf(ctx->out, ".3d");
  561     if (cat5->is_a)    fprintf(ctx->out, ".a");
  562     if (cat5->is_o)    fprintf(ctx->out, ".o");
  563     if (cat5->is_p)    fprintf(ctx->out, ".p");
  564     if (cat5->is_s)    fprintf(ctx->out, ".s");
  565     if (desc_indirect) fprintf(ctx->out, ".s2en");
  566     if (uniform)       fprintf(ctx->out, ".uniform");
  567 
  568     if (bindless) {
  569         unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo;
  570         fprintf(ctx->out, ".base%d", base);
  571     }
  572 
  573     fprintf(ctx->out, " ");
  574 
  575     switch (_OPC(5, cat5->opc)) {
  576     case OPC_DSXPP_1:
  577     case OPC_DSYPP_1:
  578         break;
  579     default:
  580         fprintf(ctx->out, "(%s)", type[cat5->type]);
  581         break;
  582     }
  583 
  584     fprintf(ctx->out, "(");
  585     for (i = 0; i < 4; i++)
  586         if (cat5->wrmask & (1 << i))
  587             fprintf(ctx->out, "%c", "xyzw"[i]);
  588     fprintf(ctx->out, ")");
  589 
  590     print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
  591 
  592     if (info[cat5->opc].src1) {
  593         fprintf(ctx->out, ", ");
  594         struct reginfo src = { .reg = (reg_t)(cat5->src1), .full = cat5->full };
  595         print_src(ctx, &src);
  596     }
  597 
  598     if (cat5->is_o || info[cat5->opc].src2) {
  599         fprintf(ctx->out, ", ");
  600         struct reginfo src = { .reg = (reg_t)(cat5->src2), .full = cat5->full };
  601         print_src(ctx, &src);
  602     }
  603     if (cat5->is_s2en_bindless) {
  604         if (!desc_indirect) {
  605             if (info[cat5->opc].samp) {
  606                 if (use_a1)
  607                     fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3);
  608                 else
  609                     fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf);
  610             }
  611 
  612             if (info[cat5->opc].tex && !use_a1) {
  613                 fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4);
  614             }
  615         }
  616     } else {
  617         if (info[cat5->opc].samp)
  618             fprintf(ctx->out, ", s#%d", cat5->norm.samp);
  619         if (info[cat5->opc].tex)
  620             fprintf(ctx->out, ", t#%d", cat5->norm.tex);
  621     }
  622 
  623     if (desc_indirect) {
  624         fprintf(ctx->out, ", ");
  625         struct reginfo src = { .reg = (reg_t)(cat5->s2en_bindless.src3), .full = bindless };
  626         print_src(ctx, &src);
  627     }
  628 
  629     if (use_a1)
  630         fprintf(ctx->out, ", a1.x");
  631 
  632     if (debug & PRINT_VERBOSE) {
  633         if (cat5->is_s2en_bindless) {
  634             if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1)
  635                 fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1);
  636         } else {
  637             if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1)
  638                 fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1);
  639         }
  640     }
  641 }
  642 
  643 static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr)
  644 {
  645     instr_cat6_t *cat6 = &instr->cat6;
  646     char sd = 0, ss = 0;  /* dst/src address space */
  647     bool nodst = false;
  648     struct reginfo dst, src1, src2;
  649     int src1off = 0, dstoff = 0;
  650 
  651     memset(&dst, 0, sizeof(dst));
  652     memset(&src1, 0, sizeof(src1));
  653     memset(&src2, 0, sizeof(src2));
  654 
  655     switch (_OPC(6, cat6->opc)) {
  656     case OPC_RESINFO:
  657     case OPC_RESFMT:
  658         dst.full  = type_size(cat6->type) == 32;
  659         src1.full = type_size(cat6->type) == 32;
  660         src2.full = type_size(cat6->type) == 32;
  661         break;
  662     case OPC_L2G:
  663     case OPC_G2L:
  664         dst.full = true;
  665         src1.full = true;
  666         src2.full = true;
  667         break;
  668     case OPC_STG:
  669     case OPC_STL:
  670     case OPC_STP:
  671     case OPC_STLW:
  672     case OPC_STIB:
  673         dst.full  = type_size(cat6->type) == 32;
  674         src1.full = type_size(cat6->type) == 32;
  675         src2.full = type_size(cat6->type) == 32;
  676         break;
  677     default:
  678         dst.full  = type_size(cat6->type) == 32;
  679         src1.full = true;
  680         src2.full = true;
  681         break;
  682     }
  683 
  684     switch (_OPC(6, cat6->opc)) {
  685     case OPC_PREFETCH:
  686         break;
  687     case OPC_RESINFO:
  688         fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
  689         break;
  690     case OPC_LDGB:
  691         fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
  692         fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
  693         fprintf(ctx->out, ".%s", type[cat6->type]);
  694         fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
  695         break;
  696     case OPC_STGB:
  697     case OPC_STIB:
  698         fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped");
  699         fprintf(ctx->out, ".%dd", cat6->stgb.d + 1);
  700         fprintf(ctx->out, ".%s", type[cat6->type]);
  701         fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1);
  702         break;
  703     case OPC_ATOMIC_ADD:
  704     case OPC_ATOMIC_SUB:
  705     case OPC_ATOMIC_XCHG:
  706     case OPC_ATOMIC_INC:
  707     case OPC_ATOMIC_DEC:
  708     case OPC_ATOMIC_CMPXCHG:
  709     case OPC_ATOMIC_MIN:
  710     case OPC_ATOMIC_MAX:
  711     case OPC_ATOMIC_AND:
  712     case OPC_ATOMIC_OR:
  713     case OPC_ATOMIC_XOR:
  714         ss = cat6->g ? 'g' : 'l';
  715         fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped");
  716         fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1);
  717         fprintf(ctx->out, ".%s", type[cat6->type]);
  718         fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1);
  719         fprintf(ctx->out, ".%c", ss);
  720         break;
  721     default:
  722         dst.im = cat6->g && !cat6->dst_off;
  723         fprintf(ctx->out, ".%s", type[cat6->type]);
  724         break;
  725     }
  726     fprintf(ctx->out, " ");
  727 
  728     switch (_OPC(6, cat6->opc)) {
  729     case OPC_STG:
  730         sd = 'g';
  731         break;
  732     case OPC_STP:
  733         sd = 'p';
  734         break;
  735     case OPC_STL:
  736     case OPC_STLW:
  737         sd = 'l';
  738         break;
  739 
  740     case OPC_LDG:
  741     case OPC_LDC:
  742         ss = 'g';
  743         break;
  744     case OPC_LDP:
  745         ss = 'p';
  746         break;
  747     case OPC_LDL:
  748     case OPC_LDLW:
  749     case OPC_LDLV:
  750         ss = 'l';
  751         break;
  752 
  753     case OPC_L2G:
  754         ss = 'l';
  755         sd = 'g';
  756         break;
  757 
  758     case OPC_G2L:
  759         ss = 'g';
  760         sd = 'l';
  761         break;
  762 
  763     case OPC_PREFETCH:
  764         ss = 'g';
  765         nodst = true;
  766         break;
  767     }
  768 
  769     if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) {
  770         struct reginfo src3;
  771 
  772         memset(&src3, 0, sizeof(src3));
  773 
  774         src1.reg = (reg_t)(cat6->stgb.src1);
  775         src2.reg = (reg_t)(cat6->stgb.src2);
  776         src2.im  = cat6->stgb.src2_im;
  777         src3.reg = (reg_t)(cat6->stgb.src3);
  778         src3.im  = cat6->stgb.src3_im;
  779         src3.full = true;
  780 
  781         fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo);
  782         print_src(ctx, &src1);
  783         fprintf(ctx->out, ", ");
  784         print_src(ctx, &src2);
  785         fprintf(ctx->out, ", ");
  786         print_src(ctx, &src3);
  787 
  788         if (debug & PRINT_VERBOSE)
  789             fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3);
  790 
  791         return;
  792     }
  793 
  794     if (is_atomic(_OPC(6, cat6->opc))) {
  795 
  796         src1.reg = (reg_t)(cat6->ldgb.src1);
  797         src1.im  = cat6->ldgb.src1_im;
  798         src2.reg = (reg_t)(cat6->ldgb.src2);
  799         src2.im  = cat6->ldgb.src2_im;
  800         dst.reg  = (reg_t)(cat6->ldgb.dst);
  801 
  802         print_src(ctx, &dst);
  803         fprintf(ctx->out, ", ");
  804         if (ss == 'g') {
  805             struct reginfo src3;
  806             memset(&src3, 0, sizeof(src3));
  807 
  808             src3.reg = (reg_t)(cat6->ldgb.src3);
  809             src3.full = true;
  810 
  811             /* For images, the ".typed" variant is used and src2 is
  812              * the ivecN coordinates, ie ivec2 for 2d.
  813              *
  814              * For SSBOs, the ".untyped" variant is used and src2 is
  815              * a simple dword offset..  src3 appears to be
  816              * uvec2(offset * 4, 0).  Not sure the point of that.
  817              */
  818 
  819             fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
  820             print_src(ctx, &src1);  /* value */
  821             fprintf(ctx->out, ", ");
  822             print_src(ctx, &src2);  /* offset/coords */
  823             fprintf(ctx->out, ", ");
  824             print_src(ctx, &src3);  /* 64b byte offset.. */
  825 
  826             if (debug & PRINT_VERBOSE) {
  827                 fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0,
  828                         cat6->ldgb.pad3, cat6->ldgb.mustbe0);
  829             }
  830         } else { /* ss == 'l' */
  831             fprintf(ctx->out, "l[");
  832             print_src(ctx, &src1);  /* simple byte offset */
  833             fprintf(ctx->out, "], ");
  834             print_src(ctx, &src2);  /* value */
  835 
  836             if (debug & PRINT_VERBOSE) {
  837                 fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)",
  838                         cat6->ldgb.src3, cat6->ldgb.pad0,
  839                         cat6->ldgb.pad3, cat6->ldgb.mustbe0);
  840             }
  841         }
  842 
  843         return;
  844     } else if (_OPC(6, cat6->opc) == OPC_RESINFO) {
  845         dst.reg  = (reg_t)(cat6->ldgb.dst);
  846 
  847         print_src(ctx, &dst);
  848         fprintf(ctx->out, ", ");
  849         fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo);
  850 
  851         return;
  852     } else if (_OPC(6, cat6->opc) == OPC_LDGB) {
  853 
  854         src1.reg = (reg_t)(cat6->ldgb.src1);
  855         src1.im  = cat6->ldgb.src1_im;
  856         src2.reg = (reg_t)(cat6->ldgb.src2);
  857         src2.im  = cat6->ldgb.src2_im;
  858         dst.reg  = (reg_t)(cat6->ldgb.dst);
  859 
  860         print_src(ctx, &dst);
  861         fprintf(ctx->out, ", ");
  862         fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo);
  863         print_src(ctx, &src1);
  864         fprintf(ctx->out, ", ");
  865         print_src(ctx, &src2);
  866 
  867         if (debug & PRINT_VERBOSE)
  868             fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0);
  869 
  870         return;
  871     } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src2_im) {
  872         struct reginfo src3;
  873 
  874         memset(&src3, 0, sizeof(src3));
  875         src1.reg = (reg_t)(cat6->a.src1);
  876         src2.reg = (reg_t)(cat6->a.src2);
  877         src2.im  = cat6->a.src2_im;
  878         src3.reg = (reg_t)(cat6->a.off);
  879         src3.full = true;
  880         dst.reg  = (reg_t)(cat6->d.dst);
  881 
  882         print_src(ctx, &dst);
  883         fprintf(ctx->out, ", g[");
  884         print_src(ctx, &src1);
  885         fprintf(ctx->out, "+");
  886         print_src(ctx, &src3);
  887         fprintf(ctx->out, "], ");
  888         print_src(ctx, &src2);
  889 
  890         return;
  891     }
  892     if (cat6->dst_off) {
  893         dst.reg = (reg_t)(cat6->c.dst);
  894         dstoff  = cat6->c.off;
  895     } else {
  896         dst.reg = (reg_t)(cat6->d.dst);
  897     }
  898 
  899     if (cat6->src_off) {
  900         src1.reg = (reg_t)(cat6->a.src1);
  901         src1.im  = cat6->a.src1_im;
  902         src2.reg = (reg_t)(cat6->a.src2);
  903         src2.im  = cat6->a.src2_im;
  904         src1off  = cat6->a.off;
  905     } else {
  906         src1.reg = (reg_t)(cat6->b.src1);
  907         src1.im  = cat6->b.src1_im;
  908         src2.reg = (reg_t)(cat6->b.src2);
  909         src2.im  = cat6->b.src2_im;
  910     }
  911 
  912     if (!nodst) {
  913         if (sd)
  914             fprintf(ctx->out, "%c[", sd);
  915         /* note: dst might actually be a src (ie. address to store to) */
  916         print_src(ctx, &dst);
  917         if (cat6->dst_off && cat6->g) {
  918             struct reginfo dstoff_reg = {0};
  919             dstoff_reg.reg = (reg_t) cat6->c.off;
  920             dstoff_reg.full  = true;
  921             fprintf(ctx->out, "+");
  922             print_src(ctx, &dstoff_reg);
  923         } else if (dstoff)
  924             fprintf(ctx->out, "%+d", dstoff);
  925         if (sd)
  926             fprintf(ctx->out, "]");
  927         fprintf(ctx->out, ", ");
  928     }
  929 
  930     if (ss)
  931         fprintf(ctx->out, "%c[", ss);
  932 
  933     /* can have a larger than normal immed, so hack: */
  934     if (src1.im) {
  935         fprintf(ctx->out, "%u", src1.reg.dummy13);
  936     } else {
  937         print_src(ctx, &src1);
  938     }
  939 
  940     if (cat6->src_off && cat6->g)
  941         print_src(ctx, &src2);
  942     else if (src1off)
  943         fprintf(ctx->out, "%+d", src1off);
  944     if (ss)
  945         fprintf(ctx->out, "]");
  946 
  947     switch (_OPC(6, cat6->opc)) {
  948     case OPC_RESINFO:
  949     case OPC_RESFMT:
  950         break;
  951     default:
  952         fprintf(ctx->out, ", ");
  953         print_src(ctx, &src2);
  954         break;
  955     }
  956 }
  957 
  958 static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr)
  959 {
  960     instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
  961     struct reginfo src1, src2, ssbo;
  962     bool uses_type = _OPC(6, cat6->opc) != OPC_LDC;
  963 
  964     static const struct {
  965         bool indirect;
  966         bool bindless;
  967         bool uniform;
  968     } desc_features[8] = {
  969         [CAT6_IMM] = { false },
  970         [CAT6_BINDLESS_IMM] = { .bindless = true, },
  971         [CAT6_BINDLESS_UNIFORM] = {
  972             .bindless = true,
  973             .indirect = true,
  974             .uniform = true,
  975         },
  976         [CAT6_BINDLESS_NONUNIFORM] = {
  977             .bindless = true,
  978             .indirect = true,
  979         },
  980     };
  981 
  982     bool indirect_ssbo = desc_features[cat6->desc_mode].indirect;
  983     bool bindless = desc_features[cat6->desc_mode].bindless;
  984     bool uniform = desc_features[cat6->desc_mode].uniform;
  985     bool type_full = cat6->type != TYPE_U16;
  986 
  987 
  988     memset(&src1, 0, sizeof(src1));
  989     memset(&src2, 0, sizeof(src2));
  990     memset(&ssbo, 0, sizeof(ssbo));
  991 
  992     if (uses_type) {
  993         fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped");
  994         fprintf(ctx->out, ".%dd", cat6->d + 1);
  995         fprintf(ctx->out, ".%s", type[cat6->type]);
  996     } else {
  997         fprintf(ctx->out, ".offset%d", cat6->d);
  998     }
  999     fprintf(ctx->out, ".%u", cat6->type_size + 1);
 1000 
 1001     if (bindless)
 1002         fprintf(ctx->out, ".base%d", cat6->base);
 1003     if (uniform)
 1004         fprintf(ctx->out, ".uniform");
 1005     fprintf(ctx->out, " ");
 1006 
 1007     src2.reg = (reg_t)(cat6->src2);
 1008     src2.full = type_full;
 1009     print_src(ctx, &src2);
 1010     fprintf(ctx->out, ", ");
 1011 
 1012     src1.reg = (reg_t)(cat6->src1);
 1013     src1.full = true; // XXX
 1014     print_src(ctx, &src1);
 1015     fprintf(ctx->out, ", ");
 1016     ssbo.reg = (reg_t)(cat6->ssbo);
 1017     ssbo.im = !indirect_ssbo;
 1018     ssbo.full = true;
 1019     print_src(ctx, &ssbo);
 1020 
 1021     if (debug & PRINT_VERBOSE) {
 1022         fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)",
 1023                 cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5);
 1024     }
 1025 }
 1026 
 1027 static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr)
 1028 {
 1029     if (!is_cat6_legacy(instr, ctx->gpu_id)) {
 1030         print_instr_cat6_a6xx(ctx, instr);
 1031         if (debug & PRINT_VERBOSE)
 1032             fprintf(ctx->out, " NEW");
 1033     } else {
 1034         print_instr_cat6_a3xx(ctx, instr);
 1035         if (debug & PRINT_VERBOSE)
 1036             fprintf(ctx->out, " LEGACY");
 1037     }
 1038 }
 1039 static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr)
 1040 {
 1041     instr_cat7_t *cat7 = &instr->cat7;
 1042 
 1043     if (cat7->g)
 1044         fprintf(ctx->out, ".g");
 1045     if (cat7->l)
 1046         fprintf(ctx->out, ".l");
 1047 
 1048     if (_OPC(7, cat7->opc) == OPC_FENCE) {
 1049         if (cat7->r)
 1050             fprintf(ctx->out, ".r");
 1051         if (cat7->w)
 1052             fprintf(ctx->out, ".w");
 1053     }
 1054 }
 1055 
 1056 /* size of largest OPC field of all the instruction categories: */
 1057 #define NOPC_BITS 6
 1058 
 1059 static const struct opc_info {
 1060     uint16_t cat;
 1061     uint16_t opc;
 1062     const char *name;
 1063     void (*print)(struct disasm_ctx *ctx, instr_t *instr);
 1064 } opcs[1 << (3+NOPC_BITS)] = {
 1065 #define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
 1066     /* category 0: */
 1067     OPC(0, OPC_NOP,          nop),
 1068     OPC(0, OPC_BR,           br),
 1069     OPC(0, OPC_JUMP,         jump),
 1070     OPC(0, OPC_CALL,         call),
 1071     OPC(0, OPC_RET,          ret),
 1072     OPC(0, OPC_KILL,         kill),
 1073     OPC(0, OPC_END,          end),
 1074     OPC(0, OPC_EMIT,         emit),
 1075     OPC(0, OPC_CUT,          cut),
 1076     OPC(0, OPC_CHMASK,       chmask),
 1077     OPC(0, OPC_CHSH,         chsh),
 1078     OPC(0, OPC_FLOW_REV,     flow_rev),
 1079     OPC(0, OPC_IF,           if),
 1080     OPC(0, OPC_ELSE,         else),
 1081     OPC(0, OPC_ENDIF,        endif),
 1082 
 1083     /* category 1: */
 1084     OPC(1, OPC_MOV, ),
 1085 
 1086     /* category 2: */
 1087     OPC(2, OPC_ADD_F,        add.f),
 1088     OPC(2, OPC_MIN_F,        min.f),
 1089     OPC(2, OPC_MAX_F,        max.f),
 1090     OPC(2, OPC_MUL_F,        mul.f),
 1091     OPC(2, OPC_SIGN_F,       sign.f),
 1092     OPC(2, OPC_CMPS_F,       cmps.f),
 1093     OPC(2, OPC_ABSNEG_F,     absneg.f),
 1094     OPC(2, OPC_CMPV_F,       cmpv.f),
 1095     OPC(2, OPC_FLOOR_F,      floor.f),
 1096     OPC(2, OPC_CEIL_F,       ceil.f),
 1097     OPC(2, OPC_RNDNE_F,      rndne.f),
 1098     OPC(2, OPC_RNDAZ_F,      rndaz.f),
 1099     OPC(2, OPC_TRUNC_F,      trunc.f),
 1100     OPC(2, OPC_ADD_U,        add.u),
 1101     OPC(2, OPC_ADD_S,        add.s),
 1102     OPC(2, OPC_SUB_U,        sub.u),
 1103     OPC(2, OPC_SUB_S,        sub.s),
 1104     OPC(2, OPC_CMPS_U,       cmps.u),
 1105     OPC(2, OPC_CMPS_S,       cmps.s),
 1106     OPC(2, OPC_MIN_U,        min.u),
 1107     OPC(2, OPC_MIN_S,        min.s),
 1108     OPC(2, OPC_MAX_U,        max.u),
 1109     OPC(2, OPC_MAX_S,        max.s),
 1110     OPC(2, OPC_ABSNEG_S,     absneg.s),
 1111     OPC(2, OPC_AND_B,        and.b),
 1112     OPC(2, OPC_OR_B,         or.b),
 1113     OPC(2, OPC_NOT_B,        not.b),
 1114     OPC(2, OPC_XOR_B,        xor.b),
 1115     OPC(2, OPC_CMPV_U,       cmpv.u),
 1116     OPC(2, OPC_CMPV_S,       cmpv.s),
 1117     OPC(2, OPC_MUL_U24,      mul.u24),
 1118     OPC(2, OPC_MUL_S24,      mul.s24),
 1119     OPC(2, OPC_MULL_U,       mull.u),
 1120     OPC(2, OPC_BFREV_B,      bfrev.b),
 1121     OPC(2, OPC_CLZ_S,        clz.s),
 1122     OPC(2, OPC_CLZ_B,        clz.b),
 1123     OPC(2, OPC_SHL_B,        shl.b),
 1124     OPC(2, OPC_SHR_B,        shr.b),
 1125     OPC(2, OPC_ASHR_B,       ashr.b),
 1126     OPC(2, OPC_BARY_F,       bary.f),
 1127     OPC(2, OPC_MGEN_B,       mgen.b),
 1128     OPC(2, OPC_GETBIT_B,     getbit.b),
 1129     OPC(2, OPC_SETRM,        setrm),
 1130     OPC(2, OPC_CBITS_B,      cbits.b),
 1131     OPC(2, OPC_SHB,          shb),
 1132     OPC(2, OPC_MSAD,         msad),
 1133 
 1134     /* category 3: */
 1135     OPC(3, OPC_MAD_U16,      mad.u16),
 1136     OPC(3, OPC_MADSH_U16,    madsh.u16),
 1137     OPC(3, OPC_MAD_S16,      mad.s16),
 1138     OPC(3, OPC_MADSH_M16,    madsh.m16),
 1139     OPC(3, OPC_MAD_U24,      mad.u24),
 1140     OPC(3, OPC_MAD_S24,      mad.s24),
 1141     OPC(3, OPC_MAD_F16,      mad.f16),
 1142     OPC(3, OPC_MAD_F32,      mad.f32),
 1143     OPC(3, OPC_SEL_B16,      sel.b16),
 1144     OPC(3, OPC_SEL_B32,      sel.b32),
 1145     OPC(3, OPC_SEL_S16,      sel.s16),
 1146     OPC(3, OPC_SEL_S32,      sel.s32),
 1147     OPC(3, OPC_SEL_F16,      sel.f16),
 1148     OPC(3, OPC_SEL_F32,      sel.f32),
 1149     OPC(3, OPC_SAD_S16,      sad.s16),
 1150     OPC(3, OPC_SAD_S32,      sad.s32),
 1151 
 1152     /* category 4: */
 1153     OPC(4, OPC_RCP,          rcp),
 1154     OPC(4, OPC_RSQ,          rsq),
 1155     OPC(4, OPC_LOG2,         log2),
 1156     OPC(4, OPC_EXP2,         exp2),
 1157     OPC(4, OPC_SIN,          sin),
 1158     OPC(4, OPC_COS,          cos),
 1159     OPC(4, OPC_SQRT,         sqrt),
 1160     OPC(4, OPC_HRSQ,         hrsq),
 1161     OPC(4, OPC_HLOG2,        hlog2),
 1162     OPC(4, OPC_HEXP2,        hexp2),
 1163 
 1164     /* category 5: */
 1165     OPC(5, OPC_ISAM,         isam),
 1166     OPC(5, OPC_ISAML,        isaml),
 1167     OPC(5, OPC_ISAMM,        isamm),
 1168     OPC(5, OPC_SAM,          sam),
 1169     OPC(5, OPC_SAMB,         samb),
 1170     OPC(5, OPC_SAML,         saml),
 1171     OPC(5, OPC_SAMGQ,        samgq),
 1172     OPC(5, OPC_GETLOD,       getlod),
 1173     OPC(5, OPC_CONV,         conv),
 1174     OPC(5, OPC_CONVM,        convm),
 1175     OPC(5, OPC_GETSIZE,      getsize),
 1176     OPC(5, OPC_GETBUF,       getbuf),
 1177     OPC(5, OPC_GETPOS,       getpos),
 1178     OPC(5, OPC_GETINFO,      getinfo),
 1179     OPC(5, OPC_DSX,          dsx),
 1180     OPC(5, OPC_DSY,          dsy),
 1181     OPC(5, OPC_GATHER4R,     gather4r),
 1182     OPC(5, OPC_GATHER4G,     gather4g),
 1183     OPC(5, OPC_GATHER4B,     gather4b),
 1184     OPC(5, OPC_GATHER4A,     gather4a),
 1185     OPC(5, OPC_SAMGP0,       samgp0),
 1186     OPC(5, OPC_SAMGP1,       samgp1),
 1187     OPC(5, OPC_SAMGP2,       samgp2),
 1188     OPC(5, OPC_SAMGP3,       samgp3),
 1189     OPC(5, OPC_DSXPP_1,      dsxpp.1),
 1190     OPC(5, OPC_DSYPP_1,      dsypp.1),
 1191     OPC(5, OPC_RGETPOS,      rgetpos),
 1192     OPC(5, OPC_RGETINFO,     rgetinfo),
 1193 
 1194 
 1195     /* category 6: */
 1196     OPC(6, OPC_LDG,          ldg),
 1197     OPC(6, OPC_LDL,          ldl),
 1198     OPC(6, OPC_LDP,          ldp),
 1199     OPC(6, OPC_STG,          stg),
 1200     OPC(6, OPC_STL,          stl),
 1201     OPC(6, OPC_STP,          stp),
 1202     OPC(6, OPC_LDIB,         ldib),
 1203     OPC(6, OPC_G2L,          g2l),
 1204     OPC(6, OPC_L2G,          l2g),
 1205     OPC(6, OPC_PREFETCH,     prefetch),
 1206     OPC(6, OPC_LDLW,         ldlw),
 1207     OPC(6, OPC_STLW,         stlw),
 1208     OPC(6, OPC_RESFMT,       resfmt),
 1209     OPC(6, OPC_RESINFO,      resinfo),
 1210     OPC(6, OPC_ATOMIC_ADD,     atomic.add),
 1211     OPC(6, OPC_ATOMIC_SUB,     atomic.sub),
 1212     OPC(6, OPC_ATOMIC_XCHG,    atomic.xchg),
 1213     OPC(6, OPC_ATOMIC_INC,     atomic.inc),
 1214     OPC(6, OPC_ATOMIC_DEC,     atomic.dec),
 1215     OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
 1216     OPC(6, OPC_ATOMIC_MIN,     atomic.min),
 1217     OPC(6, OPC_ATOMIC_MAX,     atomic.max),
 1218     OPC(6, OPC_ATOMIC_AND,     atomic.and),
 1219     OPC(6, OPC_ATOMIC_OR,      atomic.or),
 1220     OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
 1221     OPC(6, OPC_LDGB,         ldgb),
 1222     OPC(6, OPC_STGB,         stgb),
 1223     OPC(6, OPC_STIB,         stib),
 1224     OPC(6, OPC_LDC,          ldc),
 1225     OPC(6, OPC_LDLV,         ldlv),
 1226 
 1227     OPC(7, OPC_BAR,          bar),
 1228     OPC(7, OPC_FENCE,        fence),
 1229 
 1230 #undef OPC
 1231 };
 1232 
 1233 #define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
 1234 
 1235 // XXX hack.. probably should move this table somewhere common:
 1236 #include "ir3.h"
 1237 const char *ir3_instr_name(struct ir3_instruction *instr)
 1238 {
 1239     if (opc_cat(instr->opc) == -1) return "??meta??";
 1240     return opcs[instr->opc].name;
 1241 }
 1242 
 1243 static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr)
 1244 {
 1245     const char *name = GETINFO(instr)->name;
 1246     uint32_t opc = instr_opc(instr, ctx->gpu_id);
 1247 
 1248     if (name) {
 1249         fprintf(ctx->out, "%s", name);
 1250         GETINFO(instr)->print(ctx, instr);
 1251     } else {
 1252         fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc);
 1253 
 1254         switch (instr->opc_cat) {
 1255         case 0: print_instr_cat0(ctx, instr); break;
 1256         case 1: print_instr_cat1(ctx, instr); break;
 1257         case 2: print_instr_cat2(ctx, instr); break;
 1258         case 3: print_instr_cat3(ctx, instr); break;
 1259         case 4: print_instr_cat4(ctx, instr); break;
 1260         case 5: print_instr_cat5(ctx, instr); break;
 1261         case 6: print_instr_cat6(ctx, instr); break;
 1262         case 7: print_instr_cat7(ctx, instr); break;
 1263         }
 1264     }
 1265 }
 1266 
 1267 static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n)
 1268 {
 1269     instr_t *instr = (instr_t *)dwords;
 1270     uint32_t opc = instr_opc(instr, ctx->gpu_id);
 1271     unsigned nop = 0;
 1272     unsigned cycles = ctx->instructions;
 1273 
 1274     if (debug & PRINT_VERBOSE) {
 1275         fprintf(ctx->out, "%s%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
 1276                 n, cycles++, dwords[1], dwords[0]);
 1277     }
 1278 
 1279     /* NOTE: order flags are printed is a bit fugly.. but for now I
 1280      * try to match the order in llvm-a3xx disassembler for easy
 1281      * diff'ing..
 1282      */
 1283 
 1284     ctx->repeat = instr_repeat(instr);
 1285     ctx->instructions += 1 + ctx->repeat;
 1286 
 1287     if (instr->sync) {
 1288         fprintf(ctx->out, "(sy)");
 1289     }
 1290     if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) {
 1291         fprintf(ctx->out, "(ss)");
 1292     }
 1293     if (instr->jmp_tgt)
 1294         fprintf(ctx->out, "(jp)");
 1295     if (instr_sat(instr))
 1296         fprintf(ctx->out, "(sat)");
 1297     if (ctx->repeat)
 1298         fprintf(ctx->out, "(rpt%d)", ctx->repeat);
 1299     else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r))
 1300         nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r;
 1301     else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r))
 1302         nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r;
 1303     ctx->instructions += nop;
 1304     if (nop)
 1305         fprintf(ctx->out, "(nop%d) ", nop);
 1306 
 1307     if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
 1308         fprintf(ctx->out, "(ul)");
 1309 
 1310     print_single_instr(ctx, instr);
 1311     fprintf(ctx->out, "\n");
 1312 
 1313     if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) {
 1314         int i;
 1315         for (i = 0; i < nop; i++) {
 1316             if (debug & PRINT_VERBOSE) {
 1317                 fprintf(ctx->out, "%s%04d:%04d[                   ] ",
 1318                         levels[ctx->level], n, cycles++);
 1319             }
 1320             fprintf(ctx->out, "nop\n");
 1321         }
 1322         for (i = 0; i < ctx->repeat; i++) {
 1323             ctx->repeatidx = i + 1;
 1324             if (debug & PRINT_VERBOSE) {
 1325                 fprintf(ctx->out, "%s%04d:%04d[                   ] ",
 1326                         levels[ctx->level], n, cycles++);
 1327             }
 1328             print_single_instr(ctx, instr);
 1329             fprintf(ctx->out, "\n");
 1330         }
 1331         ctx->repeatidx = 0;
 1332     }
 1333 
 1334     return (instr->opc_cat == 0) && (opc == OPC_END);
 1335 }
 1336 
 1337 int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id)
 1338 {
 1339     struct disasm_ctx ctx;
 1340     int i;
 1341     int nop_count = 0;
 1342 
 1343     assert((sizedwords % 2) == 0);
 1344 
 1345     memset(&ctx, 0, sizeof(ctx));
 1346     ctx.out = out;
 1347     ctx.level = level;
 1348     ctx.gpu_id = gpu_id;
 1349 
 1350     for (i = 0; i < sizedwords; i += 2) {
 1351         print_instr(&ctx, &dwords[i], i/2);
 1352         if (dwords[i] == 0 && dwords[i + 1] == 0)
 1353             nop_count++;
 1354         else
 1355             nop_count = 0;
 1356         if (nop_count > 3)
 1357             break;
 1358     }
 1359 
 1360     return 0;
 1361 }