"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/broadcom/qpu/qpu_instr.c" (16 Sep 2020, 28531 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "qpu_instr.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright © 2016 Broadcom
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  */
   23 
   24 #include <stdlib.h>
   25 #include "util/macros.h"
   26 #include "broadcom/common/v3d_device_info.h"
   27 #include "qpu_instr.h"
   28 
   29 const char *
   30 v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr)
   31 {
   32         static const char *waddr_magic[] = {
   33                 [V3D_QPU_WADDR_R0] = "r0",
   34                 [V3D_QPU_WADDR_R1] = "r1",
   35                 [V3D_QPU_WADDR_R2] = "r2",
   36                 [V3D_QPU_WADDR_R3] = "r3",
   37                 [V3D_QPU_WADDR_R4] = "r4",
   38                 [V3D_QPU_WADDR_R5] = "r5",
   39                 [V3D_QPU_WADDR_NOP] = "-",
   40                 [V3D_QPU_WADDR_TLB] = "tlb",
   41                 [V3D_QPU_WADDR_TLBU] = "tlbu",
   42                 [V3D_QPU_WADDR_TMU] = "tmu",
   43                 [V3D_QPU_WADDR_TMUL] = "tmul",
   44                 [V3D_QPU_WADDR_TMUD] = "tmud",
   45                 [V3D_QPU_WADDR_TMUA] = "tmua",
   46                 [V3D_QPU_WADDR_TMUAU] = "tmuau",
   47                 [V3D_QPU_WADDR_VPM] = "vpm",
   48                 [V3D_QPU_WADDR_VPMU] = "vpmu",
   49                 [V3D_QPU_WADDR_SYNC] = "sync",
   50                 [V3D_QPU_WADDR_SYNCU] = "syncu",
   51                 [V3D_QPU_WADDR_SYNCB] = "syncb",
   52                 [V3D_QPU_WADDR_RECIP] = "recip",
   53                 [V3D_QPU_WADDR_RSQRT] = "rsqrt",
   54                 [V3D_QPU_WADDR_EXP] = "exp",
   55                 [V3D_QPU_WADDR_LOG] = "log",
   56                 [V3D_QPU_WADDR_SIN] = "sin",
   57                 [V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
   58                 [V3D_QPU_WADDR_TMUC] = "tmuc",
   59                 [V3D_QPU_WADDR_TMUS] = "tmus",
   60                 [V3D_QPU_WADDR_TMUT] = "tmut",
   61                 [V3D_QPU_WADDR_TMUR] = "tmur",
   62                 [V3D_QPU_WADDR_TMUI] = "tmui",
   63                 [V3D_QPU_WADDR_TMUB] = "tmub",
   64                 [V3D_QPU_WADDR_TMUDREF] = "tmudref",
   65                 [V3D_QPU_WADDR_TMUOFF] = "tmuoff",
   66                 [V3D_QPU_WADDR_TMUSCM] = "tmuscm",
   67                 [V3D_QPU_WADDR_TMUSF] = "tmusf",
   68                 [V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
   69                 [V3D_QPU_WADDR_TMUHS] = "tmuhs",
   70                 [V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
   71                 [V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
   72                 [V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
   73                 [V3D_QPU_WADDR_R5REP] = "r5rep",
   74         };
   75 
   76         return waddr_magic[waddr];
   77 }
   78 
   79 const char *
   80 v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
   81 {
   82         static const char *op_names[] = {
   83                 [V3D_QPU_A_FADD] = "fadd",
   84                 [V3D_QPU_A_FADDNF] = "faddnf",
   85                 [V3D_QPU_A_VFPACK] = "vfpack",
   86                 [V3D_QPU_A_ADD] = "add",
   87                 [V3D_QPU_A_SUB] = "sub",
   88                 [V3D_QPU_A_FSUB] = "fsub",
   89                 [V3D_QPU_A_MIN] = "min",
   90                 [V3D_QPU_A_MAX] = "max",
   91                 [V3D_QPU_A_UMIN] = "umin",
   92                 [V3D_QPU_A_UMAX] = "umax",
   93                 [V3D_QPU_A_SHL] = "shl",
   94                 [V3D_QPU_A_SHR] = "shr",
   95                 [V3D_QPU_A_ASR] = "asr",
   96                 [V3D_QPU_A_ROR] = "ror",
   97                 [V3D_QPU_A_FMIN] = "fmin",
   98                 [V3D_QPU_A_FMAX] = "fmax",
   99                 [V3D_QPU_A_VFMIN] = "vfmin",
  100                 [V3D_QPU_A_AND] = "and",
  101                 [V3D_QPU_A_OR] = "or",
  102                 [V3D_QPU_A_XOR] = "xor",
  103                 [V3D_QPU_A_VADD] = "vadd",
  104                 [V3D_QPU_A_VSUB] = "vsub",
  105                 [V3D_QPU_A_NOT] = "not",
  106                 [V3D_QPU_A_NEG] = "neg",
  107                 [V3D_QPU_A_FLAPUSH] = "flapush",
  108                 [V3D_QPU_A_FLBPUSH] = "flbpush",
  109                 [V3D_QPU_A_FLPOP] = "flpop",
  110                 [V3D_QPU_A_RECIP] = "recip",
  111                 [V3D_QPU_A_SETMSF] = "setmsf",
  112                 [V3D_QPU_A_SETREVF] = "setrevf",
  113                 [V3D_QPU_A_NOP] = "nop",
  114                 [V3D_QPU_A_TIDX] = "tidx",
  115                 [V3D_QPU_A_EIDX] = "eidx",
  116                 [V3D_QPU_A_LR] = "lr",
  117                 [V3D_QPU_A_VFLA] = "vfla",
  118                 [V3D_QPU_A_VFLNA] = "vflna",
  119                 [V3D_QPU_A_VFLB] = "vflb",
  120                 [V3D_QPU_A_VFLNB] = "vflnb",
  121                 [V3D_QPU_A_FXCD] = "fxcd",
  122                 [V3D_QPU_A_XCD] = "xcd",
  123                 [V3D_QPU_A_FYCD] = "fycd",
  124                 [V3D_QPU_A_YCD] = "ycd",
  125                 [V3D_QPU_A_MSF] = "msf",
  126                 [V3D_QPU_A_REVF] = "revf",
  127                 [V3D_QPU_A_VDWWT] = "vdwwt",
  128                 [V3D_QPU_A_IID] = "iid",
  129                 [V3D_QPU_A_SAMPID] = "sampid",
  130                 [V3D_QPU_A_BARRIERID] = "barrierid",
  131                 [V3D_QPU_A_TMUWT] = "tmuwt",
  132                 [V3D_QPU_A_VPMSETUP] = "vpmsetup",
  133                 [V3D_QPU_A_VPMWT] = "vpmwt",
  134                 [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
  135                 [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
  136                 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
  137                 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
  138                 [V3D_QPU_A_LDVPMP] = "ldvpmp",
  139                 [V3D_QPU_A_RSQRT] = "rsqrt",
  140                 [V3D_QPU_A_EXP] = "exp",
  141                 [V3D_QPU_A_LOG] = "log",
  142                 [V3D_QPU_A_SIN] = "sin",
  143                 [V3D_QPU_A_RSQRT2] = "rsqrt2",
  144                 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
  145                 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
  146                 [V3D_QPU_A_FCMP] = "fcmp",
  147                 [V3D_QPU_A_VFMAX] = "vfmax",
  148                 [V3D_QPU_A_FROUND] = "fround",
  149                 [V3D_QPU_A_FTOIN] = "ftoin",
  150                 [V3D_QPU_A_FTRUNC] = "ftrunc",
  151                 [V3D_QPU_A_FTOIZ] = "ftoiz",
  152                 [V3D_QPU_A_FFLOOR] = "ffloor",
  153                 [V3D_QPU_A_FTOUZ] = "ftouz",
  154                 [V3D_QPU_A_FCEIL] = "fceil",
  155                 [V3D_QPU_A_FTOC] = "ftoc",
  156                 [V3D_QPU_A_FDX] = "fdx",
  157                 [V3D_QPU_A_FDY] = "fdy",
  158                 [V3D_QPU_A_STVPMV] = "stvpmv",
  159                 [V3D_QPU_A_STVPMD] = "stvpmd",
  160                 [V3D_QPU_A_STVPMP] = "stvpmp",
  161                 [V3D_QPU_A_ITOF] = "itof",
  162                 [V3D_QPU_A_CLZ] = "clz",
  163                 [V3D_QPU_A_UTOF] = "utof",
  164         };
  165 
  166         if (op >= ARRAY_SIZE(op_names))
  167                 return NULL;
  168 
  169         return op_names[op];
  170 }
  171 
  172 const char *
  173 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
  174 {
  175         static const char *op_names[] = {
  176                 [V3D_QPU_M_ADD] = "add",
  177                 [V3D_QPU_M_SUB] = "sub",
  178                 [V3D_QPU_M_UMUL24] = "umul24",
  179                 [V3D_QPU_M_VFMUL] = "vfmul",
  180                 [V3D_QPU_M_SMUL24] = "smul24",
  181                 [V3D_QPU_M_MULTOP] = "multop",
  182                 [V3D_QPU_M_FMOV] = "fmov",
  183                 [V3D_QPU_M_MOV] = "mov",
  184                 [V3D_QPU_M_NOP] = "nop",
  185                 [V3D_QPU_M_FMUL] = "fmul",
  186         };
  187 
  188         if (op >= ARRAY_SIZE(op_names))
  189                 return NULL;
  190 
  191         return op_names[op];
  192 }
  193 
  194 const char *
  195 v3d_qpu_cond_name(enum v3d_qpu_cond cond)
  196 {
  197         switch (cond) {
  198         case V3D_QPU_COND_NONE:
  199                 return "";
  200         case V3D_QPU_COND_IFA:
  201                 return ".ifa";
  202         case V3D_QPU_COND_IFB:
  203                 return ".ifb";
  204         case V3D_QPU_COND_IFNA:
  205                 return ".ifna";
  206         case V3D_QPU_COND_IFNB:
  207                 return ".ifnb";
  208         default:
  209                 unreachable("bad cond value");
  210         }
  211 }
  212 
  213 const char *
  214 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
  215 {
  216         switch (cond) {
  217         case V3D_QPU_BRANCH_COND_ALWAYS:
  218                 return "";
  219         case V3D_QPU_BRANCH_COND_A0:
  220                 return ".a0";
  221         case V3D_QPU_BRANCH_COND_NA0:
  222                 return ".na0";
  223         case V3D_QPU_BRANCH_COND_ALLA:
  224                 return ".alla";
  225         case V3D_QPU_BRANCH_COND_ANYNA:
  226                 return ".anyna";
  227         case V3D_QPU_BRANCH_COND_ANYA:
  228                 return ".anya";
  229         case V3D_QPU_BRANCH_COND_ALLNA:
  230                 return ".allna";
  231         default:
  232                 unreachable("bad branch cond value");
  233         }
  234 }
  235 
  236 const char *
  237 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
  238 {
  239         switch (msfign) {
  240         case V3D_QPU_MSFIGN_NONE:
  241                 return "";
  242         case V3D_QPU_MSFIGN_P:
  243                 return "p";
  244         case V3D_QPU_MSFIGN_Q:
  245                 return "q";
  246         default:
  247                 unreachable("bad branch cond value");
  248         }
  249 }
  250 
  251 const char *
  252 v3d_qpu_pf_name(enum v3d_qpu_pf pf)
  253 {
  254         switch (pf) {
  255         case V3D_QPU_PF_NONE:
  256                 return "";
  257         case V3D_QPU_PF_PUSHZ:
  258                 return ".pushz";
  259         case V3D_QPU_PF_PUSHN:
  260                 return ".pushn";
  261         case V3D_QPU_PF_PUSHC:
  262                 return ".pushc";
  263         default:
  264                 unreachable("bad pf value");
  265         }
  266 }
  267 
  268 const char *
  269 v3d_qpu_uf_name(enum v3d_qpu_uf uf)
  270 {
  271         switch (uf) {
  272         case V3D_QPU_UF_NONE:
  273                 return "";
  274         case V3D_QPU_UF_ANDZ:
  275                 return ".andz";
  276         case V3D_QPU_UF_ANDNZ:
  277                 return ".andnz";
  278         case V3D_QPU_UF_NORZ:
  279                 return ".norz";
  280         case V3D_QPU_UF_NORNZ:
  281                 return ".nornz";
  282         case V3D_QPU_UF_ANDN:
  283                 return ".andn";
  284         case V3D_QPU_UF_ANDNN:
  285                 return ".andnn";
  286         case V3D_QPU_UF_NORN:
  287                 return ".norn";
  288         case V3D_QPU_UF_NORNN:
  289                 return ".nornn";
  290         case V3D_QPU_UF_ANDC:
  291                 return ".andc";
  292         case V3D_QPU_UF_ANDNC:
  293                 return ".andnc";
  294         case V3D_QPU_UF_NORC:
  295                 return ".norc";
  296         case V3D_QPU_UF_NORNC:
  297                 return ".nornc";
  298         default:
  299                 unreachable("bad pf value");
  300         }
  301 }
  302 
  303 const char *
  304 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
  305 {
  306         switch (pack) {
  307         case V3D_QPU_PACK_NONE:
  308                 return "";
  309         case V3D_QPU_PACK_L:
  310                 return ".l";
  311         case V3D_QPU_PACK_H:
  312                 return ".h";
  313         default:
  314                 unreachable("bad pack value");
  315         }
  316 }
  317 
  318 const char *
  319 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
  320 {
  321         switch (unpack) {
  322         case V3D_QPU_UNPACK_NONE:
  323                 return "";
  324         case V3D_QPU_UNPACK_L:
  325                 return ".l";
  326         case V3D_QPU_UNPACK_H:
  327                 return ".h";
  328         case V3D_QPU_UNPACK_ABS:
  329                 return ".abs";
  330         case V3D_QPU_UNPACK_REPLICATE_32F_16:
  331                 return ".ff";
  332         case V3D_QPU_UNPACK_REPLICATE_L_16:
  333                 return ".ll";
  334         case V3D_QPU_UNPACK_REPLICATE_H_16:
  335                 return ".hh";
  336         case V3D_QPU_UNPACK_SWAP_16:
  337                 return ".swp";
  338         default:
  339                 unreachable("bad unpack value");
  340         }
  341 }
  342 
  343 #define D   1
  344 #define A   2
  345 #define B   4
  346 static const uint8_t add_op_args[] = {
  347         [V3D_QPU_A_FADD] = D | A | B,
  348         [V3D_QPU_A_FADDNF] = D | A | B,
  349         [V3D_QPU_A_VFPACK] = D | A | B,
  350         [V3D_QPU_A_ADD] = D | A | B,
  351         [V3D_QPU_A_VFPACK] = D | A | B,
  352         [V3D_QPU_A_SUB] = D | A | B,
  353         [V3D_QPU_A_VFPACK] = D | A | B,
  354         [V3D_QPU_A_FSUB] = D | A | B,
  355         [V3D_QPU_A_MIN] = D | A | B,
  356         [V3D_QPU_A_MAX] = D | A | B,
  357         [V3D_QPU_A_UMIN] = D | A | B,
  358         [V3D_QPU_A_UMAX] = D | A | B,
  359         [V3D_QPU_A_SHL] = D | A | B,
  360         [V3D_QPU_A_SHR] = D | A | B,
  361         [V3D_QPU_A_ASR] = D | A | B,
  362         [V3D_QPU_A_ROR] = D | A | B,
  363         [V3D_QPU_A_FMIN] = D | A | B,
  364         [V3D_QPU_A_FMAX] = D | A | B,
  365         [V3D_QPU_A_VFMIN] = D | A | B,
  366 
  367         [V3D_QPU_A_AND] = D | A | B,
  368         [V3D_QPU_A_OR] = D | A | B,
  369         [V3D_QPU_A_XOR] = D | A | B,
  370 
  371         [V3D_QPU_A_VADD] = D | A | B,
  372         [V3D_QPU_A_VSUB] = D | A | B,
  373         [V3D_QPU_A_NOT] = D | A,
  374         [V3D_QPU_A_NEG] = D | A,
  375         [V3D_QPU_A_FLAPUSH] = D | A,
  376         [V3D_QPU_A_FLBPUSH] = D | A,
  377         [V3D_QPU_A_FLPOP] = D | A,
  378         [V3D_QPU_A_RECIP] = D | A,
  379         [V3D_QPU_A_SETMSF] = D | A,
  380         [V3D_QPU_A_SETREVF] = D | A,
  381         [V3D_QPU_A_NOP] = 0,
  382         [V3D_QPU_A_TIDX] = D,
  383         [V3D_QPU_A_EIDX] = D,
  384         [V3D_QPU_A_LR] = D,
  385         [V3D_QPU_A_VFLA] = D,
  386         [V3D_QPU_A_VFLNA] = D,
  387         [V3D_QPU_A_VFLB] = D,
  388         [V3D_QPU_A_VFLNB] = D,
  389 
  390         [V3D_QPU_A_FXCD] = D,
  391         [V3D_QPU_A_XCD] = D,
  392         [V3D_QPU_A_FYCD] = D,
  393         [V3D_QPU_A_YCD] = D,
  394 
  395         [V3D_QPU_A_MSF] = D,
  396         [V3D_QPU_A_REVF] = D,
  397         [V3D_QPU_A_VDWWT] = D,
  398         [V3D_QPU_A_IID] = D,
  399         [V3D_QPU_A_SAMPID] = D,
  400         [V3D_QPU_A_BARRIERID] = D,
  401         [V3D_QPU_A_TMUWT] = D,
  402         [V3D_QPU_A_VPMWT] = D,
  403 
  404         [V3D_QPU_A_VPMSETUP] = D | A,
  405 
  406         [V3D_QPU_A_LDVPMV_IN] = D | A,
  407         [V3D_QPU_A_LDVPMV_OUT] = D | A,
  408         [V3D_QPU_A_LDVPMD_IN] = D | A,
  409         [V3D_QPU_A_LDVPMD_OUT] = D | A,
  410         [V3D_QPU_A_LDVPMP] = D | A,
  411         [V3D_QPU_A_RSQRT] = D | A,
  412         [V3D_QPU_A_EXP] = D | A,
  413         [V3D_QPU_A_LOG] = D | A,
  414         [V3D_QPU_A_SIN] = D | A,
  415         [V3D_QPU_A_RSQRT2] = D | A,
  416         [V3D_QPU_A_LDVPMG_IN] = D | A | B,
  417         [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
  418 
  419         /* FIXME: MOVABSNEG */
  420 
  421         [V3D_QPU_A_FCMP] = D | A | B,
  422         [V3D_QPU_A_VFMAX] = D | A | B,
  423 
  424         [V3D_QPU_A_FROUND] = D | A,
  425         [V3D_QPU_A_FTOIN] = D | A,
  426         [V3D_QPU_A_FTRUNC] = D | A,
  427         [V3D_QPU_A_FTOIZ] = D | A,
  428         [V3D_QPU_A_FFLOOR] = D | A,
  429         [V3D_QPU_A_FTOUZ] = D | A,
  430         [V3D_QPU_A_FCEIL] = D | A,
  431         [V3D_QPU_A_FTOC] = D | A,
  432 
  433         [V3D_QPU_A_FDX] = D | A,
  434         [V3D_QPU_A_FDY] = D | A,
  435 
  436         [V3D_QPU_A_STVPMV] = A | B,
  437         [V3D_QPU_A_STVPMD] = A | B,
  438         [V3D_QPU_A_STVPMP] = A | B,
  439 
  440         [V3D_QPU_A_ITOF] = D | A,
  441         [V3D_QPU_A_CLZ] = D | A,
  442         [V3D_QPU_A_UTOF] = D | A,
  443 };
  444 
  445 static const uint8_t mul_op_args[] = {
  446         [V3D_QPU_M_ADD] = D | A | B,
  447         [V3D_QPU_M_SUB] = D | A | B,
  448         [V3D_QPU_M_UMUL24] = D | A | B,
  449         [V3D_QPU_M_VFMUL] = D | A | B,
  450         [V3D_QPU_M_SMUL24] = D | A | B,
  451         [V3D_QPU_M_MULTOP] = D | A | B,
  452         [V3D_QPU_M_FMOV] = D | A,
  453         [V3D_QPU_M_NOP] = 0,
  454         [V3D_QPU_M_MOV] = D | A,
  455         [V3D_QPU_M_FMUL] = D | A | B,
  456 };
  457 
  458 bool
  459 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
  460 {
  461         assert(op < ARRAY_SIZE(add_op_args));
  462 
  463         return add_op_args[op] & D;
  464 }
  465 
  466 bool
  467 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
  468 {
  469         assert(op < ARRAY_SIZE(mul_op_args));
  470 
  471         return mul_op_args[op] & D;
  472 }
  473 
  474 int
  475 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
  476 {
  477         assert(op < ARRAY_SIZE(add_op_args));
  478 
  479         uint8_t args = add_op_args[op];
  480         if (args & B)
  481                 return 2;
  482         else if (args & A)
  483                 return 1;
  484         else
  485                 return 0;
  486 }
  487 
  488 int
  489 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
  490 {
  491         assert(op < ARRAY_SIZE(mul_op_args));
  492 
  493         uint8_t args = mul_op_args[op];
  494         if (args & B)
  495                 return 2;
  496         else if (args & A)
  497                 return 1;
  498         else
  499                 return 0;
  500 }
  501 
  502 enum v3d_qpu_cond
  503 v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
  504 {
  505         switch (cond) {
  506         case V3D_QPU_COND_IFA:
  507                 return V3D_QPU_COND_IFNA;
  508         case V3D_QPU_COND_IFNA:
  509                 return V3D_QPU_COND_IFA;
  510         case V3D_QPU_COND_IFB:
  511                 return V3D_QPU_COND_IFNB;
  512         case V3D_QPU_COND_IFNB:
  513                 return V3D_QPU_COND_IFB;
  514         default:
  515                 unreachable("Non-invertible cond");
  516         }
  517 }
  518 
  519 bool
  520 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
  521 {
  522         switch (waddr) {
  523         case V3D_QPU_WADDR_RECIP:
  524         case V3D_QPU_WADDR_RSQRT:
  525         case V3D_QPU_WADDR_EXP:
  526         case V3D_QPU_WADDR_LOG:
  527         case V3D_QPU_WADDR_SIN:
  528         case V3D_QPU_WADDR_RSQRT2:
  529                 return true;
  530         default:
  531                 return false;
  532         }
  533 }
  534 
  535 bool
  536 v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr)
  537 {
  538         /* XXX: WADDR_TMU changed to UNIFA on 4.x */
  539         return ((waddr >= V3D_QPU_WADDR_TMU &&
  540                  waddr <= V3D_QPU_WADDR_TMUAU) ||
  541                 (waddr >= V3D_QPU_WADDR_TMUC &&
  542                  waddr <= V3D_QPU_WADDR_TMUHSLOD));
  543 }
  544 
  545 bool
  546 v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
  547 {
  548         return (inst->sig.ldtmu ||
  549                 (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
  550                  inst->alu.add.op == V3D_QPU_A_TMUWT));
  551 }
  552 
  553 bool
  554 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
  555 {
  556         return (waddr == V3D_QPU_WADDR_TLB ||
  557                 waddr == V3D_QPU_WADDR_TLBU);
  558 }
  559 
  560 bool
  561 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
  562 {
  563         return (waddr == V3D_QPU_WADDR_VPM ||
  564                 waddr == V3D_QPU_WADDR_VPMU);
  565 }
  566 
  567 bool
  568 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
  569 {
  570         return (waddr == V3D_QPU_WADDR_SYNC ||
  571                 waddr == V3D_QPU_WADDR_SYNCB ||
  572                 waddr == V3D_QPU_WADDR_SYNCU);
  573 }
  574 
  575 bool
  576 v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
  577 {
  578         switch (waddr) {
  579         case V3D_QPU_WADDR_VPMU:
  580         case V3D_QPU_WADDR_TLBU:
  581         case V3D_QPU_WADDR_TMUAU:
  582         case V3D_QPU_WADDR_SYNCU:
  583                 return true;
  584         default:
  585                 return false;
  586         }
  587 }
  588 
  589 static bool
  590 v3d_qpu_add_op_reads_vpm(enum  v3d_qpu_add_op op)
  591 {
  592         switch (op) {
  593         case V3D_QPU_A_VPMSETUP:
  594         case V3D_QPU_A_VPMWT:
  595         case V3D_QPU_A_LDVPMV_IN:
  596         case V3D_QPU_A_LDVPMV_OUT:
  597         case V3D_QPU_A_LDVPMD_IN:
  598         case V3D_QPU_A_LDVPMD_OUT:
  599         case V3D_QPU_A_LDVPMP:
  600         case V3D_QPU_A_LDVPMG_IN:
  601         case V3D_QPU_A_LDVPMG_OUT:
  602                 return true;
  603         default:
  604                 return false;
  605         }
  606 }
  607 
  608 static bool
  609 v3d_qpu_add_op_writes_vpm(enum  v3d_qpu_add_op op)
  610 {
  611         switch (op) {
  612         case V3D_QPU_A_VPMSETUP:
  613         case V3D_QPU_A_VPMWT:
  614         case V3D_QPU_A_STVPMV:
  615         case V3D_QPU_A_STVPMD:
  616         case V3D_QPU_A_STVPMP:
  617                 return true;
  618         default:
  619                 return false;
  620         }
  621 }
  622 
  623 bool
  624 v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
  625 {
  626         if (inst->sig.ldtlb ||
  627             inst->sig.ldtlbu)
  628                 return true;
  629 
  630         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
  631                 if (inst->alu.add.magic_write &&
  632                     v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
  633                         return true;
  634                 }
  635 
  636                 if (inst->alu.mul.magic_write &&
  637                     v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
  638                         return true;
  639                 }
  640         }
  641 
  642         return false;
  643 }
  644 
  645 bool
  646 v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
  647 {
  648         if (v3d_qpu_instr_is_sfu(inst))
  649                 return true;
  650 
  651         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
  652                 if (inst->alu.add.magic_write &&
  653                     v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
  654                         return true;
  655                 }
  656 
  657                 if (inst->alu.mul.magic_write &&
  658                     v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
  659                         return true;
  660                 }
  661         }
  662 
  663         return false;
  664 }
  665 
  666 bool
  667 v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
  668 {
  669         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
  670                 switch (inst->alu.add.op) {
  671                 case V3D_QPU_A_RECIP:
  672                 case V3D_QPU_A_RSQRT:
  673                 case V3D_QPU_A_EXP:
  674                 case V3D_QPU_A_LOG:
  675                 case V3D_QPU_A_SIN:
  676                 case V3D_QPU_A_RSQRT2:
  677                         return true;
  678                 default:
  679                         return false;
  680                 }
  681         }
  682         return false;
  683 }
  684 
  685 bool
  686 v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst)
  687 {
  688         return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
  689                 ((inst->alu.add.magic_write &&
  690                   v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) ||
  691                  (inst->alu.mul.magic_write &&
  692                   v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))));
  693 }
  694 
  695 bool
  696 v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst)
  697 {
  698         return v3d_qpu_writes_tmu(inst) &&
  699                (!inst->alu.add.magic_write ||
  700                 inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
  701                (!inst->alu.mul.magic_write ||
  702                 inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
  703 }
  704 
  705 bool
  706 v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
  707 {
  708         if (inst->sig.ldvpm)
  709                 return true;
  710 
  711         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
  712                 if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
  713                         return true;
  714         }
  715 
  716         return false;
  717 }
  718 
  719 bool
  720 v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
  721 {
  722         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
  723                 if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
  724                         return true;
  725 
  726                 if (inst->alu.add.magic_write &&
  727                     v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
  728                         return true;
  729                 }
  730 
  731                 if (inst->alu.mul.magic_write &&
  732                     v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
  733                         return true;
  734                 }
  735         }
  736 
  737         return false;
  738 }
  739 
  740 bool
  741 v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
  742 {
  743         return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
  744 }
  745 
  746 bool
  747 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
  748                   const struct v3d_qpu_instr *inst)
  749 {
  750         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
  751                 if (inst->alu.add.magic_write &&
  752                     inst->alu.add.waddr == V3D_QPU_WADDR_R3) {
  753                         return true;
  754                 }
  755 
  756                 if (inst->alu.mul.magic_write &&
  757                     inst->alu.mul.waddr == V3D_QPU_WADDR_R3) {
  758                         return true;
  759                 }
  760         }
  761 
  762         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
  763             inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) {
  764                 return true;
  765         }
  766 
  767         return inst->sig.ldvary || inst->sig.ldvpm;
  768 }
  769 
  770 bool
  771 v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
  772                   const struct v3d_qpu_instr *inst)
  773 {
  774         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
  775                 if (inst->alu.add.magic_write &&
  776                     (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
  777                      v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
  778                         return true;
  779                 }
  780 
  781                 if (inst->alu.mul.magic_write &&
  782                     (inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
  783                      v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
  784                         return true;
  785                 }
  786         }
  787 
  788         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
  789                 if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
  790                         return true;
  791         } else if (inst->sig.ldtmu) {
  792                 return true;
  793         }
  794 
  795         return false;
  796 }
  797 
  798 bool
  799 v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
  800                   const struct v3d_qpu_instr *inst)
  801 {
  802         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
  803                 if (inst->alu.add.magic_write &&
  804                     inst->alu.add.waddr == V3D_QPU_WADDR_R5) {
  805                         return true;
  806                 }
  807 
  808                 if (inst->alu.mul.magic_write &&
  809                     inst->alu.mul.waddr == V3D_QPU_WADDR_R5) {
  810                         return true;
  811                 }
  812         }
  813 
  814         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
  815             inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) {
  816                 return true;
  817         }
  818 
  819         return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
  820 }
  821 
  822 bool
  823 v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
  824 {
  825         int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
  826         int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
  827 
  828         return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
  829                 (add_nsrc > 1 && inst->alu.add.b == mux) ||
  830                 (mul_nsrc > 0 && inst->alu.mul.a == mux) ||
  831                 (mul_nsrc > 1 && inst->alu.mul.b == mux));
  832 }
  833 
  834 bool
  835 v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
  836                            const struct v3d_qpu_sig *sig)
  837 {
  838         if (devinfo->ver < 41)
  839                 return false;
  840 
  841         return (sig->ldunifrf ||
  842                 sig->ldunifarf ||
  843                 sig->ldvary ||
  844                 sig->ldtmu ||
  845                 sig->ldtlb ||
  846                 sig->ldtlbu);
  847 }
  848 
  849 bool
  850 v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
  851 {
  852         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
  853                 return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
  854         } else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
  855                 if (inst->flags.ac != V3D_QPU_COND_NONE ||
  856                     inst->flags.mc != V3D_QPU_COND_NONE ||
  857                     inst->flags.auf != V3D_QPU_UF_NONE ||
  858                     inst->flags.muf != V3D_QPU_UF_NONE)
  859                         return true;
  860 
  861                 switch (inst->alu.add.op) {
  862                 case V3D_QPU_A_VFLA:
  863                 case V3D_QPU_A_VFLNA:
  864                 case V3D_QPU_A_VFLB:
  865                 case V3D_QPU_A_VFLNB:
  866                 case V3D_QPU_A_FLAPUSH:
  867                 case V3D_QPU_A_FLBPUSH:
  868                         return true;
  869                 default:
  870                         break;
  871                 }
  872         }
  873 
  874         return false;
  875 }
  876 
  877 bool
  878 v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
  879 {
  880         if (inst->flags.apf != V3D_QPU_PF_NONE ||
  881             inst->flags.mpf != V3D_QPU_PF_NONE ||
  882             inst->flags.auf != V3D_QPU_UF_NONE ||
  883             inst->flags.muf != V3D_QPU_UF_NONE) {
  884                 return true;
  885         }
  886 
  887         return false;
  888 }
  889 
  890 bool
  891 v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
  892 {
  893         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
  894                 return false;
  895 
  896         switch (inst->alu.add.op) {
  897         case V3D_QPU_A_FADD:
  898         case V3D_QPU_A_FADDNF:
  899         case V3D_QPU_A_FSUB:
  900         case V3D_QPU_A_FMIN:
  901         case V3D_QPU_A_FMAX:
  902         case V3D_QPU_A_FCMP:
  903         case V3D_QPU_A_FROUND:
  904         case V3D_QPU_A_FTRUNC:
  905         case V3D_QPU_A_FFLOOR:
  906         case V3D_QPU_A_FCEIL:
  907         case V3D_QPU_A_FDX:
  908         case V3D_QPU_A_FDY:
  909         case V3D_QPU_A_FTOIN:
  910         case V3D_QPU_A_FTOIZ:
  911         case V3D_QPU_A_FTOUZ:
  912         case V3D_QPU_A_FTOC:
  913         case V3D_QPU_A_VFPACK:
  914                 return true;
  915                 break;
  916         default:
  917                 break;
  918         }
  919 
  920         switch (inst->alu.mul.op) {
  921         case V3D_QPU_M_FMOV:
  922         case V3D_QPU_M_FMUL:
  923                 return true;
  924                 break;
  925         default:
  926                 break;
  927         }
  928 
  929         return false;
  930 }
  931 bool
  932 v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
  933 {
  934         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
  935                 return false;
  936 
  937         switch (inst->alu.add.op) {
  938         case V3D_QPU_A_VFMIN:
  939         case V3D_QPU_A_VFMAX:
  940                 return true;
  941                 break;
  942         default:
  943                 break;
  944         }
  945 
  946         switch (inst->alu.mul.op) {
  947         case V3D_QPU_M_VFMUL:
  948                 return true;
  949                 break;
  950         default:
  951                 break;
  952         }
  953 
  954         return false;
  955 }