"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/freedreno/a2xx/ir2_cp.c" (16 Sep 2020, 5893 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ir2_cp.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   21  * SOFTWARE.
   22  *
   23  * Authors:
   24  *    Jonathan Marek <jonathan@marek.ca>
   25  */
   26 
   27 #include "ir2_private.h"
   28 
   29 static bool is_mov(struct ir2_instr *instr)
   30 {
   31     return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
   32         instr->src_count == 1;
   33 }
   34 
   35 static void src_combine(struct ir2_src *src, struct ir2_src b)
   36 {
   37     src->num = b.num;
   38     src->type = b.type;
   39     src->swizzle = swiz_merge(b.swizzle, src->swizzle);
   40     if (!src->abs) /* if we have abs we don't care about previous negate */
   41         src->negate ^= b.negate;
   42     src->abs |= b.abs;
   43 }
   44 
   45 /* cp_src: replace src regs when they refer to a mov instruction
   46  * example:
   47  *  ALU:      MAXv    R7 = C7, C7
   48  *  ALU:      MULADDv R7 = R7, R10, R0.xxxx
   49  * becomes:
   50  *  ALU:      MULADDv R7 = C7, R10, R0.xxxx
   51  */
   52 void cp_src(struct ir2_context *ctx)
   53 {
   54     struct ir2_instr *p;
   55 
   56     ir2_foreach_instr(instr, ctx) {
   57         ir2_foreach_src(src, instr) {
   58             /* loop to replace recursively */
   59             do {
   60                 if (src->type != IR2_SRC_SSA)
   61                     break;
   62 
   63                 p = &ctx->instr[src->num];
   64                 /* don't work across blocks to avoid possible issues */
   65                 if (p->block_idx != instr->block_idx)
   66                     break;
   67 
   68                 if (!is_mov(p))
   69                     break;
   70 
   71                 if (p->alu.saturate)
   72                     break;
   73 
   74                 /* cant apply abs to const src, const src only for alu */
   75                 if (p->src[0].type == IR2_SRC_CONST &&
   76                     (src->abs || instr->type != IR2_ALU))
   77                     break;
   78 
   79                 src_combine(src, p->src[0]);
   80             } while (1);
   81         }
   82     }
   83 }
   84 
   85 /* cp_export: replace mov to export when possible
   86  * in the cp_src pass we bypass any mov instructions related
   87  * to the src registers, but for exports for need something different
   88  * example:
   89  *  ALU:      MAXv    R3.x___ = C9.x???, C9.x???
   90  *  ALU:      MAXv    R3._y__ = R0.?x??, C8.?x??
   91  *  ALU:      MAXv    export0 = R3.yyyx, R3.yyyx
   92  * becomes:
   93  *  ALU:      MAXv    export0.___w = C9.???x, C9.???x
   94  *  ALU:      MAXv    export0.xyz_ = R0.xxx?, C8.xxx?
   95  *
   96  */
   97 void cp_export(struct ir2_context *ctx)
   98 {
   99     struct ir2_instr *c[4], *ins[4];
  100     struct ir2_src *src;
  101     struct ir2_reg *reg;
  102     unsigned ncomp;
  103 
  104     ir2_foreach_instr(instr, ctx) {
  105         if (!is_export(instr)) /* TODO */
  106             continue;
  107 
  108         if (!is_mov(instr))
  109             continue;
  110 
  111         src = &instr->src[0];
  112 
  113         if (src->negate || src->abs) /* TODO handle these cases */
  114             continue;
  115 
  116         if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
  117             continue;
  118 
  119         reg = get_reg_src(ctx, src);
  120         ncomp = dst_ncomp(instr);
  121 
  122         unsigned reswiz[4] = {};
  123         unsigned num_instr = 0;
  124 
  125         /* fill array c with pointers to instrs that write each component */
  126         if (src->type == IR2_SRC_SSA) {
  127             struct ir2_instr *instr = &ctx->instr[src->num];
  128 
  129             if (instr->type != IR2_ALU)
  130                 continue;
  131 
  132             for (int i = 0; i < ncomp; i++)
  133                 c[i] = instr;
  134 
  135             ins[num_instr++] = instr;
  136             reswiz[0] = src->swizzle;
  137         } else {
  138             bool ok = true;
  139             unsigned write_mask = 0;
  140 
  141             ir2_foreach_instr(instr, ctx) {
  142                 if (instr->is_ssa || instr->reg != reg)
  143                     continue;
  144 
  145                 /* set by non-ALU */
  146                 if (instr->type != IR2_ALU) {
  147                     ok = false;
  148                     break;
  149                 }
  150 
  151                 /* component written more than once */
  152                 if (write_mask & instr->alu.write_mask) {
  153                     ok = false;
  154                     break;
  155                 }
  156 
  157                 write_mask |= instr->alu.write_mask;
  158 
  159                 /* src pointers for components */
  160                 for (int i = 0, j = 0; i < 4; i++) {
  161                     unsigned k = swiz_get(src->swizzle, i);
  162                     if (instr->alu.write_mask & 1 << k) {
  163                         c[i] = instr;
  164 
  165                         /* reswiz = compressed src->swizzle */
  166                         unsigned x = 0;
  167                         for (int i = 0; i < k; i++)
  168                             x += !!(instr->alu.write_mask & 1 << i);
  169 
  170                         assert(src->swizzle || x == j);
  171                         reswiz[num_instr] |= swiz_set(x, j++);
  172                     }
  173                 }
  174                 ins[num_instr++] = instr;
  175             }
  176             if (!ok)
  177                 continue;
  178         }
  179 
  180         bool redirect = true;
  181 
  182         /* must all be in same block */
  183         for (int i = 0; i < ncomp; i++)
  184             redirect &= (c[i]->block_idx == instr->block_idx);
  185 
  186         /* no other instr using the value */
  187         ir2_foreach_instr(p, ctx) {
  188             if (p == instr)
  189                 continue;
  190             ir2_foreach_src(src, p)
  191                 redirect &= reg != get_reg_src(ctx, src);
  192         }
  193 
  194         if (!redirect)
  195             continue;
  196 
  197         /* redirect the instructions writing to the register */
  198         for (int i = 0; i < num_instr; i++) {
  199             struct ir2_instr *p = ins[i];
  200 
  201             p->alu.export = instr->alu.export;
  202             p->alu.write_mask = 0;
  203             p->is_ssa = true;
  204             p->ssa.ncomp = 0;
  205             memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
  206             p->alu.saturate |= instr->alu.saturate;
  207 
  208             switch (p->alu.vector_opc) {
  209             case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
  210             case DOT2ADDv:
  211             case DOT3v:
  212             case DOT4v:
  213             case CUBEv:
  214                 continue;
  215             default:
  216                 break;
  217             }
  218             ir2_foreach_src(s, p)
  219                 swiz_merge_p(&s->swizzle, reswiz[i]);
  220         }
  221 
  222         for (int i = 0; i < ncomp; i++) {
  223             c[i]->alu.write_mask |= (1 << i);
  224             c[i]->ssa.ncomp++;
  225         }
  226         instr->type = IR2_NONE;
  227         instr->need_emit = false;
  228     }
  229 }