"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/vc4/vc4_nir_lower_blend.c" (16 Sep 2020, 26692 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "vc4_nir_lower_blend.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright © 2015 Broadcom
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  */
   23 
   24 /**
   25  * Implements most of the fixed function fragment pipeline in shader code.
   26  *
   27  * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
   28  * or color mask.  Instead, you read the current contents of the destination
   29  * from the tile buffer after having waited for the scoreboard (which is
   30  * handled by vc4_qpu_emit.c), then do math using your output color and that
   31  * destination value, and update the output color appropriately.
   32  *
   33  * Once this pass is done, the color write will either have one component (for
   34  * single sample) with packed argb8888, or 4 components with the per-sample
   35  * argb8888 result.
   36  */
   37 
   38 /**
   39  * Lowers fixed-function blending to a load of the destination color and a
   40  * series of ALU operations before the store of the output.
   41  */
   42 #include "util/format/u_format.h"
   43 #include "vc4_qir.h"
   44 #include "compiler/nir/nir_builder.h"
   45 #include "compiler/nir/nir_format_convert.h"
   46 #include "vc4_context.h"
   47 
   48 static bool
   49 blend_depends_on_dst_color(struct vc4_compile *c)
   50 {
   51         return (c->fs_key->blend.blend_enable ||
   52                 c->fs_key->blend.colormask != 0xf ||
   53                 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
   54 }
   55 
   56 /** Emits a load of the previous fragment color from the tile buffer. */
   57 static nir_ssa_def *
   58 vc4_nir_get_dst_color(nir_builder *b, int sample)
   59 {
   60         nir_intrinsic_instr *load =
   61                 nir_intrinsic_instr_create(b->shader,
   62                                            nir_intrinsic_load_input);
   63         load->num_components = 1;
   64         nir_intrinsic_set_base(load, VC4_NIR_TLB_COLOR_READ_INPUT + sample);
   65         load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
   66         nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
   67         nir_builder_instr_insert(b, &load->instr);
   68         return &load->dest.ssa;
   69 }
   70 
   71 static nir_ssa_def *
   72 vc4_blend_channel_f(nir_builder *b,
   73                     nir_ssa_def **src,
   74                     nir_ssa_def **dst,
   75                     unsigned factor,
   76                     int channel)
   77 {
   78         switch(factor) {
   79         case PIPE_BLENDFACTOR_ONE:
   80                 return nir_imm_float(b, 1.0);
   81         case PIPE_BLENDFACTOR_SRC_COLOR:
   82                 return src[channel];
   83         case PIPE_BLENDFACTOR_SRC_ALPHA:
   84                 return src[3];
   85         case PIPE_BLENDFACTOR_DST_ALPHA:
   86                 return dst[3];
   87         case PIPE_BLENDFACTOR_DST_COLOR:
   88                 return dst[channel];
   89         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
   90                 if (channel != 3) {
   91                         return nir_fmin(b,
   92                                         src[3],
   93                                         nir_fsub(b,
   94                                                  nir_imm_float(b, 1.0),
   95                                                  dst[3]));
   96                 } else {
   97                         return nir_imm_float(b, 1.0);
   98                 }
   99         case PIPE_BLENDFACTOR_CONST_COLOR:
  100                 return nir_load_system_value(b,
  101                                              nir_intrinsic_load_blend_const_color_r_float +
  102                                              channel,
  103                                              0, 32);
  104         case PIPE_BLENDFACTOR_CONST_ALPHA:
  105                 return nir_load_blend_const_color_a_float(b);
  106         case PIPE_BLENDFACTOR_ZERO:
  107                 return nir_imm_float(b, 0.0);
  108         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  109                 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
  110         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  111                 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
  112         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  113                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
  114         case PIPE_BLENDFACTOR_INV_DST_COLOR:
  115                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
  116         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  117                 return nir_fsub(b, nir_imm_float(b, 1.0),
  118                                 nir_load_system_value(b,
  119                                                       nir_intrinsic_load_blend_const_color_r_float +
  120                                                       channel,
  121                                                       0, 32));
  122         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  123                 return nir_fsub(b, nir_imm_float(b, 1.0),
  124                                 nir_load_blend_const_color_a_float(b));
  125 
  126         default:
  127         case PIPE_BLENDFACTOR_SRC1_COLOR:
  128         case PIPE_BLENDFACTOR_SRC1_ALPHA:
  129         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  130         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  131                 /* Unsupported. */
  132                 fprintf(stderr, "Unknown blend factor %d\n", factor);
  133                 return nir_imm_float(b, 1.0);
  134         }
  135 }
  136 
  137 static nir_ssa_def *
  138 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
  139                         int chan)
  140 {
  141         unsigned chan_mask = 0xff << (chan * 8);
  142         return nir_ior(b,
  143                        nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
  144                        nir_iand(b, src1, nir_imm_int(b, chan_mask)));
  145 }
  146 
  147 static nir_ssa_def *
  148 vc4_blend_channel_i(nir_builder *b,
  149                     nir_ssa_def *src,
  150                     nir_ssa_def *dst,
  151                     nir_ssa_def *src_a,
  152                     nir_ssa_def *dst_a,
  153                     unsigned factor,
  154                     int a_chan)
  155 {
  156         switch (factor) {
  157         case PIPE_BLENDFACTOR_ONE:
  158                 return nir_imm_int(b, ~0);
  159         case PIPE_BLENDFACTOR_SRC_COLOR:
  160                 return src;
  161         case PIPE_BLENDFACTOR_SRC_ALPHA:
  162                 return src_a;
  163         case PIPE_BLENDFACTOR_DST_ALPHA:
  164                 return dst_a;
  165         case PIPE_BLENDFACTOR_DST_COLOR:
  166                 return dst;
  167         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  168                 return vc4_nir_set_packed_chan(b,
  169                                                nir_umin_4x8(b,
  170                                                             src_a,
  171                                                             nir_inot(b, dst_a)),
  172                                                nir_imm_int(b, ~0),
  173                                                a_chan);
  174         case PIPE_BLENDFACTOR_CONST_COLOR:
  175                 return nir_load_blend_const_color_rgba8888_unorm(b);
  176         case PIPE_BLENDFACTOR_CONST_ALPHA:
  177                 return nir_load_blend_const_color_aaaa8888_unorm(b);
  178         case PIPE_BLENDFACTOR_ZERO:
  179                 return nir_imm_int(b, 0);
  180         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  181                 return nir_inot(b, src);
  182         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  183                 return nir_inot(b, src_a);
  184         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  185                 return nir_inot(b, dst_a);
  186         case PIPE_BLENDFACTOR_INV_DST_COLOR:
  187                 return nir_inot(b, dst);
  188         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  189                 return nir_inot(b,
  190                                 nir_load_blend_const_color_rgba8888_unorm(b));
  191         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  192                 return nir_inot(b,
  193                                 nir_load_blend_const_color_aaaa8888_unorm(b));
  194 
  195         default:
  196         case PIPE_BLENDFACTOR_SRC1_COLOR:
  197         case PIPE_BLENDFACTOR_SRC1_ALPHA:
  198         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  199         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  200                 /* Unsupported. */
  201                 fprintf(stderr, "Unknown blend factor %d\n", factor);
  202                 return nir_imm_int(b, ~0);
  203         }
  204 }
  205 
  206 static nir_ssa_def *
  207 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
  208                  unsigned func)
  209 {
  210         switch (func) {
  211         case PIPE_BLEND_ADD:
  212                 return nir_fadd(b, src, dst);
  213         case PIPE_BLEND_SUBTRACT:
  214                 return nir_fsub(b, src, dst);
  215         case PIPE_BLEND_REVERSE_SUBTRACT:
  216                 return nir_fsub(b, dst, src);
  217         case PIPE_BLEND_MIN:
  218                 return nir_fmin(b, src, dst);
  219         case PIPE_BLEND_MAX:
  220                 return nir_fmax(b, src, dst);
  221 
  222         default:
  223                 /* Unsupported. */
  224                 fprintf(stderr, "Unknown blend func %d\n", func);
  225                 return src;
  226 
  227         }
  228 }
  229 
  230 static nir_ssa_def *
  231 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
  232                  unsigned func)
  233 {
  234         switch (func) {
  235         case PIPE_BLEND_ADD:
  236                 return nir_usadd_4x8(b, src, dst);
  237         case PIPE_BLEND_SUBTRACT:
  238                 return nir_ussub_4x8(b, src, dst);
  239         case PIPE_BLEND_REVERSE_SUBTRACT:
  240                 return nir_ussub_4x8(b, dst, src);
  241         case PIPE_BLEND_MIN:
  242                 return nir_umin_4x8(b, src, dst);
  243         case PIPE_BLEND_MAX:
  244                 return nir_umax_4x8(b, src, dst);
  245 
  246         default:
  247                 /* Unsupported. */
  248                 fprintf(stderr, "Unknown blend func %d\n", func);
  249                 return src;
  250 
  251         }
  252 }
  253 
  254 static void
  255 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
  256                   nir_ssa_def **src_color, nir_ssa_def **dst_color)
  257 {
  258         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
  259 
  260         if (!blend->blend_enable) {
  261                 for (int i = 0; i < 4; i++)
  262                         result[i] = src_color[i];
  263                 return;
  264         }
  265 
  266         /* Clamp the src color to [0, 1].  Dest is already clamped. */
  267         for (int i = 0; i < 4; i++)
  268                 src_color[i] = nir_fsat(b, src_color[i]);
  269 
  270         nir_ssa_def *src_blend[4], *dst_blend[4];
  271         for (int i = 0; i < 4; i++) {
  272                 int src_factor = ((i != 3) ? blend->rgb_src_factor :
  273                                   blend->alpha_src_factor);
  274                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
  275                                   blend->alpha_dst_factor);
  276                 src_blend[i] = nir_fmul(b, src_color[i],
  277                                         vc4_blend_channel_f(b,
  278                                                             src_color, dst_color,
  279                                                             src_factor, i));
  280                 dst_blend[i] = nir_fmul(b, dst_color[i],
  281                                         vc4_blend_channel_f(b,
  282                                                             src_color, dst_color,
  283                                                             dst_factor, i));
  284         }
  285 
  286         for (int i = 0; i < 4; i++) {
  287                 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
  288                                              ((i != 3) ? blend->rgb_func :
  289                                               blend->alpha_func));
  290         }
  291 }
  292 
  293 static nir_ssa_def *
  294 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
  295 {
  296         nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
  297         return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
  298 }
  299 
  300 static nir_ssa_def *
  301 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
  302                   nir_ssa_def *src_color, nir_ssa_def *dst_color,
  303                   nir_ssa_def *src_float_a)
  304 {
  305         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
  306 
  307         if (!blend->blend_enable)
  308                 return src_color;
  309 
  310         enum pipe_format color_format = c->fs_key->color_format;
  311         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
  312         nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
  313         nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
  314         nir_ssa_def *dst_a;
  315         int alpha_chan;
  316         for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
  317                 if (format_swiz[alpha_chan] == 3)
  318                         break;
  319         }
  320         if (alpha_chan != 4) {
  321                 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
  322                 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
  323                                                               shift), imm_0xff));
  324         } else {
  325                 dst_a = nir_imm_int(b, ~0);
  326         }
  327 
  328         nir_ssa_def *src_factor = vc4_blend_channel_i(b,
  329                                                       src_color, dst_color,
  330                                                       src_a, dst_a,
  331                                                       blend->rgb_src_factor,
  332                                                       alpha_chan);
  333         nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
  334                                                       src_color, dst_color,
  335                                                       src_a, dst_a,
  336                                                       blend->rgb_dst_factor,
  337                                                       alpha_chan);
  338 
  339         if (alpha_chan != 4 &&
  340             blend->alpha_src_factor != blend->rgb_src_factor) {
  341                 nir_ssa_def *src_alpha_factor =
  342                         vc4_blend_channel_i(b,
  343                                             src_color, dst_color,
  344                                             src_a, dst_a,
  345                                             blend->alpha_src_factor,
  346                                             alpha_chan);
  347                 src_factor = vc4_nir_set_packed_chan(b, src_factor,
  348                                                      src_alpha_factor,
  349                                                      alpha_chan);
  350         }
  351         if (alpha_chan != 4 &&
  352             blend->alpha_dst_factor != blend->rgb_dst_factor) {
  353                 nir_ssa_def *dst_alpha_factor =
  354                         vc4_blend_channel_i(b,
  355                                             src_color, dst_color,
  356                                             src_a, dst_a,
  357                                             blend->alpha_dst_factor,
  358                                             alpha_chan);
  359                 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
  360                                                      dst_alpha_factor,
  361                                                      alpha_chan);
  362         }
  363         nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
  364         nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
  365 
  366         nir_ssa_def *result =
  367                 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
  368         if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
  369                 nir_ssa_def *result_a = vc4_blend_func_i(b,
  370                                                          src_blend,
  371                                                          dst_blend,
  372                                                          blend->alpha_func);
  373                 result = vc4_nir_set_packed_chan(b, result, result_a,
  374                                                  alpha_chan);
  375         }
  376         return result;
  377 }
  378 
  379 static nir_ssa_def *
  380 vc4_logicop(nir_builder *b, int logicop_func,
  381             nir_ssa_def *src, nir_ssa_def *dst)
  382 {
  383         switch (logicop_func) {
  384         case PIPE_LOGICOP_CLEAR:
  385                 return nir_imm_int(b, 0);
  386         case PIPE_LOGICOP_NOR:
  387                 return nir_inot(b, nir_ior(b, src, dst));
  388         case PIPE_LOGICOP_AND_INVERTED:
  389                 return nir_iand(b, nir_inot(b, src), dst);
  390         case PIPE_LOGICOP_COPY_INVERTED:
  391                 return nir_inot(b, src);
  392         case PIPE_LOGICOP_AND_REVERSE:
  393                 return nir_iand(b, src, nir_inot(b, dst));
  394         case PIPE_LOGICOP_INVERT:
  395                 return nir_inot(b, dst);
  396         case PIPE_LOGICOP_XOR:
  397                 return nir_ixor(b, src, dst);
  398         case PIPE_LOGICOP_NAND:
  399                 return nir_inot(b, nir_iand(b, src, dst));
  400         case PIPE_LOGICOP_AND:
  401                 return nir_iand(b, src, dst);
  402         case PIPE_LOGICOP_EQUIV:
  403                 return nir_inot(b, nir_ixor(b, src, dst));
  404         case PIPE_LOGICOP_NOOP:
  405                 return dst;
  406         case PIPE_LOGICOP_OR_INVERTED:
  407                 return nir_ior(b, nir_inot(b, src), dst);
  408         case PIPE_LOGICOP_OR_REVERSE:
  409                 return nir_ior(b, src, nir_inot(b, dst));
  410         case PIPE_LOGICOP_OR:
  411                 return nir_ior(b, src, dst);
  412         case PIPE_LOGICOP_SET:
  413                 return nir_imm_int(b, ~0);
  414         default:
  415                 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
  416                 /* FALLTHROUGH */
  417         case PIPE_LOGICOP_COPY:
  418                 return src;
  419         }
  420 }
  421 
  422 static nir_ssa_def *
  423 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
  424                          nir_ssa_def **colors)
  425 {
  426         enum pipe_format color_format = c->fs_key->color_format;
  427         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
  428 
  429         nir_ssa_def *swizzled[4];
  430         for (int i = 0; i < 4; i++) {
  431                 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
  432                                                            format_swiz[i]);
  433         }
  434 
  435         return nir_pack_unorm_4x8(b,
  436                                   nir_vec4(b,
  437                                            swizzled[0], swizzled[1],
  438                                            swizzled[2], swizzled[3]));
  439 
  440 }
  441 
  442 static nir_ssa_def *
  443 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
  444                        int sample)
  445 {
  446         enum pipe_format color_format = c->fs_key->color_format;
  447         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
  448         bool srgb = util_format_is_srgb(color_format);
  449 
  450         /* Pull out the float src/dst color components. */
  451         nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
  452         nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
  453         nir_ssa_def *src_color[4], *unpacked_dst_color[4];
  454         for (unsigned i = 0; i < 4; i++) {
  455                 src_color[i] = nir_channel(b, src, i);
  456                 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
  457         }
  458 
  459         if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
  460                 src_color[3] = nir_imm_float(b, 1.0);
  461 
  462         nir_ssa_def *packed_color;
  463         if (srgb) {
  464                 /* Unswizzle the destination color. */
  465                 nir_ssa_def *dst_color[4];
  466                 for (unsigned i = 0; i < 4; i++) {
  467                         dst_color[i] = vc4_nir_get_swizzled_channel(b,
  468                                                                     unpacked_dst_color,
  469                                                                     format_swiz[i]);
  470                 }
  471 
  472                 /* Turn dst color to linear. */
  473                 for (int i = 0; i < 3; i++)
  474                         dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
  475 
  476                 nir_ssa_def *blend_color[4];
  477                 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
  478 
  479                 /* sRGB encode the output color */
  480                 for (int i = 0; i < 3; i++)
  481                         blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
  482 
  483                 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
  484         } else {
  485                 nir_ssa_def *packed_src_color =
  486                         vc4_nir_swizzle_and_pack(c, b, src_color);
  487 
  488                 packed_color =
  489                         vc4_do_blending_i(c, b,
  490                                           packed_src_color, packed_dst_color,
  491                                           src_color[3]);
  492         }
  493 
  494         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
  495                                    packed_color, packed_dst_color);
  496 
  497         /* If the bit isn't set in the color mask, then just return the
  498          * original dst color, instead.
  499          */
  500         uint32_t colormask = 0xffffffff;
  501         for (int i = 0; i < 4; i++) {
  502                 if (format_swiz[i] < 4 &&
  503                     !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
  504                         colormask &= ~(0xff << (i * 8));
  505                 }
  506         }
  507 
  508         return nir_ior(b,
  509                        nir_iand(b, packed_color,
  510                                 nir_imm_int(b, colormask)),
  511                        nir_iand(b, packed_dst_color,
  512                                 nir_imm_int(b, ~colormask)));
  513 }
  514 
  515 static void
  516 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
  517                           nir_ssa_def *val)
  518 {
  519         nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
  520                                                         glsl_uint_type(),
  521                                                         "sample_mask");
  522         sample_mask->data.driver_location = c->s->num_outputs++;
  523         sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
  524 
  525         nir_intrinsic_instr *intr =
  526                 nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
  527         intr->num_components = 1;
  528         nir_intrinsic_set_base(intr, sample_mask->data.driver_location);
  529 
  530         intr->src[0] = nir_src_for_ssa(val);
  531         intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
  532         nir_builder_instr_insert(b, &intr->instr);
  533 }
  534 
  535 static void
  536 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
  537                           nir_intrinsic_instr *intr)
  538 {
  539         nir_ssa_def *frag_color = intr->src[0].ssa;
  540 
  541         if (c->fs_key->sample_alpha_to_coverage) {
  542                 nir_ssa_def *a = nir_channel(b, frag_color, 3);
  543 
  544                 /* XXX: We should do a nice dither based on the fragment
  545                  * coordinate, instead.
  546                  */
  547                 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
  548                 nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
  549                 nir_ssa_def *bitmask = nir_isub(b,
  550                                                 nir_ishl(b,
  551                                                          nir_imm_int(b, 1),
  552                                                          num_bits),
  553                                                 nir_imm_int(b, 1));
  554                 vc4_nir_store_sample_mask(c, b, bitmask);
  555         }
  556 
  557         /* The TLB color read returns each sample in turn, so if our blending
  558          * depends on the destination color, we're going to have to run the
  559          * blending function separately for each destination sample value, and
  560          * then output the per-sample color using TLB_COLOR_MS.
  561          */
  562         nir_ssa_def *blend_output;
  563         if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
  564                 c->msaa_per_sample_output = true;
  565 
  566                 nir_ssa_def *samples[4];
  567                 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
  568                         samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
  569                 blend_output = nir_vec4(b,
  570                                         samples[0], samples[1],
  571                                         samples[2], samples[3]);
  572         } else {
  573                 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
  574         }
  575 
  576         nir_instr_rewrite_src(&intr->instr, &intr->src[0],
  577                               nir_src_for_ssa(blend_output));
  578         intr->num_components = blend_output->num_components;
  579 }
  580 
  581 static bool
  582 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
  583 {
  584         nir_foreach_instr_safe(instr, block) {
  585                 if (instr->type != nir_instr_type_intrinsic)
  586                         continue;
  587                 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
  588                 if (intr->intrinsic != nir_intrinsic_store_output)
  589                         continue;
  590 
  591                 nir_variable *output_var = NULL;
  592                 nir_foreach_variable(var, &c->s->outputs) {
  593                         if (var->data.driver_location ==
  594                             nir_intrinsic_base(intr)) {
  595                                 output_var = var;
  596                                 break;
  597                         }
  598                 }
  599                 assert(output_var);
  600 
  601                 if (output_var->data.location != FRAG_RESULT_COLOR &&
  602                     output_var->data.location != FRAG_RESULT_DATA0) {
  603                         continue;
  604                 }
  605 
  606                 nir_function_impl *impl =
  607                         nir_cf_node_get_function(&block->cf_node);
  608                 nir_builder b;
  609                 nir_builder_init(&b, impl);
  610                 b.cursor = nir_before_instr(&intr->instr);
  611                 vc4_nir_lower_blend_instr(c, &b, intr);
  612         }
  613         return true;
  614 }
  615 
  616 void
  617 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
  618 {
  619         nir_foreach_function(function, s) {
  620                 if (function->impl) {
  621                         nir_foreach_block(block, function->impl) {
  622                                 vc4_nir_lower_blend_block(block, c);
  623                         }
  624 
  625                         nir_metadata_preserve(function->impl,
  626                                               nir_metadata_block_index |
  627                                               nir_metadata_dominance);
  628                 }
  629         }
  630 
  631         /* If we didn't do alpha-to-coverage on the output color, we still
  632          * need to pass glSampleMask() through.
  633          */
  634         if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
  635                 nir_function_impl *impl = nir_shader_get_entrypoint(s);
  636                 nir_builder b;
  637                 nir_builder_init(&b, impl);
  638                 b.cursor = nir_after_block(nir_impl_last_block(impl));
  639 
  640                 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
  641         }
  642 }