"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/softpipe/sp_quad_blend.c" (16 Sep 2020, 43222 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sp_quad_blend.c" see the Fossies "Dox" file reference documentation.

    1 /**************************************************************************
    2  * 
    3  * Copyright 2007 VMware, Inc.
    4  * All Rights Reserved.
    5  * 
    6  * Permission is hereby granted, free of charge, to any person obtaining a
    7  * copy of this software and associated documentation files (the
    8  * "Software"), to deal in the Software without restriction, including
    9  * without limitation the rights to use, copy, modify, merge, publish,
   10  * distribute, sub license, and/or sell copies of the Software, and to
   11  * permit persons to whom the Software is furnished to do so, subject to
   12  * the following conditions:
   13  * 
   14  * The above copyright notice and this permission notice (including the
   15  * next paragraph) shall be included in all copies or substantial portions
   16  * of the Software.
   17  * 
   18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
   21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   25  * 
   26  **************************************************************************/
   27 
   28 /**
   29  * quad blending
   30  * \author Brian Paul
   31  */
   32 
   33 #include "pipe/p_defines.h"
   34 #include "util/u_math.h"
   35 #include "util/u_memory.h"
   36 #include "util/format/u_format.h"
   37 #include "util/u_dual_blend.h"
   38 #include "sp_context.h"
   39 #include "sp_state.h"
   40 #include "sp_quad.h"
   41 #include "sp_tile_cache.h"
   42 #include "sp_quad_pipe.h"
   43 
   44 
   45 enum format
   46 {
   47    RGBA,
   48    RGB,
   49    LUMINANCE,
   50    LUMINANCE_ALPHA,
   51    INTENSITY
   52 };
   53 
   54 
   55 /** Subclass of quad_stage */
   56 struct blend_quad_stage
   57 {
   58    struct quad_stage base;
   59    boolean clamp[PIPE_MAX_COLOR_BUFS];  /**< clamp colors to [0,1]? */
   60    enum format base_format[PIPE_MAX_COLOR_BUFS];
   61    enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
   62 };
   63 
   64 
   65 /** cast wrapper */
   66 static inline struct blend_quad_stage *
   67 blend_quad_stage(struct quad_stage *stage)
   68 {
   69    return (struct blend_quad_stage *) stage;
   70 }
   71 
   72 
   73 #define VEC4_COPY(DST, SRC) \
   74 do { \
   75     DST[0] = SRC[0]; \
   76     DST[1] = SRC[1]; \
   77     DST[2] = SRC[2]; \
   78     DST[3] = SRC[3]; \
   79 } while(0)
   80 
   81 #define VEC4_SCALAR(DST, SRC) \
   82 do { \
   83     DST[0] = SRC; \
   84     DST[1] = SRC; \
   85     DST[2] = SRC; \
   86     DST[3] = SRC; \
   87 } while(0)
   88 
   89 #define VEC4_ADD(R, A, B) \
   90 do { \
   91    R[0] = A[0] + B[0]; \
   92    R[1] = A[1] + B[1]; \
   93    R[2] = A[2] + B[2]; \
   94    R[3] = A[3] + B[3]; \
   95 } while (0)
   96 
   97 #define VEC4_SUB(R, A, B) \
   98 do { \
   99    R[0] = A[0] - B[0]; \
  100    R[1] = A[1] - B[1]; \
  101    R[2] = A[2] - B[2]; \
  102    R[3] = A[3] - B[3]; \
  103 } while (0)
  104 
  105 /** Add and limit result to ceiling of 1.0 */
  106 #define VEC4_ADD_SAT(R, A, B) \
  107 do { \
  108    R[0] = A[0] + B[0];  if (R[0] > 1.0f) R[0] = 1.0f; \
  109    R[1] = A[1] + B[1];  if (R[1] > 1.0f) R[1] = 1.0f; \
  110    R[2] = A[2] + B[2];  if (R[2] > 1.0f) R[2] = 1.0f; \
  111    R[3] = A[3] + B[3];  if (R[3] > 1.0f) R[3] = 1.0f; \
  112 } while (0)
  113 
  114 /** Subtract and limit result to floor of 0.0 */
  115 #define VEC4_SUB_SAT(R, A, B) \
  116 do { \
  117    R[0] = A[0] - B[0];  if (R[0] < 0.0f) R[0] = 0.0f; \
  118    R[1] = A[1] - B[1];  if (R[1] < 0.0f) R[1] = 0.0f; \
  119    R[2] = A[2] - B[2];  if (R[2] < 0.0f) R[2] = 0.0f; \
  120    R[3] = A[3] - B[3];  if (R[3] < 0.0f) R[3] = 0.0f; \
  121 } while (0)
  122 
  123 #define VEC4_MUL(R, A, B) \
  124 do { \
  125    R[0] = A[0] * B[0]; \
  126    R[1] = A[1] * B[1]; \
  127    R[2] = A[2] * B[2]; \
  128    R[3] = A[3] * B[3]; \
  129 } while (0)
  130 
  131 #define VEC4_MIN(R, A, B) \
  132 do { \
  133    R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
  134    R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
  135    R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
  136    R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
  137 } while (0)
  138 
  139 #define VEC4_MAX(R, A, B) \
  140 do { \
  141    R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
  142    R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
  143    R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
  144    R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
  145 } while (0)
  146 
  147 
  148 
  149 static void
  150 logicop_quad(struct quad_stage *qs, 
  151              float (*quadColor)[4],
  152              float (*dest)[4])
  153 {
  154    struct softpipe_context *softpipe = qs->softpipe;
  155    ubyte src[4][4], dst[4][4], res[4][4];
  156    uint *src4 = (uint *) src;
  157    uint *dst4 = (uint *) dst;
  158    uint *res4 = (uint *) res;
  159    uint j;
  160 
  161 
  162    /* convert to ubyte */
  163    for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
  164       dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
  165       dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
  166       dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
  167       dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
  168 
  169       src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
  170       src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
  171       src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
  172       src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
  173    }
  174 
  175    switch (softpipe->blend->logicop_func) {
  176    case PIPE_LOGICOP_CLEAR:
  177       for (j = 0; j < 4; j++)
  178          res4[j] = 0;
  179       break;
  180    case PIPE_LOGICOP_NOR:
  181       for (j = 0; j < 4; j++)
  182          res4[j] = ~(src4[j] | dst4[j]);
  183       break;
  184    case PIPE_LOGICOP_AND_INVERTED:
  185       for (j = 0; j < 4; j++)
  186          res4[j] = ~src4[j] & dst4[j];
  187       break;
  188    case PIPE_LOGICOP_COPY_INVERTED:
  189       for (j = 0; j < 4; j++)
  190          res4[j] = ~src4[j];
  191       break;
  192    case PIPE_LOGICOP_AND_REVERSE:
  193       for (j = 0; j < 4; j++)
  194          res4[j] = src4[j] & ~dst4[j];
  195       break;
  196    case PIPE_LOGICOP_INVERT:
  197       for (j = 0; j < 4; j++)
  198          res4[j] = ~dst4[j];
  199       break;
  200    case PIPE_LOGICOP_XOR:
  201       for (j = 0; j < 4; j++)
  202          res4[j] = dst4[j] ^ src4[j];
  203       break;
  204    case PIPE_LOGICOP_NAND:
  205       for (j = 0; j < 4; j++)
  206          res4[j] = ~(src4[j] & dst4[j]);
  207       break;
  208    case PIPE_LOGICOP_AND:
  209       for (j = 0; j < 4; j++)
  210          res4[j] = src4[j] & dst4[j];
  211       break;
  212    case PIPE_LOGICOP_EQUIV:
  213       for (j = 0; j < 4; j++)
  214          res4[j] = ~(src4[j] ^ dst4[j]);
  215       break;
  216    case PIPE_LOGICOP_NOOP:
  217       for (j = 0; j < 4; j++)
  218          res4[j] = dst4[j];
  219       break;
  220    case PIPE_LOGICOP_OR_INVERTED:
  221       for (j = 0; j < 4; j++)
  222          res4[j] = ~src4[j] | dst4[j];
  223       break;
  224    case PIPE_LOGICOP_COPY:
  225       for (j = 0; j < 4; j++)
  226          res4[j] = src4[j];
  227       break;
  228    case PIPE_LOGICOP_OR_REVERSE:
  229       for (j = 0; j < 4; j++)
  230          res4[j] = src4[j] | ~dst4[j];
  231       break;
  232    case PIPE_LOGICOP_OR:
  233       for (j = 0; j < 4; j++)
  234          res4[j] = src4[j] | dst4[j];
  235       break;
  236    case PIPE_LOGICOP_SET:
  237       for (j = 0; j < 4; j++)
  238          res4[j] = ~0;
  239       break;
  240    default:
  241       assert(0 && "invalid logicop mode");
  242    }
  243 
  244    for (j = 0; j < 4; j++) {
  245       quadColor[j][0] = ubyte_to_float(res[j][0]);
  246       quadColor[j][1] = ubyte_to_float(res[j][1]);
  247       quadColor[j][2] = ubyte_to_float(res[j][2]);
  248       quadColor[j][3] = ubyte_to_float(res[j][3]);
  249    }
  250 }
  251 
  252 
  253 
  254 /**
  255  * Do blending for a 2x2 quad for one color buffer.
  256  * \param quadColor  the incoming quad colors
  257  * \param dest  the destination/framebuffer quad colors
  258  * \param const_blend_color  the constant blend color
  259  * \param blend_index  which set of blending terms to use
  260  */
  261 static void
  262 blend_quad(struct quad_stage *qs, 
  263            float (*quadColor)[4],
  264            float (*quadColor2)[4],
  265            float (*dest)[4],
  266            const float const_blend_color[4],
  267            unsigned blend_index)
  268 {
  269    static const float zero[4] = { 0, 0, 0, 0 };
  270    static const float one[4] = { 1, 1, 1, 1 };
  271    struct softpipe_context *softpipe = qs->softpipe;
  272    float source[4][TGSI_QUAD_SIZE] = { { 0 } };
  273    float blend_dest[4][TGSI_QUAD_SIZE];
  274 
  275    /*
  276     * Compute src/first term RGB
  277     */
  278    switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
  279    case PIPE_BLENDFACTOR_ONE:
  280       VEC4_COPY(source[0], quadColor[0]); /* R */
  281       VEC4_COPY(source[1], quadColor[1]); /* G */
  282       VEC4_COPY(source[2], quadColor[2]); /* B */
  283       break;
  284    case PIPE_BLENDFACTOR_SRC_COLOR:
  285       VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
  286       VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
  287       VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
  288       break;
  289    case PIPE_BLENDFACTOR_SRC_ALPHA:
  290       {
  291          const float *alpha = quadColor[3];
  292          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
  293          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
  294          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
  295       }
  296       break;
  297    case PIPE_BLENDFACTOR_DST_COLOR:
  298       VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
  299       VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
  300       VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
  301       break;
  302    case PIPE_BLENDFACTOR_DST_ALPHA:
  303       {
  304          const float *alpha = dest[3];
  305          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
  306          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
  307          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
  308       } 
  309       break;
  310    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  311       {
  312          const float *alpha = quadColor[3];
  313          float diff[4], temp[4];
  314          VEC4_SUB(diff, one, dest[3]);
  315          VEC4_MIN(temp, alpha, diff);
  316          VEC4_MUL(source[0], quadColor[0], temp); /* R */
  317          VEC4_MUL(source[1], quadColor[1], temp); /* G */
  318          VEC4_MUL(source[2], quadColor[2], temp); /* B */
  319       }
  320       break;
  321    case PIPE_BLENDFACTOR_CONST_COLOR:
  322       {
  323          float comp[4];
  324          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
  325          VEC4_MUL(source[0], quadColor[0], comp); /* R */
  326          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
  327          VEC4_MUL(source[1], quadColor[1], comp); /* G */
  328          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
  329          VEC4_MUL(source[2], quadColor[2], comp); /* B */
  330       }
  331       break;
  332    case PIPE_BLENDFACTOR_CONST_ALPHA:
  333       {
  334          float alpha[4];
  335          VEC4_SCALAR(alpha, const_blend_color[3]);
  336          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
  337          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
  338          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
  339       }
  340       break;
  341    case PIPE_BLENDFACTOR_SRC1_COLOR:
  342       VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
  343       VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
  344       VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */  
  345       break;
  346    case PIPE_BLENDFACTOR_SRC1_ALPHA:
  347       {
  348          const float *alpha = quadColor2[3];
  349          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
  350          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
  351          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
  352       }
  353       break;
  354    case PIPE_BLENDFACTOR_ZERO:
  355       VEC4_COPY(source[0], zero); /* R */
  356       VEC4_COPY(source[1], zero); /* G */
  357       VEC4_COPY(source[2], zero); /* B */
  358       break;
  359    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  360       {
  361          float inv_comp[4];
  362          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
  363          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
  364          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
  365          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
  366          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
  367          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
  368       }
  369       break;
  370    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  371       {
  372          float inv_alpha[4];
  373          VEC4_SUB(inv_alpha, one, quadColor[3]);
  374          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
  375          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
  376          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
  377       }
  378       break;
  379    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  380       {
  381          float inv_alpha[4];
  382          VEC4_SUB(inv_alpha, one, dest[3]);
  383          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
  384          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
  385          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
  386       }
  387       break;
  388    case PIPE_BLENDFACTOR_INV_DST_COLOR:
  389       {
  390          float inv_comp[4];
  391          VEC4_SUB(inv_comp, one, dest[0]); /* R */
  392          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
  393          VEC4_SUB(inv_comp, one, dest[1]); /* G */
  394          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
  395          VEC4_SUB(inv_comp, one, dest[2]); /* B */
  396          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
  397       }
  398       break;
  399    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  400       {
  401          float inv_comp[4];
  402          /* R */
  403          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
  404          VEC4_MUL(source[0], quadColor[0], inv_comp);
  405          /* G */
  406          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
  407          VEC4_MUL(source[1], quadColor[1], inv_comp);
  408          /* B */
  409          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
  410          VEC4_MUL(source[2], quadColor[2], inv_comp);
  411       }
  412       break;
  413    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  414       {
  415          float inv_alpha[4];
  416          VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
  417          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
  418          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
  419          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
  420       }
  421       break;
  422    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  423       {
  424          float inv_comp[4];
  425          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
  426          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
  427          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
  428          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
  429          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
  430          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
  431       }
  432       break;
  433    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  434       {
  435          float inv_alpha[4];
  436          VEC4_SUB(inv_alpha, one, quadColor2[3]);
  437          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
  438          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
  439          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
  440       }
  441       break;
  442    default:
  443       assert(0 && "invalid rgb src factor");
  444    }
  445 
  446    /*
  447     * Compute src/first term A
  448     */
  449    switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
  450    case PIPE_BLENDFACTOR_ONE:
  451       VEC4_COPY(source[3], quadColor[3]); /* A */
  452       break;
  453    case PIPE_BLENDFACTOR_SRC_COLOR:
  454       /* fall-through */
  455    case PIPE_BLENDFACTOR_SRC_ALPHA:
  456       {
  457          const float *alpha = quadColor[3];
  458          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
  459       }
  460       break;
  461    case PIPE_BLENDFACTOR_DST_COLOR:
  462       /* fall-through */
  463    case PIPE_BLENDFACTOR_DST_ALPHA:
  464       VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
  465       break;
  466    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  467       /* multiply alpha by 1.0 */
  468       VEC4_COPY(source[3], quadColor[3]); /* A */
  469       break;
  470    case PIPE_BLENDFACTOR_CONST_COLOR:
  471       /* fall-through */
  472    case PIPE_BLENDFACTOR_CONST_ALPHA:
  473       {
  474          float comp[4];
  475          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
  476          VEC4_MUL(source[3], quadColor[3], comp); /* A */
  477       }
  478       break;
  479    case PIPE_BLENDFACTOR_ZERO:
  480       VEC4_COPY(source[3], zero); /* A */
  481       break;
  482    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  483       /* fall-through */
  484    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  485       {
  486          float inv_alpha[4];
  487          VEC4_SUB(inv_alpha, one, quadColor[3]);
  488          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
  489       }
  490       break;
  491    case PIPE_BLENDFACTOR_INV_DST_COLOR:
  492       /* fall-through */
  493    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  494       {
  495          float inv_alpha[4];
  496          VEC4_SUB(inv_alpha, one, dest[3]);
  497          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
  498       }
  499       break;
  500    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  501       /* fall-through */
  502    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  503       {
  504          float inv_comp[4];
  505          /* A */
  506          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
  507          VEC4_MUL(source[3], quadColor[3], inv_comp);
  508       }
  509       break;
  510    case PIPE_BLENDFACTOR_SRC1_COLOR:
  511       /* fall-through */
  512    case PIPE_BLENDFACTOR_SRC1_ALPHA:
  513       {
  514          const float *alpha = quadColor2[3];
  515          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
  516       }
  517       break;
  518    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  519       /* fall-through */
  520    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  521       {
  522          float inv_alpha[4];
  523          VEC4_SUB(inv_alpha, one, quadColor2[3]);
  524          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
  525       }
  526       break;
  527    default:
  528       assert(0 && "invalid alpha src factor");
  529    }
  530 
  531    /* Save the original dest for use in masking */
  532    VEC4_COPY(blend_dest[0], dest[0]);
  533    VEC4_COPY(blend_dest[1], dest[1]);
  534    VEC4_COPY(blend_dest[2], dest[2]);
  535    VEC4_COPY(blend_dest[3], dest[3]);
  536 
  537 
  538    /*
  539     * Compute blend_dest/second term RGB
  540     */
  541    switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
  542    case PIPE_BLENDFACTOR_ONE:
  543       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
  544       break;
  545    case PIPE_BLENDFACTOR_SRC_COLOR:
  546       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
  547       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
  548       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
  549       break;
  550    case PIPE_BLENDFACTOR_SRC_ALPHA:
  551       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
  552       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
  553       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
  554       break;
  555    case PIPE_BLENDFACTOR_DST_ALPHA:
  556       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
  557       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
  558       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
  559       break;
  560    case PIPE_BLENDFACTOR_DST_COLOR:
  561       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
  562       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
  563       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
  564       break;
  565    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  566       {
  567          const float *alpha = quadColor[3];
  568          float diff[4], temp[4];
  569          VEC4_SUB(diff, one, blend_dest[3]);
  570          VEC4_MIN(temp, alpha, diff);
  571          VEC4_MUL(blend_dest[0], blend_dest[0], temp); /* R */
  572          VEC4_MUL(blend_dest[1], blend_dest[1], temp); /* G */
  573          VEC4_MUL(blend_dest[2], blend_dest[2], temp); /* B */
  574       }
  575       break;
  576    case PIPE_BLENDFACTOR_CONST_COLOR:
  577       {
  578          float comp[4];
  579          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
  580          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
  581          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
  582          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
  583          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
  584          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
  585       }
  586       break;
  587    case PIPE_BLENDFACTOR_CONST_ALPHA:
  588       {
  589          float comp[4];
  590          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
  591          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
  592          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
  593          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
  594       }
  595       break;
  596    case PIPE_BLENDFACTOR_ZERO:
  597       VEC4_COPY(blend_dest[0], zero); /* R */
  598       VEC4_COPY(blend_dest[1], zero); /* G */
  599       VEC4_COPY(blend_dest[2], zero); /* B */
  600       break;
  601    case PIPE_BLENDFACTOR_SRC1_COLOR:
  602       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
  603       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
  604       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
  605       break;
  606    case PIPE_BLENDFACTOR_SRC1_ALPHA:
  607       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
  608       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
  609       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
  610       break;
  611    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  612       {
  613          float inv_comp[4];
  614          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
  615          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
  616          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
  617          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
  618          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
  619          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
  620       }
  621       break;
  622    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  623       {
  624          float one_minus_alpha[TGSI_QUAD_SIZE];
  625          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
  626          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
  627          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
  628          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
  629       }
  630       break;
  631    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  632       {
  633          float inv_comp[4];
  634          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
  635          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
  636          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
  637          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
  638       }
  639       break;
  640    case PIPE_BLENDFACTOR_INV_DST_COLOR:
  641       {
  642          float inv_comp[4];
  643          VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
  644          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
  645          VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
  646          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
  647          VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
  648          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
  649       }
  650       break;
  651    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  652       {
  653          float inv_comp[4];
  654          /* R */
  655          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
  656          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
  657          /* G */
  658          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
  659          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
  660          /* B */
  661          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
  662          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
  663       }
  664       break;
  665    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  666       {
  667          float inv_comp[4];
  668          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
  669          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
  670          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
  671          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
  672       }
  673       break;
  674    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  675       {
  676          float inv_comp[4];
  677          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
  678          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
  679          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
  680          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
  681          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
  682          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
  683       }
  684       break;
  685    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  686       {
  687          float one_minus_alpha[TGSI_QUAD_SIZE];
  688          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
  689          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
  690          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
  691          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
  692       }
  693       break;
  694    default:
  695       assert(0 && "invalid rgb dst factor");
  696    }
  697 
  698    /*
  699     * Compute blend_dest/second term A
  700     */
  701    switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
  702    case PIPE_BLENDFACTOR_ONE:
  703       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
  704       break;
  705    case PIPE_BLENDFACTOR_SRC_COLOR:
  706       /* fall-through */
  707    case PIPE_BLENDFACTOR_SRC_ALPHA:
  708       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
  709       break;
  710    case PIPE_BLENDFACTOR_DST_COLOR:
  711       /* fall-through */
  712    case PIPE_BLENDFACTOR_DST_ALPHA:
  713       VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
  714       break;
  715    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  716       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
  717       break;
  718    case PIPE_BLENDFACTOR_CONST_COLOR:
  719       /* fall-through */
  720    case PIPE_BLENDFACTOR_CONST_ALPHA:
  721       {
  722          float comp[4];
  723          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
  724          VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
  725       }
  726       break;
  727    case PIPE_BLENDFACTOR_ZERO:
  728       VEC4_COPY(blend_dest[3], zero); /* A */
  729       break;
  730    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  731       /* fall-through */
  732    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  733       {
  734          float one_minus_alpha[TGSI_QUAD_SIZE];
  735          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
  736          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
  737       }
  738       break;
  739    case PIPE_BLENDFACTOR_INV_DST_COLOR:
  740       /* fall-through */
  741    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  742       {
  743          float inv_comp[4];
  744          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
  745          VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
  746       }
  747       break;
  748    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  749       /* fall-through */
  750    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  751       {
  752          float inv_comp[4];
  753          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
  754          VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
  755       }
  756       break;
  757    case PIPE_BLENDFACTOR_SRC1_COLOR:
  758       /* fall-through */
  759    case PIPE_BLENDFACTOR_SRC1_ALPHA:
  760       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
  761       break;
  762    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  763       /* fall-through */
  764    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  765       {
  766          float one_minus_alpha[TGSI_QUAD_SIZE];
  767          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
  768          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
  769       }
  770       break;
  771    default:
  772       assert(0 && "invalid alpha dst factor");
  773    }
  774 
  775    /*
  776     * Combine RGB terms
  777     */
  778    switch (softpipe->blend->rt[blend_index].rgb_func) {
  779    case PIPE_BLEND_ADD:
  780       VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
  781       VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
  782       VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
  783       break;
  784    case PIPE_BLEND_SUBTRACT:
  785       VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
  786       VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
  787       VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
  788       break;
  789    case PIPE_BLEND_REVERSE_SUBTRACT:
  790       VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
  791       VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
  792       VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
  793       break;
  794    case PIPE_BLEND_MIN:
  795       VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
  796       VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
  797       VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
  798       break;
  799    case PIPE_BLEND_MAX:
  800       VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
  801       VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
  802       VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
  803       break;
  804    default:
  805       assert(0 && "invalid rgb blend func");
  806    }
  807 
  808    /*
  809     * Combine A terms
  810     */
  811    switch (softpipe->blend->rt[blend_index].alpha_func) {
  812    case PIPE_BLEND_ADD:
  813       VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
  814       break;
  815    case PIPE_BLEND_SUBTRACT:
  816       VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
  817       break;
  818    case PIPE_BLEND_REVERSE_SUBTRACT:
  819       VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
  820       break;
  821    case PIPE_BLEND_MIN:
  822       VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
  823       break;
  824    case PIPE_BLEND_MAX:
  825       VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
  826       break;
  827    default:
  828       assert(0 && "invalid alpha blend func");
  829    }
  830 }
  831 
  832 static void
  833 colormask_quad(unsigned colormask,
  834                float (*quadColor)[4],
  835                float (*dest)[4])
  836 {
  837    /* R */
  838    if (!(colormask & PIPE_MASK_R))
  839       COPY_4V(quadColor[0], dest[0]);
  840 
  841    /* G */
  842    if (!(colormask & PIPE_MASK_G))
  843       COPY_4V(quadColor[1], dest[1]);
  844 
  845    /* B */
  846    if (!(colormask & PIPE_MASK_B))
  847       COPY_4V(quadColor[2], dest[2]);
  848 
  849    /* A */
  850    if (!(colormask & PIPE_MASK_A))
  851       COPY_4V(quadColor[3], dest[3]);
  852 }
  853 
  854 
  855 /**
  856  * Clamp all colors in a quad to [0, 1]
  857  */
  858 static void
  859 clamp_colors(float (*quadColor)[4])
  860 {
  861    unsigned i, j;
  862 
  863    for (i = 0; i < 4; i++) {
  864       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  865          quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
  866       }
  867    }
  868 }
  869 
  870 
  871 /**
  872  * If we're drawing to a luminance, luminance/alpha or intensity surface
  873  * we have to adjust (rebase) the fragment/quad colors before writing them
  874  * to the tile cache.  The tile cache always stores RGBA colors but if
  875  * we're caching a L/A surface (for example) we need to be sure that R=G=B
  876  * so that subsequent reads from the surface cache appear to return L/A
  877  * values.
  878  * The piglit fbo-blending-formats test will exercise this.
  879  */
  880 static void
  881 rebase_colors(enum format base_format, float (*quadColor)[4])
  882 {
  883    unsigned i;
  884 
  885    switch (base_format) {
  886    case RGB:
  887       for (i = 0; i < 4; i++) {
  888          /* A = 1 */
  889          quadColor[3][i] = 1.0F;
  890       }
  891       break;
  892    case LUMINANCE:
  893       for (i = 0; i < 4; i++) {
  894          /* B = G = R */
  895          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
  896          /* A = 1 */
  897          quadColor[3][i] = 1.0F;
  898       }
  899       break;
  900    case LUMINANCE_ALPHA:
  901       for (i = 0; i < 4; i++) {
  902          /* B = G = R */
  903          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
  904       }
  905       break;
  906    case INTENSITY:
  907       for (i = 0; i < 4; i++) {
  908          /* A = B = G = R */
  909          quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
  910       }
  911       break;
  912    default:
  913       ; /* nothing */
  914    }
  915 }
  916 
  917 static void
  918 blend_fallback(struct quad_stage *qs, 
  919                struct quad_header *quads[],
  920                unsigned nr)
  921 {
  922    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
  923    struct softpipe_context *softpipe = qs->softpipe;
  924    const struct pipe_blend_state *blend = softpipe->blend;
  925    unsigned cbuf;
  926    boolean write_all =
  927       softpipe->fs_variant->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
  928 
  929    for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
  930       if (softpipe->framebuffer.cbufs[cbuf]) {
  931          /* which blend/mask state index to use: */
  932          const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
  933          float dest[4][TGSI_QUAD_SIZE];
  934          struct softpipe_cached_tile *tile
  935             = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
  936                                  quads[0]->input.x0, 
  937                                  quads[0]->input.y0, quads[0]->input.layer);
  938          const boolean clamp = bqs->clamp[cbuf];
  939          const float *blend_color;
  940          const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
  941          uint q, i, j;
  942 
  943          if (clamp)
  944             blend_color = softpipe->blend_color_clamped.color;
  945          else
  946             blend_color = softpipe->blend_color.color;
  947 
  948          for (q = 0; q < nr; q++) {
  949             struct quad_header *quad = quads[q];
  950             float (*quadColor)[4];
  951             float (*quadColor2)[4] = NULL;
  952             float temp_quad_color[TGSI_QUAD_SIZE][4];
  953             const int itx = (quad->input.x0 & (TILE_SIZE-1));
  954             const int ity = (quad->input.y0 & (TILE_SIZE-1));
  955 
  956             if (write_all) {
  957                for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  958                   for (i = 0; i < 4; i++) {
  959                      temp_quad_color[i][j] = quad->output.color[0][i][j];
  960                   }
  961                }
  962                quadColor = temp_quad_color;
  963             } else {
  964                quadColor = quad->output.color[cbuf];
  965                if (dual_source_blend)
  966                   quadColor2 = quad->output.color[cbuf + 1];
  967             }
  968 
  969             /* If fixed-point dest color buffer, need to clamp the incoming
  970              * fragment colors now.
  971              */
  972             if (clamp || softpipe->rasterizer->clamp_fragment_color) {
  973                clamp_colors(quadColor);
  974             }
  975 
  976             /* get/swizzle dest colors
  977              */
  978             for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  979                int x = itx + (j & 1);
  980                int y = ity + (j >> 1);
  981                for (i = 0; i < 4; i++) {
  982                   dest[i][j] = tile->data.color[y][x][i];
  983                }
  984             }
  985 
  986 
  987             if (blend->logicop_enable) {
  988                if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
  989                   logicop_quad( qs, quadColor, dest );
  990                }
  991             }
  992             else if (blend->rt[blend_buf].blend_enable) {
  993                blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
  994 
  995                /* If fixed-point dest color buffer, need to clamp the outgoing
  996                 * fragment colors now.
  997                 */
  998                if (clamp) {
  999                   clamp_colors(quadColor);
 1000                }
 1001             }
 1002 
 1003             rebase_colors(bqs->base_format[cbuf], quadColor);
 1004 
 1005             if (blend->rt[blend_buf].colormask != 0xf)
 1006                colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
 1007 
 1008             /* Output color values
 1009              */
 1010             for (j = 0; j < TGSI_QUAD_SIZE; j++) {
 1011                if (quad->inout.mask & (1 << j)) {
 1012                   int x = itx + (j & 1);
 1013                   int y = ity + (j >> 1);
 1014                   for (i = 0; i < 4; i++) { /* loop over color chans */
 1015                      tile->data.color[y][x][i] = quadColor[i][j];
 1016                   }
 1017                }
 1018             }
 1019          }
 1020       }
 1021    }
 1022 }
 1023 
 1024 
 1025 static void
 1026 blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs, 
 1027                                          struct quad_header *quads[],
 1028                                          unsigned nr)
 1029 {
 1030    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
 1031    static const float one[4] = { 1, 1, 1, 1 };
 1032    float one_minus_alpha[TGSI_QUAD_SIZE];
 1033    float dest[4][TGSI_QUAD_SIZE];
 1034    float source[4][TGSI_QUAD_SIZE];
 1035    uint i, j, q;
 1036 
 1037    struct softpipe_cached_tile *tile
 1038       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
 1039                            quads[0]->input.x0, 
 1040                            quads[0]->input.y0, quads[0]->input.layer);
 1041 
 1042    for (q = 0; q < nr; q++) {
 1043       struct quad_header *quad = quads[q];
 1044       float (*quadColor)[4] = quad->output.color[0];
 1045       const float *alpha = quadColor[3];
 1046       const int itx = (quad->input.x0 & (TILE_SIZE-1));
 1047       const int ity = (quad->input.y0 & (TILE_SIZE-1));
 1048       
 1049       /* get/swizzle dest colors */
 1050       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
 1051          int x = itx + (j & 1);
 1052          int y = ity + (j >> 1);
 1053          for (i = 0; i < 4; i++) {
 1054             dest[i][j] = tile->data.color[y][x][i];
 1055          }
 1056       }
 1057 
 1058       /* If fixed-point dest color buffer, need to clamp the incoming
 1059        * fragment colors now.
 1060        */
 1061       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
 1062          clamp_colors(quadColor);
 1063       }
 1064 
 1065       VEC4_MUL(source[0], quadColor[0], alpha); /* R */
 1066       VEC4_MUL(source[1], quadColor[1], alpha); /* G */
 1067       VEC4_MUL(source[2], quadColor[2], alpha); /* B */
 1068       VEC4_MUL(source[3], quadColor[3], alpha); /* A */
 1069 
 1070       VEC4_SUB(one_minus_alpha, one, alpha);
 1071       VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
 1072       VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
 1073       VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
 1074       VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
 1075 
 1076       VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
 1077       VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
 1078       VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
 1079       VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
 1080 
 1081       /* If fixed-point dest color buffer, need to clamp the outgoing
 1082        * fragment colors now.
 1083        */
 1084       if (bqs->clamp[0]) {
 1085          clamp_colors(quadColor);
 1086       }
 1087 
 1088       rebase_colors(bqs->base_format[0], quadColor);
 1089 
 1090       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
 1091          if (quad->inout.mask & (1 << j)) {
 1092             int x = itx + (j & 1);
 1093             int y = ity + (j >> 1);
 1094             for (i = 0; i < 4; i++) { /* loop over color chans */
 1095                tile->data.color[y][x][i] = quadColor[i][j];
 1096             }
 1097          }
 1098       }
 1099    }
 1100 }
 1101 
 1102 static void
 1103 blend_single_add_one_one(struct quad_stage *qs, 
 1104                          struct quad_header *quads[],
 1105                          unsigned nr)
 1106 {
 1107    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
 1108    float dest[4][TGSI_QUAD_SIZE];
 1109    uint i, j, q;
 1110 
 1111    struct softpipe_cached_tile *tile
 1112       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
 1113                            quads[0]->input.x0, 
 1114                            quads[0]->input.y0, quads[0]->input.layer);
 1115 
 1116    for (q = 0; q < nr; q++) {
 1117       struct quad_header *quad = quads[q];
 1118       float (*quadColor)[4] = quad->output.color[0];
 1119       const int itx = (quad->input.x0 & (TILE_SIZE-1));
 1120       const int ity = (quad->input.y0 & (TILE_SIZE-1));
 1121       
 1122       /* get/swizzle dest colors */
 1123       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
 1124          int x = itx + (j & 1);
 1125          int y = ity + (j >> 1);
 1126          for (i = 0; i < 4; i++) {
 1127             dest[i][j] = tile->data.color[y][x][i];
 1128          }
 1129       }
 1130      
 1131       /* If fixed-point dest color buffer, need to clamp the incoming
 1132        * fragment colors now.
 1133        */
 1134       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
 1135          clamp_colors(quadColor);
 1136       }
 1137 
 1138       VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
 1139       VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
 1140       VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
 1141       VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
 1142 
 1143       /* If fixed-point dest color buffer, need to clamp the outgoing
 1144        * fragment colors now.
 1145        */
 1146       if (bqs->clamp[0]) {
 1147          clamp_colors(quadColor);
 1148       }
 1149 
 1150       rebase_colors(bqs->base_format[0], quadColor);
 1151 
 1152       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
 1153          if (quad->inout.mask & (1 << j)) {
 1154             int x = itx + (j & 1);
 1155             int y = ity + (j >> 1);
 1156             for (i = 0; i < 4; i++) { /* loop over color chans */
 1157                tile->data.color[y][x][i] = quadColor[i][j];
 1158             }
 1159          }
 1160       }
 1161    }
 1162 }
 1163 
 1164 
 1165 /**
 1166  * Just copy the quad color to the framebuffer tile (respecting the writemask),
 1167  * for one color buffer.
 1168  * Clamping will be done, if needed (depending on the color buffer's
 1169  * datatype) when we write/pack the colors later.
 1170  */
 1171 static void
 1172 single_output_color(struct quad_stage *qs, 
 1173                     struct quad_header *quads[],
 1174                     unsigned nr)
 1175 {
 1176    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
 1177    uint i, j, q;
 1178 
 1179    struct softpipe_cached_tile *tile
 1180       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
 1181                            quads[0]->input.x0, 
 1182                            quads[0]->input.y0, quads[0]->input.layer);
 1183 
 1184    for (q = 0; q < nr; q++) {
 1185       struct quad_header *quad = quads[q];
 1186       float (*quadColor)[4] = quad->output.color[0];
 1187       const int itx = (quad->input.x0 & (TILE_SIZE-1));
 1188       const int ity = (quad->input.y0 & (TILE_SIZE-1));
 1189 
 1190       if (qs->softpipe->rasterizer->clamp_fragment_color)
 1191          clamp_colors(quadColor);
 1192 
 1193       rebase_colors(bqs->base_format[0], quadColor);
 1194 
 1195       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
 1196          if (quad->inout.mask & (1 << j)) {
 1197             int x = itx + (j & 1);
 1198             int y = ity + (j >> 1);
 1199             for (i = 0; i < 4; i++) { /* loop over color chans */
 1200                tile->data.color[y][x][i] = quadColor[i][j];
 1201             }
 1202          }
 1203       }
 1204    }
 1205 }
 1206 
 1207 static void
 1208 blend_noop(struct quad_stage *qs, 
 1209            struct quad_header *quads[],
 1210            unsigned nr)
 1211 {
 1212 }
 1213 
 1214 
 1215 static void
 1216 choose_blend_quad(struct quad_stage *qs, 
 1217                   struct quad_header *quads[],
 1218                   unsigned nr)
 1219 {
 1220    struct blend_quad_stage *bqs = blend_quad_stage(qs);
 1221    struct softpipe_context *softpipe = qs->softpipe;
 1222    const struct pipe_blend_state *blend = softpipe->blend;
 1223    unsigned i;
 1224 
 1225    qs->run = blend_fallback;
 1226    
 1227    if (softpipe->framebuffer.nr_cbufs == 0) {
 1228       qs->run = blend_noop;
 1229    }
 1230    else if (!softpipe->blend->logicop_enable &&
 1231             softpipe->blend->rt[0].colormask == 0xf &&
 1232             softpipe->framebuffer.nr_cbufs == 1)
 1233    {
 1234       if (softpipe->framebuffer.cbufs[0] == NULL) {
 1235          qs->run = blend_noop;
 1236       }
 1237       else if (!blend->rt[0].blend_enable) {
 1238          qs->run = single_output_color;
 1239       }
 1240       else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
 1241                blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
 1242                blend->rt[0].rgb_func == blend->rt[0].alpha_func)
 1243       {
 1244          if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
 1245             if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
 1246                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
 1247                qs->run = blend_single_add_one_one;
 1248             }
 1249             else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
 1250                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
 1251                qs->run = blend_single_add_src_alpha_inv_src_alpha;
 1252 
 1253          }
 1254       }
 1255    }
 1256 
 1257    /* For each color buffer, determine if the buffer has destination alpha and
 1258     * whether color clamping is needed.
 1259     */
 1260    for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
 1261       if (softpipe->framebuffer.cbufs[i]) {
 1262          const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format;
 1263          const struct util_format_description *desc =
 1264             util_format_description(format);
 1265          /* assuming all or no color channels are normalized: */
 1266          bqs->clamp[i] = desc->channel[0].normalized;
 1267          bqs->format_type[i] = desc->channel[0].type;
 1268 
 1269          if (util_format_is_intensity(format))
 1270             bqs->base_format[i] = INTENSITY;
 1271          else if (util_format_is_luminance(format))
 1272             bqs->base_format[i] = LUMINANCE;
 1273          else if (util_format_is_luminance_alpha(format))
 1274             bqs->base_format[i] = LUMINANCE_ALPHA;
 1275          else if (!util_format_has_alpha(format))
 1276             bqs->base_format[i] = RGB;
 1277          else
 1278             bqs->base_format[i] = RGBA;
 1279       }
 1280    }
 1281 
 1282    qs->run(qs, quads, nr);
 1283 }
 1284 
 1285 
 1286 static void blend_begin(struct quad_stage *qs)
 1287 {
 1288    qs->run = choose_blend_quad;
 1289 }
 1290 
 1291 
 1292 static void blend_destroy(struct quad_stage *qs)
 1293 {
 1294    FREE( qs );
 1295 }
 1296 
 1297 
 1298 struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
 1299 {
 1300    struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage);
 1301 
 1302    if (!stage)
 1303       return NULL;
 1304 
 1305    stage->base.softpipe = softpipe;
 1306    stage->base.begin = blend_begin;
 1307    stage->base.run = choose_blend_quad;
 1308    stage->base.destroy = blend_destroy;
 1309 
 1310    return &stage->base;
 1311 }