"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c" (16 Sep 2020, 33863 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "lp_bld_format_aos.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /**************************************************************************
    2  *
    3  * Copyright 2009 VMware, Inc.
    4  * All Rights Reserved.
    5  *
    6  * Permission is hereby granted, free of charge, to any person obtaining a
    7  * copy of this software and associated documentation files (the
    8  * "Software"), to deal in the Software without restriction, including
    9  * without limitation the rights to use, copy, modify, merge, publish,
   10  * distribute, sub license, and/or sell copies of the Software, and to
   11  * permit persons to whom the Software is furnished to do so, subject to
   12  * the following conditions:
   13  *
   14  * The above copyright notice and this permission notice (including the
   15  * next paragraph) shall be included in all copies or substantial portions
   16  * of the Software.
   17  *
   18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
   21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   25  *
   26  **************************************************************************/
   27 
   28 /**
   29  * @file
   30  * AoS pixel format manipulation.
   31  *
   32  * @author Jose Fonseca <jfonseca@vmware.com>
   33  */
   34 
   35 
   36 #include "util/format/u_format.h"
   37 #include "util/u_memory.h"
   38 #include "util/u_math.h"
   39 #include "util/u_pointer.h"
   40 #include "util/u_string.h"
   41 #include "util/u_cpu_detect.h"
   42 
   43 #include "lp_bld_arit.h"
   44 #include "lp_bld_init.h"
   45 #include "lp_bld_type.h"
   46 #include "lp_bld_flow.h"
   47 #include "lp_bld_const.h"
   48 #include "lp_bld_conv.h"
   49 #include "lp_bld_swizzle.h"
   50 #include "lp_bld_gather.h"
   51 #include "lp_bld_debug.h"
   52 #include "lp_bld_format.h"
   53 #include "lp_bld_pack.h"
   54 #include "lp_bld_intr.h"
   55 #include "lp_bld_logic.h"
   56 #include "lp_bld_bitarit.h"
   57 
   58 
   59 /**
   60  * Basic swizzling.  Rearrange the order of the unswizzled array elements
   61  * according to the format description.  PIPE_SWIZZLE_0/ONE are supported
   62  * too.
   63  * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
   64  */
   65 LLVMValueRef
   66 lp_build_format_swizzle_aos(const struct util_format_description *desc,
   67                             struct lp_build_context *bld,
   68                             LLVMValueRef unswizzled)
   69 {
   70    unsigned char swizzles[4];
   71    unsigned chan;
   72 
   73    assert(bld->type.length % 4 == 0);
   74 
   75    for (chan = 0; chan < 4; ++chan) {
   76       enum pipe_swizzle swizzle;
   77 
   78       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
   79          /*
   80           * For ZS formats do RGBA = ZZZ1
   81           */
   82          if (chan == 3) {
   83             swizzle = PIPE_SWIZZLE_1;
   84          } else if (desc->swizzle[0] == PIPE_SWIZZLE_NONE) {
   85             swizzle = PIPE_SWIZZLE_0;
   86          } else {
   87             swizzle = desc->swizzle[0];
   88          }
   89       } else {
   90          swizzle = desc->swizzle[chan];
   91       }
   92       swizzles[chan] = swizzle;
   93    }
   94 
   95    return lp_build_swizzle_aos(bld, unswizzled, swizzles);
   96 }
   97 
   98 
   99 /**
  100  * Whether the format matches the vector type, apart of swizzles.
  101  */
  102 static inline boolean
  103 format_matches_type(const struct util_format_description *desc,
  104                     struct lp_type type)
  105 {
  106    enum util_format_type chan_type;
  107    unsigned chan;
  108 
  109    assert(type.length % 4 == 0);
  110 
  111    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
  112        desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
  113        desc->block.width != 1 ||
  114        desc->block.height != 1) {
  115       return FALSE;
  116    }
  117 
  118    if (type.floating) {
  119       chan_type = UTIL_FORMAT_TYPE_FLOAT;
  120    } else if (type.fixed) {
  121       chan_type = UTIL_FORMAT_TYPE_FIXED;
  122    } else if (type.sign) {
  123       chan_type = UTIL_FORMAT_TYPE_SIGNED;
  124    } else {
  125       chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
  126    }
  127 
  128    for (chan = 0; chan < desc->nr_channels; ++chan) {
  129       if (desc->channel[chan].size != type.width) {
  130          return FALSE;
  131       }
  132 
  133       if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
  134          if (desc->channel[chan].type != chan_type ||
  135              desc->channel[chan].normalized != type.norm) {
  136             return FALSE;
  137          }
  138       }
  139    }
  140 
  141    return TRUE;
  142 }
  143 
  144 /*
  145  * Do rounding when converting small unorm values to larger ones.
  146  * Not quite 100% accurate, as it's done by appending MSBs, but
  147  * should be good enough.
  148  */
  149 
  150 static inline LLVMValueRef
  151 scale_bits_up(struct gallivm_state *gallivm,
  152               int src_bits,
  153               int dst_bits,
  154               LLVMValueRef src,
  155               struct lp_type src_type)
  156 {
  157    LLVMBuilderRef builder = gallivm->builder;
  158    LLVMValueRef result = src;
  159 
  160    if (src_bits == 1 && dst_bits > 1) {
  161       /*
  162        * Useful for a1 - we'd need quite some repeated copies otherwise.
  163        */
  164       struct lp_build_context bld;
  165       LLVMValueRef dst_mask;
  166       lp_build_context_init(&bld, gallivm, src_type);
  167       dst_mask = lp_build_const_int_vec(gallivm, src_type,
  168                                         (1 << dst_bits) - 1),
  169       result = lp_build_cmp(&bld, PIPE_FUNC_EQUAL, src,
  170                             lp_build_const_int_vec(gallivm, src_type, 0));
  171       result = lp_build_andnot(&bld, dst_mask, result);
  172    }
  173    else if (dst_bits > src_bits) {
  174       /* Scale up bits */
  175       int db = dst_bits - src_bits;
  176 
  177       /* Shift left by difference in bits */
  178       result = LLVMBuildShl(builder,
  179                             src,
  180                             lp_build_const_int_vec(gallivm, src_type, db),
  181                             "");
  182 
  183       if (db <= src_bits) {
  184          /* Enough bits in src to fill the remainder */
  185          LLVMValueRef lower = LLVMBuildLShr(builder,
  186                                             src,
  187                                             lp_build_const_int_vec(gallivm, src_type,
  188                                                                    src_bits - db),
  189                                             "");
  190 
  191          result = LLVMBuildOr(builder, result, lower, "");
  192       } else if (db > src_bits) {
  193          /* Need to repeatedly copy src bits to fill remainder in dst */
  194          unsigned n;
  195 
  196          for (n = src_bits; n < dst_bits; n *= 2) {
  197             LLVMValueRef shuv = lp_build_const_int_vec(gallivm, src_type, n);
  198 
  199             result = LLVMBuildOr(builder,
  200                                  result,
  201                                  LLVMBuildLShr(builder, result, shuv, ""),
  202                                  "");
  203          }
  204       }
  205    } else {
  206       assert (dst_bits == src_bits);
  207    }
  208 
  209    return result;
  210 }
  211 
  212 /**
  213  * Unpack a single pixel into its XYZW components.
  214  *
  215  * @param desc  the pixel format for the packed pixel value
  216  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
  217  *
  218  * @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.
  219  */
  220 static inline LLVMValueRef
  221 lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
  222                                const struct util_format_description *desc,
  223                                LLVMValueRef packed)
  224 {
  225    LLVMBuilderRef builder = gallivm->builder;
  226    LLVMValueRef shifted, casted, scaled, masked;
  227    LLVMValueRef shifts[4];
  228    LLVMValueRef masks[4];
  229    LLVMValueRef scales[4];
  230    LLVMTypeRef vec32_type;
  231 
  232    boolean normalized;
  233    boolean needs_uitofp;
  234    unsigned i;
  235 
  236    /* TODO: Support more formats */
  237    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  238    assert(desc->block.width == 1);
  239    assert(desc->block.height == 1);
  240    assert(desc->block.bits <= 32);
  241 
  242    /* Do the intermediate integer computations with 32bit integers since it
  243     * matches floating point size */
  244    assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));
  245 
  246    vec32_type = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
  247 
  248    /* Broadcast the packed value to all four channels
  249     * before: packed = BGRA
  250     * after: packed = {BGRA, BGRA, BGRA, BGRA}
  251     */
  252    packed = LLVMBuildInsertElement(builder, LLVMGetUndef(vec32_type), packed,
  253                                    LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
  254                                    "");
  255    packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(vec32_type),
  256                                    LLVMConstNull(vec32_type),
  257                                    "");
  258 
  259    /* Initialize vector constants */
  260    normalized = FALSE;
  261    needs_uitofp = FALSE;
  262 
  263    /* Loop over 4 color components */
  264    for (i = 0; i < 4; ++i) {
  265       unsigned bits = desc->channel[i].size;
  266       unsigned shift = desc->channel[i].shift;
  267 
  268       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
  269          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  270          masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
  271          scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
  272       }
  273       else {
  274          unsigned long long mask = (1ULL << bits) - 1;
  275 
  276          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
  277 
  278          if (bits == 32) {
  279             needs_uitofp = TRUE;
  280          }
  281 
  282          shifts[i] = lp_build_const_int32(gallivm, shift);
  283          masks[i] = lp_build_const_int32(gallivm, mask);
  284 
  285          if (desc->channel[i].normalized) {
  286             scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
  287             normalized = TRUE;
  288          }
  289          else
  290             scales[i] =  lp_build_const_float(gallivm, 1.0);
  291       }
  292    }
  293 
  294    /* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW}
  295     * into masked = {X, Y, Z, W}
  296     */
  297    if (desc->block.bits < 32 && normalized) {
  298       /*
  299        * Note: we cannot do the shift below on x86 natively until AVX2.
  300        *
  301        * Old llvm versions will resort to scalar extract/shift insert,
  302        * which is definitely terrible, new versions will just do
  303        * several vector shifts and shuffle/blend results together.
  304        * We could turn this into a variable left shift plus a constant
  305        * right shift, and llvm would then turn the variable left shift
  306        * into a mul for us (albeit without sse41 the mul needs emulation
  307        * too...). However, since we're going to do a float mul
  308        * anyway, we just adjust that mul instead (plus the mask), skipping
  309        * the shift completely.
  310        * We could also use a extra mul when the format isn't normalized and
  311        * we don't have AVX2 support, but don't bother for now. Unfortunately,
  312        * this strategy doesn't work for 32bit formats (such as rgb10a2 or even
  313        * rgba8 if it ends up here), as that would require UIToFP, albeit that
  314        * would be fixable with easy 16bit shuffle (unless there's channels
  315        * crossing 16bit boundaries).
  316        */
  317       for (i = 0; i < 4; ++i) {
  318          if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
  319             unsigned bits = desc->channel[i].size;
  320             unsigned shift = desc->channel[i].shift;
  321             unsigned long long mask = ((1ULL << bits) - 1) << shift;
  322             scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
  323             masks[i] = lp_build_const_int32(gallivm, mask);
  324          }
  325       }
  326       masked = LLVMBuildAnd(builder, packed, LLVMConstVector(masks, 4), "");
  327    } else {
  328       shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
  329       masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
  330    }
  331 
  332    if (!needs_uitofp) {
  333       /* UIToFP can't be expressed in SSE2 */
  334       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
  335    } else {
  336       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
  337    }
  338 
  339    /*
  340     * At this point 'casted' may be a vector of floats such as
  341     * {255.0, 255.0, 255.0, 255.0}. (Normalized values may be multiplied
  342     * by powers of two). Next, if the pixel values are normalized
  343     * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
  344     */
  345 
  346    if (normalized)
  347       scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
  348    else
  349       scaled = casted;
  350 
  351    return scaled;
  352 }
  353 
  354 
  355 /**
  356  * Pack a single pixel.
  357  *
  358  * @param rgba 4 float vector with the unpacked components.
  359  *
  360  * XXX: This is mostly for reference and testing -- operating a single pixel at
  361  * a time is rarely if ever needed.
  362  */
  363 LLVMValueRef
  364 lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
  365                        const struct util_format_description *desc,
  366                        LLVMValueRef rgba)
  367 {
  368    LLVMBuilderRef builder = gallivm->builder;
  369    LLVMTypeRef type;
  370    LLVMValueRef packed = NULL;
  371    LLVMValueRef swizzles[4];
  372    LLVMValueRef shifted, casted, scaled, unswizzled;
  373    LLVMValueRef shifts[4];
  374    LLVMValueRef scales[4];
  375    boolean normalized;
  376    unsigned i, j;
  377 
  378    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  379    assert(desc->block.width == 1);
  380    assert(desc->block.height == 1);
  381 
  382    type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);
  383 
  384    /* Unswizzle the color components into the source vector. */
  385    for (i = 0; i < 4; ++i) {
  386       for (j = 0; j < 4; ++j) {
  387          if (desc->swizzle[j] == i)
  388             break;
  389       }
  390       if (j < 4)
  391          swizzles[i] = lp_build_const_int32(gallivm, j);
  392       else
  393          swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  394    }
  395 
  396    unswizzled = LLVMBuildShuffleVector(builder, rgba,
  397                                        LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
  398                                        LLVMConstVector(swizzles, 4), "");
  399 
  400    normalized = FALSE;
  401    for (i = 0; i < 4; ++i) {
  402       unsigned bits = desc->channel[i].size;
  403       unsigned shift = desc->channel[i].shift;
  404 
  405       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
  406          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  407          scales[i] =  LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
  408       }
  409       else {
  410          unsigned mask = (1 << bits) - 1;
  411 
  412          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
  413          assert(bits < 32);
  414 
  415          shifts[i] = lp_build_const_int32(gallivm, shift);
  416 
  417          if (desc->channel[i].normalized) {
  418             scales[i] = lp_build_const_float(gallivm, mask);
  419             normalized = TRUE;
  420          }
  421          else
  422             scales[i] = lp_build_const_float(gallivm, 1.0);
  423       }
  424    }
  425 
  426    if (normalized)
  427       scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
  428    else
  429       scaled = unswizzled;
  430 
  431    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");
  432 
  433    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
  434    
  435    /* Bitwise or all components */
  436    for (i = 0; i < 4; ++i) {
  437       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
  438          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
  439                                                lp_build_const_int32(gallivm, i), "");
  440          if (packed)
  441             packed = LLVMBuildOr(builder, packed, component, "");
  442          else
  443             packed = component;
  444       }
  445    }
  446 
  447    if (!packed)
  448       packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  449 
  450    if (desc->block.bits < 32)
  451       packed = LLVMBuildTrunc(builder, packed, type, "");
  452 
  453    return packed;
  454 }
  455 
  456 
  457 
  458 
  459 /**
  460  * Fetch a pixel into a 4 float AoS.
  461  *
  462  * \param format_desc  describes format of the image we're fetching from
  463  * \param aligned  whether the data is guaranteed to be aligned
  464  * \param ptr  address of the pixel block (or the texel if uncompressed)
  465  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
  466  *              these will always be (0, 0).
  467  * \param cache  optional value pointing to a lp_build_format_cache structure
  468  * \return  a 4 element vector with the pixel's RGBA values.
  469  */
  470 LLVMValueRef
  471 lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
  472                         const struct util_format_description *format_desc,
  473                         struct lp_type type,
  474                         boolean aligned,
  475                         LLVMValueRef base_ptr,
  476                         LLVMValueRef offset,
  477                         LLVMValueRef i,
  478                         LLVMValueRef j,
  479                         LLVMValueRef cache)
  480 {
  481    LLVMBuilderRef builder = gallivm->builder;
  482    unsigned num_pixels = type.length / 4;
  483    struct lp_build_context bld;
  484 
  485    assert(type.length <= LP_MAX_VECTOR_LENGTH);
  486    assert(type.length % 4 == 0);
  487 
  488    lp_build_context_init(&bld, gallivm, type);
  489 
  490    /*
  491     * Trivial case
  492     *
  493     * The format matches the type (apart of a swizzle) so no need for
  494     * scaling or converting.
  495     */
  496 
  497    if (format_matches_type(format_desc, type) &&
  498        format_desc->block.bits <= type.width * 4 &&
  499        /* XXX this shouldn't be needed */
  500        util_is_power_of_two_or_zero(format_desc->block.bits)) {
  501       LLVMValueRef packed;
  502       LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type);
  503       struct lp_type fetch_type;
  504       unsigned vec_len = type.width * type.length;
  505 
  506       /*
  507        * The format matches the type (apart of a swizzle) so no need for
  508        * scaling or converting.
  509        */
  510 
  511       fetch_type = lp_type_uint(type.width*4);
  512       packed = lp_build_gather(gallivm, type.length/4,
  513                                format_desc->block.bits, fetch_type,
  514                                aligned, base_ptr, offset, TRUE);
  515 
  516       assert(format_desc->block.bits <= vec_len);
  517       (void) vec_len; /* silence unused var warning for non-debug build */
  518 
  519       packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");
  520       return lp_build_format_swizzle_aos(format_desc, &bld, packed);
  521    }
  522 
  523    /*
  524     * Bit arithmetic for converting small_unorm to unorm8.
  525     *
  526     * This misses some opportunities for optimizations (like skipping mask
  527     * for the highest channel for instance, or doing bit scaling in parallel
  528     * for channels with the same bit width) but it should be passable for
  529     * all arithmetic formats.
  530     */
  531    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
  532        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
  533        util_format_fits_8unorm(format_desc) &&
  534        type.width == 8 && type.norm == 1 && type.sign == 0 &&
  535        type.fixed == 0 && type.floating == 0) {
  536       LLVMValueRef packed, res = NULL, chans[4], rgba[4];
  537       LLVMTypeRef dst_vec_type, conv_vec_type;
  538       struct lp_type fetch_type, conv_type;
  539       struct lp_build_context bld_conv;
  540       unsigned j;
  541 
  542       fetch_type = lp_type_uint(type.width*4);
  543       conv_type = lp_type_int_vec(type.width*4, type.width * type.length);
  544       dst_vec_type = lp_build_vec_type(gallivm, type);
  545       conv_vec_type = lp_build_vec_type(gallivm, conv_type);
  546       lp_build_context_init(&bld_conv, gallivm, conv_type);
  547 
  548       packed = lp_build_gather(gallivm, type.length/4,
  549                                format_desc->block.bits, fetch_type,
  550                                aligned, base_ptr, offset, TRUE);
  551 
  552       assert(format_desc->block.bits * type.length / 4 <=
  553              type.width * type.length);
  554 
  555       packed = LLVMBuildBitCast(gallivm->builder, packed, conv_vec_type, "");
  556 
  557       for (j = 0; j < format_desc->nr_channels; ++j) {
  558          unsigned mask = 0;
  559          unsigned sa = format_desc->channel[j].shift;
  560 
  561          mask = (1 << format_desc->channel[j].size) - 1;
  562 
  563          /* Extract bits from source */
  564          chans[j] = LLVMBuildLShr(builder, packed,
  565                                   lp_build_const_int_vec(gallivm, conv_type, sa),
  566                                   "");
  567 
  568          chans[j] = LLVMBuildAnd(builder, chans[j],
  569                                  lp_build_const_int_vec(gallivm, conv_type, mask),
  570                                  "");
  571 
  572          /* Scale bits */
  573          if (type.norm) {
  574             chans[j] = scale_bits_up(gallivm, format_desc->channel[j].size,
  575                                      type.width, chans[j], conv_type);
  576          }
  577       }
  578       /*
  579        * This is a hacked lp_build_format_swizzle_soa() since we need a
  580        * normalized 1 but only 8 bits in a 32bit vector...
  581        */
  582       for (j = 0; j < 4; ++j) {
  583          enum pipe_swizzle swizzle = format_desc->swizzle[j];
  584          if (swizzle == PIPE_SWIZZLE_1) {
  585             rgba[j] = lp_build_const_int_vec(gallivm, conv_type, (1 << type.width) - 1);
  586          } else {
  587             rgba[j] = lp_build_swizzle_soa_channel(&bld_conv, chans, swizzle);
  588          }
  589          if (j == 0) {
  590             res = rgba[j];
  591          } else {
  592             rgba[j] = LLVMBuildShl(builder, rgba[j],
  593                                    lp_build_const_int_vec(gallivm, conv_type,
  594                                                           j * type.width), "");
  595             res = LLVMBuildOr(builder, res, rgba[j], "");
  596          }
  597       }
  598       res = LLVMBuildBitCast(gallivm->builder, res, dst_vec_type, "");
  599 
  600       return res;
  601    }
  602 
  603    /*
  604     * Bit arithmetic
  605     */
  606 
  607    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
  608        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
  609         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
  610        format_desc->block.width == 1 &&
  611        format_desc->block.height == 1 &&
  612        /* XXX this shouldn't be needed */
  613        util_is_power_of_two_or_zero(format_desc->block.bits) &&
  614        format_desc->block.bits <= 32 &&
  615        format_desc->is_bitmask &&
  616        !format_desc->is_mixed &&
  617        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
  618         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) &&
  619        !format_desc->channel[0].pure_integer) {
  620 
  621       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
  622       LLVMValueRef res[LP_MAX_VECTOR_WIDTH / 128];
  623       struct lp_type conv_type;
  624       unsigned k, num_conv_src, num_conv_dst;
  625 
  626       /*
  627        * Note this path is generally terrible for fetching multiple pixels.
  628        * We should make sure we cannot hit this code path for anything but
  629        * single pixels.
  630        */
  631 
  632       /*
  633        * Unpack a pixel at a time into a <4 x float> RGBA vector
  634        */
  635 
  636       for (k = 0; k < num_pixels; ++k) {
  637          LLVMValueRef packed;
  638 
  639          packed = lp_build_gather_elem(gallivm, num_pixels,
  640                                        format_desc->block.bits, 32, aligned,
  641                                        base_ptr, offset, k, FALSE);
  642 
  643          tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
  644                                                   format_desc,
  645                                                   packed);
  646       }
  647 
  648       /*
  649        * Type conversion.
  650        *
  651        * TODO: We could avoid floating conversion for integer to
  652        * integer conversions.
  653        */
  654 
  655       if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
  656          debug_printf("%s: unpacking %s with floating point\n",
  657                       __FUNCTION__, format_desc->short_name);
  658       }
  659 
  660       conv_type = lp_float32_vec4_type();
  661       num_conv_src = num_pixels;
  662       num_conv_dst = 1;
  663 
  664       if (num_pixels % 8 == 0) {
  665          lp_build_concat_n(gallivm, lp_float32_vec4_type(),
  666                            tmps, num_pixels, tmps, num_pixels / 2);
  667          conv_type.length *= num_pixels / 4;
  668          num_conv_src = 4 * num_pixels / 8;
  669          if (type.width == 8 && type.floating == 0 && type.fixed == 0) {
  670             /*
  671              * FIXME: The fast float->unorm path (which is basically
  672              * skipping the MIN/MAX which are extremely pointless in any
  673              * case) requires that there's 2 destinations...
  674              * In any case, we really should make sure we don't hit this
  675              * code with multiple pixels for unorm8 dst types, it's
  676              * completely hopeless even if we do hit the right conversion.
  677              */
  678             type.length /= num_pixels / 4;
  679             num_conv_dst = num_pixels / 4;
  680          }
  681       }
  682 
  683       lp_build_conv(gallivm, conv_type, type,
  684                     tmps, num_conv_src, res, num_conv_dst);
  685 
  686       if (num_pixels % 8 == 0 &&
  687           (type.width == 8 && type.floating == 0 && type.fixed == 0)) {
  688          lp_build_concat_n(gallivm, type, res, num_conv_dst, res, 1);
  689       }
  690 
  691       return lp_build_format_swizzle_aos(format_desc, &bld, res[0]);
  692    }
  693 
  694    /* If all channels are of same type and we are not using half-floats */
  695    if (format_desc->is_array &&
  696        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) {
  697       assert(!format_desc->is_mixed);
  698       return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset);
  699    }
  700 
  701    /*
  702     * YUV / subsampled formats
  703     */
  704 
  705    if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
  706       struct lp_type tmp_type;
  707       LLVMValueRef tmp;
  708 
  709       memset(&tmp_type, 0, sizeof tmp_type);
  710       tmp_type.width = 8;
  711       tmp_type.length = num_pixels * 4;
  712       tmp_type.norm = TRUE;
  713 
  714       tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
  715                                                format_desc,
  716                                                num_pixels,
  717                                                base_ptr,
  718                                                offset,
  719                                                i, j);
  720 
  721       lp_build_conv(gallivm,
  722                     tmp_type, type,
  723                     &tmp, 1, &tmp, 1);
  724 
  725       return tmp;
  726    }
  727 
  728    /*
  729     * s3tc rgb formats
  730     */
  731 
  732    if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
  733       struct lp_type tmp_type;
  734       LLVMValueRef tmp;
  735 
  736       memset(&tmp_type, 0, sizeof tmp_type);
  737       tmp_type.width = 8;
  738       tmp_type.length = num_pixels * 4;
  739       tmp_type.norm = TRUE;
  740 
  741       tmp = lp_build_fetch_s3tc_rgba_aos(gallivm,
  742                                          format_desc,
  743                                          num_pixels,
  744                                          base_ptr,
  745                                          offset,
  746                                          i, j,
  747                                          cache);
  748 
  749       lp_build_conv(gallivm,
  750                     tmp_type, type,
  751                     &tmp, 1, &tmp, 1);
  752 
  753        return tmp;
  754    }
  755 
  756    /*
  757     * rgtc rgb formats
  758     */
  759 
  760    if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
  761       struct lp_type tmp_type;
  762       LLVMValueRef tmp;
  763 
  764       memset(&tmp_type, 0, sizeof tmp_type);
  765       tmp_type.width = 8;
  766       tmp_type.length = num_pixels * 4;
  767       tmp_type.norm = TRUE;
  768       tmp_type.sign = (format_desc->format == PIPE_FORMAT_RGTC1_SNORM ||
  769                        format_desc->format == PIPE_FORMAT_RGTC2_SNORM ||
  770                        format_desc->format == PIPE_FORMAT_LATC1_SNORM ||
  771                        format_desc->format == PIPE_FORMAT_LATC2_SNORM);
  772 
  773       tmp = lp_build_fetch_rgtc_rgba_aos(gallivm,
  774                                          format_desc,
  775                                          num_pixels,
  776                                          base_ptr,
  777                                          offset,
  778                                          i, j,
  779                                          cache);
  780 
  781       lp_build_conv(gallivm,
  782                     tmp_type, type,
  783                     &tmp, 1, &tmp, 1);
  784 
  785        return tmp;
  786    }
  787 
  788    /*
  789     * Fallback to util_format_description::fetch_rgba_8unorm().
  790     */
  791 
  792    if (format_desc->fetch_rgba_8unorm &&
  793        !type.floating && type.width == 8 && !type.sign && type.norm) {
  794       /*
  795        * Fallback to calling util_format_description::fetch_rgba_8unorm.
  796        *
  797        * This is definitely not the most efficient way of fetching pixels, as
  798        * we miss the opportunity to do vectorization, but this it is a
  799        * convenient for formats or scenarios for which there was no opportunity
  800        * or incentive to optimize.
  801        */
  802 
  803       LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
  804       LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
  805       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  806       LLVMValueRef function;
  807       LLVMValueRef tmp_ptr;
  808       LLVMValueRef tmp;
  809       LLVMValueRef res;
  810       unsigned k;
  811 
  812       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
  813          debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
  814                       __FUNCTION__, format_desc->short_name);
  815       }
  816 
  817       /*
  818        * Declare and bind format_desc->fetch_rgba_8unorm().
  819        */
  820 
  821       {
  822          /*
  823           * Function to call looks like:
  824           *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
  825           */
  826          LLVMTypeRef ret_type;
  827          LLVMTypeRef arg_types[4];
  828          LLVMTypeRef function_type;
  829 
  830          ret_type = LLVMVoidTypeInContext(gallivm->context);
  831          arg_types[0] = pi8t;
  832          arg_types[1] = pi8t;
  833          arg_types[2] = i32t;
  834          arg_types[3] = i32t;
  835          function_type = LLVMFunctionType(ret_type, arg_types,
  836                                           ARRAY_SIZE(arg_types), 0);
  837 
  838          /* make const pointer for the C fetch_rgba_8unorm function */
  839          function = lp_build_const_int_pointer(gallivm,
  840             func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
  841 
  842          /* cast the callee pointer to the function's type */
  843          function = LLVMBuildBitCast(builder, function,
  844                                      LLVMPointerType(function_type, 0),
  845                                      "cast callee");
  846       }
  847 
  848       tmp_ptr = lp_build_alloca(gallivm, i32t, "");
  849 
  850       res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
  851 
  852       /*
  853        * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
  854        * in the SoA vectors.
  855        */
  856 
  857       for (k = 0; k < num_pixels; ++k) {
  858          LLVMValueRef index = lp_build_const_int32(gallivm, k);
  859          LLVMValueRef args[4];
  860 
  861          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
  862          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
  863                                             base_ptr, offset, k);
  864 
  865          if (num_pixels == 1) {
  866             args[2] = i;
  867             args[3] = j;
  868          }
  869          else {
  870             args[2] = LLVMBuildExtractElement(builder, i, index, "");
  871             args[3] = LLVMBuildExtractElement(builder, j, index, "");
  872          }
  873 
  874          LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
  875 
  876          tmp = LLVMBuildLoad(builder, tmp_ptr, "");
  877 
  878          if (num_pixels == 1) {
  879             res = tmp;
  880          }
  881          else {
  882             res = LLVMBuildInsertElement(builder, res, tmp, index, "");
  883          }
  884       }
  885 
  886       /* Bitcast from <n x i32> to <4n x i8> */
  887       res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
  888 
  889       return res;
  890    }
  891 
  892    /*
  893     * Fallback to util_format_description::fetch_rgba_float().
  894     */
  895 
  896    if (format_desc->fetch_rgba_float) {
  897       /*
  898        * Fallback to calling util_format_description::fetch_rgba_float.
  899        *
  900        * This is definitely not the most efficient way of fetching pixels, as
  901        * we miss the opportunity to do vectorization, but this it is a
  902        * convenient for formats or scenarios for which there was no opportunity
  903        * or incentive to optimize.
  904        */
  905 
  906       LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
  907       LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
  908       LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
  909       LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
  910       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  911       LLVMValueRef function;
  912       LLVMValueRef tmp_ptr;
  913       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
  914       LLVMValueRef res;
  915       unsigned k;
  916 
  917       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
  918          debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
  919                       __FUNCTION__, format_desc->short_name);
  920       }
  921 
  922       /*
  923        * Declare and bind format_desc->fetch_rgba_float().
  924        */
  925 
  926       {
  927          /*
  928           * Function to call looks like:
  929           *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
  930           */
  931          LLVMTypeRef ret_type;
  932          LLVMTypeRef arg_types[4];
  933 
  934          ret_type = LLVMVoidTypeInContext(gallivm->context);
  935          arg_types[0] = pf32t;
  936          arg_types[1] = pi8t;
  937          arg_types[2] = i32t;
  938          arg_types[3] = i32t;
  939 
  940          function = lp_build_const_func_pointer(gallivm,
  941                                                 func_to_pointer((func_pointer) format_desc->fetch_rgba_float),
  942                                                 ret_type,
  943                                                 arg_types, ARRAY_SIZE(arg_types),
  944                                                 format_desc->short_name);
  945       }
  946 
  947       tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
  948 
  949       /*
  950        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
  951        * in the SoA vectors.
  952        */
  953 
  954       for (k = 0; k < num_pixels; ++k) {
  955          LLVMValueRef args[4];
  956 
  957          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
  958          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
  959                                             base_ptr, offset, k);
  960 
  961          if (num_pixels == 1) {
  962             args[2] = i;
  963             args[3] = j;
  964          }
  965          else {
  966             LLVMValueRef index = lp_build_const_int32(gallivm, k);
  967             args[2] = LLVMBuildExtractElement(builder, i, index, "");
  968             args[3] = LLVMBuildExtractElement(builder, j, index, "");
  969          }
  970 
  971          LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
  972 
  973          tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
  974       }
  975 
  976       lp_build_conv(gallivm,
  977                     lp_float32_vec4_type(),
  978                     type,
  979                     tmps, num_pixels, &res, 1);
  980 
  981       return res;
  982    }
  983 
  984    assert(!util_format_is_pure_integer(format_desc->format));
  985 
  986    assert(0);
  987    return lp_build_undef(gallivm, type);
  988 }