"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c" (16 Sep 2020, 29124 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "lp_bld_tgsi_aos.c" see the Fossies "Dox" file reference documentation.

    1 /**************************************************************************
    2  *
    3  * Copyright 2010 VMware, Inc.
    4  * All Rights Reserved.
    5  *
    6  * Permission is hereby granted, free of charge, to any person obtaining a
    7  * copy of this software and associated documentation files (the
    8  * "Software"), to deal in the Software without restriction, including
    9  * without limitation the rights to use, copy, modify, merge, publish,
   10  * distribute, sub license, and/or sell copies of the Software, and to
   11  * permit persons to whom the Software is furnished to do so, subject to
   12  * the following conditions:
   13  *
   14  * The above copyright notice and this permission notice (including the
   15  * next paragraph) shall be included in all copies or substantial portions
   16  * of the Software.
   17  *
   18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
   21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   25  *
   26  **************************************************************************/
   27 
   28 /**
   29  * @file
   30  * TGSI to LLVM IR translation -- AoS.
   31  *
   32  * FIXME:
   33  * - No control flow support: the existing control flow code should be factored
   34  * out into from the SoA code into a common module and shared.
   35  * - No derivatives. Derivate logic should be pluggable, just like the samplers.
   36  *
   37  * @author Jose Fonseca <jfonseca@vmware.com>
   38  */
   39 
   40 #include "pipe/p_config.h"
   41 #include "pipe/p_shader_tokens.h"
   42 #include "util/u_debug.h"
   43 #include "util/u_math.h"
   44 #include "util/u_memory.h"
   45 #include "tgsi/tgsi_dump.h"
   46 #include "tgsi/tgsi_info.h"
   47 #include "tgsi/tgsi_parse.h"
   48 #include "tgsi/tgsi_util.h"
   49 #include "tgsi/tgsi_scan.h"
   50 #include "lp_bld_type.h"
   51 #include "lp_bld_const.h"
   52 #include "lp_bld_arit.h"
   53 #include "lp_bld_logic.h"
   54 #include "lp_bld_swizzle.h"
   55 #include "lp_bld_flow.h"
   56 #include "lp_bld_quad.h"
   57 #include "lp_bld_tgsi.h"
   58 #include "lp_bld_debug.h"
   59 #include "lp_bld_sample.h"
   60 
   61 
   62 /**
   63  * Wrapper around lp_build_swizzle_aos which translates swizzles to another 
   64  * ordering.
   65  */
   66 static LLVMValueRef
   67 swizzle_aos(struct lp_build_tgsi_context *bld_base,
   68             LLVMValueRef a,
   69             unsigned swizzle_x,
   70             unsigned swizzle_y,
   71             unsigned swizzle_z,
   72             unsigned swizzle_w)
   73 {
   74    unsigned char swizzles[4];
   75    struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
   76 
   77    assert(swizzle_x < 4);
   78    assert(swizzle_y < 4);
   79    assert(swizzle_z < 4);
   80    assert(swizzle_w < 4);
   81 
   82    swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
   83    swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
   84    swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
   85    swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
   86 
   87    return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
   88 }
   89 
   90 
   91 static LLVMValueRef
   92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
   93                    LLVMValueRef a,
   94                    unsigned chan)
   95 {
   96    chan = bld->swizzles[chan];
   97    return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
   98 }
   99 
  100 
  101 static LLVMValueRef
  102 emit_fetch_constant(
  103    struct lp_build_tgsi_context * bld_base,
  104    const struct tgsi_full_src_register * reg,
  105    enum tgsi_opcode_type stype,
  106    unsigned swizzle)
  107 {
  108    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
  109    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  110    struct lp_type type = bld_base->base.type;
  111    LLVMValueRef res;
  112    unsigned chan;
  113 
  114    assert(!reg->Register.Indirect);
  115 
  116    /*
  117     * Get the constants components
  118     */
  119 
  120    res = bld->bld_base.base.undef;
  121    for (chan = 0; chan < 4; ++chan) {
  122       LLVMValueRef index;
  123       LLVMValueRef scalar_ptr;
  124       LLVMValueRef scalar;
  125       LLVMValueRef swizzle;
  126 
  127       index = lp_build_const_int32(bld->bld_base.base.gallivm,
  128                                    reg->Register.Index * 4 + chan);
  129 
  130       scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
  131 
  132       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
  133 
  134       lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
  135 
  136       /*
  137        * NOTE: constants array is always assumed to be RGBA
  138        */
  139 
  140       swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
  141                                      bld->swizzles[chan]);
  142 
  143       res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
  144    }
  145 
  146    /*
  147     * Broadcast the first quaternion to all others.
  148     *
  149     * XXX: could be factored into a reusable function.
  150     */
  151 
  152    if (type.length > 4) {
  153       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  154       unsigned i;
  155 
  156       for (chan = 0; chan < 4; ++chan) {
  157          shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
  158       }
  159 
  160       for (i = 4; i < type.length; ++i) {
  161          shuffles[i] = shuffles[i % 4];
  162       }
  163 
  164       res = LLVMBuildShuffleVector(builder,
  165                                    res, bld->bld_base.base.undef,
  166                                    LLVMConstVector(shuffles, type.length),
  167                                    "");
  168    }
  169    return res;
  170 }
  171 
  172 static LLVMValueRef
  173 emit_fetch_immediate(
  174    struct lp_build_tgsi_context * bld_base,
  175    const struct tgsi_full_src_register * reg,
  176    enum tgsi_opcode_type stype,
  177    unsigned swizzle)
  178 {
  179    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
  180    LLVMValueRef res = bld->immediates[reg->Register.Index];
  181    assert(res);
  182    return res;
  183 }
  184 
  185 static LLVMValueRef
  186 emit_fetch_input(
  187    struct lp_build_tgsi_context * bld_base,
  188    const struct tgsi_full_src_register * reg,
  189    enum tgsi_opcode_type stype,
  190    unsigned swizzle)
  191 {
  192    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
  193    LLVMValueRef res = bld->inputs[reg->Register.Index];
  194    assert(!reg->Register.Indirect);
  195    assert(res);
  196    return res;
  197 }
  198 
  199 static LLVMValueRef
  200 emit_fetch_temporary(
  201    struct lp_build_tgsi_context * bld_base,
  202    const struct tgsi_full_src_register * reg,
  203    enum tgsi_opcode_type stype,
  204    unsigned swizzle)
  205 {
  206    struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
  207    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  208    LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
  209    LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
  210    assert(!reg->Register.Indirect);
  211    if (!res)
  212       return bld->bld_base.base.undef;
  213 
  214    return res;
  215 }
  216 
  217 /**
  218  * Register store.
  219  */
  220 void
  221 lp_emit_store_aos(
  222    struct lp_build_tgsi_aos_context *bld,
  223    const struct tgsi_full_instruction *inst,
  224    unsigned index,
  225    LLVMValueRef value)
  226 {
  227    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  228    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
  229    LLVMValueRef mask = NULL;
  230    LLVMValueRef ptr;
  231 
  232    /*
  233     * Saturate the value
  234     */
  235    if (inst->Instruction.Saturate) {
  236       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
  237       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
  238    }
  239 
  240    /*
  241     * Translate the register file
  242     */
  243 
  244    assert(!reg->Register.Indirect);
  245 
  246    switch (reg->Register.File) {
  247    case TGSI_FILE_OUTPUT:
  248       ptr = bld->outputs[reg->Register.Index];
  249       break;
  250 
  251    case TGSI_FILE_TEMPORARY:
  252       ptr = bld->temps[reg->Register.Index];
  253       break;
  254 
  255    case TGSI_FILE_ADDRESS:
  256       ptr = bld->addr[reg->Indirect.Index];
  257       break;
  258 
  259    default:
  260       assert(0);
  261       return;
  262    }
  263 
  264    if (!ptr)
  265       return;
  266 
  267    /*
  268     * Writemask
  269     */
  270 
  271    if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
  272       LLVMValueRef writemask;
  273 
  274       writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
  275                                                    bld->bld_base.base.type,
  276                                                    reg->Register.WriteMask,
  277                                                    TGSI_NUM_CHANNELS,
  278                                                    bld->swizzles);
  279 
  280       if (mask) {
  281          mask = LLVMBuildAnd(builder, mask, writemask, "");
  282       } else {
  283          mask = writemask;
  284       }
  285    }
  286 
  287    if (mask) {
  288       LLVMValueRef orig_value;
  289 
  290       orig_value = LLVMBuildLoad(builder, ptr, "");
  291       value = lp_build_select(&bld->bld_base.base,
  292                               mask, value, orig_value);
  293    }
  294 
  295    LLVMBuildStore(builder, value, ptr);
  296 }
  297 
  298 
  299 /**
  300  * High-level instruction translators.
  301  */
  302 
  303 static LLVMValueRef
  304 emit_tex(struct lp_build_tgsi_aos_context *bld,
  305          const struct tgsi_full_instruction *inst,
  306          enum lp_build_tex_modifier modifier)
  307 {
  308    unsigned target;
  309    unsigned unit;
  310    LLVMValueRef coords;
  311    struct lp_derivatives derivs = { {NULL}, {NULL} };
  312 
  313    if (!bld->sampler) {
  314       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
  315       return bld->bld_base.base.undef;
  316    }
  317 
  318    target = inst->Texture.Texture;
  319 
  320    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
  321 
  322    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
  323       /* probably not going to work */
  324       derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
  325       derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
  326       unit = inst->Src[3].Register.Index;
  327    }
  328    else {
  329       unit = inst->Src[1].Register.Index;
  330    }
  331    return bld->sampler->emit_fetch_texel(bld->sampler,
  332                                          &bld->bld_base.base,
  333                                          target, unit,
  334                                          coords, derivs,
  335                                          modifier);
  336 }
  337 
  338 
  339 static LLVMValueRef
  340 emit_sample(struct lp_build_tgsi_aos_context *bld,
  341             const struct tgsi_full_instruction *inst,
  342             enum lp_build_tex_modifier modifier)
  343 {
  344    unsigned target;
  345    unsigned unit;
  346    LLVMValueRef coords;
  347    struct lp_derivatives derivs = { {NULL}, {NULL} };
  348 
  349    if (!bld->sampler) {
  350       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
  351       return bld->bld_base.base.undef;
  352    }
  353 
  354    coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
  355 
  356    /* ignore modifiers, can't handle different sampler / sampler view, etc... */
  357    unit = inst->Src[1].Register.Index;
  358    assert(inst->Src[2].Register.Index == unit);
  359 
  360    target = bld->sv[unit].Resource;
  361 
  362    return bld->sampler->emit_fetch_texel(bld->sampler,
  363                                          &bld->bld_base.base,
  364                                          target, unit,
  365                                          coords, derivs,
  366                                          modifier);
  367 }
  368 
  369 
  370 void
  371 lp_emit_declaration_aos(
  372    struct lp_build_tgsi_aos_context *bld,
  373    const struct tgsi_full_declaration *decl)
  374 {
  375    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  376    LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
  377 
  378    unsigned first = decl->Range.First;
  379    unsigned last = decl->Range.Last;
  380    unsigned idx;
  381 
  382    for (idx = first; idx <= last; ++idx) {
  383       switch (decl->Declaration.File) {
  384       case TGSI_FILE_TEMPORARY:
  385          assert(idx < LP_MAX_INLINED_TEMPS);
  386          if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
  387             LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
  388             bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
  389                                                      vec_type, array_size, "");
  390          } else {
  391             bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
  392          }
  393          break;
  394 
  395       case TGSI_FILE_OUTPUT:
  396          bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
  397          break;
  398 
  399       case TGSI_FILE_ADDRESS:
  400          assert(idx < LP_MAX_TGSI_ADDRS);
  401          bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
  402          break;
  403 
  404       case TGSI_FILE_SAMPLER_VIEW:
  405          /*
  406           * The target stored here MUST match whatever there actually
  407           * is in the set sampler views (what about return type?).
  408           */
  409          assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
  410          for (idx = first; idx <= last; ++idx) {
  411             bld->sv[idx] = decl->SamplerView;
  412          }
  413          break;
  414 
  415       default:
  416          /* don't need to declare other vars */
  417          break;
  418       }
  419    }
  420 }
  421 
  422 
  423 /**
  424  * Emit LLVM for one TGSI instruction.
  425  * \param return TRUE for success, FALSE otherwise
  426  */
  427 boolean
  428 lp_emit_instruction_aos(
  429    struct lp_build_tgsi_aos_context *bld,
  430    const struct tgsi_full_instruction *inst,
  431    const struct tgsi_opcode_info *info,
  432    int *pc)
  433 {
  434    LLVMValueRef src0, src1, src2;
  435    LLVMValueRef tmp0;
  436    LLVMValueRef dst0 = NULL;
  437 
  438    /*
  439     * Stores and write masks are handled in a general fashion after the long
  440     * instruction opcode switch statement.
  441     *
  442     * Although not stricitly necessary, we avoid generating instructions for
  443     * channels which won't be stored, in cases where's that easy. For some
  444     * complex instructions, like texture sampling, it is more convenient to
  445     * assume a full writemask and then let LLVM optimization passes eliminate
  446     * redundant code.
  447     */
  448 
  449    (*pc)++;
  450 
  451    assert(info->num_dst <= 1);
  452    if (info->num_dst) {
  453       dst0 = bld->bld_base.base.undef;
  454    }
  455 
  456    switch (inst->Instruction.Opcode) {
  457    case TGSI_OPCODE_ARL:
  458       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  459       dst0 = lp_build_floor(&bld->bld_base.base, src0);
  460       break;
  461 
  462    case TGSI_OPCODE_MOV:
  463       dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  464       break;
  465 
  466    case TGSI_OPCODE_LIT:
  467       return FALSE;
  468 
  469    case TGSI_OPCODE_RCP:
  470    /* TGSI_OPCODE_RECIP */
  471       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  472       dst0 = lp_build_rcp(&bld->bld_base.base, src0);
  473       break;
  474 
  475    case TGSI_OPCODE_RSQ:
  476    /* TGSI_OPCODE_RECIPSQRT */
  477       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  478       tmp0 = lp_build_abs(&bld->bld_base.base, src0);
  479       dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
  480       break;
  481 
  482    case TGSI_OPCODE_EXP:
  483       return FALSE;
  484 
  485    case TGSI_OPCODE_LOG:
  486       return FALSE;
  487 
  488    case TGSI_OPCODE_MUL:
  489       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  490       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  491       dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
  492       break;
  493 
  494    case TGSI_OPCODE_ADD:
  495       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  496       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  497       dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
  498       break;
  499 
  500    case TGSI_OPCODE_DP3:
  501    /* TGSI_OPCODE_DOT3 */
  502       return FALSE;
  503 
  504    case TGSI_OPCODE_DP4:
  505    /* TGSI_OPCODE_DOT4 */
  506       return FALSE;
  507 
  508    case TGSI_OPCODE_DST:
  509       return FALSE;
  510 
  511    case TGSI_OPCODE_MIN:
  512       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  513       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  514       dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
  515       break;
  516 
  517    case TGSI_OPCODE_MAX:
  518       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  519       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  520       dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
  521       break;
  522 
  523    case TGSI_OPCODE_SLT:
  524    /* TGSI_OPCODE_SETLT */
  525       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  526       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  527       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
  528       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
  529       break;
  530 
  531    case TGSI_OPCODE_SGE:
  532    /* TGSI_OPCODE_SETGE */
  533       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  534       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  535       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
  536       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
  537       break;
  538 
  539    case TGSI_OPCODE_MAD:
  540    /* TGSI_OPCODE_MADD */
  541       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  542       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  543       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
  544       tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
  545       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
  546       break;
  547 
  548    case TGSI_OPCODE_LRP:
  549       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  550       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  551       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
  552       tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
  553       tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
  554       dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
  555       break;
  556 
  557    case TGSI_OPCODE_FRC:
  558       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  559       tmp0 = lp_build_floor(&bld->bld_base.base, src0);
  560       dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
  561       break;
  562 
  563    case TGSI_OPCODE_FLR:
  564       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  565       dst0 = lp_build_floor(&bld->bld_base.base, src0);
  566       break;
  567 
  568    case TGSI_OPCODE_ROUND:
  569       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  570       dst0 = lp_build_round(&bld->bld_base.base, src0);
  571       break;
  572 
  573    case TGSI_OPCODE_EX2:
  574       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  575       tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
  576       dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
  577       break;
  578 
  579    case TGSI_OPCODE_LG2:
  580       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  581       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
  582       dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
  583       break;
  584 
  585    case TGSI_OPCODE_POW:
  586       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  587       src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
  588       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  589       src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
  590       dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
  591       break;
  592 
  593    case TGSI_OPCODE_COS:
  594       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  595       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
  596       dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
  597       break;
  598 
  599    case TGSI_OPCODE_DDX:
  600       return FALSE;
  601 
  602    case TGSI_OPCODE_DDY:
  603       return FALSE;
  604 
  605    case TGSI_OPCODE_KILL:
  606       return FALSE;
  607 
  608    case TGSI_OPCODE_KILL_IF:
  609       return FALSE;
  610 
  611    case TGSI_OPCODE_PK2H:
  612       return FALSE;
  613       break;
  614 
  615    case TGSI_OPCODE_PK2US:
  616       return FALSE;
  617       break;
  618 
  619    case TGSI_OPCODE_PK4B:
  620       return FALSE;
  621       break;
  622 
  623    case TGSI_OPCODE_PK4UB:
  624       return FALSE;
  625 
  626    case TGSI_OPCODE_SEQ:
  627       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  628       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  629       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
  630       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
  631       break;
  632 
  633    case TGSI_OPCODE_SGT:
  634       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  635       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  636       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
  637       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
  638       break;
  639 
  640    case TGSI_OPCODE_SIN:
  641       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  642       tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
  643       dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
  644       break;
  645 
  646    case TGSI_OPCODE_SLE:
  647       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  648       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  649       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
  650       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
  651       break;
  652 
  653    case TGSI_OPCODE_SNE:
  654       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  655       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  656       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
  657       dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
  658       break;
  659 
  660    case TGSI_OPCODE_TEX:
  661       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
  662       break;
  663 
  664    case TGSI_OPCODE_TXD:
  665       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
  666       break;
  667 
  668    case TGSI_OPCODE_UP2H:
  669       /* deprecated */
  670       assert (0);
  671       return FALSE;
  672       break;
  673 
  674    case TGSI_OPCODE_UP2US:
  675       /* deprecated */
  676       assert(0);
  677       return FALSE;
  678       break;
  679 
  680    case TGSI_OPCODE_UP4B:
  681       /* deprecated */
  682       assert(0);
  683       return FALSE;
  684       break;
  685 
  686    case TGSI_OPCODE_UP4UB:
  687       /* deprecated */
  688       assert(0);
  689       return FALSE;
  690       break;
  691 
  692    case TGSI_OPCODE_ARR:
  693       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  694       dst0 = lp_build_round(&bld->bld_base.base, src0);
  695       break;
  696 
  697    case TGSI_OPCODE_CAL:
  698       return FALSE;
  699 
  700    case TGSI_OPCODE_RET:
  701       /* safe to ignore at end */
  702       break;
  703 
  704    case TGSI_OPCODE_END:
  705       *pc = -1;
  706       break;
  707 
  708    case TGSI_OPCODE_SSG:
  709    /* TGSI_OPCODE_SGN */
  710       tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  711       dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
  712       break;
  713 
  714    case TGSI_OPCODE_CMP:
  715       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  716       src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
  717       src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
  718       tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
  719       dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
  720       break;
  721 
  722    case TGSI_OPCODE_TXB:
  723       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
  724       break;
  725 
  726    case TGSI_OPCODE_DIV:
  727       assert(0);
  728       return FALSE;
  729       break;
  730 
  731    case TGSI_OPCODE_DP2:
  732       return FALSE;
  733 
  734    case TGSI_OPCODE_TXL:
  735       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
  736       break;
  737 
  738    case TGSI_OPCODE_TXP:
  739       dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
  740       break;
  741 
  742    case TGSI_OPCODE_BRK:
  743       return FALSE;
  744 
  745    case TGSI_OPCODE_IF:
  746    case TGSI_OPCODE_UIF:
  747       return FALSE;
  748 
  749    case TGSI_OPCODE_BGNLOOP:
  750       return FALSE;
  751 
  752    case TGSI_OPCODE_BGNSUB:
  753       return FALSE;
  754 
  755    case TGSI_OPCODE_ELSE:
  756       return FALSE;
  757 
  758    case TGSI_OPCODE_ENDIF:
  759       return FALSE;
  760 
  761    case TGSI_OPCODE_ENDLOOP:
  762       return FALSE;
  763 
  764    case TGSI_OPCODE_ENDSUB:
  765       return FALSE;
  766 
  767    case TGSI_OPCODE_CEIL:
  768       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  769       dst0 = lp_build_ceil(&bld->bld_base.base, src0);
  770       break;
  771 
  772    case TGSI_OPCODE_I2F:
  773       assert(0);
  774       return FALSE;
  775       break;
  776 
  777    case TGSI_OPCODE_NOT:
  778       assert(0);
  779       return FALSE;
  780       break;
  781 
  782    case TGSI_OPCODE_TRUNC:
  783       src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
  784       dst0 = lp_build_trunc(&bld->bld_base.base, src0);
  785       break;
  786 
  787    case TGSI_OPCODE_SHL:
  788       assert(0);
  789       return FALSE;
  790       break;
  791 
  792    case TGSI_OPCODE_ISHR:
  793       assert(0);
  794       return FALSE;
  795       break;
  796 
  797    case TGSI_OPCODE_AND:
  798       assert(0);
  799       return FALSE;
  800       break;
  801 
  802    case TGSI_OPCODE_OR:
  803       assert(0);
  804       return FALSE;
  805       break;
  806 
  807    case TGSI_OPCODE_MOD:
  808       assert(0);
  809       return FALSE;
  810       break;
  811 
  812    case TGSI_OPCODE_XOR:
  813       assert(0);
  814       return FALSE;
  815       break;
  816 
  817    case TGSI_OPCODE_TXF:
  818       assert(0);
  819       return FALSE;
  820       break;
  821 
  822    case TGSI_OPCODE_TXQ:
  823       assert(0);
  824       return FALSE;
  825       break;
  826 
  827    case TGSI_OPCODE_CONT:
  828       return FALSE;
  829 
  830    case TGSI_OPCODE_EMIT:
  831       return FALSE;
  832       break;
  833 
  834    case TGSI_OPCODE_ENDPRIM:
  835       return FALSE;
  836       break;
  837 
  838    case TGSI_OPCODE_NOP:
  839       break;
  840 
  841    case TGSI_OPCODE_SAMPLE:
  842       dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
  843       break;
  844 
  845    default:
  846       return FALSE;
  847    }
  848    
  849    if (info->num_dst) {
  850       lp_emit_store_aos(bld, inst, 0, dst0);
  851    }
  852 
  853    return TRUE;
  854 }
  855 
  856 
  857 void
  858 lp_build_tgsi_aos(struct gallivm_state *gallivm,
  859                   const struct tgsi_token *tokens,
  860                   struct lp_type type,
  861                   const unsigned char swizzles[4],
  862                   LLVMValueRef consts_ptr,
  863                   const LLVMValueRef *inputs,
  864                   LLVMValueRef *outputs,
  865                   const struct lp_build_sampler_aos *sampler,
  866                   const struct tgsi_shader_info *info)
  867 {
  868    struct lp_build_tgsi_aos_context bld;
  869    struct tgsi_parse_context parse;
  870    uint num_immediates = 0;
  871    unsigned chan;
  872    int pc = 0;
  873 
  874    /* Setup build context */
  875    memset(&bld, 0, sizeof bld);
  876    lp_build_context_init(&bld.bld_base.base, gallivm, type);
  877    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
  878    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
  879    lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
  880 
  881    for (chan = 0; chan < 4; ++chan) {
  882       bld.swizzles[chan] = swizzles[chan];
  883       bld.inv_swizzles[swizzles[chan]] = chan;
  884    }
  885 
  886    bld.inputs = inputs;
  887    bld.outputs = outputs;
  888    bld.consts_ptr = consts_ptr;
  889    bld.sampler = sampler;
  890    bld.indirect_files = info->indirect_files;
  891    bld.bld_base.emit_swizzle = swizzle_aos;
  892    bld.bld_base.info = info;
  893 
  894    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
  895    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
  896    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
  897    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
  898 
  899    /* Set opcode actions */
  900    lp_set_default_actions_cpu(&bld.bld_base);
  901 
  902    if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
  903       return;
  904    }
  905 
  906    tgsi_parse_init(&parse, tokens);
  907 
  908    while (!tgsi_parse_end_of_tokens(&parse)) {
  909       tgsi_parse_token(&parse);
  910 
  911       switch(parse.FullToken.Token.Type) {
  912       case TGSI_TOKEN_TYPE_DECLARATION:
  913          /* Inputs already interpolated */
  914          lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
  915          break;
  916 
  917       case TGSI_TOKEN_TYPE_INSTRUCTION:
  918          /* save expanded instruction */
  919          lp_bld_tgsi_add_instruction(&bld.bld_base,
  920                                      &parse.FullToken.FullInstruction);
  921          break;
  922 
  923       case TGSI_TOKEN_TYPE_IMMEDIATE:
  924          /* simply copy the immediate values into the next immediates[] slot */
  925          {
  926             const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
  927             float imm[4];
  928             assert(size <= 4);
  929             assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
  930             for (chan = 0; chan < 4; ++chan) {
  931                imm[chan] = 0.0f;
  932             }
  933             for (chan = 0; chan < size; ++chan) {
  934                unsigned swizzle = bld.swizzles[chan];
  935                imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
  936             }
  937             bld.immediates[num_immediates] =
  938                      lp_build_const_aos(gallivm, type,
  939                                         imm[0], imm[1], imm[2], imm[3],
  940                                         NULL);
  941             num_immediates++;
  942          }
  943          break;
  944 
  945       case TGSI_TOKEN_TYPE_PROPERTY:
  946          break;
  947 
  948       default:
  949          assert(0);
  950       }
  951    }
  952 
  953    while (pc != -1) {
  954       struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
  955       const struct tgsi_opcode_info *opcode_info =
  956          tgsi_get_opcode_info(instr->Instruction.Opcode);
  957       if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
  958          _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
  959                        tgsi_get_opcode_name(instr->Instruction.Opcode));
  960    }
  961 
  962    if (0) {
  963       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
  964       LLVMValueRef function = LLVMGetBasicBlockParent(block);
  965       debug_printf("11111111111111111111111111111 \n");
  966       tgsi_dump(tokens, 0);
  967       lp_debug_dump_value(function);
  968       debug_printf("2222222222222222222222222222 \n");
  969    }
  970    tgsi_parse_free(&parse);
  971    FREE(bld.bld_base.instructions);
  972 
  973    if (0) {
  974       LLVMModuleRef module = LLVMGetGlobalParent(
  975          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
  976       LLVMDumpModule(module);
  977    }
  978 
  979 }
  980