"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp" (16 Sep 2020, 54568 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "nv50_ir_emit_nv50.cpp" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright 2011 Christoph Bumiller
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice shall be included in
   12  * all copies or substantial portions of the Software.
   13  *
   14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
   18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
   19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
   20  * OTHER DEALINGS IN THE SOFTWARE.
   21  */
   22 
   23 #include "codegen/nv50_ir.h"
   24 #include "codegen/nv50_ir_target_nv50.h"
   25 
   26 namespace nv50_ir {
   27 
   28 #define NV50_OP_ENC_LONG     0
   29 #define NV50_OP_ENC_SHORT    1
   30 #define NV50_OP_ENC_IMM      2
   31 #define NV50_OP_ENC_LONG_ALT 3
   32 
   33 class CodeEmitterNV50 : public CodeEmitter
   34 {
   35 public:
   36    CodeEmitterNV50(const TargetNV50 *);
   37 
   38    virtual bool emitInstruction(Instruction *);
   39 
   40    virtual uint32_t getMinEncodingSize(const Instruction *) const;
   41 
   42    inline void setProgramType(Program::Type pType) { progType = pType; }
   43 
   44    virtual void prepareEmission(Function *);
   45 
   46 private:
   47    Program::Type progType;
   48 
   49    const TargetNV50 *targNV50;
   50 
   51 private:
   52    inline void defId(const ValueDef&, const int pos);
   53    inline void srcId(const ValueRef&, const int pos);
   54    inline void srcId(const ValueRef *, const int pos);
   55 
   56    inline void srcAddr16(const ValueRef&, bool adj, const int pos);
   57    inline void srcAddr8(const ValueRef&, const int pos);
   58 
   59    void emitFlagsRd(const Instruction *);
   60    void emitFlagsWr(const Instruction *);
   61 
   62    void emitCondCode(CondCode cc, DataType ty, int pos);
   63 
   64    inline void setARegBits(unsigned int);
   65 
   66    void setAReg16(const Instruction *, int s);
   67    void setImmediate(const Instruction *, int s);
   68 
   69    void setDst(const Value *);
   70    void setDst(const Instruction *, int d);
   71    void setSrcFileBits(const Instruction *, int enc);
   72    void setSrc(const Instruction *, unsigned int s, int slot);
   73 
   74    void emitForm_MAD(const Instruction *);
   75    void emitForm_ADD(const Instruction *);
   76    void emitForm_MUL(const Instruction *);
   77    void emitForm_IMM(const Instruction *);
   78 
   79    void emitLoadStoreSizeLG(DataType ty, int pos);
   80    void emitLoadStoreSizeCS(DataType ty);
   81 
   82    void roundMode_MAD(const Instruction *);
   83    void roundMode_CVT(RoundMode);
   84 
   85    void emitMNeg12(const Instruction *);
   86 
   87    void emitLOAD(const Instruction *);
   88    void emitSTORE(const Instruction *);
   89    void emitMOV(const Instruction *);
   90    void emitRDSV(const Instruction *);
   91    void emitNOP();
   92    void emitINTERP(const Instruction *);
   93    void emitPFETCH(const Instruction *);
   94    void emitOUT(const Instruction *);
   95 
   96    void emitUADD(const Instruction *);
   97    void emitAADD(const Instruction *);
   98    void emitFADD(const Instruction *);
   99    void emitDADD(const Instruction *);
  100    void emitIMUL(const Instruction *);
  101    void emitFMUL(const Instruction *);
  102    void emitDMUL(const Instruction *);
  103    void emitFMAD(const Instruction *);
  104    void emitDMAD(const Instruction *);
  105    void emitIMAD(const Instruction *);
  106    void emitISAD(const Instruction *);
  107 
  108    void emitMINMAX(const Instruction *);
  109 
  110    void emitPreOp(const Instruction *);
  111    void emitSFnOp(const Instruction *, uint8_t subOp);
  112 
  113    void emitShift(const Instruction *);
  114    void emitARL(const Instruction *, unsigned int shl);
  115    void emitLogicOp(const Instruction *);
  116    void emitNOT(const Instruction *);
  117 
  118    void emitCVT(const Instruction *);
  119    void emitSET(const Instruction *);
  120 
  121    void emitTEX(const TexInstruction *);
  122    void emitTXQ(const TexInstruction *);
  123    void emitTEXPREP(const TexInstruction *);
  124 
  125    void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
  126 
  127    void emitFlow(const Instruction *, uint8_t flowOp);
  128    void emitPRERETEmu(const FlowInstruction *);
  129    void emitBAR(const Instruction *);
  130 
  131    void emitATOM(const Instruction *);
  132 };
  133 
  134 #define SDATA(a) ((a).rep()->reg.data)
  135 #define DDATA(a) ((a).rep()->reg.data)
  136 
  137 void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
  138 {
  139    assert(src.get());
  140    code[pos / 32] |= SDATA(src).id << (pos % 32);
  141 }
  142 
  143 void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
  144 {
  145    assert(src->get());
  146    code[pos / 32] |= SDATA(*src).id << (pos % 32);
  147 }
  148 
  149 void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
  150 {
  151    assert(src.get());
  152 
  153    int32_t offset = SDATA(src).offset;
  154 
  155    assert(!adj || src.get()->reg.size <= 4);
  156    if (adj)
  157       offset /= src.get()->reg.size;
  158 
  159    assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
  160 
  161    if (offset < 0)
  162       offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
  163 
  164    code[pos / 32] |= offset << (pos % 32);
  165 }
  166 
  167 void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
  168 {
  169    assert(src.get());
  170 
  171    uint32_t offset = SDATA(src).offset;
  172 
  173    assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
  174 
  175    code[pos / 32] |= (offset >> 2) << (pos % 32);
  176 }
  177 
  178 void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
  179 {
  180    assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
  181 
  182    code[pos / 32] |= DDATA(def).id << (pos % 32);
  183 }
  184 
  185 void
  186 CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
  187 {
  188    switch (insn->rnd) {
  189    case ROUND_M: code[1] |= 1 << 22; break;
  190    case ROUND_P: code[1] |= 2 << 22; break;
  191    case ROUND_Z: code[1] |= 3 << 22; break;
  192    default:
  193       assert(insn->rnd == ROUND_N);
  194       break;
  195    }
  196 }
  197 
  198 void
  199 CodeEmitterNV50::emitMNeg12(const Instruction *i)
  200 {
  201    code[1] |= i->src(0).mod.neg() << 26;
  202    code[1] |= i->src(1).mod.neg() << 27;
  203 }
  204 
  205 void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
  206 {
  207    uint8_t enc;
  208 
  209    assert(pos >= 32 || pos <= 27);
  210 
  211    switch (cc) {
  212    case CC_LT:  enc = 0x1; break;
  213    case CC_LTU: enc = 0x9; break;
  214    case CC_EQ:  enc = 0x2; break;
  215    case CC_EQU: enc = 0xa; break;
  216    case CC_LE:  enc = 0x3; break;
  217    case CC_LEU: enc = 0xb; break;
  218    case CC_GT:  enc = 0x4; break;
  219    case CC_GTU: enc = 0xc; break;
  220    case CC_NE:  enc = 0x5; break;
  221    case CC_NEU: enc = 0xd; break;
  222    case CC_GE:  enc = 0x6; break;
  223    case CC_GEU: enc = 0xe; break;
  224    case CC_TR:  enc = 0xf; break;
  225    case CC_FL:  enc = 0x0; break;
  226 
  227    case CC_O:  enc = 0x10; break;
  228    case CC_C:  enc = 0x11; break;
  229    case CC_A:  enc = 0x12; break;
  230    case CC_S:  enc = 0x13; break;
  231    case CC_NS: enc = 0x1c; break;
  232    case CC_NA: enc = 0x1d; break;
  233    case CC_NC: enc = 0x1e; break;
  234    case CC_NO: enc = 0x1f; break;
  235 
  236    default:
  237       enc = 0;
  238       assert(!"invalid condition code");
  239       break;
  240    }
  241    if (ty != TYPE_NONE && !isFloatType(ty))
  242       enc &= ~0x8; // unordered only exists for float types
  243 
  244    code[pos / 32] |= enc << (pos % 32);
  245 }
  246 
  247 void
  248 CodeEmitterNV50::emitFlagsRd(const Instruction *i)
  249 {
  250    int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
  251 
  252    assert(!(code[1] & 0x00003f80));
  253 
  254    if (s >= 0) {
  255       assert(i->getSrc(s)->reg.file == FILE_FLAGS);
  256       emitCondCode(i->cc, TYPE_NONE, 32 + 7);
  257       srcId(i->src(s), 32 + 12);
  258    } else {
  259       code[1] |= 0x0780;
  260    }
  261 }
  262 
  263 void
  264 CodeEmitterNV50::emitFlagsWr(const Instruction *i)
  265 {
  266    assert(!(code[1] & 0x70));
  267 
  268    int flagsDef = i->flagsDef;
  269 
  270    // find flags definition and check that it is the last def
  271    if (flagsDef < 0) {
  272       for (int d = 0; i->defExists(d); ++d)
  273          if (i->def(d).getFile() == FILE_FLAGS)
  274             flagsDef = d;
  275       if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
  276          WARN("Instruction::flagsDef was not set properly\n");
  277    }
  278    if (flagsDef == 0 && i->defExists(1))
  279       WARN("flags def should not be the primary definition\n");
  280 
  281    if (flagsDef >= 0)
  282       code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
  283 
  284 }
  285 
  286 void
  287 CodeEmitterNV50::setARegBits(unsigned int u)
  288 {
  289    code[0] |= (u & 3) << 26;
  290    code[1] |= (u & 4);
  291 }
  292 
  293 void
  294 CodeEmitterNV50::setAReg16(const Instruction *i, int s)
  295 {
  296    if (i->srcExists(s)) {
  297       s = i->src(s).indirect[0];
  298       if (s >= 0)
  299          setARegBits(SDATA(i->src(s)).id + 1);
  300    }
  301 }
  302 
  303 void
  304 CodeEmitterNV50::setImmediate(const Instruction *i, int s)
  305 {
  306    const ImmediateValue *imm = i->src(s).get()->asImm();
  307    assert(imm);
  308 
  309    uint32_t u = imm->reg.data.u32;
  310 
  311    if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
  312       u = ~u;
  313 
  314    code[1] |= 3;
  315    code[0] |= (u & 0x3f) << 16;
  316    code[1] |= (u >> 6) << 2;
  317 }
  318 
  319 void
  320 CodeEmitterNV50::setDst(const Value *dst)
  321 {
  322    const Storage *reg = &dst->join->reg;
  323 
  324    assert(reg->file != FILE_ADDRESS);
  325 
  326    if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
  327       code[0] |= (127 << 2) | 1;
  328       code[1] |= 8;
  329    } else {
  330       int id;
  331       if (reg->file == FILE_SHADER_OUTPUT) {
  332          code[1] |= 8;
  333          id = reg->data.offset / 4;
  334       } else {
  335          id = reg->data.id;
  336       }
  337       code[0] |= id << 2;
  338    }
  339 }
  340 
  341 void
  342 CodeEmitterNV50::setDst(const Instruction *i, int d)
  343 {
  344    if (i->defExists(d)) {
  345       setDst(i->getDef(d));
  346    } else
  347    if (!d) {
  348       code[0] |= 0x01fc; // bit bucket
  349       code[1] |= 0x0008;
  350    }
  351 }
  352 
  353 // 3 * 2 bits:
  354 // 0: r
  355 // 1: a/s
  356 // 2: c
  357 // 3: i
  358 void
  359 CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
  360 {
  361    uint8_t mode = 0;
  362 
  363    for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
  364       switch (i->src(s).getFile()) {
  365       case FILE_GPR:
  366          break;
  367       case FILE_MEMORY_SHARED:
  368       case FILE_SHADER_INPUT:
  369          mode |= 1 << (s * 2);
  370          break;
  371       case FILE_MEMORY_CONST:
  372          mode |= 2 << (s * 2);
  373          break;
  374       case FILE_IMMEDIATE:
  375          mode |= 3 << (s * 2);
  376          break;
  377       default:
  378          ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
  379          assert(0);
  380          break;
  381       }
  382    }
  383    switch (mode) {
  384    case 0x00: // rrr
  385       break;
  386    case 0x01: // arr/grr
  387       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
  388          code[0] |= 0x01800000;
  389          if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
  390             code[1] |= 0x00200000;
  391       } else {
  392          if (enc == NV50_OP_ENC_SHORT)
  393             code[0] |= 0x01000000;
  394          else
  395             code[1] |= 0x00200000;
  396       }
  397       break;
  398    case 0x03: // irr
  399       assert(i->op == OP_MOV);
  400       return;
  401    case 0x0c: // rir
  402       break;
  403    case 0x0d: // gir
  404       assert(progType == Program::TYPE_GEOMETRY ||
  405              progType == Program::TYPE_COMPUTE);
  406       code[0] |= 0x01000000;
  407       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
  408          int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
  409          assert(reg < 3);
  410          code[0] |= (reg + 1) << 26;
  411       }
  412       break;
  413    case 0x08: // rcr
  414       code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
  415       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
  416       break;
  417    case 0x09: // acr/gcr
  418       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
  419          code[0] |= 0x01800000;
  420       } else {
  421          code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
  422          code[1] |= 0x00200000;
  423       }
  424       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
  425       break;
  426    case 0x20: // rrc
  427       code[0] |= 0x01000000;
  428       code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
  429       break;
  430    case 0x21: // arc
  431       code[0] |= 0x01000000;
  432       code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
  433       assert(progType != Program::TYPE_GEOMETRY);
  434       break;
  435    default:
  436       ERROR("not encodable: %x\n", mode);
  437       assert(0);
  438       break;
  439    }
  440    if (progType != Program::TYPE_COMPUTE)
  441       return;
  442 
  443    if ((mode & 3) == 1) {
  444       const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
  445 
  446       switch (i->sType) {
  447       case TYPE_U8:
  448          break;
  449       case TYPE_U16:
  450          code[0] |= 1 << pos;
  451          break;
  452       case TYPE_S16:
  453          code[0] |= 2 << pos;
  454          break;
  455       default:
  456          code[0] |= 3 << pos;
  457          assert(i->getSrc(0)->reg.size == 4);
  458          break;
  459       }
  460    }
  461 }
  462 
  463 void
  464 CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
  465 {
  466    if (Target::operationSrcNr[i->op] <= s)
  467       return;
  468    const Storage *reg = &i->src(s).rep()->reg;
  469 
  470    unsigned int id = (reg->file == FILE_GPR) ?
  471       reg->data.id :
  472       reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
  473 
  474    switch (slot) {
  475    case 0: code[0] |= id << 9; break;
  476    case 1: code[0] |= id << 16; break;
  477    case 2: code[1] |= id << 14; break;
  478    default:
  479       assert(0);
  480       break;
  481    }
  482 }
  483 
  484 // the default form:
  485 //  - long instruction
  486 //  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
  487 //  - address & flags
  488 void
  489 CodeEmitterNV50::emitForm_MAD(const Instruction *i)
  490 {
  491    assert(i->encSize == 8);
  492    code[0] |= 1;
  493 
  494    emitFlagsRd(i);
  495    emitFlagsWr(i);
  496 
  497    setDst(i, 0);
  498 
  499    setSrcFileBits(i, NV50_OP_ENC_LONG);
  500    setSrc(i, 0, 0);
  501    setSrc(i, 1, 1);
  502    setSrc(i, 2, 2);
  503 
  504    if (i->getIndirect(0, 0)) {
  505       assert(!i->srcExists(1) || !i->getIndirect(1, 0));
  506       assert(!i->srcExists(2) || !i->getIndirect(2, 0));
  507       setAReg16(i, 0);
  508    } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
  509       assert(!i->srcExists(2) || !i->getIndirect(2, 0));
  510       setAReg16(i, 1);
  511    } else {
  512       setAReg16(i, 2);
  513    }
  514 }
  515 
  516 // like default form, but 2nd source in slot 2, and no 3rd source
  517 void
  518 CodeEmitterNV50::emitForm_ADD(const Instruction *i)
  519 {
  520    assert(i->encSize == 8);
  521    code[0] |= 1;
  522 
  523    emitFlagsRd(i);
  524    emitFlagsWr(i);
  525 
  526    setDst(i, 0);
  527 
  528    setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
  529    setSrc(i, 0, 0);
  530    if (i->predSrc != 1)
  531       setSrc(i, 1, 2);
  532 
  533    if (i->getIndirect(0, 0)) {
  534       assert(!i->getIndirect(1, 0));
  535       setAReg16(i, 0);
  536    } else {
  537       setAReg16(i, 1);
  538    }
  539 }
  540 
  541 // default short form (rr, ar, rc, gr)
  542 void
  543 CodeEmitterNV50::emitForm_MUL(const Instruction *i)
  544 {
  545    assert(i->encSize == 4 && !(code[0] & 1));
  546    assert(i->defExists(0));
  547    assert(!i->getPredicate());
  548 
  549    setDst(i, 0);
  550 
  551    setSrcFileBits(i, NV50_OP_ENC_SHORT);
  552    setSrc(i, 0, 0);
  553    setSrc(i, 1, 1);
  554 }
  555 
  556 // usual immediate form
  557 // - 1 to 3 sources where second is immediate (rir, gir)
  558 // - no address or predicate possible
  559 void
  560 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
  561 {
  562    assert(i->encSize == 8);
  563    code[0] |= 1;
  564 
  565    assert(i->defExists(0) && i->srcExists(0));
  566 
  567    setDst(i, 0);
  568 
  569    setSrcFileBits(i, NV50_OP_ENC_IMM);
  570    if (Target::operationSrcNr[i->op] > 1) {
  571       setSrc(i, 0, 0);
  572       setImmediate(i, 1);
  573       // If there is another source, it has to be the same as the dest reg.
  574    } else {
  575       setImmediate(i, 0);
  576    }
  577 }
  578 
  579 void
  580 CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
  581 {
  582    uint8_t enc;
  583 
  584    switch (ty) {
  585    case TYPE_F32: // fall through
  586    case TYPE_S32: // fall through
  587    case TYPE_U32:  enc = 0x6; break;
  588    case TYPE_B128: enc = 0x5; break;
  589    case TYPE_F64: // fall through
  590    case TYPE_S64: // fall through
  591    case TYPE_U64:  enc = 0x4; break;
  592    case TYPE_S16:  enc = 0x3; break;
  593    case TYPE_U16:  enc = 0x2; break;
  594    case TYPE_S8:   enc = 0x1; break;
  595    case TYPE_U8:   enc = 0x0; break;
  596    default:
  597       enc = 0;
  598       assert(!"invalid load/store type");
  599       break;
  600    }
  601    code[pos / 32] |= enc << (pos % 32);
  602 }
  603 
  604 void
  605 CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
  606 {
  607    switch (ty) {
  608    case TYPE_U8: break;
  609    case TYPE_U16: code[1] |= 0x4000; break;
  610    case TYPE_S16: code[1] |= 0x8000; break;
  611    case TYPE_F32:
  612    case TYPE_S32:
  613    case TYPE_U32: code[1] |= 0xc000; break;
  614    default:
  615       assert(0);
  616       break;
  617    }
  618 }
  619 
  620 void
  621 CodeEmitterNV50::emitLOAD(const Instruction *i)
  622 {
  623    DataFile sf = i->src(0).getFile();
  624    ASSERTED int32_t offset = i->getSrc(0)->reg.data.offset;
  625 
  626    switch (sf) {
  627    case FILE_SHADER_INPUT:
  628       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
  629          code[0] = 0x11800001;
  630       else
  631          // use 'mov' where we can
  632          code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
  633       code[1] = 0x00200000 | (i->lanes << 14);
  634       if (typeSizeof(i->dType) == 4)
  635          code[1] |= 0x04000000;
  636       break;
  637    case FILE_MEMORY_SHARED:
  638       if (targ->getChipset() >= 0x84) {
  639          assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
  640          code[0] = 0x10000001;
  641          code[1] = 0x40000000;
  642 
  643          if (typeSizeof(i->dType) == 4)
  644             code[1] |= 0x04000000;
  645 
  646          emitLoadStoreSizeCS(i->sType);
  647       } else {
  648          assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
  649          code[0] = 0x10000001;
  650          code[1] = 0x00200000 | (i->lanes << 14);
  651          emitLoadStoreSizeCS(i->sType);
  652       }
  653       break;
  654    case FILE_MEMORY_CONST:
  655       code[0] = 0x10000001;
  656       code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
  657       if (typeSizeof(i->dType) == 4)
  658          code[1] |= 0x04000000;
  659       emitLoadStoreSizeCS(i->sType);
  660       break;
  661    case FILE_MEMORY_LOCAL:
  662       code[0] = 0xd0000001;
  663       code[1] = 0x40000000;
  664       break;
  665    case FILE_MEMORY_GLOBAL:
  666       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
  667       code[1] = 0x80000000;
  668       break;
  669    default:
  670       assert(!"invalid load source file");
  671       break;
  672    }
  673    if (sf == FILE_MEMORY_LOCAL ||
  674        sf == FILE_MEMORY_GLOBAL)
  675       emitLoadStoreSizeLG(i->sType, 21 + 32);
  676 
  677    setDst(i, 0);
  678 
  679    emitFlagsRd(i);
  680    emitFlagsWr(i);
  681 
  682    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
  683       srcId(*i->src(0).getIndirect(0), 9);
  684    } else {
  685       setAReg16(i, 0);
  686       srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
  687    }
  688 }
  689 
  690 void
  691 CodeEmitterNV50::emitSTORE(const Instruction *i)
  692 {
  693    DataFile f = i->getSrc(0)->reg.file;
  694    int32_t offset = i->getSrc(0)->reg.data.offset;
  695 
  696    switch (f) {
  697    case FILE_SHADER_OUTPUT:
  698       code[0] = 0x00000001 | ((offset >> 2) << 9);
  699       code[1] = 0x80c00000;
  700       srcId(i->src(1), 32 + 14);
  701       break;
  702    case FILE_MEMORY_GLOBAL:
  703       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
  704       code[1] = 0xa0000000;
  705       emitLoadStoreSizeLG(i->dType, 21 + 32);
  706       srcId(i->src(1), 2);
  707       break;
  708    case FILE_MEMORY_LOCAL:
  709       code[0] = 0xd0000001;
  710       code[1] = 0x60000000;
  711       emitLoadStoreSizeLG(i->dType, 21 + 32);
  712       srcId(i->src(1), 2);
  713       break;
  714    case FILE_MEMORY_SHARED:
  715       code[0] = 0x00000001;
  716       code[1] = 0xe0000000;
  717       switch (typeSizeof(i->dType)) {
  718       case 1:
  719          code[0] |= offset << 9;
  720          code[1] |= 0x00400000;
  721          break;
  722       case 2:
  723          code[0] |= (offset >> 1) << 9;
  724          break;
  725       case 4:
  726          code[0] |= (offset >> 2) << 9;
  727          code[1] |= 0x04200000;
  728          break;
  729       default:
  730          assert(0);
  731          break;
  732       }
  733       srcId(i->src(1), 32 + 14);
  734       break;
  735    default:
  736       assert(!"invalid store destination file");
  737       break;
  738    }
  739 
  740    if (f == FILE_MEMORY_GLOBAL)
  741       srcId(*i->src(0).getIndirect(0), 9);
  742    else
  743       setAReg16(i, 0);
  744 
  745    if (f == FILE_MEMORY_LOCAL)
  746       srcAddr16(i->src(0), false, 9);
  747 
  748    emitFlagsRd(i);
  749 }
  750 
  751 void
  752 CodeEmitterNV50::emitMOV(const Instruction *i)
  753 {
  754    DataFile sf = i->getSrc(0)->reg.file;
  755    DataFile df = i->getDef(0)->reg.file;
  756 
  757    assert(sf == FILE_GPR || df == FILE_GPR);
  758 
  759    if (sf == FILE_FLAGS) {
  760       assert(i->flagsSrc >= 0);
  761       code[0] = 0x00000001;
  762       code[1] = 0x20000000;
  763       defId(i->def(0), 2);
  764       emitFlagsRd(i);
  765    } else
  766    if (sf == FILE_ADDRESS) {
  767       code[0] = 0x00000001;
  768       code[1] = 0x40000000;
  769       defId(i->def(0), 2);
  770       setARegBits(SDATA(i->src(0)).id + 1);
  771       emitFlagsRd(i);
  772    } else
  773    if (df == FILE_FLAGS) {
  774       assert(i->flagsDef >= 0);
  775       code[0] = 0x00000001;
  776       code[1] = 0xa0000000;
  777       srcId(i->src(0), 9);
  778       emitFlagsRd(i);
  779       emitFlagsWr(i);
  780    } else
  781    if (sf == FILE_IMMEDIATE) {
  782       code[0] = 0x10008001;
  783       code[1] = 0x00000003;
  784       emitForm_IMM(i);
  785    } else {
  786       if (i->encSize == 4) {
  787          code[0] = 0x10008000;
  788       } else {
  789          code[0] = 0x10000001;
  790          code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
  791          code[1] |= (i->lanes << 14);
  792          emitFlagsRd(i);
  793       }
  794       defId(i->def(0), 2);
  795       srcId(i->src(0), 9);
  796    }
  797    if (df == FILE_SHADER_OUTPUT) {
  798       assert(i->encSize == 8);
  799       code[1] |= 0x8;
  800    }
  801 }
  802 
  803 static inline uint8_t getSRegEncoding(const ValueRef &ref)
  804 {
  805    switch (SDATA(ref).sv.sv) {
  806    case SV_PHYSID:        return 0;
  807    case SV_CLOCK:         return 1;
  808    case SV_VERTEX_STRIDE: return 3;
  809 // case SV_PM_COUNTER:    return 4 + SDATA(ref).sv.index;
  810    case SV_SAMPLE_INDEX:  return 8;
  811    default:
  812       assert(!"no sreg for system value");
  813       return 0;
  814    }
  815 }
  816 
  817 void
  818 CodeEmitterNV50::emitRDSV(const Instruction *i)
  819 {
  820    code[0] = 0x00000001;
  821    code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
  822    defId(i->def(0), 2);
  823    emitFlagsRd(i);
  824 }
  825 
  826 void
  827 CodeEmitterNV50::emitNOP()
  828 {
  829    code[0] = 0xf0000001;
  830    code[1] = 0xe0000000;
  831 }
  832 
  833 void
  834 CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
  835 {
  836    code[0] = 0xc0000000 | (lane << 16);
  837    code[1] = 0x80000000;
  838 
  839    code[0] |= (quOp & 0x03) << 20;
  840    code[1] |= (quOp & 0xfc) << 20;
  841 
  842    emitForm_ADD(i);
  843 
  844    if (!i->srcExists(1) || i->predSrc == 1)
  845       srcId(i->src(0), 32 + 14);
  846 }
  847 
  848 /* NOTE: This returns the base address of a vertex inside the primitive.
  849  * src0 is an immediate, the index (not offset) of the vertex
  850  * inside the primitive. XXX: signed or unsigned ?
  851  * src1 (may be NULL) should use whatever units the hardware requires
  852  * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
  853  */
  854 void
  855 CodeEmitterNV50::emitPFETCH(const Instruction *i)
  856 {
  857    const uint32_t prim = i->src(0).get()->reg.data.u32;
  858    assert(prim <= 127);
  859 
  860    if (i->def(0).getFile() == FILE_ADDRESS) {
  861       // shl $aX a[] 0
  862       code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
  863       code[1] = 0xc0200000;
  864       code[0] |= prim << 9;
  865       assert(!i->srcExists(1));
  866    } else
  867    if (i->srcExists(1)) {
  868       // ld b32 $rX a[$aX+base]
  869       code[0] = 0x00000001;
  870       code[1] = 0x04200000 | (0xf << 14);
  871       defId(i->def(0), 2);
  872       code[0] |= prim << 9;
  873       setARegBits(SDATA(i->src(1)).id + 1);
  874    } else {
  875       // mov b32 $rX a[]
  876       code[0] = 0x10000001;
  877       code[1] = 0x04200000 | (0xf << 14);
  878       defId(i->def(0), 2);
  879       code[0] |= prim << 9;
  880    }
  881    emitFlagsRd(i);
  882 }
  883 
  884 static void
  885 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
  886 {
  887    int ipa = entry->ipa;
  888    int encSize = entry->reg;
  889    int loc = entry->loc;
  890 
  891    if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
  892        (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
  893       if (data.force_persample_interp) {
  894          if (encSize == 8)
  895             code[loc + 1] |= 1 << 16;
  896          else
  897             code[loc + 0] |= 1 << 24;
  898       } else {
  899          if (encSize == 8)
  900             code[loc + 1] &= ~(1 << 16);
  901          else
  902             code[loc + 0] &= ~(1 << 24);
  903       }
  904    }
  905 }
  906 
  907 void
  908 CodeEmitterNV50::emitINTERP(const Instruction *i)
  909 {
  910    code[0] = 0x80000000;
  911 
  912    defId(i->def(0), 2);
  913    srcAddr8(i->src(0), 16);
  914    setAReg16(i, 0);
  915 
  916    if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
  917       code[0] |= 1 << 8;
  918    } else {
  919       if (i->op == OP_PINTERP) {
  920          code[0] |= 1 << 25;
  921          srcId(i->src(1), 9);
  922       }
  923       if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
  924          code[0] |= 1 << 24;
  925    }
  926 
  927    if (i->encSize == 8) {
  928       if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
  929          code[1] = 4 << 16;
  930       else
  931          code[1] = (code[0] & (3 << 24)) >> (24 - 16);
  932       code[0] &= ~0x03000000;
  933       code[0] |= 1;
  934       emitFlagsRd(i);
  935    }
  936 
  937    addInterp(i->ipa, i->encSize, interpApply);
  938 }
  939 
  940 void
  941 CodeEmitterNV50::emitMINMAX(const Instruction *i)
  942 {
  943    if (i->dType == TYPE_F64) {
  944       code[0] = 0xe0000000;
  945       code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
  946    } else {
  947       code[0] = 0x30000000;
  948       code[1] = 0x80000000;
  949       if (i->op == OP_MIN)
  950          code[1] |= 0x20000000;
  951 
  952       switch (i->dType) {
  953       case TYPE_F32: code[0] |= 0x80000000; break;
  954       case TYPE_S32: code[1] |= 0x8c000000; break;
  955       case TYPE_U32: code[1] |= 0x84000000; break;
  956       case TYPE_S16: code[1] |= 0x80000000; break;
  957       case TYPE_U16: break;
  958       default:
  959          assert(0);
  960          break;
  961       }
  962    }
  963 
  964    code[1] |= i->src(0).mod.abs() << 20;
  965    code[1] |= i->src(0).mod.neg() << 26;
  966    code[1] |= i->src(1).mod.abs() << 19;
  967    code[1] |= i->src(1).mod.neg() << 27;
  968 
  969    emitForm_MAD(i);
  970 }
  971 
  972 void
  973 CodeEmitterNV50::emitFMAD(const Instruction *i)
  974 {
  975    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
  976    const int neg_add = i->src(2).mod.neg();
  977 
  978    code[0] = 0xe0000000;
  979 
  980    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  981       code[1] = 0;
  982       emitForm_IMM(i);
  983       code[0] |= neg_mul << 15;
  984       code[0] |= neg_add << 22;
  985       if (i->saturate)
  986          code[0] |= 1 << 8;
  987    } else
  988    if (i->encSize == 4) {
  989       emitForm_MUL(i);
  990       code[0] |= neg_mul << 15;
  991       code[0] |= neg_add << 22;
  992       if (i->saturate)
  993          code[0] |= 1 << 8;
  994    } else {
  995       code[1]  = neg_mul << 26;
  996       code[1] |= neg_add << 27;
  997       if (i->saturate)
  998          code[1] |= 1 << 29;
  999       emitForm_MAD(i);
 1000    }
 1001 }
 1002 
 1003 void
 1004 CodeEmitterNV50::emitDMAD(const Instruction *i)
 1005 {
 1006    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
 1007    const int neg_add = i->src(2).mod.neg();
 1008 
 1009    assert(i->encSize == 8);
 1010    assert(!i->saturate);
 1011 
 1012    code[1] = 0x40000000;
 1013    code[0] = 0xe0000000;
 1014 
 1015    code[1] |= neg_mul << 26;
 1016    code[1] |= neg_add << 27;
 1017 
 1018    roundMode_MAD(i);
 1019 
 1020    emitForm_MAD(i);
 1021 }
 1022 
 1023 void
 1024 CodeEmitterNV50::emitFADD(const Instruction *i)
 1025 {
 1026    const int neg0 = i->src(0).mod.neg();
 1027    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
 1028 
 1029    code[0] = 0xb0000000;
 1030 
 1031    assert(!(i->src(0).mod | i->src(1).mod).abs());
 1032 
 1033    if (i->src(1).getFile() == FILE_IMMEDIATE) {
 1034       code[1] = 0;
 1035       emitForm_IMM(i);
 1036       code[0] |= neg0 << 15;
 1037       code[0] |= neg1 << 22;
 1038       if (i->saturate)
 1039          code[0] |= 1 << 8;
 1040    } else
 1041    if (i->encSize == 8) {
 1042       code[1] = 0;
 1043       emitForm_ADD(i);
 1044       code[1] |= neg0 << 26;
 1045       code[1] |= neg1 << 27;
 1046       if (i->saturate)
 1047          code[1] |= 1 << 29;
 1048    } else {
 1049       emitForm_MUL(i);
 1050       code[0] |= neg0 << 15;
 1051       code[0] |= neg1 << 22;
 1052       if (i->saturate)
 1053          code[0] |= 1 << 8;
 1054    }
 1055 }
 1056 
 1057 void
 1058 CodeEmitterNV50::emitDADD(const Instruction *i)
 1059 {
 1060    const int neg0 = i->src(0).mod.neg();
 1061    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
 1062 
 1063    assert(!(i->src(0).mod | i->src(1).mod).abs());
 1064    assert(!i->saturate);
 1065    assert(i->encSize == 8);
 1066 
 1067    code[1] = 0x60000000;
 1068    code[0] = 0xe0000000;
 1069 
 1070    emitForm_ADD(i);
 1071 
 1072    code[1] |= neg0 << 26;
 1073    code[1] |= neg1 << 27;
 1074 }
 1075 
 1076 void
 1077 CodeEmitterNV50::emitUADD(const Instruction *i)
 1078 {
 1079    const int neg0 = i->src(0).mod.neg();
 1080    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
 1081 
 1082    code[0] = 0x20008000;
 1083 
 1084    if (i->src(1).getFile() == FILE_IMMEDIATE) {
 1085       code[1] = 0;
 1086       emitForm_IMM(i);
 1087    } else
 1088    if (i->encSize == 8) {
 1089       code[0] = 0x20000000;
 1090       code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
 1091       emitForm_ADD(i);
 1092    } else {
 1093       emitForm_MUL(i);
 1094    }
 1095    assert(!(neg0 && neg1));
 1096    code[0] |= neg0 << 28;
 1097    code[0] |= neg1 << 22;
 1098 
 1099    if (i->flagsSrc >= 0) {
 1100       // addc == sub | subr
 1101       assert(!(code[0] & 0x10400000) && !i->getPredicate());
 1102       code[0] |= 0x10400000;
 1103       srcId(i->src(i->flagsSrc), 32 + 12);
 1104    }
 1105 }
 1106 
 1107 void
 1108 CodeEmitterNV50::emitAADD(const Instruction *i)
 1109 {
 1110    const int s = (i->op == OP_MOV) ? 0 : 1;
 1111 
 1112    code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
 1113    code[1] = 0x20000000;
 1114 
 1115    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
 1116 
 1117    emitFlagsRd(i);
 1118 
 1119    if (s && i->srcExists(0))
 1120       setARegBits(SDATA(i->src(0)).id + 1);
 1121 }
 1122 
 1123 void
 1124 CodeEmitterNV50::emitIMUL(const Instruction *i)
 1125 {
 1126    code[0] = 0x40000000;
 1127 
 1128    if (i->src(1).getFile() == FILE_IMMEDIATE) {
 1129       if (i->sType == TYPE_S16)
 1130          code[0] |= 0x8100;
 1131       code[1] = 0;
 1132       emitForm_IMM(i);
 1133    } else
 1134    if (i->encSize == 8) {
 1135       code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
 1136       emitForm_MAD(i);
 1137    } else {
 1138       if (i->sType == TYPE_S16)
 1139          code[0] |= 0x8100;
 1140       emitForm_MUL(i);
 1141    }
 1142 }
 1143 
 1144 void
 1145 CodeEmitterNV50::emitFMUL(const Instruction *i)
 1146 {
 1147    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
 1148 
 1149    code[0] = 0xc0000000;
 1150 
 1151    if (i->src(1).getFile() == FILE_IMMEDIATE) {
 1152       code[1] = 0;
 1153       emitForm_IMM(i);
 1154       if (neg)
 1155          code[0] |= 0x8000;
 1156       if (i->saturate)
 1157          code[0] |= 1 << 8;
 1158    } else
 1159    if (i->encSize == 8) {
 1160       code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
 1161       if (neg)
 1162          code[1] |= 0x08000000;
 1163       if (i->saturate)
 1164          code[1] |= 1 << 20;
 1165       emitForm_MAD(i);
 1166    } else {
 1167       emitForm_MUL(i);
 1168       if (neg)
 1169          code[0] |= 0x8000;
 1170       if (i->saturate)
 1171          code[0] |= 1 << 8;
 1172    }
 1173 }
 1174 
 1175 void
 1176 CodeEmitterNV50::emitDMUL(const Instruction *i)
 1177 {
 1178    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
 1179 
 1180    assert(!i->saturate);
 1181    assert(i->encSize == 8);
 1182 
 1183    code[1] = 0x80000000;
 1184    code[0] = 0xe0000000;
 1185 
 1186    if (neg)
 1187       code[1] |= 0x08000000;
 1188 
 1189    roundMode_CVT(i->rnd);
 1190 
 1191    emitForm_MAD(i);
 1192 }
 1193 
 1194 void
 1195 CodeEmitterNV50::emitIMAD(const Instruction *i)
 1196 {
 1197    int mode;
 1198    code[0] = 0x60000000;
 1199 
 1200    assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
 1201    if (!isSignedType(i->sType))
 1202       mode = 0;
 1203    else if (i->saturate)
 1204       mode = 2;
 1205    else
 1206       mode = 1;
 1207 
 1208    if (i->src(1).getFile() == FILE_IMMEDIATE) {
 1209       code[1] = 0;
 1210       emitForm_IMM(i);
 1211       code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
 1212       if (i->flagsSrc >= 0) {
 1213          assert(!(code[0] & 0x10400000));
 1214          assert(SDATA(i->src(i->flagsSrc)).id == 0);
 1215          code[0] |= 0x10400000;
 1216       }
 1217    } else
 1218    if (i->encSize == 4) {
 1219       emitForm_MUL(i);
 1220       code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
 1221       if (i->flagsSrc >= 0) {
 1222          assert(!(code[0] & 0x10400000));
 1223          assert(SDATA(i->src(i->flagsSrc)).id == 0);
 1224          code[0] |= 0x10400000;
 1225       }
 1226    } else {
 1227       code[1] = mode << 29;
 1228       emitForm_MAD(i);
 1229 
 1230       if (i->flagsSrc >= 0) {
 1231          // add with carry from $cX
 1232          assert(!(code[1] & 0x0c000000) && !i->getPredicate());
 1233          code[1] |= 0xc << 24;
 1234          srcId(i->src(i->flagsSrc), 32 + 12);
 1235       }
 1236    }
 1237 }
 1238 
 1239 void
 1240 CodeEmitterNV50::emitISAD(const Instruction *i)
 1241 {
 1242    if (i->encSize == 8) {
 1243       code[0] = 0x50000000;
 1244       switch (i->sType) {
 1245       case TYPE_U32: code[1] = 0x04000000; break;
 1246       case TYPE_S32: code[1] = 0x0c000000; break;
 1247       case TYPE_U16: code[1] = 0x00000000; break;
 1248       case TYPE_S16: code[1] = 0x08000000; break;
 1249       default:
 1250          assert(0);
 1251          break;
 1252       }
 1253       emitForm_MAD(i);
 1254    } else {
 1255       switch (i->sType) {
 1256       case TYPE_U32: code[0] = 0x50008000; break;
 1257       case TYPE_S32: code[0] = 0x50008100; break;
 1258       case TYPE_U16: code[0] = 0x50000000; break;
 1259       case TYPE_S16: code[0] = 0x50000100; break;
 1260       default:
 1261          assert(0);
 1262          break;
 1263       }
 1264       emitForm_MUL(i);
 1265    }
 1266 }
 1267 
 1268 static void
 1269 alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 1270 {
 1271    int loc = entry->loc;
 1272    int enc;
 1273 
 1274    switch (data.alphatest) {
 1275    case PIPE_FUNC_NEVER: enc = 0x0; break;
 1276    case PIPE_FUNC_LESS: enc = 0x1; break;
 1277    case PIPE_FUNC_EQUAL: enc = 0x2; break;
 1278    case PIPE_FUNC_LEQUAL: enc = 0x3; break;
 1279    case PIPE_FUNC_GREATER: enc = 0x4; break;
 1280    case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;
 1281    case PIPE_FUNC_GEQUAL: enc = 0x6; break;
 1282    default:
 1283    case PIPE_FUNC_ALWAYS: enc = 0xf; break;
 1284    }
 1285 
 1286    code[loc + 1] &= ~(0x1f << 14);
 1287    code[loc + 1] |= enc << 14;
 1288 }
 1289 
 1290 void
 1291 CodeEmitterNV50::emitSET(const Instruction *i)
 1292 {
 1293    code[0] = 0x30000000;
 1294    code[1] = 0x60000000;
 1295 
 1296    switch (i->sType) {
 1297    case TYPE_F64:
 1298       code[0] = 0xe0000000;
 1299       code[1] = 0xe0000000;
 1300       break;
 1301    case TYPE_F32: code[0] |= 0x80000000; break;
 1302    case TYPE_S32: code[1] |= 0x0c000000; break;
 1303    case TYPE_U32: code[1] |= 0x04000000; break;
 1304    case TYPE_S16: code[1] |= 0x08000000; break;
 1305    case TYPE_U16: break;
 1306    default:
 1307       assert(0);
 1308       break;
 1309    }
 1310 
 1311    emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
 1312 
 1313    if (i->src(0).mod.neg()) code[1] |= 0x04000000;
 1314    if (i->src(1).mod.neg()) code[1] |= 0x08000000;
 1315    if (i->src(0).mod.abs()) code[1] |= 0x00100000;
 1316    if (i->src(1).mod.abs()) code[1] |= 0x00080000;
 1317 
 1318    emitForm_MAD(i);
 1319 
 1320    if (i->subOp == 1) {
 1321       addInterp(0, 0, alphatestSet);
 1322    }
 1323 }
 1324 
 1325 void
 1326 CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
 1327 {
 1328    switch (rnd) {
 1329    case ROUND_NI: code[1] |= 0x08000000; break;
 1330    case ROUND_M:  code[1] |= 0x00020000; break;
 1331    case ROUND_MI: code[1] |= 0x08020000; break;
 1332    case ROUND_P:  code[1] |= 0x00040000; break;
 1333    case ROUND_PI: code[1] |= 0x08040000; break;
 1334    case ROUND_Z:  code[1] |= 0x00060000; break;
 1335    case ROUND_ZI: code[1] |= 0x08060000; break;
 1336    default:
 1337       assert(rnd == ROUND_N);
 1338       break;
 1339    }
 1340 }
 1341 
 1342 void
 1343 CodeEmitterNV50::emitCVT(const Instruction *i)
 1344 {
 1345    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
 1346    RoundMode rnd;
 1347    DataType dType;
 1348 
 1349    switch (i->op) {
 1350    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
 1351    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
 1352    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
 1353    default:
 1354       rnd = i->rnd;
 1355       break;
 1356    }
 1357 
 1358    if (i->op == OP_NEG && i->dType == TYPE_U32)
 1359       dType = TYPE_S32;
 1360    else
 1361       dType = i->dType;
 1362 
 1363    code[0] = 0xa0000000;
 1364 
 1365    switch (dType) {
 1366    case TYPE_F64:
 1367       switch (i->sType) {
 1368       case TYPE_F64: code[1] = 0xc4404000; break;
 1369       case TYPE_S64: code[1] = 0x44414000; break;
 1370       case TYPE_U64: code[1] = 0x44404000; break;
 1371       case TYPE_F32: code[1] = 0xc4400000; break;
 1372       case TYPE_S32: code[1] = 0x44410000; break;
 1373       case TYPE_U32: code[1] = 0x44400000; break;
 1374       default:
 1375          assert(0);
 1376          break;
 1377       }
 1378       break;
 1379    case TYPE_S64:
 1380       switch (i->sType) {
 1381       case TYPE_F64: code[1] = 0x8c404000; break;
 1382       case TYPE_F32: code[1] = 0x8c400000; break;
 1383       default:
 1384          assert(0);
 1385          break;
 1386       }
 1387       break;
 1388    case TYPE_U64:
 1389       switch (i->sType) {
 1390       case TYPE_F64: code[1] = 0x84404000; break;
 1391       case TYPE_F32: code[1] = 0x84400000; break;
 1392       default:
 1393          assert(0);
 1394          break;
 1395       }
 1396       break;
 1397    case TYPE_F32:
 1398       switch (i->sType) {
 1399       case TYPE_F64: code[1] = 0xc0404000; break;
 1400       case TYPE_S64: code[1] = 0x40414000; break;
 1401       case TYPE_U64: code[1] = 0x40404000; break;
 1402       case TYPE_F32: code[1] = 0xc4004000; break;
 1403       case TYPE_S32: code[1] = 0x44014000; break;
 1404       case TYPE_U32: code[1] = 0x44004000; break;
 1405       case TYPE_F16: code[1] = 0xc4000000; break;
 1406       case TYPE_U16: code[1] = 0x44000000; break;
 1407       default:
 1408          assert(0);
 1409          break;
 1410       }
 1411       break;
 1412    case TYPE_S32:
 1413       switch (i->sType) {
 1414       case TYPE_F64: code[1] = 0x88404000; break;
 1415       case TYPE_F32: code[1] = 0x8c004000; break;
 1416       case TYPE_S32: code[1] = 0x0c014000; break;
 1417       case TYPE_U32: code[1] = 0x0c004000; break;
 1418       case TYPE_F16: code[1] = 0x8c000000; break;
 1419       case TYPE_S16: code[1] = 0x0c010000; break;
 1420       case TYPE_U16: code[1] = 0x0c000000; break;
 1421       case TYPE_S8:  code[1] = 0x0c018000; break;
 1422       case TYPE_U8:  code[1] = 0x0c008000; break;
 1423       default:
 1424          assert(0);
 1425          break;
 1426       }
 1427       break;
 1428    case TYPE_U32:
 1429       switch (i->sType) {
 1430       case TYPE_F64: code[1] = 0x80404000; break;
 1431       case TYPE_F32: code[1] = 0x84004000; break;
 1432       case TYPE_S32: code[1] = 0x04014000; break;
 1433       case TYPE_U32: code[1] = 0x04004000; break;
 1434       case TYPE_F16: code[1] = 0x84000000; break;
 1435       case TYPE_S16: code[1] = 0x04010000; break;
 1436       case TYPE_U16: code[1] = 0x04000000; break;
 1437       case TYPE_S8:  code[1] = 0x04018000; break;
 1438       case TYPE_U8:  code[1] = 0x04008000; break;
 1439       default:
 1440          assert(0);
 1441          break;
 1442       }
 1443       break;
 1444    case TYPE_S16:
 1445    case TYPE_U16:
 1446    case TYPE_S8:
 1447    case TYPE_U8:
 1448    default:
 1449       assert(0);
 1450       break;
 1451    }
 1452    if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
 1453       code[1] |= 0x00004000;
 1454 
 1455    roundMode_CVT(rnd);
 1456 
 1457    switch (i->op) {
 1458    case OP_ABS: code[1] |= 1 << 20; break;
 1459    case OP_SAT: code[1] |= 1 << 19; break;
 1460    case OP_NEG: code[1] |= 1 << 29; break;
 1461    default:
 1462       break;
 1463    }
 1464    code[1] ^= i->src(0).mod.neg() << 29;
 1465    code[1] |= i->src(0).mod.abs() << 20;
 1466    if (i->saturate)
 1467       code[1] |= 1 << 19;
 1468 
 1469    assert(i->op != OP_ABS || !i->src(0).mod.neg());
 1470 
 1471    emitForm_MAD(i);
 1472 }
 1473 
 1474 void
 1475 CodeEmitterNV50::emitPreOp(const Instruction *i)
 1476 {
 1477    code[0] = 0xb0000000;
 1478    code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
 1479 
 1480    code[1] |= i->src(0).mod.abs() << 20;
 1481    code[1] |= i->src(0).mod.neg() << 26;
 1482 
 1483    emitForm_MAD(i);
 1484 }
 1485 
 1486 void
 1487 CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
 1488 {
 1489    code[0] = 0x90000000;
 1490 
 1491    if (i->encSize == 4) {
 1492       assert(i->op == OP_RCP);
 1493       assert(!i->saturate);
 1494       code[0] |= i->src(0).mod.abs() << 15;
 1495       code[0] |= i->src(0).mod.neg() << 22;
 1496       emitForm_MUL(i);
 1497    } else {
 1498       code[1] = subOp << 29;
 1499       code[1] |= i->src(0).mod.abs() << 20;
 1500       code[1] |= i->src(0).mod.neg() << 26;
 1501       if (i->saturate) {
 1502          assert(subOp == 6 && i->op == OP_EX2);
 1503          code[1] |= 1 << 27;
 1504       }
 1505       emitForm_MAD(i);
 1506    }
 1507 }
 1508 
 1509 void
 1510 CodeEmitterNV50::emitNOT(const Instruction *i)
 1511 {
 1512    code[0] = 0xd0000000;
 1513    code[1] = 0x0002c000;
 1514 
 1515    switch (i->sType) {
 1516    case TYPE_U32:
 1517    case TYPE_S32:
 1518       code[1] |= 0x04000000;
 1519       break;
 1520    default:
 1521       break;
 1522    }
 1523    emitForm_MAD(i);
 1524    setSrc(i, 0, 1);
 1525 }
 1526 
 1527 void
 1528 CodeEmitterNV50::emitLogicOp(const Instruction *i)
 1529 {
 1530    code[0] = 0xd0000000;
 1531    code[1] = 0;
 1532 
 1533    if (i->src(1).getFile() == FILE_IMMEDIATE) {
 1534       switch (i->op) {
 1535       case OP_OR:  code[0] |= 0x0100; break;
 1536       case OP_XOR: code[0] |= 0x8000; break;
 1537       default:
 1538          assert(i->op == OP_AND);
 1539          break;
 1540       }
 1541       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
 1542          code[0] |= 1 << 22;
 1543 
 1544       emitForm_IMM(i);
 1545    } else {
 1546       switch (i->op) {
 1547       case OP_AND: code[1] = 0x04000000; break;
 1548       case OP_OR:  code[1] = 0x04004000; break;
 1549       case OP_XOR: code[1] = 0x04008000; break;
 1550       default:
 1551          assert(0);
 1552          break;
 1553       }
 1554       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
 1555          code[1] |= 1 << 16;
 1556       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
 1557          code[1] |= 1 << 17;
 1558 
 1559       emitForm_MAD(i);
 1560    }
 1561 }
 1562 
 1563 void
 1564 CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
 1565 {
 1566    code[0] = 0x00000001 | (shl << 16);
 1567    code[1] = 0xc0000000;
 1568 
 1569    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
 1570 
 1571    setSrcFileBits(i, NV50_OP_ENC_IMM);
 1572    setSrc(i, 0, 0);
 1573    emitFlagsRd(i);
 1574 }
 1575 
 1576 void
 1577 CodeEmitterNV50::emitShift(const Instruction *i)
 1578 {
 1579    if (i->def(0).getFile() == FILE_ADDRESS) {
 1580       assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
 1581       emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
 1582    } else {
 1583       code[0] = 0x30000001;
 1584       code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
 1585       if (i->op == OP_SHR && isSignedType(i->sType))
 1586           code[1] |= 1 << 27;
 1587 
 1588       if (i->src(1).getFile() == FILE_IMMEDIATE) {
 1589          code[1] |= 1 << 20;
 1590          code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
 1591          defId(i->def(0), 2);
 1592          srcId(i->src(0), 9);
 1593          emitFlagsRd(i);
 1594       } else {
 1595          emitForm_MAD(i);
 1596       }
 1597    }
 1598 }
 1599 
 1600 void
 1601 CodeEmitterNV50::emitOUT(const Instruction *i)
 1602 {
 1603    code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
 1604    code[1] = 0xc0000000;
 1605 
 1606    emitFlagsRd(i);
 1607 }
 1608 
 1609 void
 1610 CodeEmitterNV50::emitTEX(const TexInstruction *i)
 1611 {
 1612    code[0] = 0xf0000001;
 1613    code[1] = 0x00000000;
 1614 
 1615    switch (i->op) {
 1616    case OP_TXB:
 1617       code[1] = 0x20000000;
 1618       break;
 1619    case OP_TXL:
 1620       code[1] = 0x40000000;
 1621       break;
 1622    case OP_TXF:
 1623       code[0] |= 0x01000000;
 1624       break;
 1625    case OP_TXG:
 1626       code[0] |= 0x01000000;
 1627       code[1] = 0x80000000;
 1628       break;
 1629    case OP_TXLQ:
 1630       code[1] = 0x60020000;
 1631       break;
 1632    default:
 1633       assert(i->op == OP_TEX);
 1634       break;
 1635    }
 1636 
 1637    code[0] |= i->tex.r << 9;
 1638    code[0] |= i->tex.s << 17;
 1639 
 1640    int argc = i->tex.target.getArgCount();
 1641 
 1642    if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
 1643       argc += 1;
 1644    if (i->tex.target.isShadow())
 1645       argc += 1;
 1646    assert(argc <= 4);
 1647 
 1648    code[0] |= (argc - 1) << 22;
 1649 
 1650    if (i->tex.target.isCube()) {
 1651       code[0] |= 0x08000000;
 1652    } else
 1653    if (i->tex.useOffsets) {
 1654       code[1] |= (i->tex.offset[0] & 0xf) << 24;
 1655       code[1] |= (i->tex.offset[1] & 0xf) << 20;
 1656       code[1] |= (i->tex.offset[2] & 0xf) << 16;
 1657    }
 1658 
 1659    code[0] |= (i->tex.mask & 0x3) << 25;
 1660    code[1] |= (i->tex.mask & 0xc) << 12;
 1661 
 1662    if (i->tex.liveOnly)
 1663       code[1] |= 1 << 2;
 1664    if (i->tex.derivAll)
 1665       code[1] |= 1 << 3;
 1666 
 1667    defId(i->def(0), 2);
 1668 
 1669    emitFlagsRd(i);
 1670 }
 1671 
 1672 void
 1673 CodeEmitterNV50::emitTXQ(const TexInstruction *i)
 1674 {
 1675    assert(i->tex.query == TXQ_DIMS);
 1676 
 1677    code[0] = 0xf0000001;
 1678    code[1] = 0x60000000;
 1679 
 1680    code[0] |= i->tex.r << 9;
 1681    code[0] |= i->tex.s << 17;
 1682 
 1683    code[0] |= (i->tex.mask & 0x3) << 25;
 1684    code[1] |= (i->tex.mask & 0xc) << 12;
 1685 
 1686    defId(i->def(0), 2);
 1687 
 1688    emitFlagsRd(i);
 1689 }
 1690 
 1691 void
 1692 CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
 1693 {
 1694    code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
 1695    code[1] = 0x60010000;
 1696 
 1697    code[0] |= (i->tex.mask & 0x3) << 25;
 1698    code[1] |= (i->tex.mask & 0xc) << 12;
 1699    defId(i->def(0), 2);
 1700 
 1701    emitFlagsRd(i);
 1702 }
 1703 
 1704 void
 1705 CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
 1706 {
 1707    uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
 1708 
 1709    code[0] = 0x10000003; // bra
 1710    code[1] = 0x00000780; // always
 1711 
 1712    switch (i->subOp) {
 1713    case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
 1714       break;
 1715    case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
 1716       pos += 8;
 1717       break;
 1718    default:
 1719       assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
 1720       code[0] = 0x20000003; // call
 1721       code[1] = 0x00000000; // no predicate
 1722       break;
 1723    }
 1724    addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
 1725    addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
 1726 }
 1727 
 1728 void
 1729 CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
 1730 {
 1731    const FlowInstruction *f = i->asFlow();
 1732    bool hasPred = false;
 1733    bool hasTarg = false;
 1734 
 1735    code[0] = 0x00000003 | (flowOp << 28);
 1736    code[1] = 0x00000000;
 1737 
 1738    switch (i->op) {
 1739    case OP_BRA:
 1740       hasPred = true;
 1741       hasTarg = true;
 1742       break;
 1743    case OP_BREAK:
 1744    case OP_BRKPT:
 1745    case OP_DISCARD:
 1746    case OP_RET:
 1747       hasPred = true;
 1748       break;
 1749    case OP_CALL:
 1750    case OP_PREBREAK:
 1751    case OP_JOINAT:
 1752       hasTarg = true;
 1753       break;
 1754    case OP_PRERET:
 1755       hasTarg = true;
 1756       if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
 1757          emitPRERETEmu(f);
 1758          return;
 1759       }
 1760       break;
 1761    default:
 1762       break;
 1763    }
 1764 
 1765    if (hasPred)
 1766       emitFlagsRd(i);
 1767 
 1768    if (hasTarg && f) {
 1769       uint32_t pos;
 1770 
 1771       if (f->op == OP_CALL) {
 1772          if (f->builtin) {
 1773             pos = targNV50->getBuiltinOffset(f->target.builtin);
 1774          } else {
 1775             pos = f->target.fn->binPos;
 1776          }
 1777       } else {
 1778          pos = f->target.bb->binPos;
 1779       }
 1780 
 1781       code[0] |= ((pos >>  2) & 0xffff) << 11;
 1782       code[1] |= ((pos >> 18) & 0x003f) << 14;
 1783 
 1784       RelocEntry::Type relocTy;
 1785 
 1786       relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
 1787 
 1788       addReloc(relocTy, 0, pos, 0x07fff800, 9);
 1789       addReloc(relocTy, 1, pos, 0x000fc000, -4);
 1790    }
 1791 }
 1792 
 1793 void
 1794 CodeEmitterNV50::emitBAR(const Instruction *i)
 1795 {
 1796    ImmediateValue *barId = i->getSrc(0)->asImm();
 1797    assert(barId);
 1798 
 1799    code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
 1800    code[1] = 0x00004000;
 1801 
 1802    if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
 1803       code[0] |= 1 << 26;
 1804 }
 1805 
 1806 void
 1807 CodeEmitterNV50::emitATOM(const Instruction *i)
 1808 {
 1809    uint8_t subOp;
 1810    switch (i->subOp) {
 1811    case NV50_IR_SUBOP_ATOM_ADD:  subOp = 0x0; break;
 1812    case NV50_IR_SUBOP_ATOM_MIN:  subOp = 0x7; break;
 1813    case NV50_IR_SUBOP_ATOM_MAX:  subOp = 0x6; break;
 1814    case NV50_IR_SUBOP_ATOM_INC:  subOp = 0x4; break;
 1815    case NV50_IR_SUBOP_ATOM_DEC:  subOp = 0x5; break;
 1816    case NV50_IR_SUBOP_ATOM_AND:  subOp = 0xa; break;
 1817    case NV50_IR_SUBOP_ATOM_OR:   subOp = 0xb; break;
 1818    case NV50_IR_SUBOP_ATOM_XOR:  subOp = 0xc; break;
 1819    case NV50_IR_SUBOP_ATOM_CAS:  subOp = 0x2; break;
 1820    case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
 1821    default:
 1822       assert(!"invalid subop");
 1823       return;
 1824    }
 1825    code[0] = 0xd0000001;
 1826    code[1] = 0xe0c00000 | (subOp << 2);
 1827    if (isSignedType(i->dType))
 1828       code[1] |= 1 << 21;
 1829 
 1830    // args
 1831    emitFlagsRd(i);
 1832    setDst(i, 0);
 1833    setSrc(i, 1, 1);
 1834    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
 1835       setSrc(i, 2, 2);
 1836 
 1837    // g[] pointer
 1838    code[0] |= i->getSrc(0)->reg.fileIndex << 23;
 1839    srcId(i->getIndirect(0, 0), 9);
 1840 }
 1841 
 1842 bool
 1843 CodeEmitterNV50::emitInstruction(Instruction *insn)
 1844 {
 1845    if (!insn->encSize) {
 1846       ERROR("skipping unencodable instruction: "); insn->print();
 1847       return false;
 1848    } else
 1849    if (codeSize + insn->encSize > codeSizeLimit) {
 1850       ERROR("code emitter output buffer too small\n");
 1851       return false;
 1852    }
 1853 
 1854    if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
 1855       INFO("EMIT: "); insn->print();
 1856    }
 1857 
 1858    switch (insn->op) {
 1859    case OP_MOV:
 1860       emitMOV(insn);
 1861       break;
 1862    case OP_EXIT:
 1863    case OP_NOP:
 1864    case OP_JOIN:
 1865       emitNOP();
 1866       break;
 1867    case OP_VFETCH:
 1868    case OP_LOAD:
 1869       emitLOAD(insn);
 1870       break;
 1871    case OP_EXPORT:
 1872    case OP_STORE:
 1873       emitSTORE(insn);
 1874       break;
 1875    case OP_PFETCH:
 1876       emitPFETCH(insn);
 1877       break;
 1878    case OP_RDSV:
 1879       emitRDSV(insn);
 1880       break;
 1881    case OP_LINTERP:
 1882    case OP_PINTERP:
 1883       emitINTERP(insn);
 1884       break;
 1885    case OP_ADD:
 1886    case OP_SUB:
 1887       if (insn->dType == TYPE_F64)
 1888          emitDADD(insn);
 1889       else if (isFloatType(insn->dType))
 1890          emitFADD(insn);
 1891       else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
 1892          emitAADD(insn);
 1893       else
 1894          emitUADD(insn);
 1895       break;
 1896    case OP_MUL:
 1897       if (insn->dType == TYPE_F64)
 1898          emitDMUL(insn);
 1899       else if (isFloatType(insn->dType))
 1900          emitFMUL(insn);
 1901       else
 1902          emitIMUL(insn);
 1903       break;
 1904    case OP_MAD:
 1905    case OP_FMA:
 1906       if (insn->dType == TYPE_F64)
 1907          emitDMAD(insn);
 1908       else if (isFloatType(insn->dType))
 1909          emitFMAD(insn);
 1910       else
 1911          emitIMAD(insn);
 1912       break;
 1913    case OP_SAD:
 1914       emitISAD(insn);
 1915       break;
 1916    case OP_NOT:
 1917       emitNOT(insn);
 1918       break;
 1919    case OP_AND:
 1920    case OP_OR:
 1921    case OP_XOR:
 1922       emitLogicOp(insn);
 1923       break;
 1924    case OP_SHL:
 1925    case OP_SHR:
 1926       emitShift(insn);
 1927       break;
 1928    case OP_SET:
 1929       emitSET(insn);
 1930       break;
 1931    case OP_MIN:
 1932    case OP_MAX:
 1933       emitMINMAX(insn);
 1934       break;
 1935    case OP_CEIL:
 1936    case OP_FLOOR:
 1937    case OP_TRUNC:
 1938    case OP_ABS:
 1939    case OP_NEG:
 1940    case OP_SAT:
 1941       emitCVT(insn);
 1942       break;
 1943    case OP_CVT:
 1944       if (insn->def(0).getFile() == FILE_ADDRESS)
 1945          emitARL(insn, 0);
 1946       else
 1947       if (insn->def(0).getFile() == FILE_FLAGS ||
 1948           insn->src(0).getFile() == FILE_FLAGS ||
 1949           insn->src(0).getFile() == FILE_ADDRESS)
 1950          emitMOV(insn);
 1951       else
 1952          emitCVT(insn);
 1953       break;
 1954    case OP_RCP:
 1955       emitSFnOp(insn, 0);
 1956       break;
 1957    case OP_RSQ:
 1958       emitSFnOp(insn, 2);
 1959       break;
 1960    case OP_LG2:
 1961       emitSFnOp(insn, 3);
 1962       break;
 1963    case OP_SIN:
 1964       emitSFnOp(insn, 4);
 1965       break;
 1966    case OP_COS:
 1967       emitSFnOp(insn, 5);
 1968       break;
 1969    case OP_EX2:
 1970       emitSFnOp(insn, 6);
 1971       break;
 1972    case OP_PRESIN:
 1973    case OP_PREEX2:
 1974       emitPreOp(insn);
 1975       break;
 1976    case OP_TEX:
 1977    case OP_TXB:
 1978    case OP_TXL:
 1979    case OP_TXF:
 1980    case OP_TXG:
 1981    case OP_TXLQ:
 1982       emitTEX(insn->asTex());
 1983       break;
 1984    case OP_TXQ:
 1985       emitTXQ(insn->asTex());
 1986       break;
 1987    case OP_TEXPREP:
 1988       emitTEXPREP(insn->asTex());
 1989       break;
 1990    case OP_EMIT:
 1991    case OP_RESTART:
 1992       emitOUT(insn);
 1993       break;
 1994    case OP_DISCARD:
 1995       emitFlow(insn, 0x0);
 1996       break;
 1997    case OP_BRA:
 1998       emitFlow(insn, 0x1);
 1999       break;
 2000    case OP_CALL:
 2001       emitFlow(insn, 0x2);
 2002       break;
 2003    case OP_RET:
 2004       emitFlow(insn, 0x3);
 2005       break;
 2006    case OP_PREBREAK:
 2007       emitFlow(insn, 0x4);
 2008       break;
 2009    case OP_BREAK:
 2010       emitFlow(insn, 0x5);
 2011       break;
 2012    case OP_QUADON:
 2013       emitFlow(insn, 0x6);
 2014       break;
 2015    case OP_QUADPOP:
 2016       emitFlow(insn, 0x7);
 2017       break;
 2018    case OP_JOINAT:
 2019       emitFlow(insn, 0xa);
 2020       break;
 2021    case OP_PRERET:
 2022       emitFlow(insn, 0xd);
 2023       break;
 2024    case OP_QUADOP:
 2025       emitQUADOP(insn, insn->lanes, insn->subOp);
 2026       break;
 2027    case OP_DFDX:
 2028       emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
 2029       break;
 2030    case OP_DFDY:
 2031       emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
 2032       break;
 2033    case OP_ATOM:
 2034       emitATOM(insn);
 2035       break;
 2036    case OP_BAR:
 2037       emitBAR(insn);
 2038       break;
 2039    case OP_PHI:
 2040    case OP_UNION:
 2041    case OP_CONSTRAINT:
 2042       ERROR("operation should have been eliminated\n");
 2043       return false;
 2044    case OP_EXP:
 2045    case OP_LOG:
 2046    case OP_SQRT:
 2047    case OP_POW:
 2048    case OP_SELP:
 2049    case OP_SLCT:
 2050    case OP_TXD:
 2051    case OP_PRECONT:
 2052    case OP_CONT:
 2053    case OP_POPCNT:
 2054    case OP_INSBF:
 2055    case OP_EXTBF:
 2056       ERROR("operation should have been lowered\n");
 2057       return false;
 2058    default:
 2059       ERROR("unknown op: %u\n", insn->op);
 2060       return false;
 2061    }
 2062    if (insn->join || insn->op == OP_JOIN)
 2063       code[1] |= 0x2;
 2064    else
 2065    if (insn->exit || insn->op == OP_EXIT)
 2066       code[1] |= 0x1;
 2067 
 2068    assert((insn->encSize == 8) == (code[0] & 1));
 2069 
 2070    code += insn->encSize / 4;
 2071    codeSize += insn->encSize;
 2072    return true;
 2073 }
 2074 
 2075 uint32_t
 2076 CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
 2077 {
 2078    const Target::OpInfo &info = targ->getOpInfo(i);
 2079 
 2080    if (info.minEncSize > 4 || i->dType == TYPE_F64)
 2081       return 8;
 2082 
 2083    // check constraints on dst and src operands
 2084    for (int d = 0; i->defExists(d); ++d) {
 2085       if (i->def(d).rep()->reg.data.id > 63 ||
 2086           i->def(d).rep()->reg.file != FILE_GPR)
 2087          return 8;
 2088    }
 2089 
 2090    for (int s = 0; i->srcExists(s); ++s) {
 2091       DataFile sf = i->src(s).getFile();
 2092       if (sf != FILE_GPR)
 2093          if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
 2094             return 8;
 2095       if (i->src(s).rep()->reg.data.id > 63)
 2096          return 8;
 2097    }
 2098 
 2099    // check modifiers & rounding
 2100    if (i->join || i->lanes != 0xf || i->exit)
 2101       return 8;
 2102    if (i->op == OP_MUL && i->rnd != ROUND_N)
 2103       return 8;
 2104 
 2105    if (i->asTex())
 2106       return 8; // TODO: short tex encoding
 2107 
 2108    // check constraints on short MAD
 2109    if (info.srcNr >= 2 && i->srcExists(2)) {
 2110       if (!i->defExists(0) ||
 2111           (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
 2112           DDATA(i->def(0)).id != SDATA(i->src(2)).id)
 2113          return 8;
 2114    }
 2115 
 2116    return info.minEncSize;
 2117 }
 2118 
 2119 // Change the encoding size of an instruction after BBs have been scheduled.
 2120 static void
 2121 makeInstructionLong(Instruction *insn)
 2122 {
 2123    if (insn->encSize == 8)
 2124       return;
 2125    Function *fn = insn->bb->getFunction();
 2126    int n = 0;
 2127    int adj = 4;
 2128 
 2129    for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
 2130 
 2131    if (n & 1) {
 2132       adj = 8;
 2133       insn->next->encSize = 8;
 2134    } else
 2135    if (insn->prev && insn->prev->encSize == 4) {
 2136       adj = 8;
 2137       insn->prev->encSize = 8;
 2138    }
 2139    insn->encSize = 8;
 2140 
 2141    for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
 2142       fn->bbArray[i]->binPos += adj;
 2143    }
 2144    fn->binSize += adj;
 2145    insn->bb->binSize += adj;
 2146 }
 2147 
 2148 static bool
 2149 trySetExitModifier(Instruction *insn)
 2150 {
 2151    if (insn->op == OP_DISCARD ||
 2152        insn->op == OP_QUADON ||
 2153        insn->op == OP_QUADPOP)
 2154       return false;
 2155    for (int s = 0; insn->srcExists(s); ++s)
 2156       if (insn->src(s).getFile() == FILE_IMMEDIATE)
 2157          return false;
 2158    if (insn->asFlow()) {
 2159       if (insn->op == OP_CALL) // side effects !
 2160          return false;
 2161       if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
 2162          return false;
 2163       insn->op = OP_EXIT;
 2164    }
 2165    insn->exit = 1;
 2166    makeInstructionLong(insn);
 2167    return true;
 2168 }
 2169 
 2170 static void
 2171 replaceExitWithModifier(Function *func)
 2172 {
 2173    BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
 2174 
 2175    if (!epilogue->getExit() ||
 2176        epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
 2177       return;
 2178 
 2179    if (epilogue->getEntry()->op != OP_EXIT) {
 2180       Instruction *insn = epilogue->getExit()->prev;
 2181       if (!insn || !trySetExitModifier(insn))
 2182          return;
 2183       insn->exit = 1;
 2184    } else {
 2185       for (Graph::EdgeIterator ei = func->cfgExit->incident();
 2186            !ei.end(); ei.next()) {
 2187          BasicBlock *bb = BasicBlock::get(ei.getNode());
 2188          Instruction *i = bb->getExit();
 2189 
 2190          if (!i || !trySetExitModifier(i))
 2191             return;
 2192       }
 2193    }
 2194 
 2195    int adj = epilogue->getExit()->encSize;
 2196    epilogue->binSize -= adj;
 2197    func->binSize -= adj;
 2198    delete_Instruction(func->getProgram(), epilogue->getExit());
 2199 
 2200    // There may be BB's that are laid out after the exit block
 2201    for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
 2202       func->bbArray[i]->binPos -= adj;
 2203    }
 2204 }
 2205 
 2206 void
 2207 CodeEmitterNV50::prepareEmission(Function *func)
 2208 {
 2209    CodeEmitter::prepareEmission(func);
 2210 
 2211    replaceExitWithModifier(func);
 2212 }
 2213 
 2214 CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
 2215    CodeEmitter(target), targNV50(target)
 2216 {
 2217    targ = target; // specialized
 2218    code = NULL;
 2219    codeSize = codeSizeLimit = 0;
 2220    relocInfo = NULL;
 2221 }
 2222 
 2223 CodeEmitter *
 2224 TargetNV50::getCodeEmitter(Program::Type type)
 2225 {
 2226    CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
 2227    emit->setProgramType(type);
 2228    return emit;
 2229 }
 2230 
 2231 } // namespace nv50_ir