"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp" (16 Sep 2020, 88041 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "nv50_ir_emit_nvc0.cpp" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright 2011 Christoph Bumiller
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice shall be included in
   12  * all copies or substantial portions of the Software.
   13  *
   14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
   18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
   19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
   20  * OTHER DEALINGS IN THE SOFTWARE.
   21  */
   22 
   23 #include "codegen/nv50_ir_target_nvc0.h"
   24 
   25 namespace nv50_ir {
   26 
   27 // Argh, all these assertions ...
   28 
   29 class CodeEmitterNVC0 : public CodeEmitter
   30 {
   31 public:
   32    CodeEmitterNVC0(const TargetNVC0 *);
   33 
   34    virtual bool emitInstruction(Instruction *);
   35    virtual uint32_t getMinEncodingSize(const Instruction *) const;
   36    virtual void prepareEmission(Function *);
   37 
   38    inline void setProgramType(Program::Type pType) { progType = pType; }
   39 
   40 private:
   41    const TargetNVC0 *targNVC0;
   42 
   43    Program::Type progType;
   44 
   45    const bool writeIssueDelays;
   46 
   47 private:
   48    void emitForm_A(const Instruction *, uint64_t);
   49    void emitForm_B(const Instruction *, uint64_t);
   50    void emitForm_S(const Instruction *, uint32_t, bool pred);
   51 
   52    void emitPredicate(const Instruction *);
   53 
   54    void setAddress16(const ValueRef&);
   55    void setAddress24(const ValueRef&);
   56    void setAddressByFile(const ValueRef&);
   57    void setImmediate(const Instruction *, const int s); // needs op already set
   58    void setImmediateS8(const ValueRef&);
   59    void setSUConst16(const Instruction *, const int s);
   60    void setSUPred(const Instruction *, const int s);
   61    void setPDSTL(const Instruction *, const int d);
   62 
   63    void emitCondCode(CondCode cc, int pos);
   64    void emitInterpMode(const Instruction *);
   65    void emitLoadStoreType(DataType ty);
   66    void emitSUGType(DataType);
   67    void emitSUAddr(const TexInstruction *);
   68    void emitSUDim(const TexInstruction *);
   69    void emitCachingMode(CacheMode c);
   70 
   71    void emitShortSrc2(const ValueRef&);
   72 
   73    inline uint8_t getSRegEncoding(const ValueRef&);
   74 
   75    void roundMode_A(const Instruction *);
   76    void roundMode_C(const Instruction *);
   77    void roundMode_CS(const Instruction *);
   78 
   79    void emitNegAbs12(const Instruction *);
   80 
   81    void emitNOP(const Instruction *);
   82 
   83    void emitLOAD(const Instruction *);
   84    void emitSTORE(const Instruction *);
   85    void emitMOV(const Instruction *);
   86    void emitATOM(const Instruction *);
   87    void emitMEMBAR(const Instruction *);
   88    void emitCCTL(const Instruction *);
   89 
   90    void emitINTERP(const Instruction *);
   91    void emitAFETCH(const Instruction *);
   92    void emitPFETCH(const Instruction *);
   93    void emitVFETCH(const Instruction *);
   94    void emitEXPORT(const Instruction *);
   95    void emitOUT(const Instruction *);
   96 
   97    void emitUADD(const Instruction *);
   98    void emitFADD(const Instruction *);
   99    void emitDADD(const Instruction *);
  100    void emitUMUL(const Instruction *);
  101    void emitFMUL(const Instruction *);
  102    void emitDMUL(const Instruction *);
  103    void emitIMAD(const Instruction *);
  104    void emitISAD(const Instruction *);
  105    void emitSHLADD(const Instruction *a);
  106    void emitFMAD(const Instruction *);
  107    void emitDMAD(const Instruction *);
  108    void emitMADSP(const Instruction *);
  109 
  110    void emitNOT(Instruction *);
  111    void emitLogicOp(const Instruction *, uint8_t subOp);
  112    void emitPOPC(const Instruction *);
  113    void emitINSBF(const Instruction *);
  114    void emitEXTBF(const Instruction *);
  115    void emitBFIND(const Instruction *);
  116    void emitPERMT(const Instruction *);
  117    void emitShift(const Instruction *);
  118 
  119    void emitSFnOp(const Instruction *, uint8_t subOp);
  120 
  121    void emitCVT(Instruction *);
  122    void emitMINMAX(const Instruction *);
  123    void emitPreOp(const Instruction *);
  124 
  125    void emitSET(const CmpInstruction *);
  126    void emitSLCT(const CmpInstruction *);
  127    void emitSELP(const Instruction *);
  128 
  129    void emitTEXBAR(const Instruction *);
  130    void emitTEX(const TexInstruction *);
  131    void emitTEXCSAA(const TexInstruction *);
  132    void emitTXQ(const TexInstruction *);
  133 
  134    void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
  135 
  136    void emitFlow(const Instruction *);
  137    void emitBAR(const Instruction *);
  138 
  139    void emitSUCLAMPMode(uint16_t);
  140    void emitSUCalc(Instruction *);
  141    void emitSULDGB(const TexInstruction *);
  142    void emitSUSTGx(const TexInstruction *);
  143 
  144    void emitSULDB(const TexInstruction *);
  145    void emitSUSTx(const TexInstruction *);
  146    void emitSULEA(const TexInstruction *);
  147 
  148    void emitVSHL(const Instruction *);
  149    void emitVectorSubOp(const Instruction *);
  150 
  151    void emitPIXLD(const Instruction *);
  152 
  153    void emitSHFL(const Instruction *);
  154 
  155    void emitVOTE(const Instruction *);
  156 
  157    inline void defId(const ValueDef&, const int pos);
  158    inline void defId(const Instruction *, int d, const int pos);
  159    inline void srcId(const ValueRef&, const int pos);
  160    inline void srcId(const ValueRef *, const int pos);
  161    inline void srcId(const Instruction *, int s, const int pos);
  162    inline void srcAddr32(const ValueRef&, int pos, int shr);
  163 
  164    inline bool isLIMM(const ValueRef&, DataType ty);
  165 };
  166 
  167 // for better visibility
  168 #define HEX64(h, l) 0x##h##l##ULL
  169 
  170 #define SDATA(a) ((a).rep()->reg.data)
  171 #define DDATA(a) ((a).rep()->reg.data)
  172 
  173 void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
  174 {
  175    code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
  176 }
  177 
  178 void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
  179 {
  180    code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
  181 }
  182 
  183 void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
  184 {
  185    int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
  186    code[pos / 32] |= r << (pos % 32);
  187 }
  188 
  189 void
  190 CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
  191 {
  192    const uint32_t offset = SDATA(src).offset >> shr;
  193 
  194    code[pos / 32] |= offset << (pos % 32);
  195    if (pos && (pos < 32))
  196       code[1] |= offset >> (32 - pos);
  197 }
  198 
  199 void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
  200 {
  201    code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : 63) << (pos % 32);
  202 }
  203 
  204 void CodeEmitterNVC0::defId(const Instruction *insn, int d, const int pos)
  205 {
  206    if (insn->defExists(d))
  207       defId(insn->def(d), pos);
  208    else
  209       code[pos / 32] |= 63 << (pos % 32);
  210 }
  211 
  212 bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
  213 {
  214    const ImmediateValue *imm = ref.get()->asImm();
  215 
  216    if (ty == TYPE_F32)
  217       return imm && imm->reg.data.u32 & 0xfff;
  218    else
  219       return imm && (imm->reg.data.s32 > 0x7ffff ||
  220                      imm->reg.data.s32 < -0x80000);
  221 }
  222 
  223 void
  224 CodeEmitterNVC0::roundMode_A(const Instruction *insn)
  225 {
  226    switch (insn->rnd) {
  227    case ROUND_M: code[1] |= 1 << 23; break;
  228    case ROUND_P: code[1] |= 2 << 23; break;
  229    case ROUND_Z: code[1] |= 3 << 23; break;
  230    default:
  231       assert(insn->rnd == ROUND_N);
  232       break;
  233    }
  234 }
  235 
  236 void
  237 CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
  238 {
  239    if (i->src(1).mod.abs()) code[0] |= 1 << 6;
  240    if (i->src(0).mod.abs()) code[0] |= 1 << 7;
  241    if (i->src(1).mod.neg()) code[0] |= 1 << 8;
  242    if (i->src(0).mod.neg()) code[0] |= 1 << 9;
  243 }
  244 
  245 void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
  246 {
  247    uint8_t val;
  248 
  249    switch (cc) {
  250    case CC_LT:  val = 0x1; break;
  251    case CC_LTU: val = 0x9; break;
  252    case CC_EQ:  val = 0x2; break;
  253    case CC_EQU: val = 0xa; break;
  254    case CC_LE:  val = 0x3; break;
  255    case CC_LEU: val = 0xb; break;
  256    case CC_GT:  val = 0x4; break;
  257    case CC_GTU: val = 0xc; break;
  258    case CC_NE:  val = 0x5; break;
  259    case CC_NEU: val = 0xd; break;
  260    case CC_GE:  val = 0x6; break;
  261    case CC_GEU: val = 0xe; break;
  262    case CC_TR:  val = 0xf; break;
  263    case CC_FL:  val = 0x0; break;
  264 
  265    case CC_A:  val = 0x14; break;
  266    case CC_NA: val = 0x13; break;
  267    case CC_S:  val = 0x15; break;
  268    case CC_NS: val = 0x12; break;
  269    case CC_C:  val = 0x16; break;
  270    case CC_NC: val = 0x11; break;
  271    case CC_O:  val = 0x17; break;
  272    case CC_NO: val = 0x10; break;
  273 
  274    default:
  275       val = 0;
  276       assert(!"invalid condition code");
  277       break;
  278    }
  279    code[pos / 32] |= val << (pos % 32);
  280 }
  281 
  282 void
  283 CodeEmitterNVC0::emitPredicate(const Instruction *i)
  284 {
  285    if (i->predSrc >= 0) {
  286       assert(i->getPredicate()->reg.file == FILE_PREDICATE);
  287       srcId(i->src(i->predSrc), 10);
  288       if (i->cc == CC_NOT_P)
  289          code[0] |= 0x2000; // negate
  290    } else {
  291       code[0] |= 0x1c00;
  292    }
  293 }
  294 
  295 void
  296 CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
  297 {
  298    switch (src.getFile()) {
  299    case FILE_MEMORY_GLOBAL:
  300       srcAddr32(src, 26, 0);
  301       break;
  302    case FILE_MEMORY_LOCAL:
  303    case FILE_MEMORY_SHARED:
  304       setAddress24(src);
  305       break;
  306    default:
  307       assert(src.getFile() == FILE_MEMORY_CONST);
  308       setAddress16(src);
  309       break;
  310    }
  311 }
  312 
  313 void
  314 CodeEmitterNVC0::setAddress16(const ValueRef& src)
  315 {
  316    Symbol *sym = src.get()->asSym();
  317 
  318    assert(sym);
  319 
  320    code[0] |= (sym->reg.data.offset & 0x003f) << 26;
  321    code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
  322 }
  323 
  324 void
  325 CodeEmitterNVC0::setAddress24(const ValueRef& src)
  326 {
  327    Symbol *sym = src.get()->asSym();
  328 
  329    assert(sym);
  330 
  331    code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
  332    code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
  333 }
  334 
  335 void
  336 CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
  337 {
  338    const ImmediateValue *imm = i->src(s).get()->asImm();
  339    uint32_t u32;
  340 
  341    assert(imm);
  342    u32 = imm->reg.data.u32;
  343 
  344    if ((code[0] & 0xf) == 0x1) {
  345       // double immediate
  346       uint64_t u64 = imm->reg.data.u64;
  347       assert(!(u64 & 0x00000fffffffffffULL));
  348       assert(!(code[1] & 0xc000));
  349       code[0] |= ((u64 >> 44) & 0x3f) << 26;
  350       code[1] |= 0xc000 | (u64 >> 50);
  351    } else
  352    if ((code[0] & 0xf) == 0x2) {
  353       // LIMM
  354       code[0] |= (u32 & 0x3f) << 26;
  355       code[1] |= u32 >> 6;
  356    } else
  357    if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
  358       // integer immediate
  359       assert((u32 & 0xfff80000) == 0 || (u32 & 0xfff80000) == 0xfff80000);
  360       assert(!(code[1] & 0xc000));
  361       u32 &= 0xfffff;
  362       code[0] |= (u32 & 0x3f) << 26;
  363       code[1] |= 0xc000 | (u32 >> 6);
  364    } else {
  365       // float immediate
  366       assert(!(u32 & 0x00000fff));
  367       assert(!(code[1] & 0xc000));
  368       code[0] |= ((u32 >> 12) & 0x3f) << 26;
  369       code[1] |= 0xc000 | (u32 >> 18);
  370    }
  371 }
  372 
  373 void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
  374 {
  375    const ImmediateValue *imm = ref.get()->asImm();
  376 
  377    int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);
  378 
  379    assert(s8 == imm->reg.data.s32);
  380 
  381    code[0] |= (s8 & 0x3f) << 26;
  382    code[0] |= (s8 >> 6) << 8;
  383 }
  384 
  385 void CodeEmitterNVC0::setPDSTL(const Instruction *i, const int d)
  386 {
  387    assert(d < 0 || (i->defExists(d) && i->def(d).getFile() == FILE_PREDICATE));
  388 
  389    uint32_t pred = d >= 0 ? DDATA(i->def(d)).id : 7;
  390 
  391    code[0] |= (pred & 3) << 8;
  392    code[1] |= (pred & 4) << (26 - 2);
  393 }
  394 
  395 void
  396 CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
  397 {
  398    code[0] = opc;
  399    code[1] = opc >> 32;
  400 
  401    emitPredicate(i);
  402 
  403    defId(i->def(0), 14);
  404 
  405    int s1 = 26;
  406    if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
  407       s1 = 49;
  408 
  409    for (int s = 0; s < 3 && i->srcExists(s); ++s) {
  410       switch (i->getSrc(s)->reg.file) {
  411       case FILE_MEMORY_CONST:
  412          assert(!(code[1] & 0xc000));
  413          code[1] |= (s == 2) ? 0x8000 : 0x4000;
  414          code[1] |= i->getSrc(s)->reg.fileIndex << 10;
  415          setAddress16(i->src(s));
  416          break;
  417       case FILE_IMMEDIATE:
  418          assert(s == 1 ||
  419                 i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
  420          assert(!(code[1] & 0xc000));
  421          setImmediate(i, s);
  422          break;
  423       case FILE_GPR:
  424          if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
  425             break;
  426          srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
  427          break;
  428       default:
  429          if (i->op == OP_SELP) {
  430             // OP_SELP is used to implement shared+atomics on Fermi.
  431             assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);
  432             srcId(i->src(s), 49);
  433          }
  434          // ignore here, can be predicate or flags, but must not be address
  435          break;
  436       }
  437    }
  438 }
  439 
  440 void
  441 CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
  442 {
  443    code[0] = opc;
  444    code[1] = opc >> 32;
  445 
  446    emitPredicate(i);
  447 
  448    defId(i->def(0), 14);
  449 
  450    switch (i->src(0).getFile()) {
  451    case FILE_MEMORY_CONST:
  452       assert(!(code[1] & 0xc000));
  453       code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
  454       setAddress16(i->src(0));
  455       break;
  456    case FILE_IMMEDIATE:
  457       assert(!(code[1] & 0xc000));
  458       setImmediate(i, 0);
  459       break;
  460    case FILE_GPR:
  461       srcId(i->src(0), 26);
  462       break;
  463    default:
  464       // ignore here, can be predicate or flags, but must not be address
  465       break;
  466    }
  467 }
  468 
  469 void
  470 CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
  471 {
  472    code[0] = opc;
  473 
  474    int ss2a = 0;
  475    if (opc == 0x0d || opc == 0x0e)
  476       ss2a = 2;
  477 
  478    defId(i->def(0), 14);
  479    srcId(i->src(0), 20);
  480 
  481    assert(pred || (i->predSrc < 0));
  482    if (pred)
  483       emitPredicate(i);
  484 
  485    for (int s = 1; s < 3 && i->srcExists(s); ++s) {
  486       if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
  487          assert(!(code[0] & (0x300 >> ss2a)));
  488          switch (i->src(s).get()->reg.fileIndex) {
  489          case 0:  code[0] |= 0x100 >> ss2a; break;
  490          case 1:  code[0] |= 0x200 >> ss2a; break;
  491          case 16: code[0] |= 0x300 >> ss2a; break;
  492          default:
  493             ERROR("invalid c[] space for short form\n");
  494             break;
  495          }
  496          if (s == 1)
  497             code[0] |= i->getSrc(s)->reg.data.offset << 24;
  498          else
  499             code[0] |= i->getSrc(s)->reg.data.offset << 6;
  500       } else
  501       if (i->src(s).getFile() == FILE_IMMEDIATE) {
  502          assert(s == 1);
  503          setImmediateS8(i->src(s));
  504       } else
  505       if (i->src(s).getFile() == FILE_GPR) {
  506          srcId(i->src(s), (s == 1) ? 26 : 8);
  507       }
  508    }
  509 }
  510 
  511 void
  512 CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
  513 {
  514    if (src.getFile() == FILE_MEMORY_CONST) {
  515       switch (src.get()->reg.fileIndex) {
  516       case 0:  code[0] |= 0x100; break;
  517       case 1:  code[0] |= 0x200; break;
  518       case 16: code[0] |= 0x300; break;
  519       default:
  520          assert(!"unsupported file index for short op");
  521          break;
  522       }
  523       srcAddr32(src, 20, 2);
  524    } else {
  525       srcId(src, 20);
  526       assert(src.getFile() == FILE_GPR);
  527    }
  528 }
  529 
  530 void
  531 CodeEmitterNVC0::emitNOP(const Instruction *i)
  532 {
  533    code[0] = 0x000001e4;
  534    code[1] = 0x40000000;
  535    emitPredicate(i);
  536 }
  537 
  538 void
  539 CodeEmitterNVC0::emitFMAD(const Instruction *i)
  540 {
  541    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
  542 
  543    if (i->encSize == 8) {
  544       if (isLIMM(i->src(1), TYPE_F32)) {
  545          emitForm_A(i, HEX64(20000000, 00000002));
  546       } else {
  547          emitForm_A(i, HEX64(30000000, 00000000));
  548 
  549          if (i->src(2).mod.neg())
  550             code[0] |= 1 << 8;
  551       }
  552       roundMode_A(i);
  553 
  554       if (neg1)
  555          code[0] |= 1 << 9;
  556 
  557       if (i->saturate)
  558          code[0] |= 1 << 5;
  559 
  560       if (i->dnz)
  561          code[0] |= 1 << 7;
  562       else
  563       if (i->ftz)
  564          code[0] |= 1 << 6;
  565    } else {
  566       assert(!i->saturate && !i->src(2).mod.neg());
  567       emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
  568                  false);
  569       if (neg1)
  570          code[0] |= 1 << 4;
  571    }
  572 }
  573 
  574 void
  575 CodeEmitterNVC0::emitDMAD(const Instruction *i)
  576 {
  577    bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
  578 
  579    emitForm_A(i, HEX64(20000000, 00000001));
  580 
  581    if (i->src(2).mod.neg())
  582       code[0] |= 1 << 8;
  583 
  584    roundMode_A(i);
  585 
  586    if (neg1)
  587       code[0] |= 1 << 9;
  588 
  589    assert(!i->saturate);
  590    assert(!i->ftz);
  591 }
  592 
  593 void
  594 CodeEmitterNVC0::emitFMUL(const Instruction *i)
  595 {
  596    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
  597 
  598    assert(i->postFactor >= -3 && i->postFactor <= 3);
  599 
  600    if (i->encSize == 8) {
  601       if (isLIMM(i->src(1), TYPE_F32)) {
  602          assert(i->postFactor == 0); // constant folded, hopefully
  603          emitForm_A(i, HEX64(30000000, 00000002));
  604       } else {
  605          emitForm_A(i, HEX64(58000000, 00000000));
  606          roundMode_A(i);
  607          code[1] |= ((i->postFactor > 0) ?
  608                      (7 - i->postFactor) : (0 - i->postFactor)) << 17;
  609       }
  610       if (neg)
  611          code[1] ^= 1 << 25; // aliases with LIMM sign bit
  612 
  613       if (i->saturate)
  614          code[0] |= 1 << 5;
  615 
  616       if (i->dnz)
  617          code[0] |= 1 << 7;
  618       else
  619       if (i->ftz)
  620          code[0] |= 1 << 6;
  621    } else {
  622       assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
  623       emitForm_S(i, 0xa8, true);
  624    }
  625 }
  626 
  627 void
  628 CodeEmitterNVC0::emitDMUL(const Instruction *i)
  629 {
  630    bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
  631 
  632    emitForm_A(i, HEX64(50000000, 00000001));
  633    roundMode_A(i);
  634 
  635    if (neg)
  636       code[0] |= 1 << 9;
  637 
  638    assert(!i->saturate);
  639    assert(!i->ftz);
  640    assert(!i->dnz);
  641    assert(!i->postFactor);
  642 }
  643 
  644 void
  645 CodeEmitterNVC0::emitUMUL(const Instruction *i)
  646 {
  647    if (i->encSize == 8) {
  648       if (isLIMM(i->src(1), TYPE_U32)) {
  649          emitForm_A(i, HEX64(10000000, 00000002));
  650       } else {
  651          emitForm_A(i, HEX64(50000000, 00000003));
  652       }
  653       if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
  654          code[0] |= 1 << 6;
  655       if (i->sType == TYPE_S32)
  656          code[0] |= 1 << 5;
  657       if (i->dType == TYPE_S32)
  658          code[0] |= 1 << 7;
  659    } else {
  660       emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
  661 
  662       if (i->sType == TYPE_S32)
  663          code[0] |= 1 << 6;
  664    }
  665 }
  666 
  667 void
  668 CodeEmitterNVC0::emitFADD(const Instruction *i)
  669 {
  670    if (i->encSize == 8) {
  671       if (isLIMM(i->src(1), TYPE_F32)) {
  672          assert(!i->saturate);
  673          emitForm_A(i, HEX64(28000000, 00000002));
  674 
  675          code[0] |= i->src(0).mod.abs() << 7;
  676          code[0] |= i->src(0).mod.neg() << 9;
  677 
  678          if (i->src(1).mod.abs())
  679             code[1] &= 0xfdffffff;
  680          if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))
  681             code[1] ^= 0x02000000;
  682       } else {
  683          emitForm_A(i, HEX64(50000000, 00000000));
  684 
  685          roundMode_A(i);
  686          if (i->saturate)
  687             code[1] |= 1 << 17;
  688 
  689          emitNegAbs12(i);
  690          if (i->op == OP_SUB) code[0] ^= 1 << 8;
  691       }
  692       if (i->ftz)
  693          code[0] |= 1 << 5;
  694    } else {
  695       assert(!i->saturate && i->op != OP_SUB &&
  696              !i->src(0).mod.abs() &&
  697              !i->src(1).mod.neg() && !i->src(1).mod.abs());
  698 
  699       emitForm_S(i, 0x49, true);
  700 
  701       if (i->src(0).mod.neg())
  702          code[0] |= 1 << 7;
  703    }
  704 }
  705 
  706 void
  707 CodeEmitterNVC0::emitDADD(const Instruction *i)
  708 {
  709    assert(i->encSize == 8);
  710    emitForm_A(i, HEX64(48000000, 00000001));
  711    roundMode_A(i);
  712    assert(!i->saturate);
  713    assert(!i->ftz);
  714    emitNegAbs12(i);
  715    if (i->op == OP_SUB)
  716       code[0] ^= 1 << 8;
  717 }
  718 
  719 void
  720 CodeEmitterNVC0::emitUADD(const Instruction *i)
  721 {
  722    uint32_t addOp = 0;
  723 
  724    assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
  725 
  726    if (i->src(0).mod.neg())
  727       addOp |= 0x200;
  728    if (i->src(1).mod.neg())
  729       addOp |= 0x100;
  730    if (i->op == OP_SUB)
  731       addOp ^= 0x100;
  732 
  733    assert(addOp != 0x300); // would be add-plus-one
  734 
  735    if (i->encSize == 8) {
  736       if (isLIMM(i->src(1), TYPE_U32)) {
  737          emitForm_A(i, HEX64(08000000, 00000002));
  738          if (i->flagsDef >= 0)
  739             code[1] |= 1 << 26; // write carry
  740       } else {
  741          emitForm_A(i, HEX64(48000000, 00000003));
  742          if (i->flagsDef >= 0)
  743             code[1] |= 1 << 16; // write carry
  744       }
  745       code[0] |= addOp;
  746 
  747       if (i->saturate)
  748          code[0] |= 1 << 5;
  749       if (i->flagsSrc >= 0) // add carry
  750          code[0] |= 1 << 6;
  751    } else {
  752       assert(!(addOp & 0x100));
  753       emitForm_S(i, (addOp >> 3) |
  754                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
  755    }
  756 }
  757 
  758 void
  759 CodeEmitterNVC0::emitIMAD(const Instruction *i)
  760 {
  761    uint8_t addOp =
  762       i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
  763 
  764    assert(i->encSize == 8);
  765    emitForm_A(i, HEX64(20000000, 00000003));
  766 
  767    assert(addOp != 3);
  768    code[0] |= addOp << 8;
  769 
  770    if (isSignedType(i->dType))
  771       code[0] |= 1 << 7;
  772    if (isSignedType(i->sType))
  773       code[0] |= 1 << 5;
  774 
  775    code[1] |= i->saturate << 24;
  776 
  777    if (i->flagsDef >= 0) code[1] |= 1 << 16;
  778    if (i->flagsSrc >= 0) code[1] |= 1 << 23;
  779 
  780    if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
  781       code[0] |= 1 << 6;
  782 }
  783 
  784 void
  785 CodeEmitterNVC0::emitSHLADD(const Instruction *i)
  786 {
  787    uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
  788    const ImmediateValue *imm = i->src(1).get()->asImm();
  789    assert(imm);
  790 
  791    code[0] = 0x00000003;
  792    code[1] = 0x40000000 | addOp << 23;
  793 
  794    emitPredicate(i);
  795 
  796    defId(i->def(0), 14);
  797    srcId(i->src(0), 20);
  798 
  799    if (i->flagsDef >= 0)
  800       code[1] |= 1 << 16;
  801 
  802    assert(!(imm->reg.data.u32 & 0xffffffe0));
  803    code[0] |= imm->reg.data.u32 << 5;
  804 
  805    switch (i->src(2).getFile()) {
  806    case FILE_GPR:
  807       srcId(i->src(2), 26);
  808       break;
  809    case FILE_MEMORY_CONST:
  810       code[1] |= 0x4000;
  811       code[1] |= i->getSrc(2)->reg.fileIndex << 10;
  812       setAddress16(i->src(2));
  813       break;
  814    case FILE_IMMEDIATE:
  815       setImmediate(i, 2);
  816       break;
  817    default:
  818       assert(!"bad src2 file");
  819       break;
  820    }
  821 }
  822 
  823 void
  824 CodeEmitterNVC0::emitMADSP(const Instruction *i)
  825 {
  826    assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
  827 
  828    emitForm_A(i, HEX64(00000000, 00000003));
  829 
  830    if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
  831       code[1] |= 0x01800000;
  832    } else {
  833       code[0] |= (i->subOp & 0x00f) << 7;
  834       code[0] |= (i->subOp & 0x0f0) << 1;
  835       code[0] |= (i->subOp & 0x100) >> 3;
  836       code[0] |= (i->subOp & 0x200) >> 2;
  837       code[1] |= (i->subOp & 0xc00) << 13;
  838    }
  839 
  840    if (i->flagsDef >= 0)
  841       code[1] |= 1 << 16;
  842 }
  843 
  844 void
  845 CodeEmitterNVC0::emitISAD(const Instruction *i)
  846 {
  847    assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
  848    assert(i->encSize == 8);
  849 
  850    emitForm_A(i, HEX64(38000000, 00000003));
  851 
  852    if (i->dType == TYPE_S32)
  853       code[0] |= 1 << 5;
  854 }
  855 
  856 void
  857 CodeEmitterNVC0::emitNOT(Instruction *i)
  858 {
  859    assert(i->encSize == 8);
  860    if (i->getPredicate())
  861       i->moveSources(1, 1);
  862    i->setSrc(1, i->src(0));
  863    emitForm_A(i, HEX64(68000000, 000001c3));
  864 }
  865 
  866 void
  867 CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
  868 {
  869    if (i->def(0).getFile() == FILE_PREDICATE) {
  870       code[0] = 0x00000004 | (subOp << 30);
  871       code[1] = 0x0c000000;
  872 
  873       emitPredicate(i);
  874 
  875       defId(i->def(0), 17);
  876       srcId(i->src(0), 20);
  877       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
  878       srcId(i->src(1), 26);
  879       if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
  880 
  881       if (i->defExists(1)) {
  882          defId(i->def(1), 14);
  883       } else {
  884          code[0] |= 7 << 14;
  885       }
  886       // (a OP b) OP c
  887       if (i->predSrc != 2 && i->srcExists(2)) {
  888          code[1] |= subOp << 21;
  889          srcId(i->src(2), 49);
  890          if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
  891       } else {
  892          code[1] |= 0x000e0000;
  893       }
  894    } else
  895    if (i->encSize == 8) {
  896       if (isLIMM(i->src(1), TYPE_U32)) {
  897          emitForm_A(i, HEX64(38000000, 00000002));
  898 
  899          if (i->flagsDef >= 0)
  900             code[1] |= 1 << 26;
  901       } else {
  902          emitForm_A(i, HEX64(68000000, 00000003));
  903 
  904          if (i->flagsDef >= 0)
  905             code[1] |= 1 << 16;
  906       }
  907       code[0] |= subOp << 6;
  908 
  909       if (i->flagsSrc >= 0) // carry
  910          code[0] |= 1 << 5;
  911 
  912       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
  913       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
  914    } else {
  915       emitForm_S(i, (subOp << 5) |
  916                  ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
  917    }
  918 }
  919 
  920 void
  921 CodeEmitterNVC0::emitPOPC(const Instruction *i)
  922 {
  923    emitForm_A(i, HEX64(54000000, 00000004));
  924 
  925    if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
  926    if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
  927 }
  928 
  929 void
  930 CodeEmitterNVC0::emitINSBF(const Instruction *i)
  931 {
  932    emitForm_A(i, HEX64(28000000, 00000003));
  933 }
  934 
  935 void
  936 CodeEmitterNVC0::emitEXTBF(const Instruction *i)
  937 {
  938    emitForm_A(i, HEX64(70000000, 00000003));
  939 
  940    if (i->dType == TYPE_S32)
  941       code[0] |= 1 << 5;
  942    if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
  943       code[0] |= 1 << 8;
  944 }
  945 
  946 void
  947 CodeEmitterNVC0::emitBFIND(const Instruction *i)
  948 {
  949    emitForm_B(i, HEX64(78000000, 00000003));
  950 
  951    if (i->dType == TYPE_S32)
  952       code[0] |= 1 << 5;
  953    if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
  954       code[0] |= 1 << 8;
  955    if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
  956       code[0] |= 1 << 6;
  957 }
  958 
  959 void
  960 CodeEmitterNVC0::emitPERMT(const Instruction *i)
  961 {
  962    emitForm_A(i, HEX64(24000000, 00000004));
  963 
  964    code[0] |= i->subOp << 5;
  965 }
  966 
  967 void
  968 CodeEmitterNVC0::emitShift(const Instruction *i)
  969 {
  970    if (i->op == OP_SHR) {
  971       emitForm_A(i, HEX64(58000000, 00000003)
  972                  | (isSignedType(i->dType) ? 0x20 : 0x00));
  973    } else {
  974       emitForm_A(i, HEX64(60000000, 00000003));
  975    }
  976 
  977    if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
  978       code[0] |= 1 << 9;
  979 }
  980 
  981 void
  982 CodeEmitterNVC0::emitPreOp(const Instruction *i)
  983 {
  984    if (i->encSize == 8) {
  985       emitForm_B(i, HEX64(60000000, 00000000));
  986 
  987       if (i->op == OP_PREEX2)
  988          code[0] |= 0x20;
  989 
  990       if (i->src(0).mod.abs()) code[0] |= 1 << 6;
  991       if (i->src(0).mod.neg()) code[0] |= 1 << 8;
  992    } else {
  993       emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
  994    }
  995 }
  996 
  997 void
  998 CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
  999 {
 1000    if (i->encSize == 8) {
 1001       code[0] = 0x00000000 | (subOp << 26);
 1002       code[1] = 0xc8000000;
 1003 
 1004       emitPredicate(i);
 1005 
 1006       defId(i->def(0), 14);
 1007       srcId(i->src(0), 20);
 1008 
 1009       assert(i->src(0).getFile() == FILE_GPR);
 1010 
 1011       if (i->saturate) code[0] |= 1 << 5;
 1012 
 1013       if (i->src(0).mod.abs()) code[0] |= 1 << 7;
 1014       if (i->src(0).mod.neg()) code[0] |= 1 << 9;
 1015    } else {
 1016       emitForm_S(i, 0x80000008 | (subOp << 26), true);
 1017 
 1018       assert(!i->src(0).mod.neg());
 1019       if (i->src(0).mod.abs()) code[0] |= 1 << 30;
 1020    }
 1021 }
 1022 
 1023 void
 1024 CodeEmitterNVC0::emitMINMAX(const Instruction *i)
 1025 {
 1026    uint64_t op;
 1027 
 1028    assert(i->encSize == 8);
 1029 
 1030    op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
 1031 
 1032    if (i->ftz)
 1033       op |= 1 << 5;
 1034    else
 1035    if (!isFloatType(i->dType)) {
 1036       op |= isSignedType(i->dType) ? 0x23 : 0x03;
 1037       op |= i->subOp << 6;
 1038    }
 1039    if (i->dType == TYPE_F64)
 1040       op |= 0x01;
 1041 
 1042    emitForm_A(i, op);
 1043    emitNegAbs12(i);
 1044 
 1045    if (i->flagsDef >= 0)
 1046       code[1] |= 1 << 16;
 1047 }
 1048 
 1049 void
 1050 CodeEmitterNVC0::roundMode_C(const Instruction *i)
 1051 {
 1052    switch (i->rnd) {
 1053    case ROUND_M:  code[1] |= 1 << 17; break;
 1054    case ROUND_P:  code[1] |= 2 << 17; break;
 1055    case ROUND_Z:  code[1] |= 3 << 17; break;
 1056    case ROUND_NI: code[0] |= 1 << 7; break;
 1057    case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
 1058    case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
 1059    case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
 1060    case ROUND_N: break;
 1061    default:
 1062       assert(!"invalid round mode");
 1063       break;
 1064    }
 1065 }
 1066 
 1067 void
 1068 CodeEmitterNVC0::roundMode_CS(const Instruction *i)
 1069 {
 1070    switch (i->rnd) {
 1071    case ROUND_M:
 1072    case ROUND_MI: code[0] |= 1 << 16; break;
 1073    case ROUND_P:
 1074    case ROUND_PI: code[0] |= 2 << 16; break;
 1075    case ROUND_Z:
 1076    case ROUND_ZI: code[0] |= 3 << 16; break;
 1077    default:
 1078       break;
 1079    }
 1080 }
 1081 
 1082 void
 1083 CodeEmitterNVC0::emitCVT(Instruction *i)
 1084 {
 1085    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
 1086    DataType dType;
 1087 
 1088    switch (i->op) {
 1089    case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
 1090    case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
 1091    case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
 1092    default:
 1093       break;
 1094    }
 1095 
 1096    const bool sat = (i->op == OP_SAT) || i->saturate;
 1097    const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
 1098    const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
 1099 
 1100    if (i->op == OP_NEG && i->dType == TYPE_U32)
 1101       dType = TYPE_S32;
 1102    else
 1103       dType = i->dType;
 1104 
 1105    if (i->encSize == 8) {
 1106       emitForm_B(i, HEX64(10000000, 00000004));
 1107 
 1108       roundMode_C(i);
 1109 
 1110       // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
 1111       code[0] |= util_logbase2(typeSizeof(dType)) << 20;
 1112       code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
 1113 
 1114       // for 8/16 source types, the byte/word is in subOp. word 1 is
 1115       // represented as 2.
 1116       if (!isFloatType(i->sType))
 1117          code[1] |= i->subOp << 0x17;
 1118       else
 1119          code[1] |= i->subOp << 0x18;
 1120 
 1121       if (sat)
 1122          code[0] |= 0x20;
 1123       if (abs)
 1124          code[0] |= 1 << 6;
 1125       if (neg && i->op != OP_ABS)
 1126          code[0] |= 1 << 8;
 1127 
 1128       if (i->ftz)
 1129          code[1] |= 1 << 23;
 1130 
 1131       if (isSignedIntType(dType))
 1132          code[0] |= 0x080;
 1133       if (isSignedIntType(i->sType))
 1134          code[0] |= 0x200;
 1135 
 1136       if (isFloatType(dType)) {
 1137          if (!isFloatType(i->sType))
 1138             code[1] |= 0x08000000;
 1139       } else {
 1140          if (isFloatType(i->sType))
 1141             code[1] |= 0x04000000;
 1142          else
 1143             code[1] |= 0x0c000000;
 1144       }
 1145    } else {
 1146       if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
 1147          code[0] = 0x298;
 1148       } else
 1149       if (isFloatType(dType)) {
 1150          if (isFloatType(i->sType))
 1151             code[0] = 0x098;
 1152          else
 1153             code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
 1154       } else {
 1155          assert(isFloatType(i->sType));
 1156 
 1157          code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
 1158       }
 1159 
 1160       if (neg) code[0] |= 1 << 16;
 1161       if (sat) code[0] |= 1 << 18;
 1162       if (abs) code[0] |= 1 << 19;
 1163 
 1164       roundMode_CS(i);
 1165    }
 1166 }
 1167 
 1168 void
 1169 CodeEmitterNVC0::emitSET(const CmpInstruction *i)
 1170 {
 1171    uint32_t hi;
 1172    uint32_t lo = 0;
 1173 
 1174    if (i->sType == TYPE_F64)
 1175       lo = 0x1;
 1176    else
 1177    if (!isFloatType(i->sType))
 1178       lo = 0x3;
 1179 
 1180    if (isSignedIntType(i->sType))
 1181       lo |= 0x20;
 1182    if (isFloatType(i->dType)) {
 1183       if (isFloatType(i->sType))
 1184          lo |= 0x20;
 1185       else
 1186          lo |= 0x80;
 1187    }
 1188 
 1189    switch (i->op) {
 1190    case OP_SET_AND: hi = 0x10000000; break;
 1191    case OP_SET_OR:  hi = 0x10200000; break;
 1192    case OP_SET_XOR: hi = 0x10400000; break;
 1193    default:
 1194       hi = 0x100e0000;
 1195       break;
 1196    }
 1197    emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);
 1198 
 1199    if (i->op != OP_SET)
 1200       srcId(i->src(2), 32 + 17);
 1201 
 1202    if (i->def(0).getFile() == FILE_PREDICATE) {
 1203       if (i->sType == TYPE_F32)
 1204          code[1] += 0x10000000;
 1205       else
 1206          code[1] += 0x08000000;
 1207 
 1208       code[0] &= ~0xfc000;
 1209       defId(i->def(0), 17);
 1210       if (i->defExists(1))
 1211          defId(i->def(1), 14);
 1212       else
 1213          code[0] |= 0x1c000;
 1214    }
 1215 
 1216    if (i->ftz)
 1217       code[1] |= 1 << 27;
 1218    if (i->flagsSrc >= 0)
 1219       code[0] |= 1 << 6;
 1220 
 1221    emitCondCode(i->setCond, 32 + 23);
 1222    emitNegAbs12(i);
 1223 }
 1224 
 1225 void
 1226 CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
 1227 {
 1228    uint64_t op;
 1229 
 1230    switch (i->dType) {
 1231    case TYPE_S32:
 1232       op = HEX64(30000000, 00000023);
 1233       break;
 1234    case TYPE_U32:
 1235       op = HEX64(30000000, 00000003);
 1236       break;
 1237    case TYPE_F32:
 1238       op = HEX64(38000000, 00000000);
 1239       break;
 1240    default:
 1241       assert(!"invalid type for SLCT");
 1242       op = 0;
 1243       break;
 1244    }
 1245    emitForm_A(i, op);
 1246 
 1247    CondCode cc = i->setCond;
 1248 
 1249    if (i->src(2).mod.neg())
 1250       cc = reverseCondCode(cc);
 1251 
 1252    emitCondCode(cc, 32 + 23);
 1253 
 1254    if (i->ftz)
 1255       code[0] |= 1 << 5;
 1256 }
 1257 
 1258 static void
 1259 selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 1260 {
 1261    int loc = entry->loc;
 1262    if (data.force_persample_interp)
 1263       code[loc + 1] |= 1 << 20;
 1264    else
 1265       code[loc + 1] &= ~(1 << 20);
 1266 }
 1267 
 1268 void CodeEmitterNVC0::emitSELP(const Instruction *i)
 1269 {
 1270    emitForm_A(i, HEX64(20000000, 00000004));
 1271 
 1272    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
 1273       code[1] |= 1 << 20;
 1274 
 1275    if (i->subOp == 1) {
 1276       addInterp(0, 0, selpFlip);
 1277    }
 1278 }
 1279 
 1280 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
 1281 {
 1282    code[0] = 0x00000006 | (i->subOp << 26);
 1283    code[1] = 0xf0000000;
 1284    emitPredicate(i);
 1285    emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
 1286 }
 1287 
 1288 void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
 1289 {
 1290    code[0] = 0x00000086;
 1291    code[1] = 0xd0000000;
 1292 
 1293    code[1] |= i->tex.r;
 1294    code[1] |= i->tex.s << 8;
 1295 
 1296    if (i->tex.liveOnly)
 1297       code[0] |= 1 << 9;
 1298 
 1299    defId(i->def(0), 14);
 1300    srcId(i->src(0), 20);
 1301 }
 1302 
 1303 static inline bool
 1304 isNextIndependentTex(const TexInstruction *i)
 1305 {
 1306    if (!i->next || !isTextureOp(i->next->op))
 1307       return false;
 1308    if (i->getDef(0)->interfers(i->next->getSrc(0)))
 1309       return false;
 1310    return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
 1311 }
 1312 
 1313 void
 1314 CodeEmitterNVC0::emitTEX(const TexInstruction *i)
 1315 {
 1316    code[0] = 0x00000006;
 1317 
 1318    if (isNextIndependentTex(i))
 1319       code[0] |= 0x080; // t mode
 1320    else
 1321       code[0] |= 0x100; // p mode
 1322 
 1323    if (i->tex.liveOnly)
 1324       code[0] |= 1 << 9;
 1325 
 1326    switch (i->op) {
 1327    case OP_TEX: code[1] = 0x80000000; break;
 1328    case OP_TXB: code[1] = 0x84000000; break;
 1329    case OP_TXL: code[1] = 0x86000000; break;
 1330    case OP_TXF: code[1] = 0x90000000; break;
 1331    case OP_TXG: code[1] = 0xa0000000; break;
 1332    case OP_TXLQ: code[1] = 0xb0000000; break;
 1333    case OP_TXD: code[1] = 0xe0000000; break;
 1334    default:
 1335       assert(!"invalid texture op");
 1336       break;
 1337    }
 1338    if (i->op == OP_TXF) {
 1339       if (!i->tex.levelZero)
 1340          code[1] |= 0x02000000;
 1341    } else
 1342    if (i->tex.levelZero) {
 1343       code[1] |= 0x02000000;
 1344    }
 1345 
 1346    if (i->op != OP_TXD && i->tex.derivAll)
 1347       code[1] |= 1 << 13;
 1348 
 1349    defId(i->def(0), 14);
 1350    srcId(i->src(0), 20);
 1351 
 1352    emitPredicate(i);
 1353 
 1354    if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
 1355 
 1356    code[1] |= i->tex.mask << 14;
 1357 
 1358    code[1] |= i->tex.r;
 1359    code[1] |= i->tex.s << 8;
 1360    if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
 1361       code[1] |= 1 << 18; // in 1st source (with array index)
 1362 
 1363    // texture target:
 1364    code[1] |= (i->tex.target.getDim() - 1) << 20;
 1365    if (i->tex.target.isCube())
 1366       code[1] += 2 << 20;
 1367    if (i->tex.target.isArray())
 1368       code[1] |= 1 << 19;
 1369    if (i->tex.target.isShadow())
 1370       code[1] |= 1 << 24;
 1371 
 1372    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
 1373 
 1374    if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
 1375       // lzero
 1376       if (i->op == OP_TXL)
 1377          code[1] &= ~(1 << 26);
 1378       else
 1379       if (i->op == OP_TXF)
 1380          code[1] &= ~(1 << 25);
 1381    }
 1382    if (i->tex.target == TEX_TARGET_2D_MS ||
 1383        i->tex.target == TEX_TARGET_2D_MS_ARRAY)
 1384       code[1] |= 1 << 23;
 1385 
 1386    if (i->tex.useOffsets == 1)
 1387       code[1] |= 1 << 22;
 1388    if (i->tex.useOffsets == 4)
 1389       code[1] |= 1 << 23;
 1390 
 1391    srcId(i, src1, 26);
 1392 }
 1393 
 1394 void
 1395 CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
 1396 {
 1397    code[0] = 0x00000086;
 1398    code[1] = 0xc0000000;
 1399 
 1400    switch (i->tex.query) {
 1401    case TXQ_DIMS:            code[1] |= 0 << 22; break;
 1402    case TXQ_TYPE:            code[1] |= 1 << 22; break;
 1403    case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
 1404    case TXQ_FILTER:          code[1] |= 3 << 22; break;
 1405    case TXQ_LOD:             code[1] |= 4 << 22; break;
 1406    case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
 1407    default:
 1408       assert(!"invalid texture query");
 1409       break;
 1410    }
 1411 
 1412    code[1] |= i->tex.mask << 14;
 1413 
 1414    code[1] |= i->tex.r;
 1415    code[1] |= i->tex.s << 8;
 1416    if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
 1417       code[1] |= 1 << 18;
 1418 
 1419    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
 1420 
 1421    defId(i->def(0), 14);
 1422    srcId(i->src(0), 20);
 1423    srcId(i, src1, 26);
 1424 
 1425    emitPredicate(i);
 1426 }
 1427 
 1428 void
 1429 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
 1430 {
 1431    code[0] = 0x00000200 | (laneMask << 6); // dall
 1432    code[1] = 0x48000000 | qOp;
 1433 
 1434    defId(i->def(0), 14);
 1435    srcId(i->src(0), 20);
 1436    srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);
 1437 
 1438    emitPredicate(i);
 1439 }
 1440 
 1441 void
 1442 CodeEmitterNVC0::emitFlow(const Instruction *i)
 1443 {
 1444    const FlowInstruction *f = i->asFlow();
 1445 
 1446    unsigned mask; // bit 0: predicate, bit 1: target
 1447 
 1448    code[0] = 0x00000007;
 1449 
 1450    switch (i->op) {
 1451    case OP_BRA:
 1452       code[1] = f->absolute ? 0x00000000 : 0x40000000;
 1453       if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
 1454          code[0] |= 0x4000;
 1455       mask = 3;
 1456       break;
 1457    case OP_CALL:
 1458       code[1] = f->absolute ? 0x10000000 : 0x50000000;
 1459       if (f->indirect)
 1460          code[0] |= 0x4000; // indirect calls always use c[] source
 1461       mask = 2;
 1462       break;
 1463 
 1464    case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
 1465    case OP_RET:     code[1] = 0x90000000; mask = 1; break;
 1466    case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
 1467    case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
 1468    case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
 1469 
 1470    case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
 1471    case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
 1472    case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
 1473    case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
 1474 
 1475    case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
 1476    case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
 1477    case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
 1478    default:
 1479       assert(!"invalid flow operation");
 1480       return;
 1481    }
 1482 
 1483    if (mask & 1) {
 1484       emitPredicate(i);
 1485       if (i->flagsSrc < 0)
 1486          code[0] |= 0x1e0;
 1487    }
 1488 
 1489    if (!f)
 1490       return;
 1491 
 1492    if (f->allWarp)
 1493       code[0] |= 1 << 15;
 1494    if (f->limit)
 1495       code[0] |= 1 << 16;
 1496 
 1497    if (f->indirect) {
 1498       if (code[0] & 0x4000) {
 1499          assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
 1500          setAddress16(i->src(0));
 1501          code[1] |= i->getSrc(0)->reg.fileIndex << 10;
 1502          if (f->op == OP_BRA)
 1503             srcId(f->src(0).getIndirect(0), 20);
 1504       } else {
 1505          srcId(f, 0, 20);
 1506       }
 1507    }
 1508 
 1509    if (f->op == OP_CALL) {
 1510       if (f->indirect) {
 1511          // nothing
 1512       } else
 1513       if (f->builtin) {
 1514          assert(f->absolute);
 1515          uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
 1516          addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
 1517          addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
 1518       } else {
 1519          assert(!f->absolute);
 1520          int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
 1521          code[0] |= (pcRel & 0x3f) << 26;
 1522          code[1] |= (pcRel >> 6) & 0x3ffff;
 1523       }
 1524    } else
 1525    if (mask & 2) {
 1526       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
 1527       if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
 1528          pcRel += 8;
 1529       // currently we don't want absolute branches
 1530       assert(!f->absolute);
 1531       code[0] |= (pcRel & 0x3f) << 26;
 1532       code[1] |= (pcRel >> 6) & 0x3ffff;
 1533    }
 1534 }
 1535 
 1536 void
 1537 CodeEmitterNVC0::emitBAR(const Instruction *i)
 1538 {
 1539    Value *rDef = NULL, *pDef = NULL;
 1540 
 1541    switch (i->subOp) {
 1542    case NV50_IR_SUBOP_BAR_ARRIVE:   code[0] = 0x84; break;
 1543    case NV50_IR_SUBOP_BAR_RED_AND:  code[0] = 0x24; break;
 1544    case NV50_IR_SUBOP_BAR_RED_OR:   code[0] = 0x44; break;
 1545    case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
 1546    default:
 1547       code[0] = 0x04;
 1548       assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
 1549       break;
 1550    }
 1551    code[1] = 0x50000000;
 1552 
 1553    code[0] |= 63 << 14;
 1554    code[1] |= 7 << 21;
 1555 
 1556    emitPredicate(i);
 1557 
 1558    // barrier id
 1559    if (i->src(0).getFile() == FILE_GPR) {
 1560       srcId(i->src(0), 20);
 1561    } else {
 1562       ImmediateValue *imm = i->getSrc(0)->asImm();
 1563       assert(imm);
 1564       code[0] |= imm->reg.data.u32 << 20;
 1565       code[1] |= 0x8000;
 1566    }
 1567 
 1568    // thread count
 1569    if (i->src(1).getFile() == FILE_GPR) {
 1570       srcId(i->src(1), 26);
 1571    } else {
 1572       ImmediateValue *imm = i->getSrc(1)->asImm();
 1573       assert(imm);
 1574       assert(imm->reg.data.u32 <= 0xfff);
 1575       code[0] |= imm->reg.data.u32 << 26;
 1576       code[1] |= imm->reg.data.u32 >> 6;
 1577       code[1] |= 0x4000;
 1578    }
 1579 
 1580    if (i->srcExists(2) && (i->predSrc != 2)) {
 1581       srcId(i->src(2), 32 + 17);
 1582       if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
 1583          code[1] |= 1 << 20;
 1584    } else {
 1585       code[1] |= 7 << 17;
 1586    }
 1587 
 1588    if (i->defExists(0)) {
 1589       if (i->def(0).getFile() == FILE_GPR)
 1590          rDef = i->getDef(0);
 1591       else
 1592          pDef = i->getDef(0);
 1593 
 1594       if (i->defExists(1)) {
 1595          if (i->def(1).getFile() == FILE_GPR)
 1596             rDef = i->getDef(1);
 1597          else
 1598             pDef = i->getDef(1);
 1599       }
 1600    }
 1601    if (rDef) {
 1602       code[0] &= ~(63 << 14);
 1603       defId(rDef, 14);
 1604    }
 1605    if (pDef) {
 1606       code[1] &= ~(7 << 21);
 1607       defId(pDef, 32 + 21);
 1608    }
 1609 }
 1610 
 1611 void
 1612 CodeEmitterNVC0::emitAFETCH(const Instruction *i)
 1613 {
 1614    code[0] = 0x00000006;
 1615    code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
 1616 
 1617    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
 1618       code[0] |= 0x200;
 1619 
 1620    emitPredicate(i);
 1621 
 1622    defId(i->def(0), 14);
 1623    srcId(i->src(0).getIndirect(0), 20);
 1624 }
 1625 
 1626 void
 1627 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
 1628 {
 1629    uint32_t prim = i->src(0).get()->reg.data.u32;
 1630 
 1631    code[0] = 0x00000006 | ((prim & 0x3f) << 26);
 1632    code[1] = 0x00000000 | (prim >> 6);
 1633 
 1634    emitPredicate(i);
 1635 
 1636    const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
 1637 
 1638    defId(i->def(0), 14);
 1639    srcId(i, src1, 20);
 1640 }
 1641 
 1642 void
 1643 CodeEmitterNVC0::emitVFETCH(const Instruction *i)
 1644 {
 1645    code[0] = 0x00000006;
 1646    code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
 1647 
 1648    if (i->perPatch)
 1649       code[0] |= 0x100;
 1650    if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
 1651       code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
 1652 
 1653    emitPredicate(i);
 1654 
 1655    code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
 1656 
 1657    defId(i->def(0), 14);
 1658    srcId(i->src(0).getIndirect(0), 20);
 1659    srcId(i->src(0).getIndirect(1), 26); // vertex address
 1660 }
 1661 
 1662 void
 1663 CodeEmitterNVC0::emitEXPORT(const Instruction *i)
 1664 {
 1665    unsigned int size = typeSizeof(i->dType);
 1666 
 1667    code[0] = 0x00000006 | ((size / 4 - 1) << 5);
 1668    code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
 1669 
 1670    assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
 1671 
 1672    if (i->perPatch)
 1673       code[0] |= 0x100;
 1674 
 1675    emitPredicate(i);
 1676 
 1677    assert(i->src(1).getFile() == FILE_GPR);
 1678 
 1679    srcId(i->src(0).getIndirect(0), 20);
 1680    srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
 1681    srcId(i->src(1), 26);
 1682 }
 1683 
 1684 void
 1685 CodeEmitterNVC0::emitOUT(const Instruction *i)
 1686 {
 1687    code[0] = 0x00000006;
 1688    code[1] = 0x1c000000;
 1689 
 1690    emitPredicate(i);
 1691 
 1692    defId(i->def(0), 14); // new secret address
 1693    srcId(i->src(0), 20); // old secret address, should be 0 initially
 1694 
 1695    assert(i->src(0).getFile() == FILE_GPR);
 1696 
 1697    if (i->op == OP_EMIT)
 1698       code[0] |= 1 << 5;
 1699    if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
 1700       code[0] |= 1 << 6;
 1701 
 1702    // vertex stream
 1703    if (i->src(1).getFile() == FILE_IMMEDIATE) {
 1704       unsigned int stream = SDATA(i->src(1)).u32;
 1705       assert(stream < 4);
 1706       if (stream) {
 1707          code[1] |= 0xc000;
 1708          code[0] |= stream << 26;
 1709       } else {
 1710          srcId(NULL, 26);
 1711       }
 1712    } else {
 1713       srcId(i->src(1), 26);
 1714    }
 1715 }
 1716 
 1717 void
 1718 CodeEmitterNVC0::emitInterpMode(const Instruction *i)
 1719 {
 1720    if (i->encSize == 8) {
 1721       code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
 1722    } else {
 1723       if (i->getInterpMode() == NV50_IR_INTERP_SC)
 1724          code[0] |= 0x80;
 1725       assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
 1726    }
 1727 }
 1728 
 1729 static void
 1730 interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
 1731 {
 1732    int ipa = entry->ipa;
 1733    int reg = entry->reg;
 1734    int loc = entry->loc;
 1735 
 1736    if (data.flatshade &&
 1737        (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
 1738       ipa = NV50_IR_INTERP_FLAT;
 1739       reg = 0x3f;
 1740    } else if (data.force_persample_interp &&
 1741               (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
 1742               (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
 1743       ipa |= NV50_IR_INTERP_CENTROID;
 1744    }
 1745    code[loc + 0] &= ~(0xf << 6);
 1746    code[loc + 0] |= ipa << 6;
 1747    code[loc + 0] &= ~(0x3f << 26);
 1748    code[loc + 0] |= reg << 26;
 1749 }
 1750 
 1751 void
 1752 CodeEmitterNVC0::emitINTERP(const Instruction *i)
 1753 {
 1754    const uint32_t base = i->getSrc(0)->reg.data.offset;
 1755 
 1756    if (i->encSize == 8) {
 1757       code[0] = 0x00000000;
 1758       code[1] = 0xc0000000 | (base & 0xffff);
 1759 
 1760       if (i->saturate)
 1761          code[0] |= 1 << 5;
 1762 
 1763       if (i->op == OP_PINTERP) {
 1764          srcId(i->src(1), 26);
 1765          addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
 1766       } else {
 1767          code[0] |= 0x3f << 26;
 1768          addInterp(i->ipa, 0x3f, interpApply);
 1769       }
 1770 
 1771       srcId(i->src(0).getIndirect(0), 20);
 1772    } else {
 1773       assert(i->op == OP_PINTERP);
 1774       code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
 1775       srcId(i->src(1), 20);
 1776    }
 1777    emitInterpMode(i);
 1778 
 1779    emitPredicate(i);
 1780    defId(i->def(0), 14);
 1781 
 1782    if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
 1783       srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
 1784    else
 1785       code[1] |= 0x3f << 17;
 1786 }
 1787 
 1788 void
 1789 CodeEmitterNVC0::emitLoadStoreType(DataType ty)
 1790 {
 1791    uint8_t val;
 1792 
 1793    switch (ty) {
 1794    case TYPE_U8:
 1795       val = 0x00;
 1796       break;
 1797    case TYPE_S8:
 1798       val = 0x20;
 1799       break;
 1800    case TYPE_F16:
 1801    case TYPE_U16:
 1802       val = 0x40;
 1803       break;
 1804    case TYPE_S16:
 1805       val = 0x60;
 1806       break;
 1807    case TYPE_F32:
 1808    case TYPE_U32:
 1809    case TYPE_S32:
 1810       val = 0x80;
 1811       break;
 1812    case TYPE_F64:
 1813    case TYPE_U64:
 1814    case TYPE_S64:
 1815       val = 0xa0;
 1816       break;
 1817    case TYPE_B128:
 1818       val = 0xc0;
 1819       break;
 1820    default:
 1821       val = 0x80;
 1822       assert(!"invalid type");
 1823       break;
 1824    }
 1825    code[0] |= val;
 1826 }
 1827 
 1828 void
 1829 CodeEmitterNVC0::emitCachingMode(CacheMode c)
 1830 {
 1831    uint32_t val;
 1832 
 1833    switch (c) {
 1834    case CACHE_CA:
 1835 // case CACHE_WB:
 1836       val = 0x000;
 1837       break;
 1838    case CACHE_CG:
 1839       val = 0x100;
 1840       break;
 1841    case CACHE_CS:
 1842       val = 0x200;
 1843       break;
 1844    case CACHE_CV:
 1845 // case CACHE_WT:
 1846       val = 0x300;
 1847       break;
 1848    default:
 1849       val = 0;
 1850       assert(!"invalid caching mode");
 1851       break;
 1852    }
 1853    code[0] |= val;
 1854 }
 1855 
 1856 static inline bool
 1857 uses64bitAddress(const Instruction *ldst)
 1858 {
 1859    return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
 1860       ldst->src(0).isIndirect(0) &&
 1861       ldst->getIndirect(0, 0)->reg.size == 8;
 1862 }
 1863 
 1864 void
 1865 CodeEmitterNVC0::emitSTORE(const Instruction *i)
 1866 {
 1867    uint32_t opc;
 1868 
 1869    switch (i->src(0).getFile()) {
 1870    case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
 1871    case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
 1872    case FILE_MEMORY_SHARED:
 1873       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
 1874          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
 1875             opc = 0xb8000000;
 1876          else
 1877             opc = 0xcc000000;
 1878       } else {
 1879          opc = 0xc9000000;
 1880       }
 1881       break;
 1882    default:
 1883       assert(!"invalid memory file");
 1884       opc = 0;
 1885       break;
 1886    }
 1887    code[0] = 0x00000005;
 1888    code[1] = opc;
 1889 
 1890    if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
 1891       // Unlocked store on shared memory can fail.
 1892       if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
 1893           i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
 1894          assert(i->defExists(0));
 1895          setPDSTL(i, 0);
 1896       }
 1897    }
 1898 
 1899    setAddressByFile(i->src(0));
 1900    srcId(i->src(1), 14);
 1901    srcId(i->src(0).getIndirect(0), 20);
 1902    if (uses64bitAddress(i))
 1903       code[1] |= 1 << 26;
 1904 
 1905    emitPredicate(i);
 1906 
 1907    emitLoadStoreType(i->dType);
 1908    emitCachingMode(i->cache);
 1909 }
 1910 
 1911 void
 1912 CodeEmitterNVC0::emitLOAD(const Instruction *i)
 1913 {
 1914    uint32_t opc;
 1915 
 1916    code[0] = 0x00000005;
 1917 
 1918    switch (i->src(0).getFile()) {
 1919    case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
 1920    case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
 1921    case FILE_MEMORY_SHARED:
 1922       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
 1923          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
 1924             opc = 0xa8000000;
 1925          else
 1926             opc = 0xc4000000;
 1927       } else {
 1928          opc = 0xc1000000;
 1929       }
 1930       break;
 1931    case FILE_MEMORY_CONST:
 1932       if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
 1933          emitMOV(i); // not sure if this is any better
 1934          return;
 1935       }
 1936       opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
 1937       code[0] = 0x00000006 | (i->subOp << 8);
 1938       break;
 1939    default:
 1940       assert(!"invalid memory file");
 1941       opc = 0;
 1942       break;
 1943    }
 1944    code[1] = opc;
 1945 
 1946    int r = 0, p = -1;
 1947    if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
 1948       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
 1949          if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
 1950             r = -1;
 1951             p = 0;
 1952          } else if (i->defExists(1)) { // r, p
 1953             p = 1;
 1954          } else {
 1955             assert(!"Expected predicate dest for load locked");
 1956          }
 1957       }
 1958    }
 1959 
 1960    if (r >= 0)
 1961       defId(i->def(r), 14);
 1962    else
 1963       code[0] |= 63 << 14;
 1964 
 1965    if (p >= 0) {
 1966       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
 1967          setPDSTL(i, p);
 1968       else
 1969          defId(i->def(p), 32 + 18);
 1970    }
 1971 
 1972    setAddressByFile(i->src(0));
 1973    srcId(i->src(0).getIndirect(0), 20);
 1974    if (uses64bitAddress(i))
 1975       code[1] |= 1 << 26;
 1976 
 1977    emitPredicate(i);
 1978 
 1979    emitLoadStoreType(i->dType);
 1980    emitCachingMode(i->cache);
 1981 }
 1982 
 1983 uint8_t
 1984 CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
 1985 {
 1986    switch (SDATA(ref).sv.sv) {
 1987    case SV_LANEID:        return 0x00;
 1988    case SV_PHYSID:        return 0x03;
 1989    case SV_VERTEX_COUNT:  return 0x10;
 1990    case SV_INVOCATION_ID: return 0x11;
 1991    case SV_YDIR:          return 0x12;
 1992    case SV_THREAD_KILL:   return 0x13;
 1993    case SV_COMBINED_TID:  return 0x20;
 1994    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
 1995    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
 1996    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
 1997    case SV_GRIDID:        return 0x2c;
 1998    case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
 1999    case SV_LBASE:         return 0x34;
 2000    case SV_SBASE:         return 0x30;
 2001    case SV_LANEMASK_EQ:   return 0x38;
 2002    case SV_LANEMASK_LT:   return 0x39;
 2003    case SV_LANEMASK_LE:   return 0x3a;
 2004    case SV_LANEMASK_GT:   return 0x3b;
 2005    case SV_LANEMASK_GE:   return 0x3c;
 2006    case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
 2007    default:
 2008       assert(!"no sreg for system value");
 2009       return 0;
 2010    }
 2011 }
 2012 
 2013 void
 2014 CodeEmitterNVC0::emitMOV(const Instruction *i)
 2015 {
 2016    assert(!i->saturate);
 2017    if (i->def(0).getFile() == FILE_PREDICATE) {
 2018       if (i->src(0).getFile() == FILE_GPR) {
 2019          code[0] = 0xfc01c003;
 2020          code[1] = 0x1a8e0000;
 2021          srcId(i->src(0), 20);
 2022       } else {
 2023          code[0] = 0x0001c004;
 2024          code[1] = 0x0c0e0000;
 2025          if (i->src(0).getFile() == FILE_IMMEDIATE) {
 2026             code[0] |= 7 << 20;
 2027             if (!i->getSrc(0)->reg.data.u32)
 2028                code[0] |= 1 << 23;
 2029          } else {
 2030             srcId(i->src(0), 20);
 2031          }
 2032       }
 2033       defId(i->def(0), 17);
 2034       emitPredicate(i);
 2035    } else
 2036    if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
 2037       uint8_t sr = getSRegEncoding(i->src(0));
 2038 
 2039       if (i->encSize == 8) {
 2040          code[0] = 0x00000004 | (sr << 26);
 2041          code[1] = 0x2c000000;
 2042       } else {
 2043          code[0] = 0x40000008 | (sr << 20);
 2044       }
 2045       defId(i->def(0), 14);
 2046 
 2047       emitPredicate(i);
 2048    } else
 2049    if (i->encSize == 8) {
 2050       uint64_t opc;
 2051 
 2052       if (i->src(0).getFile() == FILE_IMMEDIATE)
 2053          opc = HEX64(18000000, 000001e2);
 2054       else
 2055       if (i->src(0).getFile() == FILE_PREDICATE)
 2056          opc = HEX64(080e0000, 1c000004);
 2057       else
 2058          opc = HEX64(28000000, 00000004);
 2059 
 2060       if (i->src(0).getFile() != FILE_PREDICATE)
 2061          opc |= i->lanes << 5;
 2062 
 2063       emitForm_B(i, opc);
 2064 
 2065       // Explicitly emit the predicate source as emitForm_B skips it.
 2066       if (i->src(0).getFile() == FILE_PREDICATE)
 2067          srcId(i->src(0), 20);
 2068    } else {
 2069       uint32_t imm;
 2070 
 2071       if (i->src(0).getFile() == FILE_IMMEDIATE) {
 2072          imm = SDATA(i->src(0)).u32;
 2073          if (imm & 0xfff00000) {
 2074             assert(!(imm & 0x000fffff));
 2075             code[0] = 0x00000318 | imm;
 2076          } else {
 2077             assert(imm < 0x800 && ((int32_t)imm >= -0x800));
 2078             code[0] = 0x00000118 | (imm << 20);
 2079          }
 2080       } else {
 2081          code[0] = 0x0028;
 2082          emitShortSrc2(i->src(0));
 2083       }
 2084       defId(i->def(0), 14);
 2085 
 2086       emitPredicate(i);
 2087    }
 2088 }
 2089 
 2090 void
 2091 CodeEmitterNVC0::emitATOM(const Instruction *i)
 2092 {
 2093    const bool hasDst = i->defExists(0);
 2094    const bool casOrExch =
 2095       i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
 2096       i->subOp == NV50_IR_SUBOP_ATOM_CAS;
 2097 
 2098    if (i->dType == TYPE_U64) {
 2099       switch (i->subOp) {
 2100       case NV50_IR_SUBOP_ATOM_ADD:
 2101          code[0] = 0x205;
 2102          if (hasDst)
 2103             code[1] = 0x507e0000;
 2104          else
 2105             code[1] = 0x10000000;
 2106          break;
 2107       case NV50_IR_SUBOP_ATOM_EXCH:
 2108          code[0] = 0x305;
 2109          code[1] = 0x507e0000;
 2110          break;
 2111       case NV50_IR_SUBOP_ATOM_CAS:
 2112          code[0] = 0x325;
 2113          code[1] = 0x50000000;
 2114          break;
 2115       default:
 2116          assert(!"invalid u64 red op");
 2117          break;
 2118       }
 2119    } else
 2120    if (i->dType == TYPE_U32) {
 2121       switch (i->subOp) {
 2122       case NV50_IR_SUBOP_ATOM_EXCH:
 2123          code[0] = 0x105;
 2124          code[1] = 0x507e0000;
 2125          break;
 2126       case NV50_IR_SUBOP_ATOM_CAS:
 2127          code[0] = 0x125;
 2128          code[1] = 0x50000000;
 2129          break;
 2130       default:
 2131          code[0] = 0x5 | (i->subOp << 5);
 2132          if (hasDst)
 2133             code[1] = 0x507e0000;
 2134          else
 2135             code[1] = 0x10000000;
 2136          break;
 2137       }
 2138    } else
 2139    if (i->dType == TYPE_S32) {
 2140       assert(i->subOp <= 2);
 2141       code[0] = 0x205 | (i->subOp << 5);
 2142       if (hasDst)
 2143          code[1] = 0x587e0000;
 2144       else
 2145          code[1] = 0x18000000;
 2146    } else
 2147    if (i->dType == TYPE_F32) {
 2148       assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
 2149       code[0] = 0x205;
 2150       if (hasDst)
 2151          code[1] = 0x687e0000;
 2152       else
 2153          code[1] = 0x28000000;
 2154    }
 2155 
 2156    emitPredicate(i);
 2157 
 2158    srcId(i->src(1), 14);
 2159 
 2160    if (hasDst)
 2161       defId(i->def(0), 32 + 11);
 2162    else
 2163    if (casOrExch)
 2164       code[1] |= 63 << 11;
 2165 
 2166    if (hasDst || casOrExch) {
 2167       const int32_t offset = SDATA(i->src(0)).offset;
 2168       assert(offset < 0x80000 && offset >= -0x80000);
 2169       code[0] |= offset << 26;
 2170       code[1] |= (offset & 0x1ffc0) >> 6;
 2171       code[1] |= (offset & 0xe0000) << 6;
 2172    } else {
 2173       srcAddr32(i->src(0), 26, 0);
 2174    }
 2175    if (i->getIndirect(0, 0)) {
 2176       srcId(i->getIndirect(0, 0), 20);
 2177       if (i->getIndirect(0, 0)->reg.size == 8)
 2178          code[1] |= 1 << 26;
 2179    } else {
 2180       code[0] |= 63 << 20;
 2181    }
 2182 
 2183    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
 2184       assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
 2185       code[1] |= (SDATA(i->src(1)).id + 1) << 17;
 2186    }
 2187 }
 2188 
 2189 void
 2190 CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
 2191 {
 2192    switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
 2193    case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
 2194    case NV50_IR_SUBOP_MEMBAR_GL:  code[0] = 0x25; break;
 2195    default:
 2196       code[0] = 0x45;
 2197       assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
 2198       break;
 2199    }
 2200    code[1] = 0xe0000000;
 2201 
 2202    emitPredicate(i);
 2203 }
 2204 
 2205 void
 2206 CodeEmitterNVC0::emitCCTL(const Instruction *i)
 2207 {
 2208    code[0] = 0x00000005 | (i->subOp << 5);
 2209 
 2210    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
 2211       code[1] = 0x98000000;
 2212       srcAddr32(i->src(0), 28, 2);
 2213    } else {
 2214       code[1] = 0xd0000000;
 2215       setAddress24(i->src(0));
 2216    }
 2217    if (uses64bitAddress(i))
 2218       code[1] |= 1 << 26;
 2219    srcId(i->src(0).getIndirect(0), 20);
 2220 
 2221    emitPredicate(i);
 2222 
 2223    defId(i, 0, 14);
 2224 }
 2225 
 2226 void
 2227 CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
 2228 {
 2229    uint8_t m;
 2230    switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
 2231    case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
 2232    case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
 2233    case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
 2234    case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
 2235    case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
 2236    case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
 2237    case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
 2238    case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
 2239    case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
 2240    case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
 2241    case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
 2242    case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
 2243    case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
 2244    case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
 2245    case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
 2246    default:
 2247       return;
 2248    }
 2249    code[0] |= m << 5;
 2250    if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
 2251       code[1] |= 1 << 16;
 2252 }
 2253 
 2254 void
 2255 CodeEmitterNVC0::emitSUCalc(Instruction *i)
 2256 {
 2257    ImmediateValue *imm = NULL;
 2258    uint64_t opc;
 2259 
 2260    if (i->srcExists(2)) {
 2261       imm = i->getSrc(2)->asImm();
 2262       if (imm)
 2263          i->setSrc(2, NULL); // special case, make emitForm_A not assert
 2264    }
 2265 
 2266    switch (i->op) {
 2267    case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
 2268    case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
 2269    case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
 2270    default:
 2271       assert(0);
 2272       return;
 2273    }
 2274    emitForm_A(i, opc);
 2275 
 2276    if (i->op == OP_SUCLAMP) {
 2277       if (i->dType == TYPE_S32)
 2278          code[0] |= 1 << 9;
 2279       emitSUCLAMPMode(i->subOp);
 2280    }
 2281 
 2282    if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
 2283          code[1] |= 1 << 16;
 2284 
 2285    if (i->op != OP_SUEAU) {
 2286       if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
 2287          code[0] |= 63 << 14;
 2288          code[1] |= i->getDef(0)->reg.data.id << 23;
 2289       } else
 2290       if (i->defExists(1)) { // r, p
 2291          assert(i->def(1).getFile() == FILE_PREDICATE);
 2292          code[1] |= i->getDef(1)->reg.data.id << 23;
 2293       } else { // r, #
 2294          code[1] |= 7 << 23;
 2295       }
 2296    }
 2297    if (imm) {
 2298       assert(i->op == OP_SUCLAMP);
 2299       i->setSrc(2, imm);
 2300       code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
 2301    }
 2302 }
 2303 
 2304 void
 2305 CodeEmitterNVC0::emitSUGType(DataType ty)
 2306 {
 2307    switch (ty) {
 2308    case TYPE_S32: code[1] |= 1 << 13; break;
 2309    case TYPE_U8:  code[1] |= 2 << 13; break;
 2310    case TYPE_S8:  code[1] |= 3 << 13; break;
 2311    default:
 2312       assert(ty == TYPE_U32);
 2313       break;
 2314    }
 2315 }
 2316 
 2317 void
 2318 CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
 2319 {
 2320    const uint32_t offset = i->getSrc(s)->reg.data.offset;
 2321 
 2322    assert(i->src(s).getFile() == FILE_MEMORY_CONST);
 2323    assert(offset == (offset & 0xfffc));
 2324 
 2325    code[1] |= 1 << 21;
 2326    code[0] |= offset << 24;
 2327    code[1] |= offset >> 8;
 2328    code[1] |= i->getSrc(s)->reg.fileIndex << 8;
 2329 }
 2330 
 2331 void
 2332 CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
 2333 {
 2334    if (!i->srcExists(s) || (i->predSrc == s)) {
 2335       code[1] |= 0x7 << 17;
 2336    } else {
 2337       if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
 2338          code[1] |= 1 << 20;
 2339       srcId(i->src(s), 32 + 17);
 2340    }
 2341 }
 2342 
 2343 void
 2344 CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
 2345 {
 2346    code[0] = 0x5;
 2347    code[1] = 0xd4000000 | (i->subOp << 15);
 2348 
 2349    emitLoadStoreType(i->dType);
 2350    emitSUGType(i->sType);
 2351    emitCachingMode(i->cache);
 2352 
 2353    emitPredicate(i);
 2354    defId(i->def(0), 14); // destination
 2355    srcId(i->src(0), 20); // address
 2356    // format
 2357    if (i->src(1).getFile() == FILE_GPR)
 2358       srcId(i->src(1), 26);
 2359    else
 2360       setSUConst16(i, 1);
 2361    setSUPred(i, 2);
 2362 }
 2363 
 2364 void
 2365 CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
 2366 {
 2367    code[0] = 0x5;
 2368    code[1] = 0xdc000000 | (i->subOp << 15);
 2369 
 2370    if (i->op == OP_SUSTP)
 2371       code[1] |= i->tex.mask << 22;
 2372    else
 2373       emitLoadStoreType(i->dType);
 2374    emitSUGType(i->sType);
 2375    emitCachingMode(i->cache);
 2376 
 2377    emitPredicate(i);
 2378    srcId(i->src(0), 20); // address
 2379    // format
 2380    if (i->src(1).getFile() == FILE_GPR)
 2381       srcId(i->src(1), 26);
 2382    else
 2383       setSUConst16(i, 1);
 2384    srcId(i->src(3), 14); // values
 2385    setSUPred(i, 2);
 2386 }
 2387 
 2388 void
 2389 CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)
 2390 {
 2391    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
 2392 
 2393    if (i->tex.rIndirectSrc < 0) {
 2394       code[1] |= 0x00004000;
 2395       code[0] |= i->tex.r << 26;
 2396    } else {
 2397       srcId(i, i->tex.rIndirectSrc, 26);
 2398    }
 2399 }
 2400 
 2401 void
 2402 CodeEmitterNVC0::emitSUDim(const TexInstruction *i)
 2403 {
 2404    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
 2405 
 2406    code[1] |= (i->tex.target.getDim() - 1) << 12;
 2407    if (i->tex.target.isArray() || i->tex.target.isCube() ||
 2408        i->tex.target.getDim() == 3) {
 2409       // use e2d mode for 3-dim images, arrays and cubes.
 2410       code[1] |= 3 << 12;
 2411    }
 2412 
 2413    srcId(i->src(0), 20);
 2414 }
 2415 
 2416 void
 2417 CodeEmitterNVC0::emitSULEA(const TexInstruction *i)
 2418 {
 2419    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
 2420 
 2421    code[0] = 0x5;
 2422    code[1] = 0xf0000000;
 2423 
 2424    emitPredicate(i);
 2425    emitLoadStoreType(i->sType);
 2426 
 2427    defId(i->def(0), 14);
 2428 
 2429    if (i->defExists(1)) {
 2430       defId(i->def(1), 32 + 22);
 2431    } else {
 2432       code[1] |= 7 << 22;
 2433    }
 2434 
 2435    emitSUAddr(i);
 2436    emitSUDim(i);
 2437 }
 2438 
 2439 void
 2440 CodeEmitterNVC0::emitSULDB(const TexInstruction *i)
 2441 {
 2442    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
 2443 
 2444    code[0] = 0x5;
 2445    code[1] = 0xd4000000 | (i->subOp << 15);
 2446 
 2447    emitPredicate(i);
 2448    emitLoadStoreType(i->dType);
 2449 
 2450    defId(i->def(0), 14);
 2451 
 2452    emitCachingMode(i->cache);
 2453    emitSUAddr(i);
 2454    emitSUDim(i);
 2455 }
 2456 
 2457 void
 2458 CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)
 2459 {
 2460    assert(targ->getChipset() < NVISA_GK104_CHIPSET);
 2461 
 2462    code[0] = 0x5;
 2463    code[1] = 0xdc000000 | (i->subOp << 15);
 2464 
 2465    if (i->op == OP_SUSTP)
 2466       code[1] |= i->tex.mask << 17;
 2467    else
 2468       emitLoadStoreType(i->dType);
 2469 
 2470    emitPredicate(i);
 2471 
 2472    srcId(i->src(1), 14);
 2473 
 2474    emitCachingMode(i->cache);
 2475    emitSUAddr(i);
 2476    emitSUDim(i);
 2477 }
 2478 
 2479 void
 2480 CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
 2481 {
 2482    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
 2483    case 0:
 2484       code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
 2485       code[1] |= (i->subOp & 0x00e0) >> 5;  // vsrc2
 2486       code[1] |= (i->subOp & 0x0100) << 7;  // vsrc2
 2487       code[1] |= (i->subOp & 0x3c00) << 13; // vdst
 2488       break;
 2489    case 1:
 2490       code[1] |= (i->subOp & 0x000f) << 8;  // v2src1
 2491       code[1] |= (i->subOp & 0x0010) << 11; // v2src1
 2492       code[1] |= (i->subOp & 0x01e0) >> 1;  // v2src2
 2493       code[1] |= (i->subOp & 0x0200) << 6;  // v2src2
 2494       code[1] |= (i->subOp & 0x3c00) << 2;  // v4dst
 2495       code[1] |= (i->mask & 0x3) << 2;
 2496       break;
 2497    case 2:
 2498       code[1] |= (i->subOp & 0x000f) << 8; // v4src1
 2499       code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
 2500       code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
 2501       code[1] |= (i->mask & 0x3) << 2;
 2502       code[1] |= (i->mask & 0xc) << 21;
 2503       break;
 2504    default:
 2505       assert(0);
 2506       break;
 2507    }
 2508 }
 2509 
 2510 void
 2511 CodeEmitterNVC0::emitVSHL(const Instruction *i)
 2512 {
 2513    uint64_t opc = 0x4;
 2514 
 2515    switch (NV50_IR_SUBOP_Vn(i->subOp)) {
 2516    case 0: opc |= 0xe8ULL << 56; break;
 2517    case 1: opc |= 0xb4ULL << 56; break;
 2518    case 2: opc |= 0x94ULL << 56; break;
 2519    default:
 2520       assert(0);
 2521       break;
 2522    }
 2523    if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
 2524       if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
 2525       if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
 2526    } else {
 2527       if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
 2528       if (isSignedType(i->sType)) opc |= 1 << 6;
 2529    }
 2530    emitForm_A(i, opc);
 2531    emitVectorSubOp(i);
 2532 
 2533    if (i->saturate)
 2534       code[0] |= 1 << 9;
 2535    if (i->flagsDef >= 0)
 2536       code[1] |= 1 << 16;
 2537 }
 2538 
 2539 void
 2540 CodeEmitterNVC0::emitPIXLD(const Instruction *i)
 2541 {
 2542    assert(i->encSize == 8);
 2543    emitForm_A(i, HEX64(10000000, 00000006));
 2544    code[0] |= i->subOp << 5;
 2545    code[1] |= 0x00e00000;
 2546 }
 2547 
 2548 void
 2549 CodeEmitterNVC0::emitSHFL(const Instruction *i)
 2550 {
 2551    const ImmediateValue *imm;
 2552 
 2553    assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
 2554 
 2555    code[0] = 0x00000005;
 2556    code[1] = 0x88000000 | (i->subOp << 23);
 2557 
 2558    emitPredicate(i);
 2559 
 2560    defId(i->def(0), 14);
 2561    srcId(i->src(0), 20);
 2562 
 2563    switch (i->src(1).getFile()) {
 2564    case FILE_GPR:
 2565       srcId(i->src(1), 26);
 2566       break;
 2567    case FILE_IMMEDIATE:
 2568       imm = i->getSrc(1)->asImm();
 2569       assert(imm && imm->reg.data.u32 < 0x20);
 2570       code[0] |= imm->reg.data.u32 << 26;
 2571       code[0] |= 1 << 5;
 2572       break;
 2573    default:
 2574       assert(!"invalid src1 file");
 2575       break;
 2576    }
 2577 
 2578    switch (i->src(2).getFile()) {
 2579    case FILE_GPR:
 2580       srcId(i->src(2), 49);
 2581       break;
 2582    case FILE_IMMEDIATE:
 2583       imm = i->getSrc(2)->asImm();
 2584       assert(imm && imm->reg.data.u32 < 0x2000);
 2585       code[1] |= imm->reg.data.u32 << 10;
 2586       code[0] |= 1 << 6;
 2587       break;
 2588    default:
 2589       assert(!"invalid src2 file");
 2590       break;
 2591    }
 2592 
 2593    setPDSTL(i, i->defExists(1) ? 1 : -1);
 2594 }
 2595 
 2596 void
 2597 CodeEmitterNVC0::emitVOTE(const Instruction *i)
 2598 {
 2599    const ImmediateValue *imm;
 2600    uint32_t u32;
 2601 
 2602    code[0] = 0x00000004 | (i->subOp << 5);
 2603    code[1] = 0x48000000;
 2604 
 2605    emitPredicate(i);
 2606 
 2607    unsigned rp = 0;
 2608    for (int d = 0; i->defExists(d); d++) {
 2609       if (i->def(d).getFile() == FILE_PREDICATE) {
 2610          assert(!(rp & 2));
 2611          rp |= 2;
 2612          defId(i->def(d), 32 + 22);
 2613       } else if (i->def(d).getFile() == FILE_GPR) {
 2614          assert(!(rp & 1));
 2615          rp |= 1;
 2616          defId(i->def(d), 14);
 2617       } else {
 2618          assert(!"Unhandled def");
 2619       }
 2620    }
 2621    if (!(rp & 1))
 2622       code[0] |= 63 << 14;
 2623    if (!(rp & 2))
 2624       code[1] |= 7 << 22;
 2625 
 2626    switch (i->src(0).getFile()) {
 2627    case FILE_PREDICATE:
 2628       if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
 2629          code[0] |= 1 << 23;
 2630       srcId(i->src(0), 20);
 2631       break;
 2632    case FILE_IMMEDIATE:
 2633       imm = i->getSrc(0)->asImm();
 2634       assert(imm);
 2635       u32 = imm->reg.data.u32;
 2636       assert(u32 == 0 || u32 == 1);
 2637       code[0] |= (u32 == 1 ? 0x7 : 0xf) << 20;
 2638       break;
 2639    default:
 2640       assert(!"Unhandled src");
 2641       break;
 2642    }
 2643 }
 2644 
 2645 bool
 2646 CodeEmitterNVC0::emitInstruction(Instruction *insn)
 2647 {
 2648    unsigned int size = insn->encSize;
 2649 
 2650    if (writeIssueDelays && !(codeSize & 0x3f))
 2651       size += 8;
 2652 
 2653    if (!insn->encSize) {
 2654       ERROR("skipping unencodable instruction: "); insn->print();
 2655       return false;
 2656    } else
 2657    if (codeSize + size > codeSizeLimit) {
 2658       ERROR("code emitter output buffer too small\n");
 2659       return false;
 2660    }
 2661 
 2662    if (writeIssueDelays) {
 2663       if (!(codeSize & 0x3f)) {
 2664          code[0] = 0x00000007; // cf issue delay "instruction"
 2665          code[1] = 0x20000000;
 2666          code += 2;
 2667          codeSize += 8;
 2668       }
 2669       const unsigned int id = (codeSize & 0x3f) / 8 - 1;
 2670       uint32_t *data = code - (id * 2 + 2);
 2671       if (id <= 2) {
 2672          data[0] |= insn->sched << (id * 8 + 4);
 2673       } else
 2674       if (id == 3) {
 2675          data[0] |= insn->sched << 28;
 2676          data[1] |= insn->sched >> 4;
 2677       } else {
 2678          data[1] |= insn->sched << ((id - 4) * 8 + 4);
 2679       }
 2680    }
 2681 
 2682    // assert that instructions with multiple defs don't corrupt registers
 2683    for (int d = 0; insn->defExists(d); ++d)
 2684       assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
 2685 
 2686    switch (insn->op) {
 2687    case OP_MOV:
 2688    case OP_RDSV:
 2689       emitMOV(insn);
 2690       break;
 2691    case OP_NOP:
 2692       break;
 2693    case OP_LOAD:
 2694       emitLOAD(insn);
 2695       break;
 2696    case OP_STORE:
 2697       emitSTORE(insn);
 2698       break;
 2699    case OP_LINTERP:
 2700    case OP_PINTERP:
 2701       emitINTERP(insn);
 2702       break;
 2703    case OP_VFETCH:
 2704       emitVFETCH(insn);
 2705       break;
 2706    case OP_EXPORT:
 2707       emitEXPORT(insn);
 2708       break;
 2709    case OP_PFETCH:
 2710       emitPFETCH(insn);
 2711       break;
 2712    case OP_AFETCH:
 2713       emitAFETCH(insn);
 2714       break;
 2715    case OP_EMIT:
 2716    case OP_RESTART:
 2717       emitOUT(insn);
 2718       break;
 2719    case OP_ADD:
 2720    case OP_SUB:
 2721       if (insn->dType == TYPE_F64)
 2722          emitDADD(insn);
 2723       else if (isFloatType(insn->dType))
 2724          emitFADD(insn);
 2725       else
 2726          emitUADD(insn);
 2727       break;
 2728    case OP_MUL:
 2729       if (insn->dType == TYPE_F64)
 2730          emitDMUL(insn);
 2731       else if (isFloatType(insn->dType))
 2732          emitFMUL(insn);
 2733       else
 2734          emitUMUL(insn);
 2735       break;
 2736    case OP_MAD:
 2737    case OP_FMA:
 2738       if (insn->dType == TYPE_F64)
 2739          emitDMAD(insn);
 2740       else if (isFloatType(insn->dType))
 2741          emitFMAD(insn);
 2742       else
 2743          emitIMAD(insn);
 2744       break;
 2745    case OP_SAD:
 2746       emitISAD(insn);
 2747       break;
 2748    case OP_SHLADD:
 2749       emitSHLADD(insn);
 2750       break;
 2751    case OP_NOT:
 2752       emitNOT(insn);
 2753       break;
 2754    case OP_AND:
 2755       emitLogicOp(insn, 0);
 2756       break;
 2757    case OP_OR:
 2758       emitLogicOp(insn, 1);
 2759       break;
 2760    case OP_XOR:
 2761       emitLogicOp(insn, 2);
 2762       break;
 2763    case OP_SHL:
 2764    case OP_SHR:
 2765       emitShift(insn);
 2766       break;
 2767    case OP_SET:
 2768    case OP_SET_AND:
 2769    case OP_SET_OR:
 2770    case OP_SET_XOR:
 2771       emitSET(insn->asCmp());
 2772       break;
 2773    case OP_SELP:
 2774       emitSELP(insn);
 2775       break;
 2776    case OP_SLCT:
 2777       emitSLCT(insn->asCmp());
 2778       break;
 2779    case OP_MIN:
 2780    case OP_MAX:
 2781       emitMINMAX(insn);
 2782       break;
 2783    case OP_ABS:
 2784    case OP_NEG:
 2785    case OP_CEIL:
 2786    case OP_FLOOR:
 2787    case OP_TRUNC:
 2788    case OP_SAT:
 2789       emitCVT(insn);
 2790       break;
 2791    case OP_CVT:
 2792       if (insn->def(0).getFile() == FILE_PREDICATE ||
 2793           insn->src(0).getFile() == FILE_PREDICATE)
 2794          emitMOV(insn);
 2795       else
 2796          emitCVT(insn);
 2797       break;
 2798    case OP_RSQ:
 2799       emitSFnOp(insn, 5 + 2 * insn->subOp);
 2800       break;
 2801    case OP_RCP:
 2802       emitSFnOp(insn, 4 + 2 * insn->subOp);
 2803       break;
 2804    case OP_LG2:
 2805       emitSFnOp(insn, 3);
 2806       break;
 2807    case OP_EX2:
 2808       emitSFnOp(insn, 2);
 2809       break;
 2810    case OP_SIN:
 2811       emitSFnOp(insn, 1);
 2812       break;
 2813    case OP_COS:
 2814       emitSFnOp(insn, 0);
 2815       break;
 2816    case OP_PRESIN:
 2817    case OP_PREEX2:
 2818       emitPreOp(insn);
 2819       break;
 2820    case OP_TEX:
 2821    case OP_TXB:
 2822    case OP_TXL:
 2823    case OP_TXD:
 2824    case OP_TXF:
 2825    case OP_TXG:
 2826    case OP_TXLQ:
 2827       emitTEX(insn->asTex());
 2828       break;
 2829    case OP_TXQ:
 2830       emitTXQ(insn->asTex());
 2831       break;
 2832    case OP_TEXBAR:
 2833       emitTEXBAR(insn);
 2834       break;
 2835    case OP_SUBFM:
 2836    case OP_SUCLAMP:
 2837    case OP_SUEAU:
 2838       emitSUCalc(insn);
 2839       break;
 2840    case OP_MADSP:
 2841       emitMADSP(insn);
 2842       break;
 2843    case OP_SULDB:
 2844       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
 2845          emitSULDGB(insn->asTex());
 2846       else
 2847          emitSULDB(insn->asTex());
 2848       break;
 2849    case OP_SUSTB:
 2850    case OP_SUSTP:
 2851       if (targ->getChipset() >= NVISA_GK104_CHIPSET)
 2852          emitSUSTGx(insn->asTex());
 2853       else
 2854          emitSUSTx(insn->asTex());
 2855       break;
 2856    case OP_SULEA:
 2857       emitSULEA(insn->asTex());
 2858       break;
 2859    case OP_ATOM:
 2860       emitATOM(insn);
 2861       break;
 2862    case OP_BRA:
 2863    case OP_CALL:
 2864    case OP_PRERET:
 2865    case OP_RET:
 2866    case OP_DISCARD:
 2867    case OP_EXIT:
 2868    case OP_PRECONT:
 2869    case OP_CONT:
 2870    case OP_PREBREAK:
 2871    case OP_BREAK:
 2872    case OP_JOINAT:
 2873    case OP_BRKPT:
 2874    case OP_QUADON:
 2875    case OP_QUADPOP:
 2876       emitFlow(insn);
 2877       break;
 2878    case OP_QUADOP:
 2879       emitQUADOP(insn, insn->subOp, insn->lanes);
 2880       break;
 2881    case OP_DFDX:
 2882       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
 2883       break;
 2884    case OP_DFDY:
 2885       emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
 2886       break;
 2887    case OP_POPCNT:
 2888       emitPOPC(insn);
 2889       break;
 2890    case OP_INSBF:
 2891       emitINSBF(insn);
 2892       break;
 2893    case OP_EXTBF:
 2894       emitEXTBF(insn);
 2895       break;
 2896    case OP_BFIND:
 2897       emitBFIND(insn);
 2898       break;
 2899    case OP_PERMT:
 2900       emitPERMT(insn);
 2901       break;
 2902    case OP_JOIN:
 2903       emitNOP(insn);
 2904       insn->join = 1;
 2905       break;
 2906    case OP_BAR:
 2907       emitBAR(insn);
 2908       break;
 2909    case OP_MEMBAR:
 2910       emitMEMBAR(insn);
 2911       break;
 2912    case OP_CCTL:
 2913       emitCCTL(insn);
 2914       break;
 2915    case OP_VSHL:
 2916       emitVSHL(insn);
 2917       break;
 2918    case OP_PIXLD:
 2919       emitPIXLD(insn);
 2920       break;
 2921    case OP_SHFL:
 2922       emitSHFL(insn);
 2923       break;
 2924    case OP_VOTE:
 2925       emitVOTE(insn);
 2926       break;
 2927    case OP_PHI:
 2928    case OP_UNION:
 2929    case OP_CONSTRAINT:
 2930       ERROR("operation should have been eliminated");
 2931       return false;
 2932    case OP_EXP:
 2933    case OP_LOG:
 2934    case OP_SQRT:
 2935    case OP_POW:
 2936       ERROR("operation should have been lowered\n");
 2937       return false;
 2938    default:
 2939       ERROR("unknown op: %u\n", insn->op);
 2940       return false;
 2941    }
 2942 
 2943    if (insn->join) {
 2944       code[0] |= 0x10;
 2945       assert(insn->encSize == 8);
 2946    }
 2947 
 2948    code += insn->encSize / 4;
 2949    codeSize += insn->encSize;
 2950    return true;
 2951 }
 2952 
 2953 uint32_t
 2954 CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
 2955 {
 2956    const Target::OpInfo &info = targ->getOpInfo(i);
 2957 
 2958    if (writeIssueDelays || info.minEncSize == 8 || 1)
 2959       return 8;
 2960 
 2961    if (i->ftz || i->saturate || i->join)
 2962       return 8;
 2963    if (i->rnd != ROUND_N)
 2964       return 8;
 2965    if (i->predSrc >= 0 && i->op == OP_MAD)
 2966       return 8;
 2967 
 2968    if (i->op == OP_PINTERP) {
 2969       if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
 2970          return 8;
 2971    } else
 2972    if (i->op == OP_MOV && i->lanes != 0xf) {
 2973       return 8;
 2974    }
 2975 
 2976    for (int s = 0; i->srcExists(s); ++s) {
 2977       if (i->src(s).isIndirect(0))
 2978          return 8;
 2979 
 2980       if (i->src(s).getFile() == FILE_MEMORY_CONST) {
 2981          if (SDATA(i->src(s)).offset >= 0x100)
 2982             return 8;
 2983          if (i->getSrc(s)->reg.fileIndex > 1 &&
 2984              i->getSrc(s)->reg.fileIndex != 16)
 2985              return 8;
 2986       } else
 2987       if (i->src(s).getFile() == FILE_IMMEDIATE) {
 2988          if (i->dType == TYPE_F32) {
 2989             if (SDATA(i->src(s)).u32 >= 0x100)
 2990                return 8;
 2991          } else {
 2992             if (SDATA(i->src(s)).u32 > 0xff)
 2993                return 8;
 2994          }
 2995       }
 2996 
 2997       if (i->op == OP_CVT)
 2998          continue;
 2999       if (i->src(s).mod != Modifier(0)) {
 3000          if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
 3001             if (i->op != OP_RSQ)
 3002                return 8;
 3003          if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
 3004             if (i->op != OP_ADD || s != 0)
 3005                return 8;
 3006       }
 3007    }
 3008 
 3009    return 4;
 3010 }
 3011 
 3012 // Simplified, erring on safe side.
 3013 class SchedDataCalculator : public Pass
 3014 {
 3015 public:
 3016    SchedDataCalculator(const Target *targ) : targ(targ) { }
 3017 
 3018 private:
 3019    struct RegScores
 3020    {
 3021       struct Resource {
 3022          int st[DATA_FILE_COUNT]; // LD to LD delay 3
 3023          int ld[DATA_FILE_COUNT]; // ST to ST delay 3
 3024          int tex; // TEX to non-TEX delay 17 (0x11)
 3025          int sfu; // SFU to SFU delay 3 (except PRE-ops)
 3026          int imul; // integer MUL to MUL delay 3
 3027       } res;
 3028       struct ScoreData {
 3029          int r[256];
 3030          int p[8];
 3031          int c;
 3032       } rd, wr;
 3033       int base;
 3034       int regs;
 3035 
 3036       void rebase(const int base)
 3037       {
 3038          const int delta = this->base - base;
 3039          if (!delta)
 3040             return;
 3041          this->base = 0;
 3042 
 3043          for (int i = 0; i < regs; ++i) {
 3044             rd.r[i] += delta;
 3045             wr.r[i] += delta;
 3046          }
 3047          for (int i = 0; i < 8; ++i) {
 3048             rd.p[i] += delta;
 3049             wr.p[i] += delta;
 3050          }
 3051          rd.c += delta;
 3052          wr.c += delta;
 3053 
 3054          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
 3055             res.ld[f] += delta;
 3056             res.st[f] += delta;
 3057          }
 3058          res.sfu += delta;
 3059          res.imul += delta;
 3060          res.tex += delta;
 3061       }
 3062       void wipe(int regs)
 3063       {
 3064          memset(&rd, 0, sizeof(rd));
 3065          memset(&wr, 0, sizeof(wr));
 3066          memset(&res, 0, sizeof(res));
 3067          this->regs = regs;
 3068       }
 3069       int getLatest(const ScoreData& d) const
 3070       {
 3071          int max = 0;
 3072          for (int i = 0; i < regs; ++i)
 3073             if (d.r[i] > max)
 3074                max = d.r[i];
 3075          for (int i = 0; i < 8; ++i)
 3076             if (d.p[i] > max)
 3077                max = d.p[i];
 3078          if (d.c > max)
 3079             max = d.c;
 3080          return max;
 3081       }
 3082       inline int getLatestRd() const
 3083       {
 3084          return getLatest(rd);
 3085       }
 3086       inline int getLatestWr() const
 3087       {
 3088          return getLatest(wr);
 3089       }
 3090       inline int getLatest() const
 3091       {
 3092          const int a = getLatestRd();
 3093          const int b = getLatestWr();
 3094 
 3095          int max = MAX2(a, b);
 3096          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
 3097             max = MAX2(res.ld[f], max);
 3098             max = MAX2(res.st[f], max);
 3099          }
 3100          max = MAX2(res.sfu, max);
 3101          max = MAX2(res.imul, max);
 3102          max = MAX2(res.tex, max);
 3103          return max;
 3104       }
 3105       void setMax(const RegScores *that)
 3106       {
 3107          for (int i = 0; i < regs; ++i) {
 3108             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
 3109             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
 3110          }
 3111          for (int i = 0; i < 8; ++i) {
 3112             rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
 3113             wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
 3114          }
 3115          rd.c = MAX2(rd.c, that->rd.c);
 3116          wr.c = MAX2(wr.c, that->wr.c);
 3117 
 3118          for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
 3119             res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
 3120             res.st[f] = MAX2(res.st[f], that->res.st[f]);
 3121          }
 3122          res.sfu = MAX2(res.sfu, that->res.sfu);
 3123          res.imul = MAX2(res.imul, that->res.imul);
 3124          res.tex = MAX2(res.tex, that->res.tex);
 3125       }
 3126       void print(int cycle)
 3127       {
 3128          for (int i = 0; i < regs; ++i) {
 3129             if (rd.r[i] > cycle)
 3130                INFO("rd $r%i @ %i\n", i, rd.r[i]);
 3131             if (wr.r[i] > cycle)
 3132                INFO("wr $r%i @ %i\n", i, wr.r[i]);
 3133          }
 3134          for (int i = 0; i < 8; ++i) {
 3135             if (rd.p[i] > cycle)
 3136                INFO("rd $p%i @ %i\n", i, rd.p[i]);
 3137             if (wr.p[i] > cycle)
 3138                INFO("wr $p%i @ %i\n", i, wr.p[i]);
 3139          }
 3140          if (rd.c > cycle)
 3141             INFO("rd $c @ %i\n", rd.c);
 3142          if (wr.c > cycle)
 3143             INFO("wr $c @ %i\n", wr.c);
 3144          if (res.sfu > cycle)
 3145             INFO("sfu @ %i\n", res.sfu);
 3146          if (res.imul > cycle)
 3147             INFO("imul @ %i\n", res.imul);
 3148          if (res.tex > cycle)
 3149             INFO("tex @ %i\n", res.tex);
 3150       }
 3151    };
 3152 
 3153    RegScores *score; // for current BB
 3154    std::vector<RegScores> scoreBoards;
 3155    int prevData;
 3156    operation prevOp;
 3157 
 3158    const Target *targ;
 3159 
 3160    bool visit(Function *);
 3161    bool visit(BasicBlock *);
 3162 
 3163    void commitInsn(const Instruction *, int cycle);
 3164    int calcDelay(const Instruction *, int cycle) const;
 3165    void setDelay(Instruction *, int delay, Instruction *next);
 3166 
 3167    void recordRd(const Value *, const int ready);
 3168    void recordWr(const Value *, const int ready);
 3169    void checkRd(const Value *, int cycle, int& delay) const;
 3170    void checkWr(const Value *, int cycle, int& delay) const;
 3171 
 3172    int getCycles(const Instruction *, int origDelay) const;
 3173 };
 3174 
 3175 void
 3176 SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
 3177 {
 3178    if (insn->op == OP_EXIT || insn->op == OP_RET)
 3179       delay = MAX2(delay, 14);
 3180 
 3181    if (insn->op == OP_TEXBAR) {
 3182       // TODO: except if results not used before EXIT
 3183       insn->sched = 0xc2;
 3184    } else
 3185    if (insn->op == OP_JOIN || insn->join) {
 3186       insn->sched = 0x00;
 3187    } else
 3188    if (delay >= 0 || prevData == 0x04 ||
 3189        !next || !targ->canDualIssue(insn, next)) {
 3190       insn->sched = static_cast<uint8_t>(MAX2(delay, 0));
 3191       if (prevOp == OP_EXPORT)
 3192          insn->sched |= 0x40;
 3193       else
 3194          insn->sched |= 0x20;
 3195    } else {
 3196       insn->sched = 0x04; // dual-issue
 3197    }
 3198 
 3199    if (prevData != 0x04 || prevOp != OP_EXPORT)
 3200       if (insn->sched != 0x04 || insn->op == OP_EXPORT)
 3201          prevOp = insn->op;
 3202 
 3203    prevData = insn->sched;
 3204 }
 3205 
 3206 int
 3207 SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
 3208 {
 3209    if (insn->sched & 0x80) {
 3210       int c = (insn->sched & 0x0f) * 2 + 1;
 3211       if (insn->op == OP_TEXBAR && origDelay > 0)
 3212          c += origDelay;
 3213       return c;
 3214    }
 3215    if (insn->sched & 0x60)
 3216       return (insn->sched & 0x1f) + 1;
 3217    return (insn->sched == 0x04) ? 0 : 32;
 3218 }
 3219 
 3220 bool
 3221 SchedDataCalculator::visit(Function *func)
 3222 {
 3223    int regs = targ->getFileSize(FILE_GPR) + 1;
 3224    scoreBoards.resize(func->cfg.getSize());
 3225    for (size_t i = 0; i < scoreBoards.size(); ++i)
 3226       scoreBoards[i].wipe(regs);
 3227    return true;
 3228 }
 3229 
 3230 bool
 3231 SchedDataCalculator::visit(BasicBlock *bb)
 3232 {
 3233    Instruction *insn;
 3234    Instruction *next = NULL;
 3235 
 3236    int cycle = 0;
 3237 
 3238    prevData = 0x00;
 3239    prevOp = OP_NOP;
 3240    score = &scoreBoards.at(bb->getId());
 3241 
 3242    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
 3243       // back branches will wait until all target dependencies are satisfied
 3244       if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
 3245          continue;
 3246       BasicBlock *in = BasicBlock::get(ei.getNode());
 3247       if (in->getExit()) {
 3248          if (prevData != 0x04)
 3249             prevData = in->getExit()->sched;
 3250          prevOp = in->getExit()->op;
 3251       }
 3252       score->setMax(&scoreBoards.at(in->getId()));
 3253    }
 3254    if (bb->cfg.incidentCount() > 1)
 3255       prevOp = OP_NOP;
 3256 
 3257 #ifdef NVC0_DEBUG_SCHED_DATA
 3258    INFO("=== BB:%i initial scores\n", bb->getId());
 3259    score->print(cycle);
 3260 #endif
 3261 
 3262    for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
 3263       next = insn->next;
 3264 
 3265       commitInsn(insn, cycle);
 3266       int delay = calcDelay(next, cycle);
 3267       setDelay(insn, delay, next);
 3268       cycle += getCycles(insn, delay);
 3269 
 3270 #ifdef NVC0_DEBUG_SCHED_DATA
 3271       INFO("cycle %i, sched %02x\n", cycle, insn->sched);
 3272       insn->print();
 3273       next->print();
 3274 #endif
 3275    }
 3276    if (!insn)
 3277       return true;
 3278    commitInsn(insn, cycle);
 3279 
 3280    int bbDelay = -1;
 3281 
 3282    for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
 3283       BasicBlock *out = BasicBlock::get(ei.getNode());
 3284 
 3285       if (ei.getType() != Graph::Edge::BACK) {
 3286          // only test the first instruction of the outgoing block
 3287          next = out->getEntry();
 3288          if (next)
 3289             bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
 3290       } else {
 3291          // wait until all dependencies are satisfied
 3292          const int regsFree = score->getLatest();
 3293          next = out->getFirst();
 3294          for (int c = cycle; next && c < regsFree; next = next->next) {
 3295             bbDelay = MAX2(bbDelay, calcDelay(next, c));
 3296             c += getCycles(next, bbDelay);
 3297          }
 3298          next = NULL;
 3299       }
 3300    }
 3301    if (bb->cfg.outgoingCount() != 1)
 3302       next = NULL;
 3303    setDelay(insn, bbDelay, next);
 3304    cycle += getCycles(insn, bbDelay);
 3305 
 3306    score->rebase(cycle); // common base for initializing out blocks' scores
 3307    return true;
 3308 }
 3309 
 3310 #define NVE4_MAX_ISSUE_DELAY 0x1f
 3311 int
 3312 SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
 3313 {
 3314    int delay = 0, ready = cycle;
 3315 
 3316    for (int s = 0; insn->srcExists(s); ++s)
 3317       checkRd(insn->getSrc(s), cycle, delay);
 3318    // WAR & WAW don't seem to matter
 3319    // for (int s = 0; insn->srcExists(s); ++s)
 3320    //   recordRd(insn->getSrc(s), cycle);
 3321 
 3322    switch (Target::getOpClass(insn->op)) {
 3323    case OPCLASS_SFU:
 3324       ready = score->res.sfu;
 3325       break;
 3326    case OPCLASS_ARITH:
 3327       if (insn->op == OP_MUL && !isFloatType(insn->dType))
 3328          ready = score->res.imul;
 3329       break;
 3330    case OPCLASS_TEXTURE:
 3331       ready = score->res.tex;
 3332       break;
 3333    case OPCLASS_LOAD:
 3334       ready = score->res.ld[insn->src(0).getFile()];
 3335       break;
 3336    case OPCLASS_STORE:
 3337       ready = score->res.st[insn->src(0).getFile()];
 3338       break;
 3339    default:
 3340       break;
 3341    }
 3342    if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
 3343       ready = MAX2(ready, score->res.tex);
 3344 
 3345    delay = MAX2(delay, ready - cycle);
 3346 
 3347    // if can issue next cycle, delay is 0, not 1
 3348    return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
 3349 }
 3350 
 3351 void
 3352 SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
 3353 {
 3354    const int ready = cycle + targ->getLatency(insn);
 3355 
 3356    for (int d = 0; insn->defExists(d); ++d)
 3357       recordWr(insn->getDef(d), ready);
 3358    // WAR & WAW don't seem to matter
 3359    // for (int s = 0; insn->srcExists(s); ++s)
 3360    //   recordRd(insn->getSrc(s), cycle);
 3361 
 3362    switch (Target::getOpClass(insn->op)) {
 3363    case OPCLASS_SFU:
 3364       score->res.sfu = cycle + 4;
 3365       break;
 3366    case OPCLASS_ARITH:
 3367       if (insn->op == OP_MUL && !isFloatType(insn->dType))
 3368          score->res.imul = cycle + 4;
 3369       break;
 3370    case OPCLASS_TEXTURE:
 3371       score->res.tex = cycle + 18;
 3372       break;
 3373    case OPCLASS_LOAD:
 3374       if (insn->src(0).getFile() == FILE_MEMORY_CONST)
 3375          break;
 3376       score->res.ld[insn->src(0).getFile()] = cycle + 4;
 3377       score->res.st[insn->src(0).getFile()] = ready;
 3378       break;
 3379    case OPCLASS_STORE:
 3380       score->res.st[insn->src(0).getFile()] = cycle + 4;
 3381       score->res.ld[insn->src(0).getFile()] = ready;
 3382       break;
 3383    case OPCLASS_OTHER:
 3384       if (insn->op == OP_TEXBAR)
 3385          score->res.tex = cycle;
 3386       break;
 3387    default:
 3388       break;
 3389    }
 3390 
 3391 #ifdef NVC0_DEBUG_SCHED_DATA
 3392    score->print(cycle);
 3393 #endif
 3394 }
 3395 
 3396 void
 3397 SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
 3398 {
 3399    int ready = cycle;
 3400    int a, b;
 3401 
 3402    switch (v->reg.file) {
 3403    case FILE_GPR:
 3404       a = v->reg.data.id;
 3405       b = a + v->reg.size / 4;
 3406       for (int r = a; r < b; ++r)
 3407          ready = MAX2(ready, score->rd.r[r]);
 3408       break;
 3409    case FILE_PREDICATE:
 3410       ready = MAX2(ready, score->rd.p[v->reg.data.id]);
 3411       break;
 3412    case FILE_FLAGS:
 3413       ready = MAX2(ready, score->rd.c);
 3414       break;
 3415    case FILE_SHADER_INPUT:
 3416    case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
 3417    case FILE_MEMORY_LOCAL:
 3418    case FILE_MEMORY_CONST:
 3419    case FILE_MEMORY_SHARED:
 3420    case FILE_MEMORY_GLOBAL:
 3421    case FILE_SYSTEM_VALUE:
 3422       // TODO: any restrictions here ?
 3423       break;
 3424    case FILE_IMMEDIATE:
 3425       break;
 3426    default:
 3427       assert(0);
 3428       break;
 3429    }
 3430    if (cycle < ready)
 3431       delay = MAX2(delay, ready - cycle);
 3432 }
 3433 
 3434 void
 3435 SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
 3436 {
 3437    int ready = cycle;
 3438    int a, b;
 3439 
 3440    switch (v->reg.file) {
 3441    case FILE_GPR:
 3442       a = v->reg.data.id;
 3443       b = a + v->reg.size / 4;
 3444       for (int r = a; r < b; ++r)
 3445          ready = MAX2(ready, score->wr.r[r]);
 3446       break;
 3447    case FILE_PREDICATE:
 3448       ready = MAX2(ready, score->wr.p[v->reg.data.id]);
 3449       break;
 3450    default:
 3451       assert(v->reg.file == FILE_FLAGS);
 3452       ready = MAX2(ready, score->wr.c);
 3453       break;
 3454    }
 3455    if (cycle < ready)
 3456       delay = MAX2(delay, ready - cycle);
 3457 }
 3458 
 3459 void
 3460 SchedDataCalculator::recordWr(const Value *v, const int ready)
 3461 {
 3462    int a = v->reg.data.id;
 3463 
 3464    if (v->reg.file == FILE_GPR) {
 3465       int b = a + v->reg.size / 4;
 3466       for (int r = a; r < b; ++r)
 3467          score->rd.r[r] = ready;
 3468    } else
 3469    // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
 3470    if (v->reg.file == FILE_PREDICATE) {
 3471       score->rd.p[a] = ready + 4;
 3472    } else {
 3473       assert(v->reg.file == FILE_FLAGS);
 3474       score->rd.c = ready + 4;
 3475    }
 3476 }
 3477 
 3478 void
 3479 SchedDataCalculator::recordRd(const Value *v, const int ready)
 3480 {
 3481    int a = v->reg.data.id;
 3482 
 3483    if (v->reg.file == FILE_GPR) {
 3484       int b = a + v->reg.size / 4;
 3485       for (int r = a; r < b; ++r)
 3486          score->wr.r[r] = ready;
 3487    } else
 3488    if (v->reg.file == FILE_PREDICATE) {
 3489       score->wr.p[a] = ready;
 3490    } else
 3491    if (v->reg.file == FILE_FLAGS) {
 3492       score->wr.c = ready;
 3493    }
 3494 }
 3495 
 3496 bool
 3497 calculateSchedDataNVC0(const Target *targ, Function *func)
 3498 {
 3499    SchedDataCalculator sched(targ);
 3500    return sched.run(func, true, true);
 3501 }
 3502 
 3503 void
 3504 CodeEmitterNVC0::prepareEmission(Function *func)
 3505 {
 3506    CodeEmitter::prepareEmission(func);
 3507 
 3508    if (targ->hasSWSched)
 3509       calculateSchedDataNVC0(targ, func);
 3510 }
 3511 
 3512 CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
 3513    : CodeEmitter(target),
 3514      targNVC0(target),
 3515      writeIssueDelays(target->hasSWSched)
 3516 {
 3517    code = NULL;
 3518    codeSize = codeSizeLimit = 0;
 3519    relocInfo = NULL;
 3520 }
 3521 
 3522 CodeEmitter *
 3523 TargetNVC0::createCodeEmitterNVC0(Program::Type type)
 3524 {
 3525    CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
 3526    emit->setProgramType(type);
 3527    return emit;
 3528 }
 3529 
 3530 CodeEmitter *
 3531 TargetNVC0::getCodeEmitter(Program::Type type)
 3532 {
 3533    if (chipset >= NVISA_GK20A_CHIPSET)
 3534       return createCodeEmitterGK110(type);
 3535    return createCodeEmitterNVC0(type);
 3536 }
 3537 
 3538 } // namespace nv50_ir