"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp" (16 Sep 2020, 48136 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sfn_ir_to_assembly.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /* -*- mesa-c++  -*-
    2  *
    3  * Copyright (c) 2018 Collabora LTD
    4  *
    5  * Author: Gert Wollny <gert.wollny@collabora.com>
    6  *
    7  * Permission is hereby granted, free of charge, to any person obtaining a
    8  * copy of this software and associated documentation files (the "Software"),
    9  * to deal in the Software without restriction, including without limitation
   10  * on the rights to use, copy, modify, merge, publish, distribute, sub
   11  * license, and/or sell copies of the Software, and to permit persons to whom
   12  * the Software is furnished to do so, subject to the following conditions:
   13  *
   14  * The above copyright notice and this permission notice (including the next
   15  * paragraph) shall be included in all copies or substantial portions of the
   16  * Software.
   17  *
   18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
   21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
   22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
   25  */
   26 
   27 #include "sfn_ir_to_assembly.h"
   28 #include "sfn_conditionaljumptracker.h"
   29 #include "sfn_callstack.h"
   30 #include "sfn_instruction_gds.h"
   31 #include "sfn_instruction_misc.h"
   32 #include "sfn_instruction_fetch.h"
   33 #include "sfn_instruction_lds.h"
   34 
   35 #include "../r600_shader.h"
   36 #include "../r600_sq.h"
   37 
   38 namespace r600 {
   39 
   40 using std::vector;
   41 
   42 struct AssemblyFromShaderLegacyImpl {
   43 
   44    AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
   45    bool emit(const Instruction::Pointer i);
   46    void reset_addr_register() {m_last_addr.reset();}
   47 
   48 private:
   49    bool emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op);
   50    bool emit_export(const ExportInstruction & exi);
   51    bool emit_streamout(const StreamOutIntruction& instr);
   52    bool emit_memringwrite(const MemRingOutIntruction& instr);
   53    bool emit_tex(const TexInstruction & tex_instr);
   54    bool emit_vtx(const FetchInstruction& fetch_instr);
   55    bool emit_if_start(const IfInstruction & if_instr);
   56    bool emit_else(const ElseInstruction & else_instr);
   57    bool emit_endif(const IfElseEndInstruction & endif_instr);
   58    bool emit_emit_vertex(const EmitVertex &instr);
   59 
   60    bool emit_loop_begin(const LoopBeginInstruction& instr);
   61    bool emit_loop_end(const LoopEndInstruction& instr);
   62    bool emit_loop_break(const LoopBreakInstruction& instr);
   63    bool emit_loop_continue(const LoopContInstruction& instr);
   64    bool emit_wait_ack(const WaitAck& instr);
   65    bool emit_wr_scratch(const WriteScratchInstruction& instr);
   66    bool emit_gds(const GDSInstr& instr);
   67    bool emit_rat(const RatInstruction& instr);
   68    bool emit_ldswrite(const LDSWriteInstruction& instr);
   69    bool emit_ldsread(const LDSReadInstruction& instr);
   70    bool emit_tf_write(const GDSStoreTessFactor& instr);
   71 
   72    bool emit_load_addr(PValue addr);
   73    bool emit_fs_pixel_export(const ExportInstruction & exi);
   74    bool emit_vs_pos_export(const ExportInstruction & exi);
   75    bool emit_vs_param_export(const ExportInstruction & exi);
   76    bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
   77    bool copy_src(r600_bytecode_alu_src& src, const Value& s);
   78 
   79 
   80 
   81    ConditionalJumpTracker m_jump_tracker;
   82    CallStack m_callstack;
   83 
   84 public:
   85    r600_bytecode *m_bc;
   86    r600_shader *m_shader;
   87    r600_shader_key *m_key;
   88    r600_bytecode_output m_output;
   89    unsigned m_max_color_exports;
   90    bool has_pos_output;
   91    bool has_param_output;
   92    PValue m_last_addr;
   93    int m_loop_nesting;
   94    int m_nliterals_in_group;
   95    std::set<int> vtx_fetch_results;
   96 };
   97 
   98 
   99 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
  100                                                    r600_shader_key *key)
  101 {
  102    impl = new AssemblyFromShaderLegacyImpl(sh, key);
  103 }
  104 
  105 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
  106 {
  107    delete impl;
  108 }
  109 
  110 bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
  111 {
  112    if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
  113        impl->m_shader->ninput > 0)
  114          r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
  115 
  116 
  117    std::vector<Instruction::Pointer> exports;
  118 
  119    for (const auto& block : ir) {
  120       for (const auto& i : block) {
  121          if (!impl->emit(i))
  122          return false;
  123       if (i->type() != Instruction::alu)
  124          impl->reset_addr_register();
  125       }
  126    }
  127    /*
  128    for (const auto& i : exports) {
  129       if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
  130           return false;
  131    }*/
  132 
  133 
  134    const struct cf_op_info *last = nullptr;
  135    if (impl->m_bc->cf_last)
  136       last = r600_isa_cf(impl->m_bc->cf_last->op);
  137 
  138    /* alu clause instructions don't have EOP bit, so add NOP */
  139    if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
  140        || impl->m_bc->cf_last->op == CF_OP_POP)
  141       r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
  142 
  143     /* A fetch shader only can't be EOP (results in hang), but we can replace it
  144      * by a NOP */
  145    else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
  146       impl->m_bc->cf_last->op = CF_OP_NOP;
  147 
  148    impl->m_bc->cf_last->end_of_program = 1;
  149 
  150    return true;
  151 }
  152 
  153 bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i)
  154 {
  155    if (i->type() != Instruction::vtx)
  156        vtx_fetch_results.clear();
  157 
  158    sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
  159    switch (i->type()) {
  160    case Instruction::alu:
  161       return emit_alu(static_cast<const AluInstruction&>(*i), cf_alu_undefined);
  162    case Instruction::exprt:
  163       return emit_export(static_cast<const ExportInstruction&>(*i));
  164    case Instruction::tex:
  165       return emit_tex(static_cast<const TexInstruction&>(*i));
  166    case Instruction::vtx:
  167       return emit_vtx(static_cast<const FetchInstruction&>(*i));
  168    case Instruction::cond_if:
  169       return emit_if_start(static_cast<const IfInstruction&>(*i));
  170    case Instruction::cond_else:
  171       return emit_else(static_cast<const ElseInstruction&>(*i));
  172    case Instruction::cond_endif:
  173       return emit_endif(static_cast<const IfElseEndInstruction&>(*i));
  174    case Instruction::loop_begin:
  175       return emit_loop_begin(static_cast<const LoopBeginInstruction&>(*i));
  176    case Instruction::loop_end:
  177       return emit_loop_end(static_cast<const LoopEndInstruction&>(*i));
  178    case Instruction::loop_break:
  179       return emit_loop_break(static_cast<const LoopBreakInstruction&>(*i));
  180    case Instruction::loop_continue:
  181       return emit_loop_continue(static_cast<const LoopContInstruction&>(*i));
  182    case Instruction::streamout:
  183       return emit_streamout(static_cast<const StreamOutIntruction&>(*i));
  184    case Instruction::ring:
  185       return emit_memringwrite(static_cast<const MemRingOutIntruction&>(*i));
  186    case Instruction::emit_vtx:
  187       return emit_emit_vertex(static_cast<const EmitVertex&>(*i));
  188    case Instruction::wait_ack:
  189       return emit_wait_ack(static_cast<const WaitAck&>(*i));
  190    case Instruction::mem_wr_scratch:
  191       return emit_wr_scratch(static_cast<const WriteScratchInstruction&>(*i));
  192    case Instruction::gds:
  193       return emit_gds(static_cast<const GDSInstr&>(*i));
  194    case Instruction::rat:
  195       return emit_rat(static_cast<const RatInstruction&>(*i));
  196    case Instruction::lds_write:
  197       return emit_ldswrite(static_cast<const LDSWriteInstruction&>(*i));
  198    case Instruction::lds_read:
  199       return emit_ldsread(static_cast<const LDSReadInstruction&>(*i));
  200    case Instruction::tf_write:
  201       return emit_tf_write(static_cast<const GDSStoreTessFactor&>(*i));
  202    default:
  203       return false;
  204    }
  205 }
  206 
  207 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
  208                                                            r600_shader_key *key):
  209    m_callstack(sh->bc),
  210    m_bc(&sh->bc),
  211    m_shader(sh),
  212    m_key(key),
  213    has_pos_output(false),
  214    has_param_output(false),
  215    m_loop_nesting(0),
  216    m_nliterals_in_group(0)
  217 {
  218    m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
  219 }
  220 
  221 extern const std::map<EAluOp, int> opcode_map;
  222 
  223 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
  224 {
  225    m_bc->ar_reg = addr->sel();
  226    m_bc->ar_chan = addr->chan();
  227    m_bc->ar_loaded = 0;
  228    m_last_addr = addr;
  229 
  230    sfn_log << SfnLog::assembly << "   Prepare " << *addr << " to address register\n";
  231 
  232    return true;
  233 }
  234 
  235 bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op)
  236 {
  237 
  238    struct r600_bytecode_alu alu;
  239    memset(&alu, 0, sizeof(alu));
  240    PValue addr_in_use;
  241 
  242    if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
  243       std::cerr << "Opcode not handled for " << ai <<"\n";
  244       return false;
  245    }
  246 
  247    for (unsigned i = 0; i < ai.n_sources(); ++i) {
  248       auto& s = ai.src(i);
  249       if (s.type() == Value::literal)
  250          ++m_nliterals_in_group;
  251    }
  252 
  253    /* This instruction group would exeed the limit of literals, so
  254     * force a new instruction group by adding a NOP as last
  255     * instruction. This will no loner be needed with a real
  256     * scheduler */
  257    if (m_nliterals_in_group > 4) {
  258       sfn_log << SfnLog::assembly << "  Have " << m_nliterals_in_group << " inject a last op (nop)\n";
  259       alu.op = ALU_OP0_NOP;
  260       alu.last = 1;
  261       int retval = r600_bytecode_add_alu(m_bc, &alu);
  262       if (retval)
  263          return false;
  264       memset(&alu, 0, sizeof(alu));
  265       m_nliterals_in_group = 0;
  266    }
  267 
  268    alu.op = opcode_map.at(ai.opcode());
  269 
  270    /* Missing test whether ai actually has a dest */
  271    auto dst = ai.dest();
  272 
  273    if (dst) {
  274       if (!copy_dst(alu.dst, *dst))
  275          return false;
  276 
  277       alu.dst.write = ai.flag(alu_write);
  278       alu.dst.clamp = ai.flag(alu_dst_clamp);
  279 
  280       if (dst->type() == Value::gpr_array_value) {
  281          auto& v = static_cast<const GPRArrayValue&>(*dst);
  282          PValue addr = v.indirect();
  283          if (addr) {
  284             if (!m_last_addr || *addr != *m_last_addr) {
  285                emit_load_addr(addr);
  286                addr_in_use = addr;
  287             }
  288             alu.dst.rel = addr ? 1 : 0;;
  289          }
  290       }
  291    }
  292 
  293    alu.is_op3 = ai.n_sources() == 3;
  294 
  295    for (unsigned i = 0; i < ai.n_sources(); ++i) {
  296       auto& s = ai.src(i);
  297 
  298       if (!copy_src(alu.src[i], s))
  299          return false;
  300       alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
  301 
  302       if (s.type() == Value::gpr_array_value) {
  303          auto& v = static_cast<const GPRArrayValue&>(s);
  304          PValue addr = v.indirect();
  305          if (addr) {
  306             assert(!addr_in_use || (*addr_in_use == *addr));
  307             if (!m_last_addr || *addr != *m_last_addr) {
  308                emit_load_addr(addr);
  309                addr_in_use = addr;
  310             }
  311             alu.src[i].rel = addr ? 1 : 0;
  312          }
  313       }
  314       if (!alu.is_op3)
  315          alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
  316    }
  317 
  318    if (ai.bank_swizzle() != alu_vec_unknown)
  319       alu.bank_swizzle_force = ai.bank_swizzle();
  320 
  321    alu.last = ai.flag(alu_last_instr);
  322    alu.update_pred = ai.flag(alu_update_pred);
  323    alu.execute_mask = ai.flag(alu_update_exec);
  324 
  325    /* If the destination register is equal to the last loaded address register
  326     * then clear the latter one, because the values will no longer be identical */
  327    if (m_last_addr)
  328       sfn_log << SfnLog::assembly << "  Current address register is " << *m_last_addr << "\n";
  329 
  330    if (dst)
  331       sfn_log << SfnLog::assembly << "  Current dst register is " << *dst << "\n";
  332 
  333    if (dst && m_last_addr)
  334       if (*dst == *m_last_addr) {
  335          sfn_log << SfnLog::assembly << "  Clear address register (was " << *m_last_addr << "\n";
  336          m_last_addr.reset();
  337       }
  338 
  339    if (cf_op == cf_alu_undefined)
  340       cf_op = ai.cf_type();
  341 
  342    unsigned type = 0;
  343    switch (cf_op) {
  344    case cf_alu: type = CF_OP_ALU; break;
  345    case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
  346    case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
  347    case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
  348    case cf_alu_break: type = CF_OP_ALU_BREAK; break;
  349    case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
  350    case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
  351    case cf_alu_extended: type = CF_OP_ALU_EXT; break;
  352    default:
  353       assert(0 && "cf_alu_undefined should have been replaced");
  354    }
  355 
  356    if (alu.last)
  357       m_nliterals_in_group = 0;
  358 
  359    bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
  360 
  361    if (ai.opcode() == op1_mova_int)
  362       m_bc->ar_loaded = 0;
  363 
  364    if (ai.opcode() == op1_set_cf_idx0)
  365       m_bc->index_loaded[0] = 1;
  366 
  367    if (ai.opcode() == op1_set_cf_idx1)
  368       m_bc->index_loaded[1] = 1;
  369 
  370 
  371    m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
  372                           ai.opcode() == op2_killne_int ||
  373                           ai.opcode() == op1_set_cf_idx0 ||
  374                           ai.opcode() == op1_set_cf_idx1);
  375    return retval;
  376 }
  377 
  378 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
  379 {
  380    r600_bytecode_output output;
  381    memset(&output, 0, sizeof(output));
  382    assert(exi.gpr().type() == Value::gpr_vector);
  383    const auto& gpr = exi.gpr();
  384    output.gpr = gpr.sel();
  385    output.elem_size = 3;
  386    output.swizzle_x = gpr.chan_i(0);
  387    output.swizzle_y = gpr.chan_i(1);
  388    output.swizzle_z = gpr.chan_i(2);
  389    output.swizzle_w = gpr.chan_i(3);
  390    output.burst_count = 1;
  391    output.array_base = 60 + exi.location();
  392    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
  393    output.type = exi.export_type();
  394 
  395 
  396    if (r600_bytecode_add_output(m_bc, &output)) {
  397       R600_ERR("Error adding pixel export at location %d\n", exi.location());
  398       return false;
  399    }
  400 
  401    return true;
  402 }
  403 
  404 
  405 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
  406 {
  407    r600_bytecode_output output;
  408    assert(exi.gpr().type() == Value::gpr_vector);
  409    const auto& gpr = exi.gpr();
  410 
  411    memset(&output, 0, sizeof(output));
  412    output.gpr = gpr.sel();
  413    output.elem_size = 3;
  414    output.swizzle_x = gpr.chan_i(0);
  415    output.swizzle_y = gpr.chan_i(1);
  416    output.swizzle_z = gpr.chan_i(2);
  417    output.swizzle_w = gpr.chan_i(3);
  418    output.burst_count = 1;
  419    output.array_base = exi.location();
  420    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
  421    output.type = exi.export_type();
  422 
  423 
  424    if (r600_bytecode_add_output(m_bc, &output)) {
  425       R600_ERR("Error adding pixel export at location %d\n", exi.location());
  426       return false;
  427    }
  428 
  429    return true;
  430 }
  431 
  432 
  433 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
  434 {
  435    if (exi.location() >= m_max_color_exports && exi.location()  < 60) {
  436       R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
  437                exi.location(), m_max_color_exports);
  438       return true;
  439    }
  440 
  441    assert(exi.gpr().type() == Value::gpr_vector);
  442    const auto& gpr = exi.gpr();
  443 
  444    r600_bytecode_output output;
  445    memset(&output, 0, sizeof(output));
  446 
  447    output.gpr = gpr.sel();
  448    output.elem_size = 3;
  449    output.swizzle_x = gpr.chan_i(0);
  450    output.swizzle_y = gpr.chan_i(1);
  451    output.swizzle_z = gpr.chan_i(2);
  452    output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
  453    output.burst_count = 1;
  454    output.array_base = exi.location();
  455    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
  456    output.type = exi.export_type();
  457 
  458 
  459    if (r600_bytecode_add_output(m_bc, &output)) {
  460       R600_ERR("Error adding pixel export at location %d\n", exi.location());
  461       return false;
  462    }
  463 
  464    return true;
  465 }
  466 
  467 
  468 bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction & exi)
  469 {
  470    switch (exi.export_type()) {
  471    case ExportInstruction::et_pixel:
  472       return emit_fs_pixel_export(exi);
  473    case ExportInstruction::et_pos:
  474       return emit_vs_pos_export(exi);
  475    case ExportInstruction::et_param:
  476       return emit_vs_param_export(exi);
  477    default:
  478       R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
  479       return false;
  480    }
  481 }
  482 
  483 bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr)
  484 {
  485    assert(m_bc->chip_class == EVERGREEN);
  486 
  487     bool needs_workaround = false;
  488    int elems = m_callstack.push(FC_PUSH_VPM);
  489 
  490    if (m_bc->family != CHIP_HEMLOCK &&
  491        m_bc->family != CHIP_CYPRESS &&
  492        m_bc->family != CHIP_JUNIPER) {
  493       unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
  494         unsigned dmod2 = (elems) % m_bc->stack.entry_size;
  495 
  496       if (elems && (!dmod1 || !dmod2))
  497             needs_workaround = true;
  498     }
  499 
  500    auto& pred = if_instr.pred();
  501    auto op = cf_alu_push_before;
  502 
  503    if (needs_workaround) {
  504         r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
  505       m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
  506         op = cf_alu;
  507     }
  508    emit_alu(pred, op);
  509 
  510    r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
  511 
  512    m_jump_tracker.push(m_bc->cf_last, jt_if);
  513    return true;
  514 }
  515 
  516 bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED const ElseInstruction & else_instr)
  517 {
  518    r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
  519    m_bc->cf_last->pop_count = 1;
  520    return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
  521 }
  522 
  523 bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED const IfElseEndInstruction & endif_instr)
  524 {
  525    m_callstack.pop(FC_PUSH_VPM);
  526 
  527    unsigned force_pop = m_bc->force_add_cf;
  528    if (!force_pop) {
  529       int alu_pop = 3;
  530       if (m_bc->cf_last) {
  531          if (m_bc->cf_last->op == CF_OP_ALU)
  532             alu_pop = 0;
  533          else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
  534             alu_pop = 1;
  535       }
  536       alu_pop += 1;
  537       if (alu_pop == 1) {
  538          m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
  539          m_bc->force_add_cf = 1;
  540       } else if (alu_pop == 2) {
  541          m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
  542          m_bc->force_add_cf = 1;
  543       } else {
  544          force_pop = 1;
  545       }
  546    }
  547 
  548    if (force_pop) {
  549       r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
  550       m_bc->cf_last->pop_count = 1;
  551       m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
  552    }
  553 
  554    return m_jump_tracker.pop(m_bc->cf_last, jt_if);
  555 }
  556 
  557 bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED const LoopBeginInstruction& instr)
  558 {
  559    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
  560    m_jump_tracker.push(m_bc->cf_last, jt_loop);
  561    m_callstack.push(FC_LOOP);
  562    ++m_loop_nesting;
  563    return true;
  564 }
  565 
  566 bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED const LoopEndInstruction& instr)
  567 {
  568    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
  569    m_callstack.pop(FC_LOOP);
  570    assert(m_loop_nesting);
  571    --m_loop_nesting;
  572    return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
  573 }
  574 
  575 bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED const LoopBreakInstruction& instr)
  576 {
  577    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
  578    return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
  579 }
  580 
  581 bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED const LoopContInstruction &instr)
  582 {
  583    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
  584    return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
  585 }
  586 
  587 bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction& so_instr)
  588 {
  589    struct r600_bytecode_output output;
  590    memset(&output, 0, sizeof(struct r600_bytecode_output));
  591 
  592    output.gpr = so_instr.gpr().sel();
  593    output.elem_size = so_instr.element_size();
  594    output.array_base = so_instr.array_base();
  595    output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
  596    output.burst_count = so_instr.burst_count();
  597    output.array_size = so_instr.array_size();
  598    output.comp_mask = so_instr.comp_mask();
  599    output.op = so_instr.op();
  600 
  601    assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
  602 
  603 
  604    if (r600_bytecode_add_output(m_bc, &output))  {
  605       R600_ERR("shader_from_nir: Error creating stream output instruction\n");
  606       return false;
  607    }
  608    return true;
  609 }
  610 
  611 
  612 bool AssemblyFromShaderLegacyImpl::emit_memringwrite(const MemRingOutIntruction& instr)
  613 {
  614    struct r600_bytecode_output output;
  615    memset(&output, 0, sizeof(struct r600_bytecode_output));
  616 
  617    output.gpr = instr.gpr().sel();
  618    output.type = instr.type();
  619    output.elem_size = instr.ncomp();
  620    output.comp_mask = 0xF;
  621    output.burst_count = 1;
  622    output.op = instr.op();
  623    if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
  624       output.index_gpr = instr.index_reg();
  625       output.array_size = 0xfff;
  626    }
  627    output.array_base = instr.array_base();
  628 
  629    if (r600_bytecode_add_output(m_bc, &output)) {
  630       R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
  631       return false;
  632    }
  633    return true;
  634 }
  635 
  636 
  637 bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction & tex_instr)
  638 {
  639    auto addr = tex_instr.sampler_offset();
  640    if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
  641                 ||  m_bc->index_reg[1] != addr->sel())) {
  642       struct r600_bytecode_alu alu;
  643       memset(&alu, 0, sizeof(alu));
  644       alu.op = opcode_map.at(op1_mova_int);
  645       alu.dst.chan = 0;
  646       alu.src[0].sel = addr->sel();
  647       alu.src[0].chan = addr->chan();
  648       alu.last = 1;
  649       int r = r600_bytecode_add_alu(m_bc, &alu);
  650       if (r)
  651          return false;
  652 
  653       m_bc->ar_loaded = 0;
  654 
  655       alu.op = opcode_map.at(op1_set_cf_idx1);
  656       alu.dst.chan = 0;
  657       alu.src[0].sel = 0;
  658       alu.src[0].chan = 0;
  659       alu.last = 1;
  660 
  661       r = r600_bytecode_add_alu(m_bc, &alu);
  662       if (r)
  663          return false;
  664 
  665       m_bc->index_reg[1] = addr->sel();
  666       m_bc->index_loaded[1] = true;
  667    }
  668 
  669    r600_bytecode_tex tex;
  670    memset(&tex, 0, sizeof(struct r600_bytecode_tex));
  671    tex.op = tex_instr.opcode();
  672    tex.sampler_id = tex_instr.sampler_id();
  673    tex.sampler_index_mode = 0;
  674    tex.resource_id = tex_instr.resource_id();;
  675    tex.resource_index_mode = 0;
  676    tex.src_gpr = tex_instr.src().sel();
  677    tex.dst_gpr = tex_instr.dst().sel();
  678    tex.dst_sel_x = tex_instr.dest_swizzle(0);
  679    tex.dst_sel_y = tex_instr.dest_swizzle(1);
  680    tex.dst_sel_z = tex_instr.dest_swizzle(2);
  681    tex.dst_sel_w = tex_instr.dest_swizzle(3);
  682    tex.src_sel_x = tex_instr.src().chan_i(0);
  683    tex.src_sel_y = tex_instr.src().chan_i(1);
  684    tex.src_sel_z = tex_instr.src().chan_i(2);
  685    tex.src_sel_w = tex_instr.src().chan_i(3);
  686    tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
  687    tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
  688    tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
  689    tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
  690    tex.offset_x = tex_instr.get_offset(0);
  691    tex.offset_y = tex_instr.get_offset(1);
  692    tex.offset_z = tex_instr.get_offset(2);
  693    tex.resource_index_mode = (!!addr) ? 2 : 0;
  694    tex.sampler_index_mode = tex.resource_index_mode;
  695 
  696    if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
  697        tex_instr.opcode() == TexInstruction::get_gradient_v)
  698       tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
  699    else
  700       tex.inst_mod = tex_instr.inst_mode();
  701    if (r600_bytecode_add_tex(m_bc, &tex)) {
  702       R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
  703       return false;
  704    }
  705    return true;
  706 }
  707 
  708 bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr)
  709 {
  710    int buffer_offset = 0;
  711    auto addr = fetch_instr.buffer_offset();
  712    auto index_mode = fetch_instr.buffer_index_mode();
  713 
  714    if (addr) {
  715       if (addr->type() == Value::literal) {
  716          const auto& boffs = static_cast<const LiteralValue&>(*addr);
  717          buffer_offset = boffs.value();
  718       } else {
  719          index_mode = bim_zero;
  720          if ((!m_bc->index_loaded[0] || m_loop_nesting  ||  m_bc->index_reg[0] != addr->sel())) {
  721             struct r600_bytecode_alu alu;
  722             memset(&alu, 0, sizeof(alu));
  723             alu.op = opcode_map.at(op1_mova_int);
  724             alu.dst.chan = 0;
  725             alu.src[0].sel = addr->sel();
  726             alu.src[0].chan = addr->chan();
  727             alu.last = 1;
  728             int r = r600_bytecode_add_alu(m_bc, &alu);
  729             if (r)
  730                return false;
  731 
  732             m_bc->ar_loaded = 0;
  733 
  734             alu.op = opcode_map.at(op1_set_cf_idx0);
  735             alu.dst.chan = 0;
  736             alu.src[0].sel = 0;
  737             alu.src[0].chan = 0;
  738             alu.last = 1;
  739 
  740             r = r600_bytecode_add_alu(m_bc, &alu);
  741             if (r)
  742                return false;
  743 
  744             m_bc->index_reg[0] = addr->sel();
  745             m_bc->index_loaded[0] = true;
  746          }
  747       }
  748    }
  749 
  750    if (fetch_instr.has_prelude()) {
  751       for(auto &i : fetch_instr.prelude()) {
  752          if (!emit(i))
  753             return false;
  754       }
  755    }
  756 
  757    if (vtx_fetch_results.find(fetch_instr.src().sel()) !=
  758        vtx_fetch_results.end()) {
  759       m_bc->force_add_cf = 1;
  760       vtx_fetch_results.clear();
  761    }
  762    vtx_fetch_results.insert(fetch_instr.dst().sel());
  763 
  764    struct r600_bytecode_vtx vtx;
  765    memset(&vtx, 0, sizeof(vtx));
  766    vtx.op = fetch_instr.vc_opcode();
  767    vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
  768    vtx.fetch_type = fetch_instr.fetch_type();
  769    vtx.src_gpr = fetch_instr.src().sel();
  770    vtx.src_sel_x = fetch_instr.src().chan();
  771    vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
  772    vtx.dst_gpr = fetch_instr.dst().sel();
  773    vtx.dst_sel_x = fetch_instr.swz(0);      /* SEL_X */
  774    vtx.dst_sel_y = fetch_instr.swz(1);      /* SEL_Y */
  775    vtx.dst_sel_z = fetch_instr.swz(2);      /* SEL_Z */
  776    vtx.dst_sel_w = fetch_instr.swz(3);      /* SEL_W */
  777    vtx.use_const_fields = fetch_instr.use_const_fields();
  778    vtx.data_format = fetch_instr.data_format();
  779    vtx.num_format_all = fetch_instr.num_format();       /* NUM_FORMAT_SCALED */
  780    vtx.format_comp_all = fetch_instr.is_signed();   /* FORMAT_COMP_SIGNED */
  781    vtx.endian = fetch_instr.endian_swap();
  782    vtx.buffer_index_mode = index_mode;
  783    vtx.offset = fetch_instr.offset();
  784    vtx.indexed = fetch_instr.indexed();
  785    vtx.uncached = fetch_instr.uncached();
  786    vtx.elem_size = fetch_instr.elm_size();
  787    vtx.array_base = fetch_instr.array_base();
  788    vtx.array_size = fetch_instr.array_size();
  789    vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
  790 
  791    if (fetch_instr.use_tc()) {
  792       if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
  793          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
  794          return false;
  795       }
  796 
  797    } else {
  798       if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
  799          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
  800          return false;
  801       }
  802    }
  803 
  804    m_bc->cf_last->vpm = fetch_instr.use_vpm();
  805    m_bc->cf_last->barrier = 1;
  806 
  807    return true;
  808 }
  809 
  810 bool AssemblyFromShaderLegacyImpl::emit_emit_vertex(const EmitVertex &instr)
  811 {
  812    int r = r600_bytecode_add_cfinst(m_bc, instr.op());
  813    if (!r)
  814       m_bc->cf_last->count = instr.stream();
  815    assert(m_bc->cf_last->count < 4);
  816 
  817    return r == 0;
  818 }
  819 
  820 bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck& instr)
  821 {
  822    int r = r600_bytecode_add_cfinst(m_bc, instr.op());
  823    if (!r)
  824       m_bc->cf_last->cf_addr = instr.n_ack();
  825 
  826    return r == 0;
  827 }
  828 
  829 bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction& instr)
  830 {
  831    struct r600_bytecode_output cf;
  832 
  833    memset(&cf, 0, sizeof(struct r600_bytecode_output));
  834 
  835    cf.op = CF_OP_MEM_SCRATCH;
  836    cf.elem_size = 3;
  837    cf.gpr = instr.gpr().sel();
  838    cf.mark = 1;
  839    cf.comp_mask = instr.write_mask();
  840    cf.swizzle_x = 0;
  841    cf.swizzle_y = 1;
  842    cf.swizzle_z = 2;
  843    cf.swizzle_w = 3;
  844    cf.burst_count = 1;
  845 
  846    if (instr.indirect()) {
  847       cf.type = 3;
  848       cf.index_gpr = instr.address();
  849 
  850       /* The docu seems to be wrong here: In indirect addressing the
  851        * address_base seems to be the array_size */
  852       cf.array_size = instr.array_size();
  853    } else {
  854       cf.type = 2;
  855       cf.array_base = instr.location();
  856    }
  857    /* This should be 0, but the address calculation is apparently wrong */
  858 
  859 
  860    if (r600_bytecode_add_output(m_bc, &cf)){
  861       R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
  862       return false;
  863    }
  864 
  865    return true;
  866 }
  867 
  868 extern const std::map<ESDOp, int> ds_opcode_map;
  869 
  870 bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr& instr)
  871 {
  872    struct r600_bytecode_gds gds;
  873 
  874    int uav_idx = -1;
  875    auto addr = instr.uav_id();
  876    if (addr->type() != Value::literal) {
  877       if (!m_bc->index_loaded[1] || m_loop_nesting ||
  878           m_bc->index_reg[1] != addr->sel()) {
  879          struct r600_bytecode_alu alu;
  880 
  881          memset(&alu, 0, sizeof(alu));
  882          alu.op = opcode_map.at(op2_lshr_int);
  883          alu.dst.sel = addr->sel();
  884          alu.dst.chan = addr->chan();
  885          alu.src[0].sel = addr->sel();
  886          alu.src[0].chan = addr->chan();
  887          alu.src[1].sel = ALU_SRC_LITERAL;
  888          alu.src[1].value = 2;
  889          alu.last = 1;
  890          alu.dst.write = 1;
  891          int r = r600_bytecode_add_alu(m_bc, &alu);
  892          if (r)
  893             return false;
  894 
  895          memset(&alu, 0, sizeof(alu));
  896          alu.op = opcode_map.at(op1_mova_int);
  897          alu.dst.chan = 0;
  898          alu.src[0].sel = addr->sel();
  899          alu.src[0].chan = addr->chan();
  900          alu.last = 1;
  901          r = r600_bytecode_add_alu(m_bc, &alu);
  902          if (r)
  903             return false;
  904 
  905          m_bc->ar_loaded = 0;
  906 
  907          alu.op = opcode_map.at(op1_set_cf_idx1);
  908          alu.dst.chan = 0;
  909          alu.src[0].sel = 0;
  910          alu.src[0].chan = 0;
  911          alu.last = 1;
  912 
  913          r = r600_bytecode_add_alu(m_bc, &alu);
  914          if (r)
  915             return false;
  916 
  917          m_bc->index_reg[1] = addr->sel();
  918          m_bc->index_loaded[1] = true;
  919       }
  920    } else {
  921       const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
  922       uav_idx = addr_reg.value() >> 2;
  923    }
  924 
  925    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
  926 
  927    gds.op = ds_opcode_map.at(instr.op());
  928     gds.dst_gpr = instr.dest_sel();
  929     gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
  930     gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
  931     gds.src_gpr = instr.src_sel();
  932 
  933    if (instr.op() == DS_OP_CMP_XCHG_RET) {
  934       gds.src_sel_z = 1;
  935    } else {
  936       gds.src_sel_z = 7;
  937    }
  938 
  939     gds.src_sel_x = instr.src_swizzle(0);
  940     gds.src_sel_y = instr.src_swizzle(1);
  941 
  942     gds.dst_sel_x = 0;
  943     gds.dst_sel_y = 7;
  944     gds.dst_sel_z = 7;
  945     gds.dst_sel_w = 7;
  946     gds.src_gpr2 = 0;
  947     gds.alloc_consume = 1; // Not Cayman
  948 
  949    int r = r600_bytecode_add_gds(m_bc, &gds);
  950     if (r)
  951         return false;
  952     m_bc->cf_last->vpm = 1;
  953    return true;
  954 }
  955 
  956 bool AssemblyFromShaderLegacyImpl::emit_tf_write(const GDSStoreTessFactor& instr)
  957 {
  958    struct r600_bytecode_gds gds;
  959 
  960    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
  961    gds.src_gpr = instr.sel();
  962    gds.src_sel_x = instr.chan(0);
  963    gds.src_sel_y = instr.chan(1);
  964    gds.src_sel_z = 4;
  965    gds.dst_sel_x = 7;
  966    gds.dst_sel_y = 7;
  967    gds.dst_sel_z = 7;
  968    gds.dst_sel_w = 7;
  969    gds.op = FETCH_OP_TF_WRITE;
  970 
  971    if (r600_bytecode_add_gds(m_bc, &gds) != 0)
  972          return false;
  973 
  974    if (instr.chan(2) != 7) {
  975       memset(&gds, 0, sizeof(struct r600_bytecode_gds));
  976       gds.src_gpr = instr.sel();
  977       gds.src_sel_x = instr.chan(2);
  978       gds.src_sel_y = instr.chan(3);
  979       gds.src_sel_z = 4;
  980       gds.dst_sel_x = 7;
  981       gds.dst_sel_y = 7;
  982       gds.dst_sel_z = 7;
  983       gds.dst_sel_w = 7;
  984       gds.op = FETCH_OP_TF_WRITE;
  985 
  986       if (r600_bytecode_add_gds(m_bc, &gds))
  987          return false;
  988    }
  989    return true;
  990 }
  991 
  992 bool AssemblyFromShaderLegacyImpl::emit_ldswrite(const LDSWriteInstruction& instr)
  993 {
  994    r600_bytecode_alu alu;
  995    memset(&alu, 0, sizeof(r600_bytecode_alu));
  996 
  997    alu.last = true;
  998    alu.is_lds_idx_op = true;
  999    copy_src(alu.src[0], instr.address());
 1000    copy_src(alu.src[1], instr.value0());
 1001 
 1002    if (instr.num_components() == 1) {
 1003       alu.op = LDS_OP2_LDS_WRITE;
 1004    } else {
 1005       alu.op = LDS_OP3_LDS_WRITE_REL;
 1006       alu.lds_idx = 1;
 1007       copy_src(alu.src[2], instr.value1());
 1008    }
 1009 
 1010    return r600_bytecode_add_alu(m_bc, &alu) == 0;
 1011 }
 1012 
 1013 bool AssemblyFromShaderLegacyImpl::emit_ldsread(const LDSReadInstruction& instr)
 1014 {
 1015    int r;
 1016    unsigned nread = 0;
 1017    unsigned nfetch = 0;
 1018    unsigned n_values = instr.num_values();
 1019 
 1020    r600_bytecode_alu alu_fetch;
 1021    r600_bytecode_alu alu_read;
 1022 
 1023    /* We must add a new ALU clause if the fetch and read op would be split otherwise
 1024     * r600_asm limites at 120 slots = 240 dwords */
 1025    if (m_bc->cf_last->ndw > 240 - 4 * n_values)
 1026       m_bc->force_add_cf = 1;
 1027 
 1028    while (nread < n_values) {
 1029       if (nfetch < n_values) {
 1030          memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
 1031          alu_fetch.is_lds_idx_op = true;
 1032          alu_fetch.op = LDS_OP1_LDS_READ_RET;
 1033 
 1034          copy_src(alu_fetch.src[0], instr.address(nfetch));
 1035          alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
 1036          alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
 1037          alu_fetch.last = 1;
 1038          r = r600_bytecode_add_alu(m_bc, &alu_fetch);
 1039          m_bc->cf_last->nlds_read++;
 1040          if (r)
 1041             return false;
 1042       }
 1043 
 1044       if (nfetch >= n_values) {
 1045          memset(&alu_read, 0, sizeof(r600_bytecode_alu));
 1046          copy_dst(alu_read.dst, instr.dest(nread));
 1047          alu_read.op = ALU_OP1_MOV;
 1048          alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
 1049          alu_read.last = 1;
 1050          alu_read.dst.write = 1;
 1051          r = r600_bytecode_add_alu(m_bc, &alu_read);
 1052          m_bc->cf_last->nqueue_read++;
 1053          if (r)
 1054             return false;
 1055          ++nread;
 1056       }
 1057       ++nfetch;
 1058    }
 1059    assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
 1060 
 1061    return true;
 1062 }
 1063 
 1064 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
 1065 {
 1066    struct r600_bytecode_gds gds;
 1067 
 1068    int rat_idx = -1;
 1069    EBufferIndexMode rat_index_mode = bim_none;
 1070    auto addr = instr.rat_id_offset();
 1071 
 1072    if (addr) {
 1073       if (addr->type() != Value::literal) {
 1074          rat_index_mode = bim_one;
 1075          if (!m_bc->index_loaded[1] || m_loop_nesting ||  m_bc->index_reg[1] != addr->sel()) {
 1076             struct r600_bytecode_alu alu;
 1077 
 1078             memset(&alu, 0, sizeof(alu));
 1079             alu.op = opcode_map.at(op1_mova_int);
 1080             alu.dst.chan = 0;
 1081             alu.src[0].sel = addr->sel();
 1082             alu.src[0].chan = addr->chan();
 1083             alu.last = 1;
 1084             int r = r600_bytecode_add_alu(m_bc, &alu);
 1085             if (r)
 1086                return false;
 1087 
 1088             m_bc->ar_loaded = 0;
 1089 
 1090             alu.op = opcode_map.at(op1_set_cf_idx1);
 1091             alu.dst.chan = 0;
 1092             alu.src[0].sel = 0;
 1093             alu.src[0].chan = 0;
 1094             alu.last = 1;
 1095 
 1096             r = r600_bytecode_add_alu(m_bc, &alu);
 1097             if (r)
 1098                return false;
 1099 
 1100             m_bc->index_reg[1] = addr->sel();
 1101             m_bc->index_loaded[1] = true;
 1102 
 1103          }
 1104       } else {
 1105          const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
 1106          rat_idx = addr_reg.value();
 1107       }
 1108    }
 1109    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
 1110 
 1111    r600_bytecode_add_cfinst(m_bc, CF_OP_MEM_RAT);
 1112    auto cf = m_bc->cf_last;
 1113    cf->rat.id = rat_idx + m_shader->rat_base;
 1114    cf->rat.inst = instr.rat_op();
 1115    cf->rat.index_mode = rat_index_mode;
 1116    cf->output.type = instr.need_ack() ? 3 : 1;
 1117    cf->output.gpr = instr.data_gpr();
 1118    cf->output.index_gpr = instr.index_gpr();
 1119    cf->output.comp_mask = instr.comp_mask();
 1120    cf->output.burst_count = instr.burst_count();
 1121    cf->output.swizzle_x = instr.data_swz(0);
 1122    cf->output.swizzle_y = instr.data_swz(1);
 1123    cf->output.swizzle_z = instr.data_swz(2);
 1124    cf->output.swizzle_w = instr.data_swz(3);
 1125    cf->vpm = 1;
 1126    cf->barrier = 1;
 1127    cf->mark = instr.need_ack();
 1128    cf->output.elem_size = instr.elm_size();
 1129    return true;
 1130 }
 1131 
 1132 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
 1133                                             const Value& d)
 1134 {
 1135    assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
 1136 
 1137    if (d.sel() > 124) {
 1138       R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
 1139       return false;
 1140    }
 1141 
 1142    dst.sel = d.sel();
 1143    dst.chan = d.chan();
 1144 
 1145    if (m_bc->index_reg[1] == dst.sel)
 1146       m_bc->index_loaded[1] = false;
 1147 
 1148    if (m_bc->index_reg[0] == dst.sel)
 1149       m_bc->index_loaded[0] = false;
 1150 
 1151    return true;
 1152 }
 1153 
 1154 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
 1155 {
 1156 
 1157    if (s.type() == Value::gpr && s.sel() > 124) {
 1158       R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
 1159       return false;
 1160    }
 1161 
 1162    if (s.type() == Value::lds_direct)  {
 1163       R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
 1164       return false;
 1165    }
 1166 
 1167    if (s.type() == Value::kconst && s.sel() < 512)  {
 1168       R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
 1169       return false;
 1170    }
 1171 
 1172    if (s.type() == Value::literal) {
 1173       auto& v = static_cast<const LiteralValue&>(s);
 1174       if (v.value() == 0) {
 1175          src.sel = ALU_SRC_0;
 1176          src.chan = 0;
 1177          --m_nliterals_in_group;
 1178          return true;
 1179       }
 1180       if (v.value() == 1) {
 1181          src.sel = ALU_SRC_1_INT;
 1182          src.chan = 0;
 1183          --m_nliterals_in_group;
 1184          return true;
 1185       }
 1186       if (v.value_float() == 1.0f) {
 1187          src.sel = ALU_SRC_1;
 1188          src.chan = 0;
 1189          --m_nliterals_in_group;
 1190          return true;
 1191       }
 1192       if (v.value_float() == 0.5f) {
 1193          src.sel = ALU_SRC_0_5;
 1194          src.chan = 0;
 1195          --m_nliterals_in_group;
 1196          return true;
 1197       }
 1198       if (v.value() == 0xffffffff) {
 1199          src.sel = ALU_SRC_M_1_INT;
 1200          src.chan = 0;
 1201          --m_nliterals_in_group;
 1202          return true;
 1203       }
 1204       src.value = v.value();
 1205    }
 1206 
 1207    src.sel = s.sel();
 1208    src.chan = s.chan();
 1209    if (s.type() == Value::kconst) {
 1210       const UniformValue& cv = static_cast<const UniformValue&>(s);
 1211       src.kc_bank = cv.kcache_bank();
 1212    }
 1213 
 1214    return true;
 1215 }
 1216 
 1217 const std::map<EAluOp, int> opcode_map = {
 1218 
 1219    {op2_add, ALU_OP2_ADD},
 1220    {op2_mul, ALU_OP2_MUL},
 1221    {op2_mul_ieee, ALU_OP2_MUL_IEEE},
 1222    {op2_max, ALU_OP2_MAX},
 1223    {op2_min, ALU_OP2_MIN},
 1224    {op2_max_dx10, ALU_OP2_MAX_DX10},
 1225    {op2_min_dx10, ALU_OP2_MIN_DX10},
 1226    {op2_sete, ALU_OP2_SETE},
 1227    {op2_setgt, ALU_OP2_SETGT},
 1228    {op2_setge, ALU_OP2_SETGE},
 1229    {op2_setne, ALU_OP2_SETNE},
 1230    {op2_sete_dx10, ALU_OP2_SETE_DX10},
 1231    {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
 1232    {op2_setge_dx10, ALU_OP2_SETGE_DX10},
 1233    {op2_setne_dx10, ALU_OP2_SETNE_DX10},
 1234    {op1_fract, ALU_OP1_FRACT},
 1235    {op1_trunc, ALU_OP1_TRUNC},
 1236    {op1_ceil, ALU_OP1_CEIL},
 1237    {op1_rndne, ALU_OP1_RNDNE},
 1238    {op1_floor, ALU_OP1_FLOOR},
 1239    {op2_ashr_int, ALU_OP2_ASHR_INT},
 1240    {op2_lshr_int, ALU_OP2_LSHR_INT},
 1241    {op2_lshl_int, ALU_OP2_LSHL_INT},
 1242    {op1_mov, ALU_OP1_MOV},
 1243    {op0_nop, ALU_OP0_NOP},
 1244    {op2_mul_64, ALU_OP2_MUL_64},
 1245    {op1_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
 1246    {op1v_flt64_to_flt32, ALU_OP1_FLT32_TO_FLT64},
 1247    {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
 1248    {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
 1249    {op2_pred_sete, ALU_OP2_PRED_SETE},
 1250    {op2_pred_setgt, ALU_OP2_PRED_SETGT},
 1251    {op2_pred_setge, ALU_OP2_PRED_SETGE},
 1252    {op2_pred_setne, ALU_OP2_PRED_SETNE},
 1253    //{op2_pred_set_inv, ALU_OP2_PRED_SET},
 1254    //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
 1255    //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
 1256    {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
 1257    {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
 1258    {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
 1259    {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
 1260    {op2_kille, ALU_OP2_KILLE},
 1261    {op2_killgt, ALU_OP2_KILLGT},
 1262    {op2_killge, ALU_OP2_KILLGE},
 1263    {op2_killne, ALU_OP2_KILLNE},
 1264    {op2_and_int, ALU_OP2_AND_INT},
 1265    {op2_or_int, ALU_OP2_OR_INT},
 1266    {op2_xor_int, ALU_OP2_XOR_INT},
 1267    {op1_not_int, ALU_OP1_NOT_INT},
 1268    {op2_add_int, ALU_OP2_ADD_INT},
 1269    {op2_sub_int, ALU_OP2_SUB_INT},
 1270    {op2_max_int, ALU_OP2_MAX_INT},
 1271    {op2_min_int, ALU_OP2_MIN_INT},
 1272    {op2_max_uint, ALU_OP2_MAX_UINT},
 1273    {op2_min_uint, ALU_OP2_MIN_UINT},
 1274    {op2_sete_int, ALU_OP2_SETE_INT},
 1275    {op2_setgt_int, ALU_OP2_SETGT_INT},
 1276    {op2_setge_int, ALU_OP2_SETGE_INT},
 1277    {op2_setne_int, ALU_OP2_SETNE_INT},
 1278    {op2_setgt_uint, ALU_OP2_SETGT_UINT},
 1279    {op2_setge_uint, ALU_OP2_SETGE_UINT},
 1280    {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
 1281    {op2_killge_uint, ALU_OP2_KILLGE_UINT},
 1282    //p2_prede_int, ALU_OP2_PREDE_INT},
 1283    {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
 1284    {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
 1285    {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
 1286    {op2_kille_int, ALU_OP2_KILLE_INT},
 1287    {op2_killgt_int, ALU_OP2_KILLGT_INT},
 1288    {op2_killge_int, ALU_OP2_KILLGE_INT},
 1289    {op2_killne_int, ALU_OP2_KILLNE_INT},
 1290    {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
 1291    {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
 1292    {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
 1293    {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
 1294    {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
 1295    {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
 1296    {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
 1297    {op1_bfrev_int, ALU_OP1_BFREV_INT},
 1298    {op2_addc_uint, ALU_OP2_ADDC_UINT},
 1299    {op2_subb_uint, ALU_OP2_SUBB_UINT},
 1300    {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
 1301    {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
 1302    {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
 1303    {op2_set_mode, ALU_OP2_SET_MODE},
 1304    {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
 1305    {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
 1306    {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
 1307    {op1_exp_ieee, ALU_OP1_EXP_IEEE},
 1308    {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
 1309    {op1_log_ieee, ALU_OP1_LOG_IEEE},
 1310    {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
 1311    {op1_recip_ff, ALU_OP1_RECIP_FF},
 1312    {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
 1313    {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
 1314    {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
 1315    {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
 1316    {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
 1317    {op1_sin, ALU_OP1_SIN},
 1318    {op1_cos, ALU_OP1_COS},
 1319    {op2_mullo_int, ALU_OP2_MULLO_INT},
 1320    {op2_mulhi_int, ALU_OP2_MULHI_INT},
 1321    {op2_mullo_uint, ALU_OP2_MULLO_UINT},
 1322    {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
 1323    {op1_recip_int, ALU_OP1_RECIP_INT},
 1324    {op1_recip_uint, ALU_OP1_RECIP_UINT},
 1325    {op1_recip_64, ALU_OP2_RECIP_64},
 1326    {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
 1327    {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
 1328    {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
 1329    {op1_sqrt_64, ALU_OP2_SQRT_64},
 1330    {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
 1331    {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
 1332    {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
 1333    {op2_bfm_int, ALU_OP2_BFM_INT},
 1334    {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
 1335    {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
 1336    {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
 1337    {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
 1338    {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
 1339    {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
 1340    {op1_bcnt_int, ALU_OP1_BCNT_INT},
 1341    {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
 1342    {op1_ffbl_int, ALU_OP1_FFBL_INT},
 1343    {op1_ffbh_int, ALU_OP1_FFBH_INT},
 1344    {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
 1345    {op2_dot_ieee, ALU_OP2_DOT_IEEE},
 1346    {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
 1347    {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
 1348    {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
 1349    {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
 1350    {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
 1351    {op2_mul_uint24, ALU_OP2_MUL_UINT24},
 1352    {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
 1353    {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
 1354    {op2_sete_64, ALU_OP2_SETE_64},
 1355    {op2_setne_64, ALU_OP2_SETNE_64},
 1356    {op2_setgt_64, ALU_OP2_SETGT_64},
 1357    {op2_setge_64, ALU_OP2_SETGE_64},
 1358    {op2_min_64, ALU_OP2_MIN_64},
 1359    {op2_max_64, ALU_OP2_MAX_64},
 1360    {op2_dot4, ALU_OP2_DOT4},
 1361    {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
 1362    {op2_cube, ALU_OP2_CUBE},
 1363    {op1_max4, ALU_OP1_MAX4},
 1364    {op1_frexp_64, ALU_OP1_FREXP_64},
 1365    {op1_ldexp_64, ALU_OP2_LDEXP_64},
 1366    {op1_fract_64, ALU_OP1_FRACT_64},
 1367    {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
 1368    {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
 1369    {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
 1370    {op2_add_64, ALU_OP2_ADD_64},
 1371    {op1_mova_int, ALU_OP1_MOVA_INT},
 1372    {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
 1373    {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
 1374    {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
 1375    {op2_dot, ALU_OP2_DOT},
 1376    //p2_mul_prev, ALU_OP2_MUL_PREV},
 1377    //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
 1378    //p2_add_prev, ALU_OP2_ADD_PREV},
 1379    {op2_muladd_prev, ALU_OP2_MULADD_PREV},
 1380    {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
 1381    {op2_interp_xy, ALU_OP2_INTERP_XY},
 1382    {op2_interp_zw, ALU_OP2_INTERP_ZW},
 1383    {op2_interp_x, ALU_OP2_INTERP_X},
 1384    {op2_interp_z, ALU_OP2_INTERP_Z},
 1385    {op0_store_flags, ALU_OP1_STORE_FLAGS},
 1386    {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
 1387    {op0_lds_1a, ALU_OP2_LDS_1A},
 1388    {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
 1389    {op0_lds_2a, ALU_OP2_LDS_2A},
 1390    {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
 1391    {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
 1392    {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
 1393       // {op 3 all left shift 6
 1394    {op3_bfe_uint, ALU_OP3_BFE_UINT},
 1395    {op3_bfe_int, ALU_OP3_BFE_INT},
 1396    {op3_bfi_int, ALU_OP3_BFI_INT},
 1397    {op3_fma, ALU_OP3_FMA},
 1398    {op3_cndne_64, ALU_OP3_CNDNE_64},
 1399    {op3_fma_64, ALU_OP3_FMA_64},
 1400    {op3_lerp_uint, ALU_OP3_LERP_UINT},
 1401    {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
 1402    {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
 1403    {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
 1404    {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
 1405    {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
 1406    {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
 1407    {op3_muladd, ALU_OP3_MULADD},
 1408    {op3_muladd_m2, ALU_OP3_MULADD_M2},
 1409    {op3_muladd_m4, ALU_OP3_MULADD_M4},
 1410    {op3_muladd_d2, ALU_OP3_MULADD_D2},
 1411    {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
 1412    {op3_cnde, ALU_OP3_CNDE},
 1413    {op3_cndgt, ALU_OP3_CNDGT},
 1414    {op3_cndge, ALU_OP3_CNDGE},
 1415    {op3_cnde_int, ALU_OP3_CNDE_INT},
 1416    {op3_cndgt_int, ALU_OP3_CNDGT_INT},
 1417    {op3_cndge_int, ALU_OP3_CNDGE_INT},
 1418    {op3_mul_lit, ALU_OP3_MUL_LIT},
 1419 };
 1420 
 1421 const std::map<ESDOp, int> ds_opcode_map = {
 1422    {DS_OP_ADD, FETCH_OP_GDS_ADD},
 1423    {DS_OP_SUB, FETCH_OP_GDS_SUB},
 1424    {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
 1425    {DS_OP_INC, FETCH_OP_GDS_INC},
 1426    {DS_OP_DEC, FETCH_OP_GDS_DEC},
 1427    {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
 1428    {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
 1429    {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
 1430    {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
 1431    {DS_OP_AND, FETCH_OP_GDS_AND},
 1432    {DS_OP_OR, FETCH_OP_GDS_OR},
 1433    {DS_OP_XOR, FETCH_OP_GDS_XOR},
 1434    {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
 1435    {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
 1436    {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
 1437    {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
 1438    {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
 1439    {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
 1440    {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
 1441    {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
 1442    {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
 1443    {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
 1444    {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
 1445    {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
 1446    {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
 1447    {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
 1448    {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
 1449    {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
 1450    {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
 1451    {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
 1452    {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
 1453    {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
 1454    {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
 1455    {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
 1456    {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
 1457    {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
 1458    {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
 1459    {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
 1460    {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
 1461    {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
 1462    {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
 1463    {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
 1464    {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
 1465    {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
 1466    {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
 1467    {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
 1468    {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
 1469    {DS_OP_INVALID, 0},
 1470 };
 1471 
 1472 }