"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp" (16 Sep 2020, 31481 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sfn_shader_base.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /* -*- mesa-c++  -*-
    2  *
    3  * Copyright (c) 2018 Collabora LTD
    4  *
    5  * Author: Gert Wollny <gert.wollny@collabora.com>
    6  *
    7  * Permission is hereby granted, free of charge, to any person obtaining a
    8  * copy of this software and associated documentation files (the "Software"),
    9  * to deal in the Software without restriction, including without limitation
   10  * on the rights to use, copy, modify, merge, publish, distribute, sub
   11  * license, and/or sell copies of the Software, and to permit persons to whom
   12  * the Software is furnished to do so, subject to the following conditions:
   13  *
   14  * The above copyright notice and this permission notice (including the next
   15  * paragraph) shall be included in all copies or substantial portions of the
   16  * Software.
   17  *
   18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
   21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
   22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
   23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
   25  */
   26 
   27 #include "../r600_pipe.h"
   28 #include "../r600_shader.h"
   29 #include "sfn_shader_vertex.h"
   30 
   31 #include "sfn_shader_compute.h"
   32 #include "sfn_shader_fragment.h"
   33 #include "sfn_shader_geometry.h"
   34 #include "sfn_liverange.h"
   35 #include "sfn_ir_to_assembly.h"
   36 #include "sfn_nir.h"
   37 #include "sfn_instruction_misc.h"
   38 #include "sfn_instruction_fetch.h"
   39 #include "sfn_instruction_lds.h"
   40 
   41 #include <iostream>
   42 
   43 #define ENABLE_DEBUG 1
   44 
   45 #ifdef ENABLE_DEBUG
   46 #define DEBUG_SFN(X)  \
   47    do {\
   48       X; \
   49    } while (0)
   50 #else
   51 #define DEBUG_SFN(X)
   52 #endif
   53 
   54 namespace r600 {
   55 
   56 using namespace std;
   57 
   58 
   59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
   60                                                r600_pipe_shader_selector& sel,
   61                                                r600_shader &sh_info, int scratch_size):
   62    m_processor_type(ptype),
   63    m_nesting_depth(0),
   64    m_block_number(0),
   65    m_export_output(0, -1),
   66    m_sh_info(sh_info),
   67    m_tex_instr(*this),
   68    m_alu_instr(*this),
   69    m_ssbo_instr(*this),
   70    m_pending_else(nullptr),
   71    m_scratch_size(scratch_size),
   72    m_next_hwatomic_loc(0),
   73    m_sel(sel)
   74 {
   75    m_sh_info.processor_type = ptype;
   76 }
   77 
   78 
   79 ShaderFromNirProcessor::~ShaderFromNirProcessor()
   80 {
   81 }
   82 
   83 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
   84 {
   85    switch (instr->type) {
   86    case nir_instr_type_tex: {
   87       nir_tex_instr *t = nir_instr_as_tex(instr);
   88       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
   89          sh_info().uses_tex_buffers = true;
   90    }
   91    default:
   92       ;
   93    }
   94 
   95    return scan_sysvalue_access(instr);
   96 }
   97 
   98 static void remap_shader_info(r600_shader& sh_info,
   99                               std::vector<rename_reg_pair>& map,
  100                               UNUSED ValueMap& values)
  101 {
  102    for (unsigned i = 0; i < sh_info.ninput; ++i) {
  103       sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
  104               << " of map.size()\n";
  105 
  106       assert(sh_info.input[i].gpr < map.size());
  107       auto new_index = map[sh_info.input[i].gpr];
  108       if (new_index.valid)
  109          sh_info.input[i].gpr = new_index.new_reg;
  110       map[sh_info.input[i].gpr].used = true;
  111    }
  112 
  113    for (unsigned i = 0; i < sh_info.noutput; ++i) {
  114       assert(sh_info.output[i].gpr < map.size());
  115       auto new_index = map[sh_info.output[i].gpr];
  116       if (new_index.valid)
  117          sh_info.output[i].gpr = new_index.new_reg;
  118       map[sh_info.output[i].gpr].used = true;
  119    }
  120 }
  121 
  122 void ShaderFromNirProcessor::remap_registers()
  123 {
  124    // register renumbering
  125    auto rc = register_count();
  126    if (!rc)
  127       return;
  128 
  129    std::vector<register_live_range> register_live_ranges(rc);
  130 
  131    auto temp_register_map = get_temp_registers();
  132 
  133    Shader sh{m_output, temp_register_map};
  134    LiverangeEvaluator().run(sh, register_live_ranges);
  135    auto register_map = get_temp_registers_remapping(register_live_ranges);
  136 
  137    sfn_log << SfnLog::merge << "=========Mapping===========\n";
  138    for (size_t  i = 0; i < register_map.size(); ++i)
  139       if (register_map[i].valid)
  140          sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
  141 
  142    ValueRemapper vmap0(register_map, temp_register_map);
  143    for (auto& block: m_output)
  144       block.remap_registers(vmap0);
  145 
  146    remap_shader_info(m_sh_info, register_map, temp_register_map);
  147 
  148    /* Mark inputs as used registers, these registers should no be remapped */
  149    for (auto& v: sh.m_temp) {
  150       if (v.second->type() == Value::gpr) {
  151          const auto& g = static_cast<const GPRValue&>(*v.second);
  152          if (g.is_input())
  153             register_map[g.sel()].used = true;
  154       }
  155    }
  156 
  157    int new_index = 0;
  158    for (auto& i : register_map) {
  159       i.valid = i.used;
  160       if (i.used)
  161          i.new_reg = new_index++;
  162    }
  163 
  164    ValueRemapper vmap1(register_map, temp_register_map);
  165    for (auto& ir: m_output)
  166       ir.remap_registers(vmap1);
  167 
  168    remap_shader_info(m_sh_info, register_map, temp_register_map);
  169 }
  170 
  171 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
  172 {
  173    // m_uniform_type_map
  174    m_uniform_type_map[uniform->data.location] = uniform->type;
  175 
  176    if (uniform->type->contains_atomic()) {
  177       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
  178       sh_info().nhwatomic += natomics;
  179 
  180       if (uniform->type->is_array())
  181          sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
  182 
  183       sh_info().uses_atomics = 1;
  184 
  185       struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
  186       ++sh_info().nhwatomic_ranges;
  187       atom.buffer_id = uniform->data.binding;
  188       atom.hw_idx = m_next_hwatomic_loc;
  189       atom.start = m_next_hwatomic_loc;
  190       atom.end = atom.start + natomics - 1;
  191       m_next_hwatomic_loc = atom.end + 1;
  192       //atom.array_id = uniform->type->is_array() ? 1 : 0;
  193 
  194       m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
  195 
  196       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
  197               << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
  198    }
  199 
  200    if (uniform->type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
  201       sh_info().uses_images = 1;
  202    }
  203 
  204    return true;
  205 }
  206 
  207 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
  208 {
  209    return do_process_inputs(input);
  210 }
  211 
  212 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
  213 {
  214    return do_process_outputs(output);
  215 }
  216 
  217 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
  218 {
  219    nir_variable *var = nir_deref_instr_get_variable(instr);
  220 
  221    assert(instr->mode == nir_var_function_temp);
  222    assert(glsl_type_is_array(var->type));
  223 
  224    // add an alias for the index to the register(s);
  225 
  226 
  227 }
  228 
  229 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
  230 {
  231    auto& dest = instr->dest;
  232    unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
  233    m_var_mode[instr->var] = instr->mode;
  234    m_var_derefs[index] = instr->var;
  235 
  236    sfn_log << SfnLog::io << "Add var deref:" << index
  237            << " with DDL:" << instr->var->data.driver_location << "\n";
  238 }
  239 
  240 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
  241 {
  242    switch (io.name) {
  243    case TGSI_SEMANTIC_POSITION:
  244    case TGSI_SEMANTIC_PSIZE:
  245    case TGSI_SEMANTIC_EDGEFLAG:
  246    case TGSI_SEMANTIC_FACE:
  247    case TGSI_SEMANTIC_SAMPLEMASK:
  248    case TGSI_SEMANTIC_CLIPVERTEX:
  249       io.spi_sid = 0;
  250       break;
  251    case TGSI_SEMANTIC_GENERIC:
  252       io.spi_sid = io.sid + 1;
  253       break;
  254    default:
  255       /* For non-generic params - pack name and sid into 8 bits */
  256       io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
  257    }   
  258 }
  259 
  260 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
  261 {
  262    unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
  263 
  264    sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
  265 
  266    auto v = m_var_derefs.find(index);
  267    if (v != m_var_derefs.end())
  268       return v->second;
  269 
  270      fprintf(stderr, "R600: could not find deref with index %d\n", index);
  271 
  272      return nullptr;
  273 
  274    /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
  275    return  nir_deref_instr_get_variable(deref); */
  276 }
  277 
  278 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
  279 {
  280    return m_tex_instr.emit(instr);
  281 }
  282 
  283 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
  284 {
  285    if (m_pending_else) {
  286       append_block(-1);
  287       m_output.back().emit(PInstruction(m_pending_else));
  288       append_block(1);
  289       m_pending_else = nullptr;
  290    }
  291 
  292    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
  293    if (m_output.empty())
  294       append_block(0);
  295 
  296    m_output.back().emit(Instruction::Pointer(ir));
  297 }
  298 
  299 void ShaderFromNirProcessor::emit_shader_start()
  300 {
  301    /* placeholder, may become an abstract method */
  302 }
  303 
  304 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
  305 {
  306    switch (instr->type) {
  307    case nir_jump_break: {
  308       auto b = new LoopBreakInstruction();
  309       emit_instruction(b);
  310       return true;
  311    }
  312    case nir_jump_continue: {
  313       auto  b = new LoopContInstruction();
  314       emit_instruction(b);
  315       return true;
  316    }
  317    default: {
  318       nir_instr *i = reinterpret_cast<nir_instr*>(instr);
  319       sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
  320       return false;
  321    }
  322    }
  323    return true;
  324 }
  325 
  326 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
  327 {
  328    return m_alu_instr.emit(instr);
  329 }
  330 
  331 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
  332 {
  333    return false;
  334 }
  335 
  336 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
  337 {
  338    LoopBeginInstruction *loop = new LoopBeginInstruction();
  339    emit_instruction(loop);
  340    m_loop_begin_block_map[loop_id] = loop;
  341    append_block(1);
  342    return true;
  343 }
  344 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
  345 {
  346    auto start = m_loop_begin_block_map.find(loop_id);
  347    if (start == m_loop_begin_block_map.end()) {
  348       sfn_log << SfnLog::err  << "End loop: Loop start for "
  349               << loop_id << "  not found\n";
  350       return false;
  351    }
  352    m_nesting_depth--;
  353    m_block_number++;
  354    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
  355    LoopEndInstruction *loop = new LoopEndInstruction(start->second);
  356    emit_instruction(loop);
  357 
  358    m_loop_begin_block_map.erase(start);
  359    return true;
  360 }
  361 
  362 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
  363 {
  364 
  365    auto value = from_nir(if_stmt->condition, 0, 0);
  366    AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
  367                                              value, Value::zero, EmitInstruction::last);
  368    pred->set_flag(alu_update_exec);
  369    pred->set_flag(alu_update_pred);
  370    pred->set_cf_type(cf_alu_push_before);
  371 
  372    append_block(1);
  373 
  374    IfInstruction *ir = new IfInstruction(pred);
  375    emit_instruction(ir);
  376    assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
  377    m_if_block_start_map[if_id] = ir;
  378    return true;
  379 }
  380 
  381 bool ShaderFromNirProcessor::emit_else_start(int if_id)
  382 {
  383    auto iif = m_if_block_start_map.find(if_id);
  384    if (iif == m_if_block_start_map.end()) {
  385       std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
  386       return false;
  387    }
  388 
  389    if (iif->second->type() != Instruction::cond_if) {
  390       std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
  391       return false;
  392    }
  393    IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
  394    ElseInstruction *ir = new ElseInstruction(if_instr);
  395    m_if_block_start_map[if_id] = ir;
  396    m_pending_else = ir;
  397 
  398    return true;
  399 }
  400 
  401 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
  402 {
  403    auto ifelse = m_if_block_start_map.find(if_id);
  404    if (ifelse == m_if_block_start_map.end()) {
  405       std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
  406       return false;
  407    }
  408 
  409    if (ifelse->second->type() != Instruction::cond_if &&
  410        ifelse->second->type() != Instruction::cond_else) {
  411       std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
  412       return false;
  413    }
  414    /* Clear pending else, if the else branch was empty, non will be emitted */
  415 
  416    m_pending_else = nullptr;
  417 
  418    append_block(-1);
  419    IfElseEndInstruction *ir = new IfElseEndInstruction();
  420    emit_instruction(ir);
  421 
  422    return true;
  423 }
  424 
  425 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
  426 {
  427    PValue src = get_temp_register();
  428    emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
  429 
  430    GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
  431    emit_instruction(new FetchTCSIOParam(dest, src, offset));
  432 
  433    return true;
  434 
  435 }
  436 
  437 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
  438 {
  439    auto address = varvec_from_nir(instr->src[0], instr->num_components);
  440    auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
  441 
  442    emit_instruction(new LDSReadInstruction(address, dest_value));
  443    return true;
  444 }
  445 
  446 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
  447 {
  448    unsigned write_mask = nir_intrinsic_write_mask(instr);
  449 
  450    auto address = from_nir(instr->src[1], 0);
  451    int swizzle_base = (write_mask & 0x3) ? 0 : 2;
  452    write_mask |= write_mask >> 2;
  453 
  454    auto value =  from_nir(instr->src[0], swizzle_base);
  455    if (!(write_mask & 2)) {
  456       emit_instruction(new LDSWriteInstruction(address, 0, value));
  457    } else {
  458       auto value1 = from_nir(instr->src[0], swizzle_base + 1);
  459       emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
  460    }
  461 
  462    return true;
  463 }
  464 
  465 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
  466 {
  467    r600::sfn_log << SfnLog::instr << "emit '"
  468                  << *reinterpret_cast<nir_instr*>(instr)
  469                  << "' (" << __func__ << ")\n";
  470 
  471    if (emit_intrinsic_instruction_override(instr))
  472       return true;
  473 
  474    switch (instr->intrinsic) {
  475    case nir_intrinsic_load_deref: {
  476       auto var = get_deref_location(instr->src[0]);
  477       if (!var)
  478          return false;
  479       auto mode_helper = m_var_mode.find(var);
  480       if (mode_helper == m_var_mode.end()) {
  481          cerr << "r600-nir: variable '" << var->name << "' not found\n";
  482          return false;
  483       }
  484       switch (mode_helper->second) {
  485       case nir_var_shader_in:
  486          return emit_load_input_deref(var, instr);
  487       case nir_var_function_temp:
  488          return emit_load_function_temp(var, instr);
  489       default:
  490          cerr << "r600-nir: Unsupported mode" << mode_helper->second
  491               << "for src variable\n";
  492          return false;
  493       }
  494    }
  495    case nir_intrinsic_store_scratch:
  496       return emit_store_scratch(instr);
  497    case nir_intrinsic_load_scratch:
  498       return emit_load_scratch(instr);
  499    case nir_intrinsic_store_deref:
  500       return emit_store_deref(instr);
  501    case nir_intrinsic_load_uniform:
  502       return reserve_uniform(instr);
  503    case nir_intrinsic_discard:
  504    case nir_intrinsic_discard_if:
  505       return emit_discard_if(instr);
  506    case nir_intrinsic_load_ubo_r600:
  507       return emit_load_ubo(instr);
  508    case nir_intrinsic_atomic_counter_add:
  509    case nir_intrinsic_atomic_counter_and:
  510    case nir_intrinsic_atomic_counter_exchange:
  511    case nir_intrinsic_atomic_counter_max:
  512    case nir_intrinsic_atomic_counter_min:
  513    case nir_intrinsic_atomic_counter_or:
  514    case nir_intrinsic_atomic_counter_xor:
  515    case nir_intrinsic_atomic_counter_comp_swap:
  516    case nir_intrinsic_atomic_counter_read:
  517    case nir_intrinsic_atomic_counter_post_dec:
  518    case nir_intrinsic_atomic_counter_inc:
  519    case nir_intrinsic_atomic_counter_pre_dec:
  520    case nir_intrinsic_store_ssbo:
  521       m_sel.info.writes_memory = true;
  522       /* fallthrough */
  523    case nir_intrinsic_load_ssbo:
  524       return m_ssbo_instr.emit(&instr->instr);
  525       break;
  526    case nir_intrinsic_copy_deref:
  527    case nir_intrinsic_load_constant:
  528    case nir_intrinsic_load_input:
  529    case nir_intrinsic_store_output:
  530    case nir_intrinsic_load_tcs_in_param_base_r600:
  531       return emit_load_tcs_param_base(instr, 0);
  532    case nir_intrinsic_load_tcs_out_param_base_r600:
  533       return emit_load_tcs_param_base(instr, 16);
  534    case nir_intrinsic_load_local_shared_r600:
  535       return emit_load_local_shared(instr);
  536    case nir_intrinsic_store_local_shared_r600:
  537       return emit_store_local_shared(instr);
  538    case nir_intrinsic_control_barrier:
  539    case nir_intrinsic_memory_barrier_tcs_patch:
  540       return emit_barrier(instr);
  541 
  542    default:
  543       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
  544       return false;
  545    }
  546    return false;
  547 }
  548 
  549 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
  550 {
  551    return false;
  552 }
  553 
  554 bool
  555 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
  556 {
  557    return false;
  558 }
  559 
  560 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
  561 {
  562    AluInstruction *ir = new AluInstruction(op0_group_barrier);
  563    ir->set_flag(alu_last_instr);
  564    emit_instruction(ir);
  565    return true;
  566 }
  567 
  568 
  569 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
  570 {
  571    if (!dest.is_ssa) {
  572       auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
  573       if (as_last)
  574          ir->set_flag(alu_last_instr);
  575       emit_instruction(ir);
  576    } else {
  577       inject_register(dest.ssa.index, chan, value, true);
  578    }
  579    return true;
  580 }
  581 
  582 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
  583 {
  584    PValue address = from_nir(instr->src[1], 0, 0);
  585 
  586    std::unique_ptr<GPRVector> vec(vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
  587                                   swizzle_from_mask(instr->num_components)));
  588    GPRVector value(*vec);
  589 
  590    int writemask = nir_intrinsic_write_mask(instr);
  591    int align = nir_intrinsic_align_mul(instr);
  592    int align_offset = nir_intrinsic_align_offset(instr);
  593 
  594    WriteScratchInstruction *ir = nullptr;
  595    if (address->type() == Value::literal) {
  596       const auto& lv = static_cast<const LiteralValue&>(*address);
  597       ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
  598    } else {
  599       address = from_nir_with_fetch_constant(instr->src[1], 0);
  600       ir = new WriteScratchInstruction(address, value, align, align_offset,
  601                                        writemask, m_scratch_size);
  602    }
  603    emit_instruction(ir);
  604    sh_info().needs_scratch_space = 1;
  605    return true;
  606 }
  607 
  608 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
  609 {
  610    PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
  611    std::array<PValue, 4> dst_val;
  612    for (int i = 0; i < 4; ++i)
  613       dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
  614 
  615    GPRVector dst(dst_val);
  616    auto ir = new LoadFromScratch(dst, address, m_scratch_size);
  617    ir->prelude_append(new WaitAck(0));
  618    emit_instruction(ir);
  619    sh_info().needs_scratch_space = 1;
  620    return true;
  621 }
  622 
  623 GPRVector *ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
  624                                                                     UNUSED unsigned mask,
  625                                                                     const GPRVector::Swizzle& swizzle)
  626 {
  627    GPRVector *result = nullptr;
  628    int sel = lookup_register_index(src);
  629    if (sel >= 0 && from_nir(src, 0)->type() == Value::gpr &&
  630        from_nir(src, 0)->chan() == 0) {
  631       /* If the x-channel is really an x-channel register then we are pretty
  632        * save that the value come like we need them */
  633       result = new GPRVector(from_nir(src, 0)->sel(), swizzle);
  634    } else {
  635       AluInstruction *ir = nullptr;
  636       int sel = allocate_temp_register();
  637       GPRVector::Values v;
  638       for (int i = 0; i < 4; ++i) {
  639          v[i] = PValue(new GPRValue(sel, swizzle[i]));
  640          if (swizzle[i] < 4 && (mask & (1 << i))) {
  641             ir = new AluInstruction(op1_mov, v[i], from_nir(src, swizzle[i]),
  642                                     EmitInstruction::write);
  643             emit_instruction(ir);
  644          }
  645       }
  646       if (ir)
  647          ir->set_flag(alu_last_instr);
  648 
  649       result = new GPRVector(v);
  650    }
  651    return result;
  652 }
  653 
  654 bool ShaderFromNirProcessor::emit_load_ubo(nir_intrinsic_instr* instr)
  655 {
  656    nir_src& src0 = instr->src[0];
  657    nir_src& src1 = instr->src[1];
  658 
  659    int sel_bufid_reg = src0.is_ssa ? src0.ssa->index : src0.reg.reg->index;
  660    const nir_load_const_instr* literal0 = get_literal_constant(sel_bufid_reg);
  661 
  662    int ofs_reg = src1.is_ssa ? src1.ssa->index : src1.reg.reg->index;
  663    const nir_load_const_instr* literal1 = get_literal_constant(ofs_reg);
  664    if (literal0) {
  665       if (literal1) {
  666          uint bufid = literal0->value[0].u32;
  667          uint buf_ofs = literal1->value[0].u32 >> 4;
  668          int buf_cmp = ((literal1->value[0].u32 >> 2) & 3);
  669          AluInstruction *ir = nullptr;
  670          for (int i = 0; i < instr->num_components; ++i) {
  671             int cmp = buf_cmp + i;
  672             assert(cmp < 4);
  673             auto u = PValue(new UniformValue(512 +  buf_ofs, cmp, bufid + 1));
  674             if (instr->dest.is_ssa)
  675                add_uniform((instr->dest.ssa.index << 2) + i, u);
  676             else {
  677                ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
  678                emit_instruction(ir);
  679             }
  680          }
  681          if (ir)
  682             ir->set_flag(alu_last_instr);
  683          return true;
  684 
  685       } else {
  686          /* literal0 is lost ...*/
  687          return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, literal0->value[0].u32 + 1);
  688       }
  689    } else {
  690       /* TODO: This can also be solved by using the CF indes on the ALU block, and
  691        * this would probably make sense when there are more then one loads with
  692        * the same buffer ID. */
  693       PValue bufid = from_nir(instr->src[0], 0, 0);
  694       PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
  695       GPRVector trgt;
  696       for (int i = 0; i < 4; ++i)
  697          trgt.set_reg_i(i, from_nir(instr->dest, i));
  698 
  699       auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
  700                                      1, bufid, bim_zero);
  701 
  702       emit_instruction(ir);
  703       for (int i = 0; i < instr->num_components ; ++i) {
  704          add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
  705       }
  706       m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
  707       return true;
  708    }
  709 
  710 }
  711 
  712 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
  713 {
  714    r600::sfn_log << SfnLog::instr << "emit '"
  715                  << *reinterpret_cast<nir_instr*>(instr)
  716                  << "' (" << __func__ << ")\n";
  717 
  718    if (instr->intrinsic == nir_intrinsic_discard_if) {
  719       emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
  720                           {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
  721 
  722    } else {
  723       emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
  724                        {Value::zero, Value::zero}, {alu_last_instr}));
  725    }
  726    m_sh_info.uses_kill = 1;
  727    return true;
  728 }
  729 
  730 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
  731                                                    nir_intrinsic_instr* instr)
  732 {
  733    return do_emit_load_deref(var, instr);
  734 }
  735 
  736 bool ShaderFromNirProcessor::reserve_uniform(nir_intrinsic_instr* instr)
  737 {
  738    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
  739                  << *reinterpret_cast<nir_instr*>(instr)
  740                  << "'\n";
  741 
  742 
  743    /* If the target register is a SSA register and the loading is not
  744     * indirect then we can do lazy loading, i.e. the uniform value can
  745     * be used directly. Otherwise we have to load the data for real
  746     * rigt away.
  747     */
  748 
  749    /* Try to find the literal that defines the array index */
  750    const nir_load_const_instr* literal = nullptr;
  751    if (instr->src[0].is_ssa)
  752       literal = get_literal_constant(instr->src[0].ssa->index);
  753 
  754    int base = nir_intrinsic_base(instr);
  755    if (literal) {
  756       AluInstruction *ir = nullptr;
  757 
  758       for (int i = 0; i < instr->num_components ; ++i) {
  759          PValue u = PValue(new UniformValue(512 + literal->value[0].u32 + base, i));
  760          sfn_log << SfnLog::io << "uniform "
  761                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
  762 
  763          if (instr->dest.is_ssa)
  764             add_uniform((instr->dest.ssa.index << 2) + i, u);
  765          else {
  766             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
  767                                                    u, {alu_write});
  768              emit_instruction(ir);
  769          }
  770       }
  771       if (ir)
  772          ir->set_flag(alu_last_instr);
  773    } else {
  774       PValue addr = from_nir(instr->src[0], 0, 0);
  775       return load_uniform_indirect(instr, addr, 16 * base, 0);
  776    }
  777    return true;
  778 }
  779 
  780 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
  781 {
  782    if (!addr) {
  783       std::cerr << "r600-nir: don't know how uniform is addressed\n";
  784       return false;
  785    }
  786 
  787    GPRVector trgt;
  788    for (int i = 0; i < 4; ++i)
  789       trgt.set_reg_i(i, from_nir(instr->dest, i));
  790 
  791    if (addr->type() != Value::gpr) {
  792       emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
  793       addr = trgt.reg_i(0);
  794    }
  795 
  796    /* FIXME: buffer index and index mode are not set correctly */
  797    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
  798                                   bufferid, PValue(), bim_none);
  799    emit_instruction(ir);
  800    for (int i = 0; i < instr->num_components ; ++i) {
  801       add_uniform((instr->dest.ssa.index << 2) + i, trgt.reg_i(i));
  802    }
  803    m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
  804    return true;
  805 }
  806 
  807 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
  808 {
  809    AluInstruction *ir = nullptr;
  810    for (int i = 0; i < literal->def.num_components ; ++i) {
  811       if (writemask & (1 << i)){
  812          PValue lsrc;
  813          switch (literal->def.bit_size) {
  814 
  815          case 1:
  816             sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
  817             lsrc = literal->value[i].b ?
  818                      PValue(new LiteralValue( 0xffffffff, i)) :
  819                      Value::zero;
  820             break;
  821          case 32:
  822             sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
  823             if (literal->value[i].u32 == 0)
  824                lsrc = Value::zero;
  825             else if (literal->value[i].u32 == 1)
  826                lsrc = Value::one_i;
  827             else if (literal->value[i].f32 == 1.0f)
  828                lsrc = Value::one_f;
  829             else if (literal->value[i].f32 == 0.5f)
  830                lsrc = Value::zero_dot_5;
  831             else
  832                lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
  833             break;
  834          default:
  835             sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
  836                     << " falling back to 32 bit\n";
  837             lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
  838          }
  839          ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
  840 
  841          emit_instruction(ir);
  842       }
  843    }
  844    return ir;
  845 }
  846 
  847 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component)
  848 {
  849    PValue value = from_nir(src, component);
  850    if (value->type() != Value::gpr &&
  851        value->type() != Value::gpr_vector &&
  852        value->type() != Value::gpr_array_value) {
  853       PValue retval = get_temp_register();
  854       emit_instruction(new AluInstruction(op1_mov, retval, value,
  855                                           EmitInstruction::last_write));
  856       value = retval;
  857    }
  858    return value;
  859 }
  860 
  861 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
  862 {
  863    auto out_var = get_deref_location(instr->src[0]);
  864    if (!out_var)
  865       return false;
  866 
  867    return do_emit_store_deref(out_var, instr);
  868 }
  869 
  870 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
  871 {
  872    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
  873                  << *reinterpret_cast<nir_instr*>(instr)
  874                  << "'\n";
  875 
  876    /* Give the specific shader type a chance to process this, i.e. Geometry and
  877     * tesselation shaders need specialized deref_array, for the other shaders
  878     * it is lowered.
  879     */
  880    if (emit_deref_instruction_override(instr))
  881       return true;
  882 
  883    switch (instr->deref_type) {
  884    case nir_deref_type_var:
  885       set_var_address(instr);
  886       return true;
  887    case nir_deref_type_array:
  888    case nir_deref_type_array_wildcard:
  889    case nir_deref_type_struct:
  890    case nir_deref_type_cast:
  891    default:
  892       fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
  893    }
  894    return false;
  895 }
  896 
  897 void ShaderFromNirProcessor::load_uniform(const nir_alu_src &src)
  898 {
  899    AluInstruction *ir = nullptr;
  900    PValue sv[4];
  901 
  902    assert(src.src.is_ssa);
  903 
  904    for (int i = 0; i < src.src.ssa->num_components ; ++i)  {
  905       unsigned uindex = (src.src.ssa->index << 2) + i;
  906       sv[i] = uniform(uindex);
  907       assert(sv[i]);
  908    }
  909 
  910    for (int i = 0; i < src.src.ssa->num_components ; ++i) {
  911       ir = new AluInstruction(op1_mov, create_register_from_nir_src(src.src, i), sv[i],
  912                               EmitInstruction::write);
  913       emit_instruction(ir);
  914    }
  915    if (ir)
  916       ir->set_flag(alu_last_instr);
  917 }
  918 
  919 
  920 
  921 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
  922                                               std::vector<PValue> srcs,
  923                                               const std::set<AluModifiers>& m_flags)
  924 {
  925    AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
  926    emit_instruction(ir);
  927    return true;
  928 }
  929 
  930 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
  931 {
  932    m_output_register_map[loc] = gpr;
  933 }
  934 
  935 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
  936 {
  937    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
  938    m_export_output.emit(PInstruction(ir));
  939 }
  940 
  941 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
  942 {
  943    const GPRVector *retval = nullptr;
  944    auto val = m_output_register_map.find(location);
  945    if (val != m_output_register_map.end())
  946       retval =  val->second;
  947    return retval;
  948 }
  949 
  950 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
  951 {
  952    r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
  953    m_inputs[pos] = var;
  954 }
  955 
  956 void ShaderFromNirProcessor::set_output(unsigned pos, PValue var)
  957 {
  958    r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << *var <<  "\n";
  959    m_outputs[pos] = var;
  960 }
  961 
  962 void ShaderFromNirProcessor::append_block(int nesting_change)
  963 {
  964    m_nesting_depth += nesting_change;
  965    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
  966 }
  967 
  968 void ShaderFromNirProcessor::finalize()
  969 {
  970    do_finalize();
  971 
  972    for (auto& i : m_inputs)
  973       m_sh_info.input[i.first].gpr = i.second->sel();
  974 
  975    for (auto& i : m_outputs)
  976       m_sh_info.output[i.first].gpr = i.second->sel();
  977 
  978    m_output.push_back(m_export_output);
  979 }
  980 
  981 }