mesa  18.2.8
About: Mesa is an open-source implementation of the OpenGL specification - a system for rendering interactive 3D graphics (main library code).
  Fossies Dox: mesa-18.2.8.tar.xz  ("inofficial" and yet experimental doxygen-generated source code documentation)  

vc4_qpu_emit.c File Reference
#include <inttypes.h>
#include "vc4_context.h"
#include "vc4_qir.h"
#include "vc4_qpu.h"
#include "util/ralloc.h"
Include dependency graph for vc4_qpu_emit.c:

Go to the source code of this file.


#define A(name)   [QOP_##name] = {QPU_A_##name}
#define M(name)   [QOP_##name] = {QPU_M_##name}


static void vc4_dump_program (struct vc4_compile *c)
static void queue (struct qblock *block, uint64_t inst)
static uint64_t * last_inst (struct qblock *block)
static void set_last_cond_add (struct qblock *block, uint32_t cond)
static void set_last_cond_mul (struct qblock *block, uint32_t cond)
static bool swap_file (struct qpu_reg *src)
static void setup_for_vpm_read (struct vc4_compile *c, struct qblock *block)
static void fixup_raddr_conflict (struct qblock *block, struct qpu_reg dst, struct qpu_reg *src0, struct qpu_reg *src1, struct qinst *inst, uint64_t *unpack)
static void set_last_dst_pack (struct qblock *block, struct qinst *inst)
static void handle_r4_qpu_write (struct qblock *block, struct qinst *qinst, struct qpu_reg dst)
static void vc4_generate_code_block (struct vc4_compile *c, struct qblock *block, struct qpu_reg *temp_registers)
void vc4_generate_code (struct vc4_context *vc4, struct vc4_compile *c)

Macro Definition Documentation

◆ A

#define A (   name)    [QOP_##name] = {QPU_A_##name}

◆ M

#define M (   name)    [QOP_##name] = {QPU_M_##name}

Function Documentation

◆ fixup_raddr_conflict()

static void fixup_raddr_conflict ( struct qblock block,
struct qpu_reg  dst,
struct qpu_reg src0,
struct qpu_reg src1,
struct qinst inst,
uint64_t *  unpack 

This is used to resolve the fact that we might register-allocate two different operands of an instruction to the same physical register file even though instructions have only one field for the register file source address.

In that case, we need to move one to a temporary that can be used in the instruction, instead. We reserve ra14/rb14 for this purpose.

Definition at line 163 of file vc4_qpu_emit.c.

References queued_qpu_inst::inst, last_inst(), qir_is_float_input(), qpu_a_MOV(), QPU_MUX_A, QPU_MUX_B, QPU_MUX_R5, QPU_MUX_SMALL_IMM, qpu_ra(), qpu_rb(), queue(), src0(), src1(), and swap_file().

Referenced by vc4_generate_code_block().

◆ handle_r4_qpu_write()

static void handle_r4_qpu_write ( struct qblock block,
struct qinst qinst,
struct qpu_reg  dst 

◆ last_inst()

static uint64_t* last_inst ( struct qblock block)

◆ queue()

static void queue ( struct qblock block,
uint64_t  inst 

Definition at line 47 of file vc4_qpu_emit.c.

References queued_qpu_inst::inst, list_addtail(), and rzalloc.

Referenced by add_to_atexit_list(), anv_queue_init(), anv_QueueBindSparse(), anv_QueuePresentKHR(), anv_QueueSignalReleaseImageANDROID(), anv_QueueSubmit(), anv_QueueWaitIdle(), anv_tramp_QueueBindSparse(), anv_tramp_QueuePresentKHR(), anv_tramp_QueueSubmit(), anv_tramp_QueueWaitIdle(), cl::Kernel::bind(), emit_rss_vgpu9(), emit_tex_binding_unit(), fill_geom_tess_rings(), fixup_raddr_conflict(), handle_r4_qpu_write(), lp_scene_dequeue(), lp_scene_enqueue(), lp_scene_queue_create(), lp_scene_queue_destroy(), radv_check_gpu_hangs(), radv_emit_compute_scratch(), radv_emit_global_shader_pointers(), radv_emit_gs_ring_sizes(), radv_emit_tess_factor_ring(), radv_get_preamble_cs(), radv_GetDeviceQueue2(), radv_gpu_hang_occured(), radv_queue_finish(), radv_queue_init(), radv_QueueBindSparse(), radv_QueuePresentKHR(), radv_QueueSignalReleaseImageANDROID(), radv_QueueSubmit(), radv_QueueWaitIdle(), radv_signal_fence(), cl::detail::ReferenceHandler< cl_command_queue >::release(), remove_from_atexit_list(), cl::detail::ReferenceHandler< cl_command_queue >::retain(), setup_for_vpm_read(), svga_reemit_tss_bindings(), u_vector_finish(), u_vector_length(), update_tss(), update_tss_binding(), util_queue_add_job(), util_queue_destroy(), util_queue_drop_job(), util_queue_finish(), util_queue_get_thread_time_nano(), util_queue_init(), util_queue_is_initialized(), util_queue_killall_and_wait(), util_queue_thread_func(), vc4_generate_code(), vc4_generate_code_block(), WorkOnCompute(), wsi_common_queue_present(), wsi_queue_destroy(), wsi_queue_init(), wsi_queue_pull(), and wsi_queue_push().

◆ set_last_cond_add()

static void set_last_cond_add ( struct qblock block,
uint32_t  cond 

Definition at line 63 of file vc4_qpu_emit.c.

References last_inst(), and qpu_set_cond_add().

Referenced by handle_r4_qpu_write(), and vc4_generate_code_block().

◆ set_last_cond_mul()

static void set_last_cond_mul ( struct qblock block,
uint32_t  cond 

Definition at line 69 of file vc4_qpu_emit.c.

References last_inst(), and qpu_set_cond_mul().

Referenced by vc4_generate_code_block().

◆ set_last_dst_pack()

static void set_last_dst_pack ( struct qblock block,
struct qinst inst 

◆ setup_for_vpm_read()

static void setup_for_vpm_read ( struct vc4_compile c,
struct qblock block 

Sets up the VPM read FIFO before we do any VPM read.

VPM reads (vertex attribute input) and VPM writes (varyings output) from the QPU reuse the VRI (varying interpolation) block's FIFOs to talk to the VPM block. In the VS/CS (unlike in the FS), the block starts out uninitialized, and you need to emit setup to the block before any VPM reads/writes.

VRI has a FIFO in each direction, with each FIFO able to hold four 32-bit-per-vertex values. VPM reads come through the read FIFO and VPM writes go through the write FIFO. The read/write setup values from QPU go through the write FIFO as well, with a sideband signal indicating that they're setup values. Once a read setup reaches the other side of the FIFO, the VPM block will start asynchronously reading vertex attributes and filling the read FIFO – that way hopefully the QPU doesn't have to block on reads later.

VPM read setup can configure 16 32-bit-per-vertex values to be read at a time, which is 4 vec4s. If more than that is being read (since we support 8 vec4 vertex attributes), then multiple read setup writes need to be done.

The existence of the FIFO makes it seem like you should be able to emit both setups for the 5-8 attribute cases and then do all the attribute reads. However, once the setup value makes it to the other end of the write FIFO, it will immediately update the VPM block's setup register. That updated setup register would be used for read FIFO fills from then on, breaking whatever remaining VPM values were supposed to be read into the read FIFO from the previous attribute set.

As a result, we need to emit the read setup, pull every VPM read value from that setup, and only then emit the second setup if applicable.

Definition at line 133 of file vc4_qpu_emit.c.

References MIN2, qpu_load_imm_ui(), qpu_vrsetup(), and queue().

Referenced by vc4_generate_code_block().

◆ swap_file()

static bool swap_file ( struct qpu_reg src)

Some special registers can be read from either file, which lets us resolve raddr conflicts without extra MOVs.

Definition at line 79 of file vc4_qpu_emit.c.


Referenced by fixup_raddr_conflict().

◆ vc4_dump_program()

static void vc4_dump_program ( struct vc4_compile c)

Definition at line 32 of file vc4_qpu_emit.c.

References ir_expression_operation::i, qir_get_stage_name(), and vc4_qpu_disasm().

Referenced by vc4_generate_code().

◆ vc4_generate_code()

◆ vc4_generate_code_block()

static void vc4_generate_code_block ( struct vc4_compile c,
struct qblock block,
struct qpu_reg temp_registers 

Definition at line 240 of file vc4_qpu_emit.c.

References A, ADD(), AND, ARRAY_SIZE, assert(), qinst::cond, qinst::dst, FADD(), qreg::file, fixup_raddr_conflict(), FMUL(), FSUB(), handle_r4_qpu_write(), ir_expression_operation::i, qreg::index, last_inst(), M, MAX, MAYBE_UNUSED, MIN, qpu_reg::mux, NOT(), qinst::op, OR, qreg::pack, QFILE_FRAG_REV_FLAG, QFILE_FRAG_X, QFILE_FRAG_Y, QFILE_LOAD_IMM, QFILE_NULL, QFILE_QPU_ELEMENT, QFILE_SMALL_IMM, QFILE_TEMP, QFILE_TEX_B, QFILE_TEX_R, QFILE_TEX_S, QFILE_TEX_S_DIRECT, QFILE_TEX_T, QFILE_TLB_COLOR_WRITE, QFILE_TLB_COLOR_WRITE_MS, QFILE_TLB_STENCIL_SETUP, QFILE_TLB_Z_WRITE, QFILE_UNIF, QFILE_VARY, QFILE_VPM, qir_dump_inst(), qir_for_each_inst, qir_get_non_sideband_nsrc(), qir_get_nsrc(), qir_is_mul(), qir_is_raw_mov(), QOP_BRANCH, QOP_EXP2, QOP_FMOV, QOP_FRAG_W, QOP_FRAG_Z, QOP_LOAD_IMM, QOP_LOAD_IMM_I2, QOP_LOAD_IMM_U2, QOP_LOG2, QOP_MIN_NOIMM, QOP_MMOV, QOP_MOV, QOP_MS_MASK, QOP_RCP, QOP_ROT_MUL, QOP_RSQ, QOP_TEX_RESULT, QOP_THRSW, QOP_TLB_COLOR_READ, QOP_UNIFORMS_RESET, QOP_VARY_ADD_C, qpu_a_alu2(), QPU_A_FMAX, QPU_A_MIN, qpu_a_MOV(), QPU_A_OR, qpu_branch(), QPU_BRANCH_REL, QPU_COND_ALWAYS, qpu_encode_small_immediate(), qpu_load_imm_i2(), qpu_load_imm_u2(), qpu_load_imm_ui(), qpu_m_alu2(), qpu_m_rot(), QPU_M_V8MIN, QPU_MUX_R3, QPU_MUX_R4, QPU_MUX_SMALL_IMM, qpu_NOP(), QPU_PM, qpu_r5(), QPU_R_ELEM_QPU, QPU_R_MS_REV_FLAGS, QPU_R_VPM, QPU_R_XY_PIXEL_COORD, qpu_ra(), qpu_rb(), qpu_rn(), QPU_SET_FIELD, qpu_set_sig(), QPU_SF, QPU_SIG_COLOR_LOAD, QPU_SIG_LOAD_TMU0, QPU_SIG_THREAD_SWITCH, QPU_SMALL_IMM_MUL_ROT, qpu_tlbc(), qpu_tlbc_ms(), qpu_unif(), qpu_vary(), QPU_W_MS_FLAGS, QPU_W_NOP, QPU_W_SFU_EXP, QPU_W_SFU_LOG, QPU_W_SFU_RECIP, QPU_W_SFU_RECIPSQRT, QPU_W_TLB_STENCIL_SETUP, QPU_W_TLB_Z, QPU_W_TMU0_B, QPU_W_TMU0_R, QPU_W_TMU0_S, QPU_W_TMU0_T, QPU_W_UNIFORMS_ADDRESS, QPU_W_VPM, queue(), set_last_cond_add(), set_last_cond_mul(), set_last_dst_pack(), setup_for_vpm_read(), qinst::sf, SHL(), qinst::src, SUB(), unreachable, void, and XOR().

Referenced by vc4_generate_code().