"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/compiler/glsl/lower_shared_reference.cpp" (16 Sep 2020, 17680 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "lower_shared_reference.cpp" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright (c) 2015 Intel Corporation
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   21  * DEALINGS IN THE SOFTWARE.
   22  */
   23 
   24 /**
   25  * \file lower_shared_reference.cpp
   26  *
   27  * IR lower pass to replace dereferences of compute shader shared variables
   28  * with intrinsic function calls.
   29  *
   30  * This relieves drivers of the responsibility of allocating space for the
   31  * shared variables in the shared memory region.
   32  */
   33 
   34 #include "lower_buffer_access.h"
   35 #include "ir_builder.h"
   36 #include "linker.h"
   37 #include "main/macros.h"
   38 #include "util/list.h"
   39 #include "glsl_parser_extras.h"
   40 #include "main/mtypes.h"
   41 
   42 using namespace ir_builder;
   43 
   44 namespace {
   45 
   46 struct var_offset {
   47    struct list_head node;
   48    const ir_variable *var;
   49    unsigned offset;
   50 };
   51 
   52 class lower_shared_reference_visitor :
   53       public lower_buffer_access::lower_buffer_access {
   54 public:
   55 
   56    lower_shared_reference_visitor(struct gl_linked_shader *shader)
   57       : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u)
   58    {
   59       list_inithead(&var_offsets);
   60    }
   61 
   62    ~lower_shared_reference_visitor()
   63    {
   64       ralloc_free(list_ctx);
   65    }
   66 
   67    enum {
   68       shared_load_access,
   69       shared_store_access,
   70       shared_atomic_access,
   71    } buffer_access_type;
   72 
   73    void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
   74                              const glsl_type *type, ir_rvalue *offset,
   75                              unsigned mask, int channel);
   76 
   77    void handle_rvalue(ir_rvalue **rvalue);
   78    ir_visitor_status visit_enter(ir_assignment *ir);
   79    void handle_assignment(ir_assignment *ir);
   80 
   81    ir_call *lower_shared_atomic_intrinsic(ir_call *ir);
   82    ir_call *check_for_shared_atomic_intrinsic(ir_call *ir);
   83    ir_visitor_status visit_enter(ir_call *ir);
   84 
   85    unsigned get_shared_offset(const ir_variable *);
   86 
   87    ir_call *shared_load(void *mem_ctx, const struct glsl_type *type,
   88                         ir_rvalue *offset);
   89    ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
   90                          unsigned write_mask);
   91 
   92    void *list_ctx;
   93    struct gl_linked_shader *shader;
   94    struct list_head var_offsets;
   95    unsigned shared_size;
   96    bool progress;
   97 };
   98 
   99 unsigned
  100 lower_shared_reference_visitor::get_shared_offset(const ir_variable *var)
  101 {
  102    list_for_each_entry(var_offset, var_entry, &var_offsets, node) {
  103       if (var_entry->var == var)
  104          return var_entry->offset;
  105    }
  106 
  107    struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset);
  108    list_add(&new_entry->node, &var_offsets);
  109    new_entry->var = var;
  110 
  111    unsigned var_align = var->type->std430_base_alignment(false);
  112    new_entry->offset = glsl_align(shared_size, var_align);
  113 
  114    unsigned var_size = var->type->std430_size(false);
  115    shared_size = new_entry->offset + var_size;
  116 
  117    return new_entry->offset;
  118 }
  119 
  120 void
  121 lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
  122 {
  123    if (!*rvalue)
  124       return;
  125 
  126    ir_dereference *deref = (*rvalue)->as_dereference();
  127    if (!deref)
  128       return;
  129 
  130    ir_variable *var = deref->variable_referenced();
  131    if (!var || var->data.mode != ir_var_shader_shared)
  132       return;
  133 
  134    buffer_access_type = shared_load_access;
  135 
  136    void *mem_ctx = ralloc_parent(shader->ir);
  137 
  138    ir_rvalue *offset = NULL;
  139    unsigned const_offset = get_shared_offset(var);
  140    bool row_major;
  141    const glsl_type *matrix_type;
  142    assert(var->get_interface_type() == NULL);
  143    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
  144 
  145    setup_buffer_access(mem_ctx, deref,
  146                        &offset, &const_offset,
  147                        &row_major, &matrix_type, NULL, packing);
  148 
  149    /* Now that we've calculated the offset to the start of the
  150     * dereference, walk over the type and emit loads into a temporary.
  151     */
  152    const glsl_type *type = (*rvalue)->type;
  153    ir_variable *load_var = new(mem_ctx) ir_variable(type,
  154                                                     "shared_load_temp",
  155                                                     ir_var_temporary);
  156    base_ir->insert_before(load_var);
  157 
  158    ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
  159                                                        "shared_load_temp_offset",
  160                                                        ir_var_temporary);
  161    base_ir->insert_before(load_offset);
  162    base_ir->insert_before(assign(load_offset, offset));
  163 
  164    deref = new(mem_ctx) ir_dereference_variable(load_var);
  165 
  166    emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major,
  167                matrix_type, packing, 0);
  168 
  169    *rvalue = deref;
  170 
  171    progress = true;
  172 }
  173 
  174 void
  175 lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
  176 {
  177    if (!ir || !ir->lhs)
  178       return;
  179 
  180    ir_rvalue *rvalue = ir->lhs->as_rvalue();
  181    if (!rvalue)
  182       return;
  183 
  184    ir_dereference *deref = ir->lhs->as_dereference();
  185    if (!deref)
  186       return;
  187 
  188    ir_variable *var = ir->lhs->variable_referenced();
  189    if (!var || var->data.mode != ir_var_shader_shared)
  190       return;
  191 
  192    buffer_access_type = shared_store_access;
  193 
  194    /* We have a write to a shared variable, so declare a temporary and rewrite
  195     * the assignment so that the temporary is the LHS.
  196     */
  197    void *mem_ctx = ralloc_parent(shader->ir);
  198 
  199    const glsl_type *type = rvalue->type;
  200    ir_variable *store_var = new(mem_ctx) ir_variable(type,
  201                                                      "shared_store_temp",
  202                                                      ir_var_temporary);
  203    base_ir->insert_before(store_var);
  204    ir->lhs = new(mem_ctx) ir_dereference_variable(store_var);
  205 
  206    ir_rvalue *offset = NULL;
  207    unsigned const_offset = get_shared_offset(var);
  208    bool row_major;
  209    const glsl_type *matrix_type;
  210    assert(var->get_interface_type() == NULL);
  211    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
  212 
  213    setup_buffer_access(mem_ctx, deref,
  214                        &offset, &const_offset,
  215                        &row_major, &matrix_type, NULL, packing);
  216 
  217    deref = new(mem_ctx) ir_dereference_variable(store_var);
  218 
  219    ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
  220                                                         "shared_store_temp_offset",
  221                                                         ir_var_temporary);
  222    base_ir->insert_before(store_offset);
  223    base_ir->insert_before(assign(store_offset, offset));
  224 
  225    /* Now we have to write the value assigned to the temporary back to memory */
  226    emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major,
  227                matrix_type, packing, ir->write_mask);
  228 
  229    progress = true;
  230 }
  231 
  232 ir_visitor_status
  233 lower_shared_reference_visitor::visit_enter(ir_assignment *ir)
  234 {
  235    handle_assignment(ir);
  236    return rvalue_visit(ir);
  237 }
  238 
  239 void
  240 lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx,
  241                                                      ir_dereference *deref,
  242                                                      const glsl_type *type,
  243                                                      ir_rvalue *offset,
  244                                                      unsigned mask,
  245                                                      int /* channel */)
  246 {
  247    if (buffer_access_type == shared_store_access) {
  248       ir_call *store = shared_store(mem_ctx, deref, offset, mask);
  249       base_ir->insert_after(store);
  250    } else {
  251       ir_call *load = shared_load(mem_ctx, type, offset);
  252       base_ir->insert_before(load);
  253       ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL);
  254       base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
  255                                     value));
  256    }
  257 }
  258 
  259 static bool
  260 compute_shader_enabled(const _mesa_glsl_parse_state *state)
  261 {
  262    return state->stage == MESA_SHADER_COMPUTE;
  263 }
  264 
  265 ir_call *
  266 lower_shared_reference_visitor::shared_store(void *mem_ctx,
  267                                              ir_rvalue *deref,
  268                                              ir_rvalue *offset,
  269                                              unsigned write_mask)
  270 {
  271    exec_list sig_params;
  272 
  273    ir_variable *offset_ref = new(mem_ctx)
  274       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
  275    sig_params.push_tail(offset_ref);
  276 
  277    ir_variable *val_ref = new(mem_ctx)
  278       ir_variable(deref->type, "value" , ir_var_function_in);
  279    sig_params.push_tail(val_ref);
  280 
  281    ir_variable *writemask_ref = new(mem_ctx)
  282       ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
  283    sig_params.push_tail(writemask_ref);
  284 
  285    ir_function_signature *sig = new(mem_ctx)
  286       ir_function_signature(glsl_type::void_type, compute_shader_enabled);
  287    assert(sig);
  288    sig->replace_parameters(&sig_params);
  289    sig->intrinsic_id = ir_intrinsic_shared_store;
  290 
  291    ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared");
  292    f->add_signature(sig);
  293 
  294    exec_list call_params;
  295    call_params.push_tail(offset->clone(mem_ctx, NULL));
  296    call_params.push_tail(deref->clone(mem_ctx, NULL));
  297    call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
  298    return new(mem_ctx) ir_call(sig, NULL, &call_params);
  299 }
  300 
  301 ir_call *
  302 lower_shared_reference_visitor::shared_load(void *mem_ctx,
  303                                             const struct glsl_type *type,
  304                                             ir_rvalue *offset)
  305 {
  306    exec_list sig_params;
  307 
  308    ir_variable *offset_ref = new(mem_ctx)
  309       ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
  310    sig_params.push_tail(offset_ref);
  311 
  312    ir_function_signature *sig =
  313       new(mem_ctx) ir_function_signature(type, compute_shader_enabled);
  314    assert(sig);
  315    sig->replace_parameters(&sig_params);
  316    sig->intrinsic_id = ir_intrinsic_shared_load;
  317 
  318    ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared");
  319    f->add_signature(sig);
  320 
  321    ir_variable *result = new(mem_ctx)
  322       ir_variable(type, "shared_load_result", ir_var_temporary);
  323    base_ir->insert_before(result);
  324    ir_dereference_variable *deref_result = new(mem_ctx)
  325       ir_dereference_variable(result);
  326 
  327    exec_list call_params;
  328    call_params.push_tail(offset->clone(mem_ctx, NULL));
  329 
  330    return new(mem_ctx) ir_call(sig, deref_result, &call_params);
  331 }
  332 
  333 /* Lowers the intrinsic call to a new internal intrinsic that swaps the access
  334  * to the shared variable in the first parameter by an offset. This involves
  335  * creating the new internal intrinsic (i.e. the new function signature).
  336  */
  337 ir_call *
  338 lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
  339 {
  340    /* Shared atomics usually have 2 parameters, the shared variable and an
  341     * integer argument. The exception is CompSwap, that has an additional
  342     * integer parameter.
  343     */
  344    int param_count = ir->actual_parameters.length();
  345    assert(param_count == 2 || param_count == 3);
  346 
  347    /* First argument must be a scalar integer shared variable */
  348    exec_node *param = ir->actual_parameters.get_head();
  349    ir_instruction *inst = (ir_instruction *) param;
  350    assert(inst->ir_type == ir_type_dereference_variable ||
  351           inst->ir_type == ir_type_dereference_array ||
  352           inst->ir_type == ir_type_dereference_record ||
  353           inst->ir_type == ir_type_swizzle);
  354 
  355    ir_rvalue *deref = (ir_rvalue *) inst;
  356    assert(deref->type->is_scalar() &&
  357           (deref->type->is_integer_32() || deref->type->is_float()));
  358 
  359    ir_variable *var = deref->variable_referenced();
  360    assert(var);
  361 
  362    /* Compute the offset to the start if the dereference
  363     */
  364    void *mem_ctx = ralloc_parent(shader->ir);
  365 
  366    ir_rvalue *offset = NULL;
  367    unsigned const_offset = get_shared_offset(var);
  368    bool row_major;
  369    const glsl_type *matrix_type;
  370    assert(var->get_interface_type() == NULL);
  371    const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430;
  372    buffer_access_type = shared_atomic_access;
  373 
  374    setup_buffer_access(mem_ctx, deref,
  375                        &offset, &const_offset,
  376                        &row_major, &matrix_type, NULL, packing);
  377 
  378    assert(offset);
  379    assert(!row_major);
  380    assert(matrix_type == NULL);
  381 
  382    ir_rvalue *deref_offset =
  383       add(offset, new(mem_ctx) ir_constant(const_offset));
  384 
  385    /* Create the new internal function signature that will take an offset
  386     * instead of a shared variable
  387     */
  388    exec_list sig_params;
  389    ir_variable *sig_param = new(mem_ctx)
  390       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
  391    sig_params.push_tail(sig_param);
  392 
  393    const glsl_type *type = deref->type->get_scalar_type();
  394    sig_param = new(mem_ctx)
  395          ir_variable(type, "data1", ir_var_function_in);
  396    sig_params.push_tail(sig_param);
  397 
  398    if (param_count == 3) {
  399       sig_param = new(mem_ctx)
  400             ir_variable(type, "data2", ir_var_function_in);
  401       sig_params.push_tail(sig_param);
  402    }
  403 
  404    ir_function_signature *sig =
  405       new(mem_ctx) ir_function_signature(deref->type,
  406                                          compute_shader_enabled);
  407    assert(sig);
  408    sig->replace_parameters(&sig_params);
  409 
  410    assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
  411    assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
  412    sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, shared);
  413 
  414    char func_name[64];
  415    sprintf(func_name, "%s_shared", ir->callee_name());
  416    ir_function *f = new(mem_ctx) ir_function(func_name);
  417    f->add_signature(sig);
  418 
  419    /* Now, create the call to the internal intrinsic */
  420    exec_list call_params;
  421    call_params.push_tail(deref_offset);
  422    param = ir->actual_parameters.get_head()->get_next();
  423    ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
  424    call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
  425    if (param_count == 3) {
  426       param = param->get_next();
  427       param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
  428       call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
  429    }
  430    ir_dereference_variable *return_deref =
  431       ir->return_deref->clone(mem_ctx, NULL);
  432    return new(mem_ctx) ir_call(sig, return_deref, &call_params);
  433 }
  434 
  435 ir_call *
  436 lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir)
  437 {
  438    exec_list& params = ir->actual_parameters;
  439 
  440    if (params.length() < 2 || params.length() > 3)
  441       return ir;
  442 
  443    ir_rvalue *rvalue =
  444       ((ir_instruction *) params.get_head())->as_rvalue();
  445    if (!rvalue)
  446       return ir;
  447 
  448    ir_variable *var = rvalue->variable_referenced();
  449    if (!var || var->data.mode != ir_var_shader_shared)
  450       return ir;
  451 
  452    const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
  453    if (id == ir_intrinsic_generic_atomic_add ||
  454        id == ir_intrinsic_generic_atomic_min ||
  455        id == ir_intrinsic_generic_atomic_max ||
  456        id == ir_intrinsic_generic_atomic_and ||
  457        id == ir_intrinsic_generic_atomic_or ||
  458        id == ir_intrinsic_generic_atomic_xor ||
  459        id == ir_intrinsic_generic_atomic_exchange ||
  460        id == ir_intrinsic_generic_atomic_comp_swap) {
  461       return lower_shared_atomic_intrinsic(ir);
  462    }
  463 
  464    return ir;
  465 }
  466 
  467 ir_visitor_status
  468 lower_shared_reference_visitor::visit_enter(ir_call *ir)
  469 {
  470    ir_call *new_ir = check_for_shared_atomic_intrinsic(ir);
  471    if (new_ir != ir) {
  472       progress = true;
  473       base_ir->replace_with(new_ir);
  474       return visit_continue_with_parent;
  475    }
  476 
  477    return rvalue_visit(ir);
  478 }
  479 
  480 } /* unnamed namespace */
  481 
  482 void
  483 lower_shared_reference(struct gl_context *ctx,
  484                        struct gl_shader_program *prog,
  485                        struct gl_linked_shader *shader)
  486 {
  487    if (shader->Stage != MESA_SHADER_COMPUTE)
  488       return;
  489 
  490    lower_shared_reference_visitor v(shader);
  491 
  492    /* Loop over the instructions lowering references, because we take a deref
  493     * of an shared variable array using a shared variable dereference as the
  494     * index will produce a collection of instructions all of which have cloned
  495     * shared variable dereferences for that array index.
  496     */
  497    do {
  498       v.progress = false;
  499       visit_list_elements(&v, shader->ir);
  500    } while (v.progress);
  501 
  502    prog->Comp.SharedSize = v.shared_size;
  503 
  504    /* Section 19.1 (Compute Shader Variables) of the OpenGL 4.5 (Core Profile)
  505     * specification says:
  506     *
  507     *   "There is a limit to the total size of all variables declared as
  508     *    shared in a single program object. This limit, expressed in units of
  509     *    basic machine units, may be queried as the value of
  510     *    MAX_COMPUTE_SHARED_MEMORY_SIZE."
  511     */
  512    if (prog->Comp.SharedSize > ctx->Const.MaxComputeSharedMemorySize) {
  513       linker_error(prog, "Too much shared memory used (%u/%u)\n",
  514                    prog->Comp.SharedSize,
  515                    ctx->Const.MaxComputeSharedMemorySize);
  516    }
  517 }