"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/compiler/nir/nir_opt_combine_stores.c" (16 Sep 2020, 14580 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "nir_opt_combine_stores.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright © 2019 Intel Corporation
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  */
   23 
   24 #include "nir.h"
   25 #include "nir_builder.h"
   26 #include "nir_deref.h"
   27 
   28 #include "util/bitscan.h"
   29 #include "util/list.h"
   30 #include "util/u_math.h"
   31 
   32 /* Combine stores of vectors to the same deref into a single store.
   33  *
   34  * This per-block pass keeps track of stores of vectors to the same
   35  * destination and combines them into the last store of the sequence.  Dead
   36  * stores (or parts of the store) found during the process are removed.
   37  *
   38  * A pending combination becomes an actual combination in various situations:
   39  * at the end of the block, when another instruction uses the memory or due to
   40  * barriers.
   41  *
   42  * Besides vectors, the pass also look at array derefs of vectors.  For direct
   43  * array derefs, it works like a write mask access to the given component.
   44  * For indirect access there's no way to know before hand what component it
   45  * will overlap with, so the combination is finished -- the indirect remains
   46  * unmodified.
   47  */
   48 
   49 /* Keep track of a group of stores that can be combined.  All stores share the
   50  * same destination.
   51  */
   52 struct combined_store {
   53    struct list_head link;
   54 
   55    nir_component_mask_t write_mask;
   56    nir_deref_instr *dst;
   57 
   58    /* Latest store added.  It is reused when combining. */
   59    nir_intrinsic_instr *latest;
   60 
   61    /* Original store for each component.  The number of times a store appear
   62     * in this array is kept in the store's pass_flags.
   63     */
   64    nir_intrinsic_instr *stores[NIR_MAX_VEC_COMPONENTS];
   65 };
   66 
   67 struct combine_stores_state {
   68    nir_variable_mode modes;
   69 
   70    /* Pending store combinations. */
   71    struct list_head pending;
   72 
   73    /* Per function impl state. */
   74    nir_builder b;
   75    bool progress;
   76 
   77 
   78    /* Allocator and freelist to reuse structs between functions. */
   79    void *lin_ctx;
   80    struct list_head freelist;
   81 };
   82 
   83 static struct combined_store *
   84 alloc_combined_store(struct combine_stores_state *state)
   85 {
   86    struct combined_store *result;
   87    if (list_is_empty(&state->freelist)) {
   88       result = linear_zalloc_child(state->lin_ctx, sizeof(*result));
   89    } else {
   90       result = list_first_entry(&state->freelist,
   91                                 struct combined_store,
   92                                 link);
   93       list_del(&result->link);
   94       memset(result, 0, sizeof(*result));
   95    }
   96    return result;
   97 }
   98 
   99 static void
  100 free_combined_store(struct combine_stores_state *state,
  101                     struct combined_store *combo)
  102 {
  103    list_del(&combo->link);
  104    combo->write_mask = 0;
  105    list_add(&combo->link, &state->freelist);
  106 }
  107 
  108 static void
  109 combine_stores(struct combine_stores_state *state,
  110                    struct combined_store *combo)
  111 {
  112    assert(combo->latest);
  113    assert(combo->latest->intrinsic == nir_intrinsic_store_deref);
  114 
  115    /* If the combined writemask is the same as the latest store, we know there
  116     * is only one store in the combination, so nothing to combine.
  117     */
  118    if ((combo->write_mask & nir_intrinsic_write_mask(combo->latest)) ==
  119        combo->write_mask)
  120       return;
  121 
  122    state->b.cursor = nir_before_instr(&combo->latest->instr);
  123 
  124    /* Build a new vec, to be used as source for the combined store.  As it
  125     * gets build, remove previous stores that are not needed anymore.
  126     */
  127    nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = {0};
  128    unsigned num_components = glsl_get_vector_elements(combo->dst->type);
  129    unsigned bit_size = combo->latest->src[1].ssa->bit_size;
  130    for (unsigned i = 0; i < num_components; i++) {
  131       nir_intrinsic_instr *store = combo->stores[i];
  132       if (combo->write_mask & (1 << i)) {
  133          assert(store);
  134          assert(store->src[1].is_ssa);
  135 
  136          /* If store->num_components == 1 then we are in the deref-of-vec case
  137           * and store->src[1] is a scalar.  Otherwise, we're a regular vector
  138           * load and we have to pick off a component.
  139           */
  140          comps[i] = store->num_components == 1 ?
  141             store->src[1].ssa :
  142             nir_channel(&state->b, store->src[1].ssa, i);
  143 
  144          assert(store->instr.pass_flags > 0);
  145          if (--store->instr.pass_flags == 0 && store != combo->latest)
  146             nir_instr_remove(&store->instr);
  147       } else {
  148          comps[i] = nir_ssa_undef(&state->b, 1, bit_size);
  149       }
  150    }
  151    assert(combo->latest->instr.pass_flags == 0);
  152    nir_ssa_def *vec = nir_vec(&state->b, comps, num_components);
  153 
  154    /* Fix the latest store with the combined information. */
  155    nir_intrinsic_instr *store = combo->latest;
  156 
  157    /* In this case, our store is as an array deref of a vector so we need to
  158     * rewrite it to use a deref to the whole vector.
  159     */
  160    if (store->num_components == 1) {
  161       store->num_components = num_components;
  162       nir_instr_rewrite_src(&store->instr, &store->src[0],
  163                             nir_src_for_ssa(&combo->dst->dest.ssa));
  164    }
  165 
  166    assert(store->num_components == num_components);
  167    nir_intrinsic_set_write_mask(store, combo->write_mask);
  168    nir_instr_rewrite_src(&store->instr, &store->src[1],
  169                          nir_src_for_ssa(vec));
  170    state->progress = true;
  171 }
  172 
  173 static void
  174 combine_stores_with_deref(struct combine_stores_state *state,
  175                               nir_deref_instr *deref)
  176 {
  177    if ((state->modes & deref->mode) == 0)
  178       return;
  179 
  180    list_for_each_entry_safe(struct combined_store, combo, &state->pending, link) {
  181       if (nir_compare_derefs(combo->dst, deref) & nir_derefs_may_alias_bit) {
  182          combine_stores(state, combo);
  183          free_combined_store(state, combo);
  184       }
  185    }
  186 }
  187 
  188 static void
  189 combine_stores_with_modes(struct combine_stores_state *state,
  190                               nir_variable_mode modes)
  191 {
  192    if ((state->modes & modes) == 0)
  193       return;
  194 
  195    list_for_each_entry_safe(struct combined_store, combo, &state->pending, link) {
  196       if (combo->dst->mode & modes) {
  197          combine_stores(state, combo);
  198          free_combined_store(state, combo);
  199       }
  200    }
  201 }
  202 
  203 static struct combined_store *
  204 find_matching_combined_store(struct combine_stores_state *state,
  205                              nir_deref_instr *deref)
  206 {
  207    list_for_each_entry(struct combined_store, combo, &state->pending, link) {
  208       if (nir_compare_derefs(combo->dst, deref) & nir_derefs_equal_bit)
  209          return combo;
  210    }
  211    return NULL;
  212 }
  213 
  214 static void
  215 update_combined_store(struct combine_stores_state *state,
  216                       nir_intrinsic_instr *intrin)
  217 {
  218    nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
  219    if ((dst->mode & state->modes) == 0)
  220       return;
  221 
  222    unsigned vec_mask;
  223    nir_deref_instr *vec_dst;
  224 
  225    if (glsl_type_is_vector(dst->type)) {
  226       vec_mask = nir_intrinsic_write_mask(intrin);
  227       vec_dst = dst;
  228    } else {
  229       /* Besides vectors, only direct array derefs of vectors are handled. */
  230       if (dst->deref_type != nir_deref_type_array ||
  231           !nir_src_is_const(dst->arr.index) ||
  232           !glsl_type_is_vector(nir_deref_instr_parent(dst)->type)) {
  233          combine_stores_with_deref(state, dst);
  234          return;
  235       }
  236 
  237       uint64_t index = nir_src_as_uint(dst->arr.index);
  238       vec_dst = nir_deref_instr_parent(dst);
  239 
  240       if (index >= glsl_get_vector_elements(vec_dst->type)) {
  241          /* Storing to an invalid index is a no-op. */
  242          nir_instr_remove(&intrin->instr);
  243          state->progress = true;
  244          return;
  245       }
  246 
  247       vec_mask = 1 << index;
  248    }
  249 
  250    struct combined_store *combo = find_matching_combined_store(state, vec_dst);
  251    if (!combo) {
  252       combo = alloc_combined_store(state);
  253       combo->dst = vec_dst;
  254       list_add(&combo->link, &state->pending);
  255    }
  256 
  257    /* Use pass_flags to reference count the store based on how many
  258     * components are still used by the combination.
  259     */
  260    intrin->instr.pass_flags = util_bitcount(vec_mask);
  261    combo->latest = intrin;
  262 
  263    /* Update the combined_store, clearing up older overlapping references. */
  264    combo->write_mask |= vec_mask;
  265    while (vec_mask) {
  266       unsigned i = u_bit_scan(&vec_mask);
  267       nir_intrinsic_instr *prev_store = combo->stores[i];
  268 
  269       if (prev_store) {
  270          if (--prev_store->instr.pass_flags == 0) {
  271             nir_instr_remove(&prev_store->instr);
  272          } else {
  273             assert(glsl_type_is_vector(
  274                       nir_src_as_deref(prev_store->src[0])->type));
  275             nir_component_mask_t prev_mask = nir_intrinsic_write_mask(prev_store);
  276             nir_intrinsic_set_write_mask(prev_store, prev_mask & ~(1 << i));
  277          }
  278          state->progress = true;
  279       }
  280       combo->stores[i] = combo->latest;
  281    }
  282 }
  283 
  284 static void
  285 combine_stores_block(struct combine_stores_state *state, nir_block *block)
  286 {
  287    nir_foreach_instr_safe(instr, block) {
  288       if (instr->type == nir_instr_type_call) {
  289          combine_stores_with_modes(state, nir_var_shader_out |
  290                                           nir_var_shader_temp |
  291                                           nir_var_function_temp |
  292                                           nir_var_mem_ssbo |
  293                                           nir_var_mem_shared |
  294                                           nir_var_mem_global);
  295          continue;
  296       }
  297 
  298       if (instr->type != nir_instr_type_intrinsic)
  299          continue;
  300 
  301       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  302       switch (intrin->intrinsic) {
  303       case nir_intrinsic_store_deref:
  304          if (nir_intrinsic_access(intrin) & ACCESS_VOLATILE) {
  305             nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
  306             /* When we see a volatile store, we go ahead and combine all
  307              * previous non-volatile stores which touch that address and
  308              * specifically don't add the volatile store to the list.  This
  309              * way we guarantee that the volatile store isn't combined with
  310              * anything and no non-volatile stores are combined across a
  311              * volatile store.
  312              */
  313             combine_stores_with_deref(state, dst);
  314          } else {
  315             update_combined_store(state, intrin);
  316          }
  317          break;
  318 
  319       case nir_intrinsic_control_barrier:
  320       case nir_intrinsic_group_memory_barrier:
  321       case nir_intrinsic_memory_barrier:
  322          combine_stores_with_modes(state, nir_var_shader_out |
  323                                           nir_var_mem_ssbo |
  324                                           nir_var_mem_shared |
  325                                           nir_var_mem_global);
  326          break;
  327 
  328       case nir_intrinsic_memory_barrier_buffer:
  329          combine_stores_with_modes(state, nir_var_mem_ssbo |
  330                                           nir_var_mem_global);
  331          break;
  332 
  333       case nir_intrinsic_memory_barrier_shared:
  334          combine_stores_with_modes(state, nir_var_mem_shared);
  335          break;
  336 
  337       case nir_intrinsic_memory_barrier_tcs_patch:
  338          combine_stores_with_modes(state, nir_var_shader_out);
  339          break;
  340 
  341       case nir_intrinsic_scoped_memory_barrier:
  342          if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) {
  343             combine_stores_with_modes(state,
  344                                       nir_intrinsic_memory_modes(intrin));
  345          }
  346          break;
  347 
  348       case nir_intrinsic_emit_vertex:
  349       case nir_intrinsic_emit_vertex_with_counter:
  350          combine_stores_with_modes(state, nir_var_shader_out);
  351          break;
  352 
  353       case nir_intrinsic_load_deref: {
  354          nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
  355          combine_stores_with_deref(state, src);
  356          break;
  357       }
  358 
  359       case nir_intrinsic_copy_deref: {
  360          nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
  361          nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
  362          combine_stores_with_deref(state, dst);
  363          combine_stores_with_deref(state, src);
  364          break;
  365       }
  366 
  367       case nir_intrinsic_deref_atomic_add:
  368       case nir_intrinsic_deref_atomic_imin:
  369       case nir_intrinsic_deref_atomic_umin:
  370       case nir_intrinsic_deref_atomic_imax:
  371       case nir_intrinsic_deref_atomic_umax:
  372       case nir_intrinsic_deref_atomic_and:
  373       case nir_intrinsic_deref_atomic_or:
  374       case nir_intrinsic_deref_atomic_xor:
  375       case nir_intrinsic_deref_atomic_exchange:
  376       case nir_intrinsic_deref_atomic_comp_swap: {
  377          nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
  378          combine_stores_with_deref(state, dst);
  379          break;
  380       }
  381 
  382       default:
  383          break;
  384       }
  385    }
  386 
  387    /* At the end of the block, try all the remaining combinations. */
  388    combine_stores_with_modes(state, state->modes);
  389 }
  390 
  391 static bool
  392 combine_stores_impl(struct combine_stores_state *state, nir_function_impl *impl)
  393 {
  394    state->progress = false;
  395    nir_builder_init(&state->b, impl);
  396 
  397    nir_foreach_block(block, impl)
  398       combine_stores_block(state, block);
  399 
  400    if (state->progress) {
  401       nir_metadata_preserve(impl, nir_metadata_block_index |
  402                                   nir_metadata_dominance);
  403    }
  404 
  405    return state->progress;
  406 }
  407 
  408 bool
  409 nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes)
  410 {
  411    void *mem_ctx = ralloc_context(NULL);
  412    struct combine_stores_state state = {
  413       .modes   = modes,
  414       .lin_ctx = linear_zalloc_parent(mem_ctx, 0),
  415    };
  416 
  417    list_inithead(&state.pending);
  418    list_inithead(&state.freelist);
  419 
  420    bool progress = false;
  421 
  422    nir_foreach_function(function, shader) {
  423       if (!function->impl)
  424          continue;
  425       progress |= combine_stores_impl(&state, function->impl);
  426    }
  427 
  428    ralloc_free(mem_ctx);
  429    return progress;
  430 }