"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/compiler/nir/nir_gather_xfb_info.c" (16 Sep 2020, 10605 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "nir_gather_xfb_info.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright © 2018 Intel Corporation
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  */
   23 
   24 #include "nir_xfb_info.h"
   25 
   26 #include <util/u_math.h>
   27 
   28 static void
   29 add_var_xfb_varying(nir_xfb_info *xfb,
   30                     nir_xfb_varyings_info *varyings,
   31                     unsigned buffer,
   32                     unsigned offset,
   33                     const struct glsl_type *type)
   34 {
   35    if (varyings == NULL)
   36       return;
   37 
   38    nir_xfb_varying_info *varying = &varyings->varyings[varyings->varying_count++];
   39 
   40    varying->type = type;
   41    varying->buffer = buffer;
   42    varying->offset = offset;
   43    xfb->buffers[buffer].varying_count++;
   44 }
   45 
   46 
   47 static nir_xfb_info *
   48 nir_xfb_info_create(void *mem_ctx, uint16_t output_count)
   49 {
   50    return rzalloc_size(mem_ctx, nir_xfb_info_size(output_count));
   51 }
   52 
   53 static size_t
   54 nir_xfb_varyings_info_size(uint16_t varying_count)
   55 {
   56    return sizeof(nir_xfb_info) + sizeof(nir_xfb_varying_info) * varying_count;
   57 }
   58 
   59 static nir_xfb_varyings_info *
   60 nir_xfb_varyings_info_create(void *mem_ctx, uint16_t varying_count)
   61 {
   62    return rzalloc_size(mem_ctx, nir_xfb_varyings_info_size(varying_count));
   63 }
   64 
   65 static void
   66 add_var_xfb_outputs(nir_xfb_info *xfb,
   67                     nir_xfb_varyings_info *varyings,
   68                     nir_variable *var,
   69                     unsigned buffer,
   70                     unsigned *location,
   71                     unsigned *offset,
   72                     const struct glsl_type *type,
   73                     bool varying_added)
   74 {
   75    /* If this type contains a 64-bit value, align to 8 bytes */
   76    if (glsl_type_contains_64bit(type))
   77       *offset = ALIGN_POT(*offset, 8);
   78 
   79    if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
   80       unsigned length = glsl_get_length(type);
   81 
   82       const struct glsl_type *child_type = glsl_get_array_element(type);
   83       if (!glsl_type_is_array(child_type) &&
   84           !glsl_type_is_struct(child_type)) {
   85 
   86          add_var_xfb_varying(xfb, varyings, buffer, *offset, type);
   87          varying_added = true;
   88       }
   89 
   90       for (unsigned i = 0; i < length; i++)
   91          add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,
   92                              child_type, varying_added);
   93    } else if (glsl_type_is_struct_or_ifc(type)) {
   94       unsigned length = glsl_get_length(type);
   95       for (unsigned i = 0; i < length; i++) {
   96          const struct glsl_type *child_type = glsl_get_struct_field(type, i);
   97          add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,
   98                              child_type, varying_added);
   99       }
  100    } else {
  101       assert(buffer < NIR_MAX_XFB_BUFFERS);
  102       if (xfb->buffers_written & (1 << buffer)) {
  103          assert(xfb->buffers[buffer].stride == var->data.xfb.stride);
  104          assert(xfb->buffer_to_stream[buffer] == var->data.stream);
  105       } else {
  106          xfb->buffers_written |= (1 << buffer);
  107          xfb->buffers[buffer].stride = var->data.xfb.stride;
  108          xfb->buffer_to_stream[buffer] = var->data.stream;
  109       }
  110 
  111       assert(var->data.stream < NIR_MAX_XFB_STREAMS);
  112       xfb->streams_written |= (1 << var->data.stream);
  113 
  114       unsigned comp_slots;
  115       if (var->data.compact) {
  116          /* This only happens for clip/cull which are float arrays */
  117          assert(glsl_without_array(type) == glsl_float_type());
  118          assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
  119                 var->data.location == VARYING_SLOT_CLIP_DIST1);
  120          comp_slots = glsl_get_length(type);
  121       } else {
  122          comp_slots = glsl_get_component_slots(type);
  123 
  124          UNUSED unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
  125          assert(attrib_slots == glsl_count_attribute_slots(type, false));
  126 
  127          /* Ensure that we don't have, for instance, a dvec2 with a
  128           * location_frac of 2 which would make it crass a location boundary
  129           * even though it fits in a single slot.  However, you can have a
  130           * dvec3 which crosses the slot boundary with a location_frac of 2.
  131           */
  132          assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
  133                 attrib_slots);
  134       }
  135 
  136       assert(var->data.location_frac + comp_slots <= 8);
  137       uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
  138       unsigned comp_offset = var->data.location_frac;
  139 
  140       if (!varying_added) {
  141          add_var_xfb_varying(xfb, varyings, buffer, *offset, type);
  142       }
  143 
  144       while (comp_mask) {
  145          nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
  146 
  147          output->buffer = buffer;
  148          output->offset = *offset;
  149          output->location = *location;
  150          output->component_mask = comp_mask & 0xf;
  151          output->component_offset = comp_offset;
  152 
  153          *offset += util_bitcount(output->component_mask) * 4;
  154          (*location)++;
  155          comp_mask >>= 4;
  156          comp_offset = 0;
  157       }
  158    }
  159 }
  160 
  161 static int
  162 compare_xfb_varying_offsets(const void *_a, const void *_b)
  163 {
  164    const nir_xfb_varying_info *a = _a, *b = _b;
  165 
  166    if (a->buffer != b->buffer)
  167       return a->buffer - b->buffer;
  168 
  169    return a->offset - b->offset;
  170 }
  171 
  172 static int
  173 compare_xfb_output_offsets(const void *_a, const void *_b)
  174 {
  175    const nir_xfb_output_info *a = _a, *b = _b;
  176 
  177    return a->offset - b->offset;
  178 }
  179 
  180 nir_xfb_info *
  181 nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)
  182 {
  183    return nir_gather_xfb_info_with_varyings(shader, mem_ctx, NULL);
  184 }
  185 
  186 nir_xfb_info *
  187 nir_gather_xfb_info_with_varyings(const nir_shader *shader,
  188                                   void *mem_ctx,
  189                                   nir_xfb_varyings_info **varyings_info_out)
  190 {
  191    assert(shader->info.stage == MESA_SHADER_VERTEX ||
  192           shader->info.stage == MESA_SHADER_TESS_EVAL ||
  193           shader->info.stage == MESA_SHADER_GEOMETRY);
  194 
  195    /* Compute the number of outputs we have.  This is simply the number of
  196     * cumulative locations consumed by all the variables.  If a location is
  197     * represented by multiple variables, then they each count separately in
  198     * number of outputs.  This is only an estimate as some variables may have
  199     * an xfb_buffer but not an output so it may end up larger than we need but
  200     * it should be good enough for allocation.
  201     */
  202    unsigned num_outputs = 0;
  203    unsigned num_varyings = 0;
  204    nir_xfb_varyings_info *varyings_info = NULL;
  205    nir_foreach_variable(var, &shader->outputs) {
  206       if (var->data.explicit_xfb_buffer) {
  207          num_outputs += glsl_count_attribute_slots(var->type, false);
  208          num_varyings += glsl_varying_count(var->type);
  209       }
  210    }
  211    if (num_outputs == 0 || num_varyings == 0)
  212       return NULL;
  213 
  214    nir_xfb_info *xfb = nir_xfb_info_create(mem_ctx, num_outputs);
  215    if (varyings_info_out != NULL) {
  216       *varyings_info_out = nir_xfb_varyings_info_create(mem_ctx, num_varyings);
  217       varyings_info = *varyings_info_out;
  218    }
  219 
  220    /* Walk the list of outputs and add them to the array */
  221    nir_foreach_variable(var, &shader->outputs) {
  222       if (!var->data.explicit_xfb_buffer)
  223          continue;
  224 
  225       unsigned location = var->data.location;
  226 
  227       /* In order to know if we have a array of blocks can't be done just by
  228        * checking if we have an interface type and is an array, because due
  229        * splitting we could end on a case were we received a split struct
  230        * that contains an array.
  231        */
  232       bool is_array_block = var->interface_type != NULL &&
  233          glsl_type_is_array(var->type) &&
  234          glsl_without_array(var->type) == var->interface_type;
  235 
  236       if (var->data.explicit_offset && !is_array_block) {
  237          unsigned offset = var->data.offset;
  238          add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer,
  239                              &location, &offset, var->type, false);
  240       } else if (is_array_block) {
  241          assert(glsl_type_is_struct_or_ifc(var->interface_type));
  242 
  243          unsigned aoa_size = glsl_get_aoa_size(var->type);
  244          const struct glsl_type *itype = var->interface_type;
  245          unsigned nfields = glsl_get_length(itype);
  246          for (unsigned b = 0; b < aoa_size; b++) {
  247             for (unsigned f = 0; f < nfields; f++) {
  248                int foffset = glsl_get_struct_field_offset(itype, f);
  249                const struct glsl_type *ftype = glsl_get_struct_field(itype, f);
  250                if (foffset < 0) {
  251                   location += glsl_count_attribute_slots(ftype, false);
  252                   continue;
  253                }
  254 
  255                unsigned offset = foffset;
  256                add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer + b,
  257                                    &location, &offset, ftype, false);
  258             }
  259          }
  260       }
  261    }
  262 
  263    /* Everything is easier in the state setup code if outputs and varyings are
  264     * sorted in order of output offset (and buffer for varyings).
  265     */
  266    qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]),
  267          compare_xfb_output_offsets);
  268 
  269    if (varyings_info != NULL) {
  270       qsort(varyings_info->varyings, varyings_info->varying_count,
  271             sizeof(varyings_info->varyings[0]),
  272             compare_xfb_varying_offsets);
  273    }
  274 
  275 #ifndef NDEBUG
  276    /* Finally, do a sanity check */
  277    unsigned max_offset[NIR_MAX_XFB_BUFFERS] = {0};
  278    for (unsigned i = 0; i < xfb->output_count; i++) {
  279       assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]);
  280       assert(xfb->outputs[i].component_mask != 0);
  281       unsigned slots = util_bitcount(xfb->outputs[i].component_mask);
  282       max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4;
  283    }
  284 #endif
  285 
  286    return xfb;
  287 }