"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/amd/vulkan/radv_meta_bufimage.c" (16 Sep 2020, 79293 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "radv_meta_bufimage.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright © 2016 Red Hat.
    3  * Copyright © 2016 Bas Nieuwenhuizen
    4  *
    5  * Permission is hereby granted, free of charge, to any person obtaining a
    6  * copy of this software and associated documentation files (the "Software"),
    7  * to deal in the Software without restriction, including without limitation
    8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    9  * and/or sell copies of the Software, and to permit persons to whom the
   10  * Software is furnished to do so, subject to the following conditions:
   11  *
   12  * The above copyright notice and this permission notice (including the next
   13  * paragraph) shall be included in all copies or substantial portions of the
   14  * Software.
   15  *
   16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   22  * IN THE SOFTWARE.
   23  */
   24 #include "radv_meta.h"
   25 #include "nir/nir_builder.h"
   26 
   27 /*
   28  * GFX queue: Compute shader implementation of image->buffer copy
   29  * Compute queue: implementation also of buffer->image, image->image, and image clear.
   30  */
   31 
   32 /* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
   33  * for that.
   34  */
   35 static nir_shader *
   36 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
   37 {
   38     nir_builder b;
   39     enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
   40     const struct glsl_type *sampler_type = glsl_sampler_type(dim,
   41                                  false,
   42                                  false,
   43                                  GLSL_TYPE_FLOAT);
   44     const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
   45                                  false,
   46                                  false,
   47                                  GLSL_TYPE_FLOAT);
   48     nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
   49     b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
   50     b.shader->info.cs.local_size[0] = 16;
   51     b.shader->info.cs.local_size[1] = 16;
   52     b.shader->info.cs.local_size[2] = 1;
   53     nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
   54                               sampler_type, "s_tex");
   55     input_img->data.descriptor_set = 0;
   56     input_img->data.binding = 0;
   57 
   58     nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
   59                                img_type, "out_img");
   60     output_img->data.descriptor_set = 0;
   61     output_img->data.binding = 1;
   62 
   63     nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
   64     nir_ssa_def *wg_id = nir_load_work_group_id(&b);
   65     nir_ssa_def *block_size = nir_imm_ivec4(&b,
   66                         b.shader->info.cs.local_size[0],
   67                         b.shader->info.cs.local_size[1],
   68                         b.shader->info.cs.local_size[2], 0);
   69 
   70     nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
   71 
   72 
   73 
   74     nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
   75     nir_intrinsic_set_base(offset, 0);
   76     nir_intrinsic_set_range(offset, 16);
   77     offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
   78     offset->num_components = is_3d ? 3 : 2;
   79     nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
   80     nir_builder_instr_insert(&b, &offset->instr);
   81 
   82     nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
   83     nir_intrinsic_set_base(stride, 0);
   84     nir_intrinsic_set_range(stride, 16);
   85     stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
   86     stride->num_components = 1;
   87     nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
   88     nir_builder_instr_insert(&b, &stride->instr);
   89 
   90     nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
   91     nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
   92 
   93     nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
   94     tex->sampler_dim = dim;
   95     tex->op = nir_texop_txf;
   96     tex->src[0].src_type = nir_tex_src_coord;
   97     tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
   98     tex->src[1].src_type = nir_tex_src_lod;
   99     tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
  100     tex->src[2].src_type = nir_tex_src_texture_deref;
  101     tex->src[2].src = nir_src_for_ssa(input_img_deref);
  102     tex->dest_type = nir_type_float;
  103     tex->is_array = false;
  104     tex->coord_components = is_3d ? 3 : 2;
  105 
  106     nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
  107     nir_builder_instr_insert(&b, &tex->instr);
  108 
  109     nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
  110     nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
  111 
  112     nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
  113     tmp = nir_iadd(&b, tmp, pos_x);
  114 
  115     nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
  116 
  117     nir_ssa_def *outval = &tex->dest.ssa;
  118     nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
  119     store->num_components = 4;
  120     store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
  121     store->src[1] = nir_src_for_ssa(coord);
  122     store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
  123     store->src[3] = nir_src_for_ssa(outval);
  124     store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
  125 
  126     nir_builder_instr_insert(&b, &store->instr);
  127     return b.shader;
  128 }
  129 
  130 /* Image to buffer - don't write use image accessors */
  131 static VkResult
  132 radv_device_init_meta_itob_state(struct radv_device *device)
  133 {
  134     VkResult result;
  135     struct radv_shader_module cs = { .nir = NULL };
  136     struct radv_shader_module cs_3d = { .nir = NULL };
  137 
  138     cs.nir = build_nir_itob_compute_shader(device, false);
  139     if (device->physical_device->rad_info.chip_class >= GFX9)
  140         cs_3d.nir = build_nir_itob_compute_shader(device, true);
  141 
  142     /*
  143      * two descriptors one for the image being sampled
  144      * one for the buffer being written.
  145      */
  146     VkDescriptorSetLayoutCreateInfo ds_create_info = {
  147         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
  148         .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
  149         .bindingCount = 2,
  150         .pBindings = (VkDescriptorSetLayoutBinding[]) {
  151             {
  152                 .binding = 0,
  153                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  154                 .descriptorCount = 1,
  155                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  156                 .pImmutableSamplers = NULL
  157             },
  158             {
  159                 .binding = 1,
  160                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
  161                 .descriptorCount = 1,
  162                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  163                 .pImmutableSamplers = NULL
  164             },
  165         }
  166     };
  167 
  168     result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
  169                         &ds_create_info,
  170                         &device->meta_state.alloc,
  171                         &device->meta_state.itob.img_ds_layout);
  172     if (result != VK_SUCCESS)
  173         goto fail;
  174 
  175 
  176     VkPipelineLayoutCreateInfo pl_create_info = {
  177         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  178         .setLayoutCount = 1,
  179         .pSetLayouts = &device->meta_state.itob.img_ds_layout,
  180         .pushConstantRangeCount = 1,
  181         .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
  182     };
  183 
  184     result = radv_CreatePipelineLayout(radv_device_to_handle(device),
  185                       &pl_create_info,
  186                       &device->meta_state.alloc,
  187                       &device->meta_state.itob.img_p_layout);
  188     if (result != VK_SUCCESS)
  189         goto fail;
  190 
  191     /* compute shader */
  192 
  193     VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
  194         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  195         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
  196         .module = radv_shader_module_to_handle(&cs),
  197         .pName = "main",
  198         .pSpecializationInfo = NULL,
  199     };
  200 
  201     VkComputePipelineCreateInfo vk_pipeline_info = {
  202         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  203         .stage = pipeline_shader_stage,
  204         .flags = 0,
  205         .layout = device->meta_state.itob.img_p_layout,
  206     };
  207 
  208     result = radv_CreateComputePipelines(radv_device_to_handle(device),
  209                          radv_pipeline_cache_to_handle(&device->meta_state.cache),
  210                          1, &vk_pipeline_info, NULL,
  211                          &device->meta_state.itob.pipeline);
  212     if (result != VK_SUCCESS)
  213         goto fail;
  214 
  215     if (device->physical_device->rad_info.chip_class >= GFX9) {
  216         VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
  217             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  218             .stage = VK_SHADER_STAGE_COMPUTE_BIT,
  219             .module = radv_shader_module_to_handle(&cs_3d),
  220             .pName = "main",
  221             .pSpecializationInfo = NULL,
  222         };
  223 
  224         VkComputePipelineCreateInfo vk_pipeline_info_3d = {
  225             .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  226             .stage = pipeline_shader_stage_3d,
  227             .flags = 0,
  228             .layout = device->meta_state.itob.img_p_layout,
  229         };
  230 
  231         result = radv_CreateComputePipelines(radv_device_to_handle(device),
  232                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
  233                              1, &vk_pipeline_info_3d, NULL,
  234                              &device->meta_state.itob.pipeline_3d);
  235         if (result != VK_SUCCESS)
  236             goto fail;
  237         ralloc_free(cs_3d.nir);
  238     }
  239     ralloc_free(cs.nir);
  240 
  241     return VK_SUCCESS;
  242 fail:
  243     ralloc_free(cs.nir);
  244     ralloc_free(cs_3d.nir);
  245     return result;
  246 }
  247 
  248 static void
  249 radv_device_finish_meta_itob_state(struct radv_device *device)
  250 {
  251     struct radv_meta_state *state = &device->meta_state;
  252 
  253     radv_DestroyPipelineLayout(radv_device_to_handle(device),
  254                    state->itob.img_p_layout, &state->alloc);
  255     radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
  256                         state->itob.img_ds_layout,
  257                     &state->alloc);
  258     radv_DestroyPipeline(radv_device_to_handle(device),
  259                  state->itob.pipeline, &state->alloc);
  260     if (device->physical_device->rad_info.chip_class >= GFX9)
  261         radv_DestroyPipeline(radv_device_to_handle(device),
  262                      state->itob.pipeline_3d, &state->alloc);
  263 }
  264 
  265 static nir_shader *
  266 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
  267 {
  268     nir_builder b;
  269     enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
  270     const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
  271                                  false,
  272                                  false,
  273                                  GLSL_TYPE_FLOAT);
  274     const struct glsl_type *img_type = glsl_sampler_type(dim,
  275                                  false,
  276                                  false,
  277                                  GLSL_TYPE_FLOAT);
  278     nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
  279     b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
  280     b.shader->info.cs.local_size[0] = 16;
  281     b.shader->info.cs.local_size[1] = 16;
  282     b.shader->info.cs.local_size[2] = 1;
  283     nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
  284                               buf_type, "s_tex");
  285     input_img->data.descriptor_set = 0;
  286     input_img->data.binding = 0;
  287 
  288     nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
  289                                img_type, "out_img");
  290     output_img->data.descriptor_set = 0;
  291     output_img->data.binding = 1;
  292 
  293     nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
  294     nir_ssa_def *wg_id = nir_load_work_group_id(&b);
  295     nir_ssa_def *block_size = nir_imm_ivec4(&b,
  296                         b.shader->info.cs.local_size[0],
  297                         b.shader->info.cs.local_size[1],
  298                         b.shader->info.cs.local_size[2], 0);
  299 
  300     nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
  301 
  302     nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
  303     nir_intrinsic_set_base(offset, 0);
  304     nir_intrinsic_set_range(offset, 16);
  305     offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
  306     offset->num_components = is_3d ? 3 : 2;
  307     nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
  308     nir_builder_instr_insert(&b, &offset->instr);
  309 
  310     nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
  311     nir_intrinsic_set_base(stride, 0);
  312     nir_intrinsic_set_range(stride, 16);
  313     stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
  314     stride->num_components = 1;
  315     nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
  316     nir_builder_instr_insert(&b, &stride->instr);
  317 
  318     nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
  319     nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
  320 
  321     nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
  322     tmp = nir_iadd(&b, tmp, pos_x);
  323 
  324     nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
  325 
  326     nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
  327     nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
  328 
  329     nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
  330     tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
  331     tex->op = nir_texop_txf;
  332     tex->src[0].src_type = nir_tex_src_coord;
  333     tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
  334     tex->src[1].src_type = nir_tex_src_lod;
  335     tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
  336     tex->src[2].src_type = nir_tex_src_texture_deref;
  337     tex->src[2].src = nir_src_for_ssa(input_img_deref);
  338     tex->dest_type = nir_type_float;
  339     tex->is_array = false;
  340     tex->coord_components = 1;
  341 
  342     nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
  343     nir_builder_instr_insert(&b, &tex->instr);
  344 
  345     nir_ssa_def *outval = &tex->dest.ssa;
  346     nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
  347     store->num_components = 4;
  348     store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
  349     store->src[1] = nir_src_for_ssa(img_coord);
  350     store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
  351     store->src[3] = nir_src_for_ssa(outval);
  352     store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
  353 
  354     nir_builder_instr_insert(&b, &store->instr);
  355     return b.shader;
  356 }
  357 
  358 /* Buffer to image - don't write use image accessors */
  359 static VkResult
  360 radv_device_init_meta_btoi_state(struct radv_device *device)
  361 {
  362     VkResult result;
  363     struct radv_shader_module cs = { .nir = NULL };
  364     struct radv_shader_module cs_3d = { .nir = NULL };
  365     cs.nir = build_nir_btoi_compute_shader(device, false);
  366     if (device->physical_device->rad_info.chip_class >= GFX9)
  367         cs_3d.nir = build_nir_btoi_compute_shader(device, true);
  368     /*
  369      * two descriptors one for the image being sampled
  370      * one for the buffer being written.
  371      */
  372     VkDescriptorSetLayoutCreateInfo ds_create_info = {
  373         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
  374         .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
  375         .bindingCount = 2,
  376         .pBindings = (VkDescriptorSetLayoutBinding[]) {
  377             {
  378                 .binding = 0,
  379                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
  380                 .descriptorCount = 1,
  381                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  382                 .pImmutableSamplers = NULL
  383             },
  384             {
  385                 .binding = 1,
  386                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
  387                 .descriptorCount = 1,
  388                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  389                 .pImmutableSamplers = NULL
  390             },
  391         }
  392     };
  393 
  394     result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
  395                         &ds_create_info,
  396                         &device->meta_state.alloc,
  397                         &device->meta_state.btoi.img_ds_layout);
  398     if (result != VK_SUCCESS)
  399         goto fail;
  400 
  401 
  402     VkPipelineLayoutCreateInfo pl_create_info = {
  403         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  404         .setLayoutCount = 1,
  405         .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
  406         .pushConstantRangeCount = 1,
  407         .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
  408     };
  409 
  410     result = radv_CreatePipelineLayout(radv_device_to_handle(device),
  411                       &pl_create_info,
  412                       &device->meta_state.alloc,
  413                       &device->meta_state.btoi.img_p_layout);
  414     if (result != VK_SUCCESS)
  415         goto fail;
  416 
  417     /* compute shader */
  418 
  419     VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
  420         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  421         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
  422         .module = radv_shader_module_to_handle(&cs),
  423         .pName = "main",
  424         .pSpecializationInfo = NULL,
  425     };
  426 
  427     VkComputePipelineCreateInfo vk_pipeline_info = {
  428         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  429         .stage = pipeline_shader_stage,
  430         .flags = 0,
  431         .layout = device->meta_state.btoi.img_p_layout,
  432     };
  433 
  434     result = radv_CreateComputePipelines(radv_device_to_handle(device),
  435                          radv_pipeline_cache_to_handle(&device->meta_state.cache),
  436                          1, &vk_pipeline_info, NULL,
  437                          &device->meta_state.btoi.pipeline);
  438     if (result != VK_SUCCESS)
  439         goto fail;
  440 
  441     if (device->physical_device->rad_info.chip_class >= GFX9) {
  442         VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
  443             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  444             .stage = VK_SHADER_STAGE_COMPUTE_BIT,
  445             .module = radv_shader_module_to_handle(&cs_3d),
  446             .pName = "main",
  447             .pSpecializationInfo = NULL,
  448         };
  449 
  450         VkComputePipelineCreateInfo vk_pipeline_info_3d = {
  451             .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  452             .stage = pipeline_shader_stage_3d,
  453             .flags = 0,
  454             .layout = device->meta_state.btoi.img_p_layout,
  455         };
  456 
  457         result = radv_CreateComputePipelines(radv_device_to_handle(device),
  458                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
  459                              1, &vk_pipeline_info_3d, NULL,
  460                              &device->meta_state.btoi.pipeline_3d);
  461         ralloc_free(cs_3d.nir);
  462     }
  463     ralloc_free(cs.nir);
  464 
  465     return VK_SUCCESS;
  466 fail:
  467     ralloc_free(cs_3d.nir);
  468     ralloc_free(cs.nir);
  469     return result;
  470 }
  471 
  472 static void
  473 radv_device_finish_meta_btoi_state(struct radv_device *device)
  474 {
  475     struct radv_meta_state *state = &device->meta_state;
  476 
  477     radv_DestroyPipelineLayout(radv_device_to_handle(device),
  478                    state->btoi.img_p_layout, &state->alloc);
  479     radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
  480                         state->btoi.img_ds_layout,
  481                     &state->alloc);
  482     radv_DestroyPipeline(radv_device_to_handle(device),
  483                  state->btoi.pipeline, &state->alloc);
  484     radv_DestroyPipeline(radv_device_to_handle(device),
  485                  state->btoi.pipeline_3d, &state->alloc);
  486 }
  487 
  488 /* Buffer to image - special path for R32G32B32 */
  489 static nir_shader *
  490 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
  491 {
  492     nir_builder b;
  493     const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
  494                                  false,
  495                                  false,
  496                                  GLSL_TYPE_FLOAT);
  497     const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
  498                                  false,
  499                                  false,
  500                                  GLSL_TYPE_FLOAT);
  501     nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
  502     b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
  503     b.shader->info.cs.local_size[0] = 16;
  504     b.shader->info.cs.local_size[1] = 16;
  505     b.shader->info.cs.local_size[2] = 1;
  506     nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
  507                               buf_type, "s_tex");
  508     input_img->data.descriptor_set = 0;
  509     input_img->data.binding = 0;
  510 
  511     nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
  512                                img_type, "out_img");
  513     output_img->data.descriptor_set = 0;
  514     output_img->data.binding = 1;
  515 
  516     nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
  517     nir_ssa_def *wg_id = nir_load_work_group_id(&b);
  518     nir_ssa_def *block_size = nir_imm_ivec4(&b,
  519                         b.shader->info.cs.local_size[0],
  520                         b.shader->info.cs.local_size[1],
  521                         b.shader->info.cs.local_size[2], 0);
  522 
  523     nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
  524 
  525     nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
  526     nir_intrinsic_set_base(offset, 0);
  527     nir_intrinsic_set_range(offset, 16);
  528     offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
  529     offset->num_components = 2;
  530     nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
  531     nir_builder_instr_insert(&b, &offset->instr);
  532 
  533     nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
  534     nir_intrinsic_set_base(pitch, 0);
  535     nir_intrinsic_set_range(pitch, 16);
  536     pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
  537     pitch->num_components = 1;
  538     nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
  539     nir_builder_instr_insert(&b, &pitch->instr);
  540 
  541     nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
  542     nir_intrinsic_set_base(stride, 0);
  543     nir_intrinsic_set_range(stride, 16);
  544     stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
  545     stride->num_components = 1;
  546     nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
  547     nir_builder_instr_insert(&b, &stride->instr);
  548 
  549     nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
  550     nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
  551 
  552     nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
  553     tmp = nir_iadd(&b, tmp, pos_x);
  554 
  555     nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
  556 
  557     nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
  558 
  559     nir_ssa_def *global_pos =
  560         nir_iadd(&b,
  561              nir_imul(&b, nir_channel(&b, img_coord, 1), &pitch->dest.ssa),
  562              nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
  563 
  564     nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
  565 
  566     nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
  567     tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
  568     tex->op = nir_texop_txf;
  569     tex->src[0].src_type = nir_tex_src_coord;
  570     tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
  571     tex->src[1].src_type = nir_tex_src_lod;
  572     tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
  573     tex->src[2].src_type = nir_tex_src_texture_deref;
  574     tex->src[2].src = nir_src_for_ssa(input_img_deref);
  575     tex->dest_type = nir_type_float;
  576     tex->is_array = false;
  577     tex->coord_components = 1;
  578     nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
  579     nir_builder_instr_insert(&b, &tex->instr);
  580 
  581     nir_ssa_def *outval = &tex->dest.ssa;
  582 
  583     for (int chan = 0; chan < 3; chan++) {
  584         nir_ssa_def *local_pos =
  585                        nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
  586 
  587                nir_ssa_def *coord =
  588                        nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
  589 
  590         nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
  591         store->num_components = 1;
  592         store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
  593         store->src[1] = nir_src_for_ssa(coord);
  594         store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
  595         store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
  596         store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
  597         nir_builder_instr_insert(&b, &store->instr);
  598     }
  599 
  600     return b.shader;
  601 }
  602 
  603 static VkResult
  604 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
  605 {
  606     VkResult result;
  607     struct radv_shader_module cs = { .nir = NULL };
  608 
  609     cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
  610 
  611     VkDescriptorSetLayoutCreateInfo ds_create_info = {
  612         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
  613         .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
  614         .bindingCount = 2,
  615         .pBindings = (VkDescriptorSetLayoutBinding[]) {
  616             {
  617                 .binding = 0,
  618                 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
  619                 .descriptorCount = 1,
  620                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  621                 .pImmutableSamplers = NULL
  622             },
  623             {
  624                 .binding = 1,
  625                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
  626                 .descriptorCount = 1,
  627                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  628                 .pImmutableSamplers = NULL
  629             },
  630         }
  631     };
  632 
  633     result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
  634                         &ds_create_info,
  635                         &device->meta_state.alloc,
  636                         &device->meta_state.btoi_r32g32b32.img_ds_layout);
  637     if (result != VK_SUCCESS)
  638         goto fail;
  639 
  640 
  641     VkPipelineLayoutCreateInfo pl_create_info = {
  642         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  643         .setLayoutCount = 1,
  644         .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
  645         .pushConstantRangeCount = 1,
  646         .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
  647     };
  648 
  649     result = radv_CreatePipelineLayout(radv_device_to_handle(device),
  650                       &pl_create_info,
  651                       &device->meta_state.alloc,
  652                       &device->meta_state.btoi_r32g32b32.img_p_layout);
  653     if (result != VK_SUCCESS)
  654         goto fail;
  655 
  656     /* compute shader */
  657 
  658     VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
  659         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  660         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
  661         .module = radv_shader_module_to_handle(&cs),
  662         .pName = "main",
  663         .pSpecializationInfo = NULL,
  664     };
  665 
  666     VkComputePipelineCreateInfo vk_pipeline_info = {
  667         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  668         .stage = pipeline_shader_stage,
  669         .flags = 0,
  670         .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
  671     };
  672 
  673     result = radv_CreateComputePipelines(radv_device_to_handle(device),
  674                          radv_pipeline_cache_to_handle(&device->meta_state.cache),
  675                          1, &vk_pipeline_info, NULL,
  676                          &device->meta_state.btoi_r32g32b32.pipeline);
  677 
  678 fail:
  679     ralloc_free(cs.nir);
  680     return result;
  681 }
  682 
  683 static void
  684 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
  685 {
  686     struct radv_meta_state *state = &device->meta_state;
  687 
  688     radv_DestroyPipelineLayout(radv_device_to_handle(device),
  689                    state->btoi_r32g32b32.img_p_layout, &state->alloc);
  690     radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
  691                         state->btoi_r32g32b32.img_ds_layout,
  692                     &state->alloc);
  693     radv_DestroyPipeline(radv_device_to_handle(device),
  694                  state->btoi_r32g32b32.pipeline, &state->alloc);
  695 }
  696 
  697 static nir_shader *
  698 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
  699 {
  700     nir_builder b;
  701     enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
  702     const struct glsl_type *buf_type = glsl_sampler_type(dim,
  703                                  false,
  704                                  false,
  705                                  GLSL_TYPE_FLOAT);
  706     const struct glsl_type *img_type = glsl_sampler_type(dim,
  707                                  false,
  708                                  false,
  709                                  GLSL_TYPE_FLOAT);
  710     nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
  711     b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
  712     b.shader->info.cs.local_size[0] = 16;
  713     b.shader->info.cs.local_size[1] = 16;
  714     b.shader->info.cs.local_size[2] = 1;
  715     nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
  716                               buf_type, "s_tex");
  717     input_img->data.descriptor_set = 0;
  718     input_img->data.binding = 0;
  719 
  720     nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
  721                                img_type, "out_img");
  722     output_img->data.descriptor_set = 0;
  723     output_img->data.binding = 1;
  724 
  725     nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
  726     nir_ssa_def *wg_id = nir_load_work_group_id(&b);
  727     nir_ssa_def *block_size = nir_imm_ivec4(&b,
  728                         b.shader->info.cs.local_size[0],
  729                         b.shader->info.cs.local_size[1],
  730                         b.shader->info.cs.local_size[2], 0);
  731 
  732     nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
  733 
  734     nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
  735     nir_intrinsic_set_base(src_offset, 0);
  736     nir_intrinsic_set_range(src_offset, 24);
  737     src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
  738     src_offset->num_components = is_3d ? 3 : 2;
  739     nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
  740     nir_builder_instr_insert(&b, &src_offset->instr);
  741 
  742     nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
  743     nir_intrinsic_set_base(dst_offset, 0);
  744     nir_intrinsic_set_range(dst_offset, 24);
  745     dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
  746     dst_offset->num_components = is_3d ? 3 : 2;
  747     nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
  748     nir_builder_instr_insert(&b, &dst_offset->instr);
  749 
  750     nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
  751     nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
  752 
  753     nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
  754 
  755     nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
  756     tex->sampler_dim = dim;
  757     tex->op = nir_texop_txf;
  758     tex->src[0].src_type = nir_tex_src_coord;
  759     tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
  760     tex->src[1].src_type = nir_tex_src_lod;
  761     tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
  762     tex->src[2].src_type = nir_tex_src_texture_deref;
  763     tex->src[2].src = nir_src_for_ssa(input_img_deref);
  764     tex->dest_type = nir_type_float;
  765     tex->is_array = false;
  766     tex->coord_components = is_3d ? 3 : 2;
  767 
  768     nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
  769     nir_builder_instr_insert(&b, &tex->instr);
  770 
  771     nir_ssa_def *outval = &tex->dest.ssa;
  772     nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
  773     store->num_components = 4;
  774     store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
  775     store->src[1] = nir_src_for_ssa(dst_coord);
  776     store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
  777     store->src[3] = nir_src_for_ssa(outval);
  778     store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
  779 
  780     nir_builder_instr_insert(&b, &store->instr);
  781     return b.shader;
  782 }
  783 
  784 /* image to image - don't write use image accessors */
  785 static VkResult
  786 radv_device_init_meta_itoi_state(struct radv_device *device)
  787 {
  788     VkResult result;
  789     struct radv_shader_module cs = { .nir = NULL };
  790     struct radv_shader_module cs_3d = { .nir = NULL };
  791     cs.nir = build_nir_itoi_compute_shader(device, false);
  792     if (device->physical_device->rad_info.chip_class >= GFX9)
  793         cs_3d.nir = build_nir_itoi_compute_shader(device, true);
  794     /*
  795      * two descriptors one for the image being sampled
  796      * one for the buffer being written.
  797      */
  798     VkDescriptorSetLayoutCreateInfo ds_create_info = {
  799         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
  800         .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
  801         .bindingCount = 2,
  802         .pBindings = (VkDescriptorSetLayoutBinding[]) {
  803             {
  804                 .binding = 0,
  805                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
  806                 .descriptorCount = 1,
  807                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  808                 .pImmutableSamplers = NULL
  809             },
  810             {
  811                 .binding = 1,
  812                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
  813                 .descriptorCount = 1,
  814                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
  815                 .pImmutableSamplers = NULL
  816             },
  817         }
  818     };
  819 
  820     result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
  821                         &ds_create_info,
  822                         &device->meta_state.alloc,
  823                         &device->meta_state.itoi.img_ds_layout);
  824     if (result != VK_SUCCESS)
  825         goto fail;
  826 
  827 
  828     VkPipelineLayoutCreateInfo pl_create_info = {
  829         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
  830         .setLayoutCount = 1,
  831         .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
  832         .pushConstantRangeCount = 1,
  833         .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
  834     };
  835 
  836     result = radv_CreatePipelineLayout(radv_device_to_handle(device),
  837                       &pl_create_info,
  838                       &device->meta_state.alloc,
  839                       &device->meta_state.itoi.img_p_layout);
  840     if (result != VK_SUCCESS)
  841         goto fail;
  842 
  843     /* compute shader */
  844 
  845     VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
  846         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  847         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
  848         .module = radv_shader_module_to_handle(&cs),
  849         .pName = "main",
  850         .pSpecializationInfo = NULL,
  851     };
  852 
  853     VkComputePipelineCreateInfo vk_pipeline_info = {
  854         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  855         .stage = pipeline_shader_stage,
  856         .flags = 0,
  857         .layout = device->meta_state.itoi.img_p_layout,
  858     };
  859 
  860     result = radv_CreateComputePipelines(radv_device_to_handle(device),
  861                          radv_pipeline_cache_to_handle(&device->meta_state.cache),
  862                          1, &vk_pipeline_info, NULL,
  863                          &device->meta_state.itoi.pipeline);
  864     if (result != VK_SUCCESS)
  865         goto fail;
  866 
  867     if (device->physical_device->rad_info.chip_class >= GFX9) {
  868         VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
  869             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
  870 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
  871             .module = radv_shader_module_to_handle(&cs_3d),
  872             .pName = "main",
  873             .pSpecializationInfo = NULL,
  874         };
  875 
  876         VkComputePipelineCreateInfo vk_pipeline_info_3d = {
  877             .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
  878             .stage = pipeline_shader_stage_3d,
  879             .flags = 0,
  880             .layout = device->meta_state.itoi.img_p_layout,
  881         };
  882 
  883         result = radv_CreateComputePipelines(radv_device_to_handle(device),
  884                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
  885                              1, &vk_pipeline_info_3d, NULL,
  886                              &device->meta_state.itoi.pipeline_3d);
  887 
  888         ralloc_free(cs_3d.nir);
  889     }
  890     ralloc_free(cs.nir);
  891 
  892     return VK_SUCCESS;
  893 fail:
  894     ralloc_free(cs.nir);
  895     ralloc_free(cs_3d.nir);
  896     return result;
  897 }
  898 
  899 static void
  900 radv_device_finish_meta_itoi_state(struct radv_device *device)
  901 {
  902     struct radv_meta_state *state = &device->meta_state;
  903 
  904     radv_DestroyPipelineLayout(radv_device_to_handle(device),
  905                    state->itoi.img_p_layout, &state->alloc);
  906     radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
  907                         state->itoi.img_ds_layout,
  908                     &state->alloc);
  909     radv_DestroyPipeline(radv_device_to_handle(device),
  910                  state->itoi.pipeline, &state->alloc);
  911     if (device->physical_device->rad_info.chip_class >= GFX9)
  912         radv_DestroyPipeline(radv_device_to_handle(device),
  913                      state->itoi.pipeline_3d, &state->alloc);
  914 }
  915 
  916 static nir_shader *
  917 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
  918 {
  919     nir_builder b;
  920     const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
  921                              false,
  922                              false,
  923                              GLSL_TYPE_FLOAT);
  924     nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
  925     b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_r32g32b32_cs");
  926     b.shader->info.cs.local_size[0] = 16;
  927     b.shader->info.cs.local_size[1] = 16;
  928     b.shader->info.cs.local_size[2] = 1;
  929     nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
  930                               type, "input_img");
  931     input_img->data.descriptor_set = 0;
  932     input_img->data.binding = 0;
  933 
  934     nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
  935                               type, "output_img");
  936     output_img->data.descriptor_set = 0;
  937     output_img->data.binding = 1;
  938 
  939     nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
  940     nir_ssa_def *wg_id = nir_load_work_group_id(&b);
  941     nir_ssa_def *block_size = nir_imm_ivec4(&b,
  942                         b.shader->info.cs.local_size[0],
  943                         b.shader->info.cs.local_size[1],
  944                         b.shader->info.cs.local_size[2], 0);
  945 
  946     nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
  947 
  948     nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
  949     nir_intrinsic_set_base(src_offset, 0);
  950     nir_intrinsic_set_range(src_offset, 24);
  951     src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
  952     src_offset->num_components = 3;
  953     nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 3, 32, "src_offset");
  954     nir_builder_instr_insert(&b, &src_offset->instr);
  955 
  956     nir_ssa_def *src_stride = nir_channel(&b, &src_offset->dest.ssa, 2);
  957 
  958     nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
  959     nir_intrinsic_set_base(dst_offset, 0);
  960     nir_intrinsic_set_range(dst_offset, 24);
  961     dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
  962     dst_offset->num_components = 3;
  963     nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 3, 32, "dst_offset");
  964     nir_builder_instr_insert(&b, &dst_offset->instr);
  965 
  966     nir_ssa_def *dst_stride = nir_channel(&b, &dst_offset->dest.ssa, 2);
  967 
  968     nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
  969     nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
  970 
  971     nir_ssa_def *src_global_pos =
  972         nir_iadd(&b,
  973              nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
  974              nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
  975 
  976     nir_ssa_def *dst_global_pos =
  977         nir_iadd(&b,
  978              nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
  979              nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
  980 
  981     for (int chan = 0; chan < 3; chan++) {
  982         /* src */
  983         nir_ssa_def *src_local_pos =
  984             nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
  985 
  986         nir_ssa_def *src_coord =
  987             nir_vec4(&b, src_local_pos, src_local_pos,
  988                  src_local_pos, src_local_pos);
  989 
  990         nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
  991 
  992         nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
  993         tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
  994         tex->op = nir_texop_txf;
  995         tex->src[0].src_type = nir_tex_src_coord;
  996         tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
  997         tex->src[1].src_type = nir_tex_src_lod;
  998         tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
  999         tex->src[2].src_type = nir_tex_src_texture_deref;
 1000         tex->src[2].src = nir_src_for_ssa(input_img_deref);
 1001         tex->dest_type = nir_type_float;
 1002         tex->is_array = false;
 1003         tex->coord_components = 1;
 1004         nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
 1005         nir_builder_instr_insert(&b, &tex->instr);
 1006 
 1007         nir_ssa_def *outval = &tex->dest.ssa;
 1008 
 1009         /* dst */
 1010         nir_ssa_def *dst_local_pos =
 1011             nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
 1012 
 1013         nir_ssa_def *dst_coord =
 1014             nir_vec4(&b, dst_local_pos, dst_local_pos,
 1015                  dst_local_pos, dst_local_pos);
 1016 
 1017         nir_intrinsic_instr *store =
 1018             nir_intrinsic_instr_create(b.shader,
 1019                            nir_intrinsic_image_deref_store);
 1020         store->num_components = 1;
 1021         store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
 1022         store->src[1] = nir_src_for_ssa(dst_coord);
 1023         store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
 1024         store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, 0));
 1025         store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
 1026         nir_builder_instr_insert(&b, &store->instr);
 1027     }
 1028 
 1029     return b.shader;
 1030 }
 1031 
 1032 /* Image to image - special path for R32G32B32 */
 1033 static VkResult
 1034 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
 1035 {
 1036     VkResult result;
 1037     struct radv_shader_module cs = { .nir = NULL };
 1038 
 1039     cs.nir = build_nir_itoi_r32g32b32_compute_shader(device);
 1040 
 1041     VkDescriptorSetLayoutCreateInfo ds_create_info = {
 1042         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
 1043         .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
 1044         .bindingCount = 2,
 1045         .pBindings = (VkDescriptorSetLayoutBinding[]) {
 1046             {
 1047                 .binding = 0,
 1048                 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
 1049                 .descriptorCount = 1,
 1050                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
 1051                 .pImmutableSamplers = NULL
 1052             },
 1053             {
 1054                 .binding = 1,
 1055                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
 1056                 .descriptorCount = 1,
 1057                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
 1058                 .pImmutableSamplers = NULL
 1059             },
 1060         }
 1061     };
 1062 
 1063     result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
 1064                         &ds_create_info,
 1065                         &device->meta_state.alloc,
 1066                         &device->meta_state.itoi_r32g32b32.img_ds_layout);
 1067     if (result != VK_SUCCESS)
 1068         goto fail;
 1069 
 1070 
 1071     VkPipelineLayoutCreateInfo pl_create_info = {
 1072         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
 1073         .setLayoutCount = 1,
 1074         .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
 1075         .pushConstantRangeCount = 1,
 1076         .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
 1077     };
 1078 
 1079     result = radv_CreatePipelineLayout(radv_device_to_handle(device),
 1080                       &pl_create_info,
 1081                       &device->meta_state.alloc,
 1082                       &device->meta_state.itoi_r32g32b32.img_p_layout);
 1083     if (result != VK_SUCCESS)
 1084         goto fail;
 1085 
 1086     /* compute shader */
 1087 
 1088     VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
 1089         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 1090         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
 1091         .module = radv_shader_module_to_handle(&cs),
 1092         .pName = "main",
 1093         .pSpecializationInfo = NULL,
 1094     };
 1095 
 1096     VkComputePipelineCreateInfo vk_pipeline_info = {
 1097         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
 1098         .stage = pipeline_shader_stage,
 1099         .flags = 0,
 1100         .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
 1101     };
 1102 
 1103     result = radv_CreateComputePipelines(radv_device_to_handle(device),
 1104                          radv_pipeline_cache_to_handle(&device->meta_state.cache),
 1105                          1, &vk_pipeline_info, NULL,
 1106                          &device->meta_state.itoi_r32g32b32.pipeline);
 1107 
 1108 fail:
 1109     ralloc_free(cs.nir);
 1110     return result;
 1111 }
 1112 
 1113 static void
 1114 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
 1115 {
 1116     struct radv_meta_state *state = &device->meta_state;
 1117 
 1118     radv_DestroyPipelineLayout(radv_device_to_handle(device),
 1119                    state->itoi_r32g32b32.img_p_layout, &state->alloc);
 1120     radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
 1121                         state->itoi_r32g32b32.img_ds_layout,
 1122                     &state->alloc);
 1123     radv_DestroyPipeline(radv_device_to_handle(device),
 1124                  state->itoi_r32g32b32.pipeline, &state->alloc);
 1125 }
 1126 
 1127 static nir_shader *
 1128 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
 1129 {
 1130     nir_builder b;
 1131     enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
 1132     const struct glsl_type *img_type = glsl_sampler_type(dim,
 1133                                  false,
 1134                                  false,
 1135                                  GLSL_TYPE_FLOAT);
 1136     nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
 1137     b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
 1138     b.shader->info.cs.local_size[0] = 16;
 1139     b.shader->info.cs.local_size[1] = 16;
 1140     b.shader->info.cs.local_size[2] = 1;
 1141 
 1142     nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
 1143                                img_type, "out_img");
 1144     output_img->data.descriptor_set = 0;
 1145     output_img->data.binding = 0;
 1146 
 1147     nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
 1148     nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 1149     nir_ssa_def *block_size = nir_imm_ivec4(&b,
 1150                         b.shader->info.cs.local_size[0],
 1151                         b.shader->info.cs.local_size[1],
 1152                         b.shader->info.cs.local_size[2], 0);
 1153 
 1154     nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
 1155 
 1156     nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 1157     nir_intrinsic_set_base(clear_val, 0);
 1158     nir_intrinsic_set_range(clear_val, 20);
 1159     clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 1160     clear_val->num_components = 4;
 1161     nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
 1162     nir_builder_instr_insert(&b, &clear_val->instr);
 1163 
 1164     nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 1165     nir_intrinsic_set_base(layer, 0);
 1166     nir_intrinsic_set_range(layer, 20);
 1167     layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
 1168     layer->num_components = 1;
 1169     nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
 1170     nir_builder_instr_insert(&b, &layer->instr);
 1171 
 1172     nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
 1173 
 1174     nir_ssa_def *comps[4];
 1175     comps[0] = nir_channel(&b, global_id, 0);
 1176     comps[1] = nir_channel(&b, global_id, 1);
 1177     comps[2] = global_z;
 1178     comps[3] = nir_imm_int(&b, 0);
 1179     global_id = nir_vec(&b, comps, 4);
 1180 
 1181     nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
 1182     store->num_components = 4;
 1183     store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
 1184     store->src[1] = nir_src_for_ssa(global_id);
 1185     store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
 1186     store->src[3] = nir_src_for_ssa(&clear_val->dest.ssa);
 1187     store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
 1188 
 1189     nir_builder_instr_insert(&b, &store->instr);
 1190     return b.shader;
 1191 }
 1192 
 1193 static VkResult
 1194 radv_device_init_meta_cleari_state(struct radv_device *device)
 1195 {
 1196     VkResult result;
 1197     struct radv_shader_module cs = { .nir = NULL };
 1198     struct radv_shader_module cs_3d = { .nir = NULL };
 1199     cs.nir = build_nir_cleari_compute_shader(device, false);
 1200     if (device->physical_device->rad_info.chip_class >= GFX9)
 1201         cs_3d.nir = build_nir_cleari_compute_shader(device, true);
 1202 
 1203     /*
 1204      * two descriptors one for the image being sampled
 1205      * one for the buffer being written.
 1206      */
 1207     VkDescriptorSetLayoutCreateInfo ds_create_info = {
 1208         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
 1209         .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
 1210         .bindingCount = 1,
 1211         .pBindings = (VkDescriptorSetLayoutBinding[]) {
 1212             {
 1213                 .binding = 0,
 1214                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
 1215                 .descriptorCount = 1,
 1216                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
 1217                 .pImmutableSamplers = NULL
 1218             },
 1219         }
 1220     };
 1221 
 1222     result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
 1223                         &ds_create_info,
 1224                         &device->meta_state.alloc,
 1225                         &device->meta_state.cleari.img_ds_layout);
 1226     if (result != VK_SUCCESS)
 1227         goto fail;
 1228 
 1229 
 1230     VkPipelineLayoutCreateInfo pl_create_info = {
 1231         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
 1232         .setLayoutCount = 1,
 1233         .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
 1234         .pushConstantRangeCount = 1,
 1235         .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
 1236     };
 1237 
 1238     result = radv_CreatePipelineLayout(radv_device_to_handle(device),
 1239                       &pl_create_info,
 1240                       &device->meta_state.alloc,
 1241                       &device->meta_state.cleari.img_p_layout);
 1242     if (result != VK_SUCCESS)
 1243         goto fail;
 1244 
 1245     /* compute shader */
 1246 
 1247     VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
 1248         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 1249         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
 1250         .module = radv_shader_module_to_handle(&cs),
 1251         .pName = "main",
 1252         .pSpecializationInfo = NULL,
 1253     };
 1254 
 1255     VkComputePipelineCreateInfo vk_pipeline_info = {
 1256         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
 1257         .stage = pipeline_shader_stage,
 1258         .flags = 0,
 1259         .layout = device->meta_state.cleari.img_p_layout,
 1260     };
 1261 
 1262     result = radv_CreateComputePipelines(radv_device_to_handle(device),
 1263                          radv_pipeline_cache_to_handle(&device->meta_state.cache),
 1264                          1, &vk_pipeline_info, NULL,
 1265                          &device->meta_state.cleari.pipeline);
 1266     if (result != VK_SUCCESS)
 1267         goto fail;
 1268 
 1269 
 1270     if (device->physical_device->rad_info.chip_class >= GFX9) {
 1271         /* compute shader */
 1272         VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
 1273             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 1274             .stage = VK_SHADER_STAGE_COMPUTE_BIT,
 1275             .module = radv_shader_module_to_handle(&cs_3d),
 1276             .pName = "main",
 1277             .pSpecializationInfo = NULL,
 1278         };
 1279 
 1280         VkComputePipelineCreateInfo vk_pipeline_info_3d = {
 1281             .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
 1282             .stage = pipeline_shader_stage_3d,
 1283             .flags = 0,
 1284             .layout = device->meta_state.cleari.img_p_layout,
 1285         };
 1286 
 1287         result = radv_CreateComputePipelines(radv_device_to_handle(device),
 1288                              radv_pipeline_cache_to_handle(&device->meta_state.cache),
 1289                              1, &vk_pipeline_info_3d, NULL,
 1290                              &device->meta_state.cleari.pipeline_3d);
 1291         if (result != VK_SUCCESS)
 1292             goto fail;
 1293 
 1294         ralloc_free(cs_3d.nir);
 1295     }
 1296     ralloc_free(cs.nir);
 1297     return VK_SUCCESS;
 1298 fail:
 1299     ralloc_free(cs.nir);
 1300     ralloc_free(cs_3d.nir);
 1301     return result;
 1302 }
 1303 
 1304 static void
 1305 radv_device_finish_meta_cleari_state(struct radv_device *device)
 1306 {
 1307     struct radv_meta_state *state = &device->meta_state;
 1308 
 1309     radv_DestroyPipelineLayout(radv_device_to_handle(device),
 1310                    state->cleari.img_p_layout, &state->alloc);
 1311     radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
 1312                         state->cleari.img_ds_layout,
 1313                     &state->alloc);
 1314     radv_DestroyPipeline(radv_device_to_handle(device),
 1315                  state->cleari.pipeline, &state->alloc);
 1316     radv_DestroyPipeline(radv_device_to_handle(device),
 1317                  state->cleari.pipeline_3d, &state->alloc);
 1318 }
 1319 
 1320 /* Special path for clearing R32G32B32 images using a compute shader. */
 1321 static nir_shader *
 1322 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
 1323 {
 1324     nir_builder b;
 1325     const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
 1326                                  false,
 1327                                  false,
 1328                                  GLSL_TYPE_FLOAT);
 1329     nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
 1330     b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_r32g32b32_cs");
 1331     b.shader->info.cs.local_size[0] = 16;
 1332     b.shader->info.cs.local_size[1] = 16;
 1333     b.shader->info.cs.local_size[2] = 1;
 1334 
 1335     nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
 1336                                img_type, "out_img");
 1337     output_img->data.descriptor_set = 0;
 1338     output_img->data.binding = 0;
 1339 
 1340     nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
 1341     nir_ssa_def *wg_id = nir_load_work_group_id(&b);
 1342     nir_ssa_def *block_size = nir_imm_ivec4(&b,
 1343                         b.shader->info.cs.local_size[0],
 1344                         b.shader->info.cs.local_size[1],
 1345                         b.shader->info.cs.local_size[2], 0);
 1346 
 1347     nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
 1348 
 1349     nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 1350     nir_intrinsic_set_base(clear_val, 0);
 1351     nir_intrinsic_set_range(clear_val, 16);
 1352     clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 1353     clear_val->num_components = 3;
 1354     nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 3, 32, "clear_value");
 1355     nir_builder_instr_insert(&b, &clear_val->instr);
 1356 
 1357     nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 1358     nir_intrinsic_set_base(stride, 0);
 1359     nir_intrinsic_set_range(stride, 16);
 1360     stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
 1361     stride->num_components = 1;
 1362     nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
 1363     nir_builder_instr_insert(&b, &stride->instr);
 1364 
 1365     nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
 1366     nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
 1367 
 1368     nir_ssa_def *global_pos =
 1369         nir_iadd(&b,
 1370              nir_imul(&b, global_y, &stride->dest.ssa),
 1371              nir_imul(&b, global_x, nir_imm_int(&b, 3)));
 1372 
 1373     for (unsigned chan = 0; chan < 3; chan++) {
 1374         nir_ssa_def *local_pos =
 1375             nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
 1376 
 1377         nir_ssa_def *coord =
 1378             nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
 1379 
 1380         nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
 1381         store->num_components = 1;
 1382         store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
 1383         store->src[1] = nir_src_for_ssa(coord);
 1384         store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
 1385         store->src[3] = nir_src_for_ssa(nir_channel(&b, &clear_val->dest.ssa, chan));
 1386         store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
 1387         nir_builder_instr_insert(&b, &store->instr);
 1388     }
 1389 
 1390     return b.shader;
 1391 }
 1392 
 1393 static VkResult
 1394 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
 1395 {
 1396     VkResult result;
 1397     struct radv_shader_module cs = { .nir = NULL };
 1398 
 1399     cs.nir = build_nir_cleari_r32g32b32_compute_shader(device);
 1400 
 1401     VkDescriptorSetLayoutCreateInfo ds_create_info = {
 1402         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
 1403         .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
 1404         .bindingCount = 1,
 1405         .pBindings = (VkDescriptorSetLayoutBinding[]) {
 1406             {
 1407                 .binding = 0,
 1408                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
 1409                 .descriptorCount = 1,
 1410                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
 1411                 .pImmutableSamplers = NULL
 1412             },
 1413         }
 1414     };
 1415 
 1416     result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
 1417                         &ds_create_info,
 1418                         &device->meta_state.alloc,
 1419                         &device->meta_state.cleari_r32g32b32.img_ds_layout);
 1420     if (result != VK_SUCCESS)
 1421         goto fail;
 1422 
 1423     VkPipelineLayoutCreateInfo pl_create_info = {
 1424         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
 1425         .setLayoutCount = 1,
 1426         .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
 1427         .pushConstantRangeCount = 1,
 1428         .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
 1429     };
 1430 
 1431     result = radv_CreatePipelineLayout(radv_device_to_handle(device),
 1432                        &pl_create_info,
 1433                        &device->meta_state.alloc,
 1434                        &device->meta_state.cleari_r32g32b32.img_p_layout);
 1435     if (result != VK_SUCCESS)
 1436         goto fail;
 1437 
 1438     /* compute shader */
 1439     VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
 1440         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
 1441         .stage = VK_SHADER_STAGE_COMPUTE_BIT,
 1442         .module = radv_shader_module_to_handle(&cs),
 1443         .pName = "main",
 1444         .pSpecializationInfo = NULL,
 1445     };
 1446 
 1447     VkComputePipelineCreateInfo vk_pipeline_info = {
 1448         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
 1449         .stage = pipeline_shader_stage,
 1450         .flags = 0,
 1451         .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
 1452     };
 1453 
 1454     result = radv_CreateComputePipelines(radv_device_to_handle(device),
 1455                          radv_pipeline_cache_to_handle(&device->meta_state.cache),
 1456                          1, &vk_pipeline_info, NULL,
 1457                          &device->meta_state.cleari_r32g32b32.pipeline);
 1458 
 1459 fail:
 1460     ralloc_free(cs.nir);
 1461     return result;
 1462 }
 1463 
 1464 static void
 1465 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
 1466 {
 1467     struct radv_meta_state *state = &device->meta_state;
 1468 
 1469     radv_DestroyPipelineLayout(radv_device_to_handle(device),
 1470                    state->cleari_r32g32b32.img_p_layout,
 1471                    &state->alloc);
 1472     radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
 1473                         state->cleari_r32g32b32.img_ds_layout,
 1474                     &state->alloc);
 1475     radv_DestroyPipeline(radv_device_to_handle(device),
 1476                  state->cleari_r32g32b32.pipeline, &state->alloc);
 1477 }
 1478 
 1479 void
 1480 radv_device_finish_meta_bufimage_state(struct radv_device *device)
 1481 {
 1482     radv_device_finish_meta_itob_state(device);
 1483     radv_device_finish_meta_btoi_state(device);
 1484     radv_device_finish_meta_btoi_r32g32b32_state(device);
 1485     radv_device_finish_meta_itoi_state(device);
 1486     radv_device_finish_meta_itoi_r32g32b32_state(device);
 1487     radv_device_finish_meta_cleari_state(device);
 1488     radv_device_finish_meta_cleari_r32g32b32_state(device);
 1489 }
 1490 
 1491 VkResult
 1492 radv_device_init_meta_bufimage_state(struct radv_device *device)
 1493 {
 1494     VkResult result;
 1495 
 1496     result = radv_device_init_meta_itob_state(device);
 1497     if (result != VK_SUCCESS)
 1498         goto fail_itob;
 1499 
 1500     result = radv_device_init_meta_btoi_state(device);
 1501     if (result != VK_SUCCESS)
 1502         goto fail_btoi;
 1503 
 1504     result = radv_device_init_meta_btoi_r32g32b32_state(device);
 1505     if (result != VK_SUCCESS)
 1506         goto fail_btoi_r32g32b32;
 1507 
 1508     result = radv_device_init_meta_itoi_state(device);
 1509     if (result != VK_SUCCESS)
 1510         goto fail_itoi;
 1511 
 1512     result = radv_device_init_meta_itoi_r32g32b32_state(device);
 1513     if (result != VK_SUCCESS)
 1514         goto fail_itoi_r32g32b32;
 1515 
 1516     result = radv_device_init_meta_cleari_state(device);
 1517     if (result != VK_SUCCESS)
 1518         goto fail_cleari;
 1519 
 1520     result = radv_device_init_meta_cleari_r32g32b32_state(device);
 1521     if (result != VK_SUCCESS)
 1522         goto fail_cleari_r32g32b32;
 1523 
 1524     return VK_SUCCESS;
 1525 fail_cleari_r32g32b32:
 1526     radv_device_finish_meta_cleari_r32g32b32_state(device);
 1527 fail_cleari:
 1528     radv_device_finish_meta_cleari_state(device);
 1529 fail_itoi_r32g32b32:
 1530     radv_device_finish_meta_itoi_r32g32b32_state(device);
 1531 fail_itoi:
 1532     radv_device_finish_meta_itoi_state(device);
 1533 fail_btoi_r32g32b32:
 1534     radv_device_finish_meta_btoi_r32g32b32_state(device);
 1535 fail_btoi:
 1536     radv_device_finish_meta_btoi_state(device);
 1537 fail_itob:
 1538     radv_device_finish_meta_itob_state(device);
 1539     return result;
 1540 }
 1541 
 1542 static void
 1543 create_iview(struct radv_cmd_buffer *cmd_buffer,
 1544              struct radv_meta_blit2d_surf *surf,
 1545              struct radv_image_view *iview)
 1546 {
 1547     VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
 1548         radv_meta_get_view_type(surf->image);
 1549     radv_image_view_init(iview, cmd_buffer->device,
 1550                  &(VkImageViewCreateInfo) {
 1551                      .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 1552                          .image = radv_image_to_handle(surf->image),
 1553                          .viewType = view_type,
 1554                          .format = surf->format,
 1555                          .subresourceRange = {
 1556                          .aspectMask = surf->aspect_mask,
 1557                          .baseMipLevel = surf->level,
 1558                          .levelCount = 1,
 1559                          .baseArrayLayer = surf->layer,
 1560                          .layerCount = 1
 1561                      },
 1562                  }, NULL);
 1563 }
 1564 
 1565 static void
 1566 create_bview(struct radv_cmd_buffer *cmd_buffer,
 1567          struct radv_buffer *buffer,
 1568          unsigned offset,
 1569          VkFormat format,
 1570          struct radv_buffer_view *bview)
 1571 {
 1572     radv_buffer_view_init(bview, cmd_buffer->device,
 1573                   &(VkBufferViewCreateInfo) {
 1574                       .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
 1575                       .flags = 0,
 1576                       .buffer = radv_buffer_to_handle(buffer),
 1577                       .format = format,
 1578                       .offset = offset,
 1579                       .range = VK_WHOLE_SIZE,
 1580                   });
 1581 
 1582 }
 1583 
 1584 static void
 1585 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer,
 1586              struct radv_meta_blit2d_surf *surf,
 1587              VkBufferUsageFlagBits usage,
 1588              VkBuffer *buffer)
 1589 {
 1590     struct radv_device *device = cmd_buffer->device;
 1591     struct radv_device_memory mem = { .bo = surf->image->bo };
 1592 
 1593     radv_CreateBuffer(radv_device_to_handle(device),
 1594               &(VkBufferCreateInfo) {
 1595                 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
 1596                 .flags = 0,
 1597                 .size = surf->image->size,
 1598                 .usage = usage,
 1599                 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
 1600               }, NULL, buffer);
 1601 
 1602     radv_BindBufferMemory2(radv_device_to_handle(device), 1,
 1603                    (VkBindBufferMemoryInfo[]) {
 1604                     {
 1605                     .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
 1606                     .buffer = *buffer,
 1607                     .memory = radv_device_memory_to_handle(&mem),
 1608                     .memoryOffset = surf->image->offset,
 1609                     }
 1610                    });
 1611 }
 1612 
 1613 static void
 1614 create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
 1615                struct radv_buffer *buffer,
 1616                unsigned offset,
 1617                VkFormat src_format,
 1618                struct radv_buffer_view *bview)
 1619 {
 1620     VkFormat format;
 1621 
 1622     switch (src_format) {
 1623     case VK_FORMAT_R32G32B32_UINT:
 1624         format = VK_FORMAT_R32_UINT;
 1625         break;
 1626     case VK_FORMAT_R32G32B32_SINT:
 1627         format = VK_FORMAT_R32_SINT;
 1628         break;
 1629     case VK_FORMAT_R32G32B32_SFLOAT:
 1630         format = VK_FORMAT_R32_SFLOAT;
 1631         break;
 1632     default:
 1633         unreachable("invalid R32G32B32 format");
 1634     }
 1635 
 1636     radv_buffer_view_init(bview, cmd_buffer->device,
 1637                   &(VkBufferViewCreateInfo) {
 1638                       .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
 1639                       .flags = 0,
 1640                       .buffer = radv_buffer_to_handle(buffer),
 1641                       .format = format,
 1642                       .offset = offset,
 1643                       .range = VK_WHOLE_SIZE,
 1644                   });
 1645 }
 1646 
 1647 static unsigned
 1648 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
 1649                    struct radv_meta_blit2d_surf *surf)
 1650 {
 1651     unsigned stride;
 1652 
 1653     if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
 1654         stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
 1655     } else {
 1656         stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
 1657     }
 1658 
 1659     return stride;
 1660 }
 1661 
 1662 static void
 1663 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
 1664               struct radv_image_view *src,
 1665               struct radv_buffer_view *dst)
 1666 {
 1667     struct radv_device *device = cmd_buffer->device;
 1668 
 1669     radv_meta_push_descriptor_set(cmd_buffer,
 1670                       VK_PIPELINE_BIND_POINT_COMPUTE,
 1671                       device->meta_state.itob.img_p_layout,
 1672                       0, /* set */
 1673                       2, /* descriptorWriteCount */
 1674                       (VkWriteDescriptorSet[]) {
 1675                               {
 1676                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 1677                                       .dstBinding = 0,
 1678                                       .dstArrayElement = 0,
 1679                                       .descriptorCount = 1,
 1680                                       .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
 1681                                       .pImageInfo = (VkDescriptorImageInfo[]) {
 1682                                               {
 1683                                                       .sampler = VK_NULL_HANDLE,
 1684                                                       .imageView = radv_image_view_to_handle(src),
 1685                                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 1686                                               },
 1687                                       }
 1688                               },
 1689                               {
 1690                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 1691                                       .dstBinding = 1,
 1692                                       .dstArrayElement = 0,
 1693                                       .descriptorCount = 1,
 1694                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
 1695                                       .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
 1696                               }
 1697                       });
 1698 }
 1699 
 1700 void
 1701 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 1702               struct radv_meta_blit2d_surf *src,
 1703               struct radv_meta_blit2d_buffer *dst,
 1704               unsigned num_rects,
 1705               struct radv_meta_blit2d_rect *rects)
 1706 {
 1707     VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
 1708     struct radv_device *device = cmd_buffer->device;
 1709     struct radv_image_view src_view;
 1710     struct radv_buffer_view dst_view;
 1711 
 1712     create_iview(cmd_buffer, src, &src_view);
 1713     create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
 1714     itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
 1715 
 1716     if (device->physical_device->rad_info.chip_class >= GFX9 &&
 1717         src->image->type == VK_IMAGE_TYPE_3D)
 1718         pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
 1719 
 1720     radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 1721                  VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
 1722 
 1723     for (unsigned r = 0; r < num_rects; ++r) {
 1724         unsigned push_constants[4] = {
 1725             rects[r].src_x,
 1726             rects[r].src_y,
 1727             src->layer,
 1728             dst->pitch
 1729         };
 1730         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 1731                       device->meta_state.itob.img_p_layout,
 1732                       VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
 1733                       push_constants);
 1734 
 1735         radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
 1736     }
 1737 }
 1738 
 1739 static void
 1740 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
 1741                 struct radv_buffer_view *src,
 1742                 struct radv_buffer_view *dst)
 1743 {
 1744     struct radv_device *device = cmd_buffer->device;
 1745 
 1746     radv_meta_push_descriptor_set(cmd_buffer,
 1747                       VK_PIPELINE_BIND_POINT_COMPUTE,
 1748                       device->meta_state.btoi_r32g32b32.img_p_layout,
 1749                       0, /* set */
 1750                       2, /* descriptorWriteCount */
 1751                       (VkWriteDescriptorSet[]) {
 1752                               {
 1753                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 1754                                       .dstBinding = 0,
 1755                                       .dstArrayElement = 0,
 1756                                       .descriptorCount = 1,
 1757                                       .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
 1758                                       .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
 1759                               },
 1760                               {
 1761                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 1762                                       .dstBinding = 1,
 1763                                       .dstArrayElement = 0,
 1764                                       .descriptorCount = 1,
 1765                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
 1766                                       .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
 1767                               }
 1768                       });
 1769 }
 1770 
 1771 static void
 1772 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
 1773                        struct radv_meta_blit2d_buffer *src,
 1774                        struct radv_meta_blit2d_surf *dst,
 1775                        unsigned num_rects,
 1776                        struct radv_meta_blit2d_rect *rects)
 1777 {
 1778     VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
 1779     struct radv_device *device = cmd_buffer->device;
 1780     struct radv_buffer_view src_view, dst_view;
 1781     unsigned dst_offset = 0;
 1782     unsigned stride;
 1783     VkBuffer buffer;
 1784 
 1785     /* This special btoi path for R32G32B32 formats will write the linear
 1786      * image as a buffer with the same underlying memory. The compute
 1787      * shader will copy all components separately using a R32 format.
 1788      */
 1789     create_buffer_from_image(cmd_buffer, dst,
 1790                  VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
 1791                  &buffer);
 1792 
 1793     create_bview(cmd_buffer, src->buffer, src->offset,
 1794              src->format, &src_view);
 1795     create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
 1796                    dst_offset, dst->format, &dst_view);
 1797     btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
 1798 
 1799     radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 1800                  VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
 1801 
 1802     stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
 1803 
 1804     for (unsigned r = 0; r < num_rects; ++r) {
 1805         unsigned push_constants[4] = {
 1806             rects[r].dst_x,
 1807             rects[r].dst_y,
 1808             stride,
 1809             src->pitch,
 1810         };
 1811 
 1812         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 1813                       device->meta_state.btoi_r32g32b32.img_p_layout,
 1814                       VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
 1815                       push_constants);
 1816 
 1817         radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
 1818     }
 1819 
 1820     radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
 1821 }
 1822 
 1823 static void
 1824 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
 1825               struct radv_buffer_view *src,
 1826               struct radv_image_view *dst)
 1827 {
 1828     struct radv_device *device = cmd_buffer->device;
 1829 
 1830     radv_meta_push_descriptor_set(cmd_buffer,
 1831                       VK_PIPELINE_BIND_POINT_COMPUTE,
 1832                       device->meta_state.btoi.img_p_layout,
 1833                       0, /* set */
 1834                       2, /* descriptorWriteCount */
 1835                       (VkWriteDescriptorSet[]) {
 1836                               {
 1837                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 1838                                       .dstBinding = 0,
 1839                                       .dstArrayElement = 0,
 1840                                       .descriptorCount = 1,
 1841                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
 1842                                       .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
 1843                               },
 1844                               {
 1845                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 1846                                       .dstBinding = 1,
 1847                                       .dstArrayElement = 0,
 1848                                       .descriptorCount = 1,
 1849                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
 1850                                       .pImageInfo = (VkDescriptorImageInfo[]) {
 1851                                               {
 1852                                                       .sampler = VK_NULL_HANDLE,
 1853                                                       .imageView = radv_image_view_to_handle(dst),
 1854                                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 1855                                               },
 1856                                       }
 1857                               }
 1858                       });
 1859 }
 1860 
 1861 void
 1862 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
 1863                  struct radv_meta_blit2d_buffer *src,
 1864                  struct radv_meta_blit2d_surf *dst,
 1865                  unsigned num_rects,
 1866                  struct radv_meta_blit2d_rect *rects)
 1867 {
 1868     VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
 1869     struct radv_device *device = cmd_buffer->device;
 1870     struct radv_buffer_view src_view;
 1871     struct radv_image_view dst_view;
 1872 
 1873     if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
 1874         dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
 1875         dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
 1876         radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
 1877                                num_rects, rects);
 1878         return;
 1879     }
 1880 
 1881     create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
 1882     create_iview(cmd_buffer, dst, &dst_view);
 1883     btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
 1884 
 1885     if (device->physical_device->rad_info.chip_class >= GFX9 &&
 1886         dst->image->type == VK_IMAGE_TYPE_3D)
 1887         pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
 1888     radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 1889                  VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
 1890 
 1891     for (unsigned r = 0; r < num_rects; ++r) {
 1892         unsigned push_constants[4] = {
 1893             rects[r].dst_x,
 1894             rects[r].dst_y,
 1895             dst->layer,
 1896             src->pitch,
 1897         };
 1898         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 1899                       device->meta_state.btoi.img_p_layout,
 1900                       VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
 1901                       push_constants);
 1902 
 1903         radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
 1904     }
 1905 }
 1906 
 1907 static void
 1908 itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
 1909                 struct radv_buffer_view *src,
 1910                 struct radv_buffer_view *dst)
 1911 {
 1912     struct radv_device *device = cmd_buffer->device;
 1913 
 1914     radv_meta_push_descriptor_set(cmd_buffer,
 1915                       VK_PIPELINE_BIND_POINT_COMPUTE,
 1916                       device->meta_state.itoi_r32g32b32.img_p_layout,
 1917                       0, /* set */
 1918                       2, /* descriptorWriteCount */
 1919                       (VkWriteDescriptorSet[]) {
 1920                               {
 1921                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 1922                                       .dstBinding = 0,
 1923                                       .dstArrayElement = 0,
 1924                                       .descriptorCount = 1,
 1925                                       .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
 1926                                       .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
 1927                               },
 1928                               {
 1929                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 1930                                       .dstBinding = 1,
 1931                                       .dstArrayElement = 0,
 1932                                       .descriptorCount = 1,
 1933                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
 1934                                       .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
 1935                               }
 1936                       });
 1937 }
 1938 
 1939 static void
 1940 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
 1941                       struct radv_meta_blit2d_surf *src,
 1942                       struct radv_meta_blit2d_surf *dst,
 1943                       unsigned num_rects,
 1944                       struct radv_meta_blit2d_rect *rects)
 1945 {
 1946     VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
 1947     struct radv_device *device = cmd_buffer->device;
 1948     struct radv_buffer_view src_view, dst_view;
 1949     unsigned src_offset = 0, dst_offset = 0;
 1950     unsigned src_stride, dst_stride;
 1951     VkBuffer src_buffer, dst_buffer;
 1952 
 1953     /* 96-bit formats are only compatible to themselves. */
 1954     assert(dst->format == VK_FORMAT_R32G32B32_UINT ||
 1955            dst->format == VK_FORMAT_R32G32B32_SINT ||
 1956            dst->format == VK_FORMAT_R32G32B32_SFLOAT);
 1957 
 1958     /* This special itoi path for R32G32B32 formats will write the linear
 1959      * image as a buffer with the same underlying memory. The compute
 1960      * shader will copy all components separately using a R32 format.
 1961      */
 1962     create_buffer_from_image(cmd_buffer, src,
 1963                  VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
 1964                  &src_buffer);
 1965     create_buffer_from_image(cmd_buffer, dst,
 1966                  VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
 1967                  &dst_buffer);
 1968 
 1969     create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer),
 1970                    src_offset, src->format, &src_view);
 1971     create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer),
 1972                    dst_offset, dst->format, &dst_view);
 1973     itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
 1974 
 1975     radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 1976                  VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
 1977 
 1978     src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
 1979     dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
 1980 
 1981     for (unsigned r = 0; r < num_rects; ++r) {
 1982         unsigned push_constants[6] = {
 1983             rects[r].src_x,
 1984             rects[r].src_y,
 1985             src_stride,
 1986             rects[r].dst_x,
 1987             rects[r].dst_y,
 1988             dst_stride,
 1989         };
 1990         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 1991                       device->meta_state.itoi_r32g32b32.img_p_layout,
 1992                       VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
 1993                       push_constants);
 1994 
 1995         radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
 1996     }
 1997 
 1998     radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
 1999     radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
 2000 }
 2001 
 2002 static void
 2003 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
 2004               struct radv_image_view *src,
 2005               struct radv_image_view *dst)
 2006 {
 2007     struct radv_device *device = cmd_buffer->device;
 2008 
 2009     radv_meta_push_descriptor_set(cmd_buffer,
 2010                       VK_PIPELINE_BIND_POINT_COMPUTE,
 2011                       device->meta_state.itoi.img_p_layout,
 2012                       0, /* set */
 2013                       2, /* descriptorWriteCount */
 2014                       (VkWriteDescriptorSet[]) {
 2015                               {
 2016                                        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 2017                                        .dstBinding = 0,
 2018                                        .dstArrayElement = 0,
 2019                                        .descriptorCount = 1,
 2020                                        .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
 2021                                        .pImageInfo = (VkDescriptorImageInfo[]) {
 2022                                                {
 2023                                                        .sampler = VK_NULL_HANDLE,
 2024                                                        .imageView = radv_image_view_to_handle(src),
 2025                                                        .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 2026                                                },
 2027                                        }
 2028                               },
 2029                               {
 2030                                        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 2031                                        .dstBinding = 1,
 2032                                        .dstArrayElement = 0,
 2033                                        .descriptorCount = 1,
 2034                                        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
 2035                                        .pImageInfo = (VkDescriptorImageInfo[]) {
 2036                                                {
 2037                                                        .sampler = VK_NULL_HANDLE,
 2038                                                        .imageView = radv_image_view_to_handle(dst),
 2039                                                        .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 2040                                                },
 2041                                        }
 2042                               }
 2043                       });
 2044 }
 2045 
 2046 void
 2047 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
 2048                 struct radv_meta_blit2d_surf *src,
 2049                 struct radv_meta_blit2d_surf *dst,
 2050                 unsigned num_rects,
 2051                 struct radv_meta_blit2d_rect *rects)
 2052 {
 2053     VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline;
 2054     struct radv_device *device = cmd_buffer->device;
 2055     struct radv_image_view src_view, dst_view;
 2056 
 2057     if (src->format == VK_FORMAT_R32G32B32_UINT ||
 2058         src->format == VK_FORMAT_R32G32B32_SINT ||
 2059         src->format == VK_FORMAT_R32G32B32_SFLOAT) {
 2060         radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst,
 2061                               num_rects, rects);
 2062         return;
 2063     }
 2064 
 2065     create_iview(cmd_buffer, src, &src_view);
 2066     create_iview(cmd_buffer, dst, &dst_view);
 2067 
 2068     itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
 2069 
 2070     if (device->physical_device->rad_info.chip_class >= GFX9 &&
 2071         (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
 2072         pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
 2073     radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 2074                  VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
 2075 
 2076     for (unsigned r = 0; r < num_rects; ++r) {
 2077         unsigned push_constants[6] = {
 2078             rects[r].src_x,
 2079             rects[r].src_y,
 2080             src->layer,
 2081             rects[r].dst_x,
 2082             rects[r].dst_y,
 2083             dst->layer,
 2084         };
 2085         radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 2086                       device->meta_state.itoi.img_p_layout,
 2087                       VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
 2088                       push_constants);
 2089 
 2090         radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
 2091     }
 2092 }
 2093 
 2094 static void
 2095 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
 2096                   struct radv_buffer_view *view)
 2097 {
 2098     struct radv_device *device = cmd_buffer->device;
 2099 
 2100     radv_meta_push_descriptor_set(cmd_buffer,
 2101                       VK_PIPELINE_BIND_POINT_COMPUTE,
 2102                       device->meta_state.cleari_r32g32b32.img_p_layout,
 2103                       0, /* set */
 2104                       1, /* descriptorWriteCount */
 2105                       (VkWriteDescriptorSet[]) {
 2106                               {
 2107                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 2108                                       .dstBinding = 0,
 2109                                       .dstArrayElement = 0,
 2110                                       .descriptorCount = 1,
 2111                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
 2112                                       .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(view) },
 2113                               }
 2114                       });
 2115 }
 2116 
 2117 static void
 2118 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
 2119                    struct radv_meta_blit2d_surf *dst,
 2120                    const VkClearColorValue *clear_color)
 2121 {
 2122     VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
 2123     struct radv_device *device = cmd_buffer->device;
 2124     struct radv_buffer_view dst_view;
 2125     unsigned stride;
 2126     VkBuffer buffer;
 2127 
 2128     /* This special clear path for R32G32B32 formats will write the linear
 2129      * image as a buffer with the same underlying memory. The compute
 2130      * shader will clear all components separately using a R32 format.
 2131      */
 2132     create_buffer_from_image(cmd_buffer, dst,
 2133                  VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
 2134                  &buffer);
 2135 
 2136     create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
 2137                    0, dst->format, &dst_view);
 2138     cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
 2139 
 2140     radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 2141                  VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
 2142 
 2143     stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
 2144 
 2145     unsigned push_constants[4] = {
 2146         clear_color->uint32[0],
 2147         clear_color->uint32[1],
 2148         clear_color->uint32[2],
 2149         stride,
 2150     };
 2151 
 2152     radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 2153                   device->meta_state.cleari_r32g32b32.img_p_layout,
 2154                   VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
 2155                   push_constants);
 2156 
 2157     radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
 2158                 dst->image->info.height, 1);
 2159 
 2160     radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
 2161 }
 2162 
 2163 static void
 2164 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
 2165                     struct radv_image_view *dst_iview)
 2166 {
 2167     struct radv_device *device = cmd_buffer->device;
 2168 
 2169     radv_meta_push_descriptor_set(cmd_buffer,
 2170                       VK_PIPELINE_BIND_POINT_COMPUTE,
 2171                       device->meta_state.cleari.img_p_layout,
 2172                       0, /* set */
 2173                       1, /* descriptorWriteCount */
 2174                       (VkWriteDescriptorSet[]) {
 2175                               {
 2176                                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
 2177                                       .dstBinding = 0,
 2178                                       .dstArrayElement = 0,
 2179                                       .descriptorCount = 1,
 2180                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
 2181                                       .pImageInfo = (VkDescriptorImageInfo[]) {
 2182                                                {
 2183                                                       .sampler = VK_NULL_HANDLE,
 2184                                                       .imageView = radv_image_view_to_handle(dst_iview),
 2185                                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 2186                                                },
 2187                                       }
 2188                                },
 2189                       });
 2190 }
 2191 
 2192 void
 2193 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
 2194              struct radv_meta_blit2d_surf *dst,
 2195              const VkClearColorValue *clear_color)
 2196 {
 2197     VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
 2198     struct radv_device *device = cmd_buffer->device;
 2199     struct radv_image_view dst_iview;
 2200 
 2201     if (dst->format == VK_FORMAT_R32G32B32_UINT ||
 2202         dst->format == VK_FORMAT_R32G32B32_SINT ||
 2203         dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
 2204         radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
 2205         return;
 2206     }
 2207 
 2208     create_iview(cmd_buffer, dst, &dst_iview);
 2209     cleari_bind_descriptors(cmd_buffer, &dst_iview);
 2210 
 2211     if (device->physical_device->rad_info.chip_class >= GFX9 &&
 2212         dst->image->type == VK_IMAGE_TYPE_3D)
 2213         pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
 2214 
 2215     radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 2216                  VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
 2217 
 2218     unsigned push_constants[5] = {
 2219         clear_color->uint32[0],
 2220         clear_color->uint32[1],
 2221         clear_color->uint32[2],
 2222         clear_color->uint32[3],
 2223         dst->layer,
 2224     };
 2225 
 2226     radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 2227                   device->meta_state.cleari.img_p_layout,
 2228                   VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
 2229                   push_constants);
 2230 
 2231     radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
 2232 }