"Fossies" - the Fresh Open Source Software Archive

Member "darktable-2.6.3/src/common/bilateralcl.c" (20 Oct 2019, 16735 Bytes) of package /linux/misc/darktable-2.6.3.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "bilateralcl.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 3.0.0.rc0_vs_3.0.0.rc1.

    1 /*
    2     This file is part of darktable,
    3     copyright (c) 2012 johannes hanika.
    4 
    5     darktable is free software: you can redistribute it and/or modify
    6     it under the terms of the GNU General Public License as published by
    7     the Free Software Foundation, either version 3 of the License, or
    8     (at your option) any later version.
    9 
   10     darktable is distributed in the hope that it will be useful,
   11     but WITHOUT ANY WARRANTY; without even the implied warranty of
   12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13     GNU General Public License for more details.
   14 
   15     You should have received a copy of the GNU General Public License
   16     along with darktable.  If not, see <http://www.gnu.org/licenses/>.
   17 */
   18 
   19 #ifdef HAVE_OPENCL
   20 
   21 #include "common/bilateralcl.h"
   22 #include "CL/cl.h"            // for _cl_mem, cl_mem, CL_SUCCESS
   23 #include "CL/cl_platform.h"   // for cl_int
   24 #include "common/darktable.h" // for CLAMPS, dt_print, darktable, darktable_t
   25 #include "common/opencl.h"    // for dt_opencl_set_kernel_arg, dt_opencl_cr...
   26 #include <glib.h>             // for MAX
   27 #include <math.h>             // for roundf
   28 #include <stdlib.h>           // for free, malloc
   29 
   30 dt_bilateral_cl_global_t *dt_bilateral_init_cl_global()
   31 {
   32   dt_bilateral_cl_global_t *b = (dt_bilateral_cl_global_t *)malloc(sizeof(dt_bilateral_cl_global_t));
   33 
   34   const int program = 10; // bilateral.cl, from programs.conf
   35   b->kernel_zero = dt_opencl_create_kernel(program, "zero");
   36   b->kernel_splat = dt_opencl_create_kernel(program, "splat");
   37   b->kernel_blur_line = dt_opencl_create_kernel(program, "blur_line");
   38   b->kernel_blur_line_z = dt_opencl_create_kernel(program, "blur_line_z");
   39   b->kernel_slice = dt_opencl_create_kernel(program, "slice");
   40   b->kernel_slice2 = dt_opencl_create_kernel(program, "slice_to_output");
   41   return b;
   42 }
   43 
   44 void dt_bilateral_free_cl(dt_bilateral_cl_t *b)
   45 {
   46   if(!b) return;
   47   // be sure we're done with the memory:
   48   dt_opencl_finish(b->devid);
   49   // free device mem
   50   dt_opencl_release_mem_object(b->dev_grid);
   51   dt_opencl_release_mem_object(b->dev_grid_tmp);
   52   free(b);
   53 }
   54 
   55 
   56 size_t dt_bilateral_memory_use(const int width,     // width of input image
   57                                const int height,    // height of input image
   58                                const float sigma_s, // spatial sigma (blur pixel coords)
   59                                const float sigma_r) // range sigma (blur luma values)
   60 {
   61   float _x = roundf(width / sigma_s);
   62   float _y = roundf(height / sigma_s);
   63   float _z = roundf(100.0f / sigma_r);
   64   size_t size_x = CLAMPS((int)_x, 4, 900) + 1;
   65   size_t size_y = CLAMPS((int)_y, 4, 900) + 1;
   66   size_t size_z = CLAMPS((int)_z, 4, 50) + 1;
   67 
   68   return size_x * size_y * size_z * sizeof(float) * 2;
   69 }
   70 
   71 // modules that want to use dt_bilateral_slice_to_output_cl() ought to take this one;
   72 // takes account of an additional temp buffer needed in the OpenCL code path
   73 size_t dt_bilateral_memory_use2(const int width,
   74                                 const int height,
   75                                 const float sigma_s,
   76                                 const float sigma_r)
   77 {
   78   return dt_bilateral_memory_use(width, height, sigma_s, sigma_r) + (size_t)width * height * 4 * sizeof(float);
   79 }
   80 
   81 
   82 size_t dt_bilateral_singlebuffer_size(const int width,     // width of input image
   83                                       const int height,    // height of input image
   84                                       const float sigma_s, // spatial sigma (blur pixel coords)
   85                                       const float sigma_r) // range sigma (blur luma values)
   86 {
   87   float _x = roundf(width / sigma_s);
   88   float _y = roundf(height / sigma_s);
   89   float _z = roundf(100.0f / sigma_r);
   90   size_t size_x = CLAMPS((int)_x, 4, 900) + 1;
   91   size_t size_y = CLAMPS((int)_y, 4, 900) + 1;
   92   size_t size_z = CLAMPS((int)_z, 4, 50) + 1;
   93 
   94   return size_x * size_y * size_z * sizeof(float);
   95 }
   96 
   97 // modules that want to use dt_bilateral_slice_to_output_cl() ought to take this one;
   98 // takes account of an additional temp buffer needed in the OpenCL code path
   99 size_t dt_bilateral_singlebuffer_size2(const int width,
  100                                        const int height,
  101                                        const float sigma_s,
  102                                        const float sigma_r)
  103 {
  104   return MAX(dt_bilateral_singlebuffer_size(width, height, sigma_s, sigma_r), (size_t)width * height * 4 * sizeof(float));
  105 }
  106 
  107 
  108 dt_bilateral_cl_t *dt_bilateral_init_cl(const int devid,
  109                                         const int width,     // width of input image
  110                                         const int height,    // height of input image
  111                                         const float sigma_s, // spatial sigma (blur pixel coords)
  112                                         const float sigma_r) // range sigma (blur luma values)
  113 {
  114   dt_opencl_local_buffer_t locopt
  115     = (dt_opencl_local_buffer_t){ .xoffset = 0, .xfactor = 1, .yoffset = 0, .yfactor = 1,
  116                                   .cellsize = 8 * sizeof(float) + sizeof(int), .overhead = 0,
  117                                   .sizex = 1 << 6, .sizey = 1 << 6 };
  118 
  119   if(!dt_opencl_local_buffer_opt(devid, darktable.opencl->bilateral->kernel_splat, &locopt))
  120   {
  121     dt_print(DT_DEBUG_OPENCL,
  122              "[opencl_bilateral] can not identify resource limits for device %d in bilateral grid\n", devid);
  123     return NULL;
  124   }
  125 
  126   if(locopt.sizex * locopt.sizey < 16 * 16)
  127   {
  128     dt_print(DT_DEBUG_OPENCL,
  129              "[opencl_bilateral] device %d does not offer sufficient resources to run bilateral grid\n",
  130              devid);
  131     return NULL;
  132   }
  133 
  134   dt_bilateral_cl_t *b = (dt_bilateral_cl_t *)malloc(sizeof(dt_bilateral_cl_t));
  135   if(!b) return NULL;
  136 
  137   b->global = darktable.opencl->bilateral;
  138   float _x = roundf(width / sigma_s);
  139   float _y = roundf(height / sigma_s);
  140   float _z = roundf(100.0f / sigma_r);
  141   b->size_x = CLAMPS((int)_x, 4, 900) + 1;
  142   b->size_y = CLAMPS((int)_y, 4, 900) + 1;
  143   b->size_z = CLAMPS((int)_z, 4, 50) + 1;
  144   b->width = width;
  145   b->height = height;
  146   b->blocksizex = locopt.sizex;
  147   b->blocksizey = locopt.sizey;
  148   b->sigma_s = MAX(height / (b->size_y - 1.0f), width / (b->size_x - 1.0f));
  149   b->sigma_r = 100.0f / (b->size_z - 1.0f);
  150   b->devid = devid;
  151   b->dev_grid = NULL;
  152   b->dev_grid_tmp = NULL;
  153 
  154   // alloc grid buffer:
  155   b->dev_grid
  156       = dt_opencl_alloc_device_buffer(b->devid, (size_t)b->size_x * b->size_y * b->size_z * sizeof(float));
  157   if(!b->dev_grid)
  158   {
  159     dt_bilateral_free_cl(b);
  160     return NULL;
  161   }
  162 
  163   // alloc temporary grid buffer
  164   b->dev_grid_tmp
  165       = dt_opencl_alloc_device_buffer(b->devid, (size_t)b->size_x * b->size_y * b->size_z * sizeof(float));
  166   if(!b->dev_grid_tmp)
  167   {
  168     dt_bilateral_free_cl(b);
  169     return NULL;
  170   }
  171 
  172   // zero out grid
  173   int wd = b->size_x, ht = b->size_y * b->size_z;
  174   size_t sizes[] = { ROUNDUPWD(wd), ROUNDUPHT(ht), 1 };
  175   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 0, sizeof(cl_mem), (void *)&b->dev_grid);
  176   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 1, sizeof(int), (void *)&wd);
  177   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_zero, 2, sizeof(int), (void *)&ht);
  178   cl_int err = -666;
  179   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_zero, sizes);
  180   if(err != CL_SUCCESS)
  181   {
  182     dt_bilateral_free_cl(b);
  183     return NULL;
  184   }
  185 
  186 #if 0
  187   fprintf(stderr, "[bilateral] created grid [%d %d %d]"
  188           " with sigma (%f %f) (%f %f)\n", b->size_x, b->size_y, b->size_z,
  189           b->sigma_s, sigma_s, b->sigma_r, sigma_r);
  190 #endif
  191   return b;
  192 }
  193 
  194 cl_int dt_bilateral_splat_cl(dt_bilateral_cl_t *b, cl_mem in)
  195 {
  196   cl_int err = -666;
  197   size_t sizes[] = { ROUNDUP(b->width, b->blocksizex), ROUNDUP(b->height, b->blocksizey), 1 };
  198   size_t local[] = { b->blocksizex, b->blocksizey, 1 };
  199   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 0, sizeof(cl_mem), (void *)&in);
  200   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 1, sizeof(cl_mem), (void *)&b->dev_grid);
  201   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 2, sizeof(int), (void *)&b->width);
  202   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 3, sizeof(int), (void *)&b->height);
  203   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 4, sizeof(int), (void *)&b->size_x);
  204   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 5, sizeof(int), (void *)&b->size_y);
  205   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 6, sizeof(int), (void *)&b->size_z);
  206   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 7, sizeof(float), (void *)&b->sigma_s);
  207   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 8, sizeof(float), (void *)&b->sigma_r);
  208   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 9, b->blocksizex * b->blocksizey * sizeof(int),
  209                            NULL);
  210   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_splat, 10,
  211                            b->blocksizex * b->blocksizey * 8 * sizeof(float), NULL);
  212   err = dt_opencl_enqueue_kernel_2d_with_local(b->devid, b->global->kernel_splat, sizes, local);
  213   return err;
  214 }
  215 
  216 cl_int dt_bilateral_blur_cl(dt_bilateral_cl_t *b)
  217 {
  218   cl_int err = -666;
  219   size_t sizes[3] = { 0, 0, 1 };
  220 
  221   err = dt_opencl_enqueue_copy_buffer_to_buffer(b->devid, b->dev_grid, b->dev_grid_tmp, 0, 0,
  222                                                 b->size_x * b->size_y * b->size_z * sizeof(float));
  223   if(err != CL_SUCCESS) return err;
  224 
  225   sizes[0] = ROUNDUPWD(b->size_z);
  226   sizes[1] = ROUNDUPHT(b->size_y);
  227   int stride1, stride2, stride3;
  228   stride1 = b->size_x * b->size_y;
  229   stride2 = b->size_x;
  230   stride3 = 1;
  231   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 0, sizeof(cl_mem), (void *)&b->dev_grid_tmp);
  232   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 1, sizeof(cl_mem), (void *)&b->dev_grid);
  233   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 2, sizeof(int), (void *)&stride1);
  234   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 3, sizeof(int), (void *)&stride2);
  235   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 4, sizeof(int), (void *)&stride3);
  236   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 5, sizeof(int), (void *)&b->size_z);
  237   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 6, sizeof(int), (void *)&b->size_y);
  238   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 7, sizeof(int), (void *)&b->size_x);
  239   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line, sizes);
  240   if(err != CL_SUCCESS) return err;
  241 
  242   stride1 = b->size_x * b->size_y;
  243   stride2 = 1;
  244   stride3 = b->size_x;
  245   sizes[0] = ROUNDUPWD(b->size_z);
  246   sizes[1] = ROUNDUPHT(b->size_x);
  247   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 0, sizeof(cl_mem), (void *)&b->dev_grid);
  248   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 1, sizeof(cl_mem), (void *)&b->dev_grid_tmp);
  249   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 2, sizeof(int), (void *)&stride1);
  250   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 3, sizeof(int), (void *)&stride2);
  251   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 4, sizeof(int), (void *)&stride3);
  252   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 5, sizeof(int), (void *)&b->size_z);
  253   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 6, sizeof(int), (void *)&b->size_x);
  254   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line, 7, sizeof(int), (void *)&b->size_y);
  255   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line, sizes);
  256   if(err != CL_SUCCESS) return err;
  257 
  258   stride1 = 1;
  259   stride2 = b->size_x;
  260   stride3 = b->size_x * b->size_y;
  261   sizes[0] = ROUNDUPWD(b->size_x);
  262   sizes[1] = ROUNDUPHT(b->size_y);
  263   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 0, sizeof(cl_mem),
  264                            (void *)&b->dev_grid_tmp);
  265   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 1, sizeof(cl_mem), (void *)&b->dev_grid);
  266   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 2, sizeof(int), (void *)&stride1);
  267   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 3, sizeof(int), (void *)&stride2);
  268   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 4, sizeof(int), (void *)&stride3);
  269   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 5, sizeof(int), (void *)&b->size_x);
  270   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 6, sizeof(int), (void *)&b->size_y);
  271   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_blur_line_z, 7, sizeof(int), (void *)&b->size_z);
  272   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_blur_line_z, sizes);
  273   return err;
  274 }
  275 
  276 cl_int dt_bilateral_slice_to_output_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
  277 {
  278   cl_int err = -666;
  279   cl_mem tmp = NULL;
  280 
  281   tmp = dt_opencl_alloc_device(b->devid, b->width, b->height, 4 * sizeof(float));
  282   if(tmp == NULL) goto error;
  283 
  284   size_t origin[] = { 0, 0, 0 };
  285   size_t region[] = { b->width, b->height, 1 };
  286   err = dt_opencl_enqueue_copy_image(b->devid, out, tmp, origin, origin, region);
  287   if(err != CL_SUCCESS) goto error;
  288 
  289   size_t sizes[] = { ROUNDUPWD(b->width), ROUNDUPHT(b->height), 1 };
  290   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 0, sizeof(cl_mem), (void *)&in);
  291   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 1, sizeof(cl_mem), (void *)&tmp);
  292   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 2, sizeof(cl_mem), (void *)&out);
  293   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 3, sizeof(cl_mem), (void *)&b->dev_grid);
  294   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 4, sizeof(int), (void *)&b->width);
  295   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 5, sizeof(int), (void *)&b->height);
  296   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 6, sizeof(int), (void *)&b->size_x);
  297   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 7, sizeof(int), (void *)&b->size_y);
  298   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 8, sizeof(int), (void *)&b->size_z);
  299   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 9, sizeof(float), (void *)&b->sigma_s);
  300   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 10, sizeof(float), (void *)&b->sigma_r);
  301   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice2, 11, sizeof(float), (void *)&detail);
  302   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_slice2, sizes);
  303 
  304   dt_opencl_release_mem_object(tmp);
  305   return err;
  306 
  307 error:
  308   dt_opencl_release_mem_object(tmp);
  309   return err;
  310 }
  311 
  312 cl_int dt_bilateral_slice_cl(dt_bilateral_cl_t *b, cl_mem in, cl_mem out, const float detail)
  313 {
  314   cl_int err = -666;
  315   size_t sizes[] = { ROUNDUPWD(b->width), ROUNDUPHT(b->height), 1 };
  316   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 0, sizeof(cl_mem), (void *)&in);
  317   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 1, sizeof(cl_mem), (void *)&out);
  318   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 2, sizeof(cl_mem), (void *)&b->dev_grid);
  319   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 3, sizeof(int), (void *)&b->width);
  320   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 4, sizeof(int), (void *)&b->height);
  321   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 5, sizeof(int), (void *)&b->size_x);
  322   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 6, sizeof(int), (void *)&b->size_y);
  323   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 7, sizeof(int), (void *)&b->size_z);
  324   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 8, sizeof(float), (void *)&b->sigma_s);
  325   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 9, sizeof(float), (void *)&b->sigma_r);
  326   dt_opencl_set_kernel_arg(b->devid, b->global->kernel_slice, 10, sizeof(float), (void *)&detail);
  327   err = dt_opencl_enqueue_kernel_2d(b->devid, b->global->kernel_slice, sizes);
  328   return err;
  329 }
  330 
  331 void dt_bilateral_free_cl_global(dt_bilateral_cl_global_t *b)
  332 {
  333   if(!b) return;
  334   // destroy kernels
  335   dt_opencl_free_kernel(b->kernel_zero);
  336   dt_opencl_free_kernel(b->kernel_splat);
  337   dt_opencl_free_kernel(b->kernel_blur_line);
  338   dt_opencl_free_kernel(b->kernel_blur_line_z);
  339   dt_opencl_free_kernel(b->kernel_slice);
  340   dt_opencl_free_kernel(b->kernel_slice2);
  341   free(b);
  342 }
  343 
  344 #endif
  345 
  346 // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh
  347 // vim: shiftwidth=2 expandtab tabstop=2 cindent
  348 // kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified;