"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/state_trackers/clover/api/transfer.cpp" (16 Sep 2020, 35326 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "transfer.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 //
    2 // Copyright 2012 Francisco Jerez
    3 //
    4 // Permission is hereby granted, free of charge, to any person obtaining a
    5 // copy of this software and associated documentation files (the "Software"),
    6 // to deal in the Software without restriction, including without limitation
    7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8 // and/or sell copies of the Software, and to permit persons to whom the
    9 // Software is furnished to do so, subject to the following conditions:
   10 //
   11 // The above copyright notice and this permission notice shall be included in
   12 // all copies or substantial portions of the Software.
   13 //
   14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
   18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
   19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
   20 // OTHER DEALINGS IN THE SOFTWARE.
   21 //
   22 
   23 #include <cstring>
   24 
   25 #include "util/bitscan.h"
   26 
   27 #include "api/dispatch.hpp"
   28 #include "api/util.hpp"
   29 #include "core/event.hpp"
   30 #include "core/memory.hpp"
   31 
   32 using namespace clover;
   33 
   34 namespace {
   35    typedef resource::vector vector_t;
   36 
   37    vector_t
   38    vector(const size_t *p) {
   39       return range(p, 3);
   40    }
   41 
   42    vector_t
   43    pitch(const vector_t &region, vector_t pitch) {
   44       for (auto x : zip(tail(pitch),
   45                         map(multiplies(), region, pitch))) {
   46          // The spec defines a value of zero as the natural pitch,
   47          // i.e. the unaligned size of the previous dimension.
   48          if (std::get<0>(x) == 0)
   49             std::get<0>(x) = std::get<1>(x);
   50       }
   51 
   52       return pitch;
   53    }
   54 
   55    ///
   56    /// Size of a region in bytes.
   57    ///
   58    size_t
   59    size(const vector_t &pitch, const vector_t &region) {
   60       if (any_of(is_zero(), region))
   61          return 0;
   62       else
   63          return dot(pitch, region - vector_t{ 0, 1, 1 });
   64    }
   65 
   66    ///
   67    /// Common argument checking shared by memory transfer commands.
   68    ///
   69    void
   70    validate_common(command_queue &q,
   71                    const ref_vector<event> &deps) {
   72       if (any_of([&](const event &ev) {
   73                return ev.context() != q.context();
   74             }, deps))
   75          throw error(CL_INVALID_CONTEXT);
   76    }
   77 
   78    ///
   79    /// Common error checking for a buffer object argument.
   80    ///
   81    void
   82    validate_object(command_queue &q, buffer &mem, const vector_t &origin,
   83                    const vector_t &pitch, const vector_t &region) {
   84       if (mem.context() != q.context())
   85          throw error(CL_INVALID_CONTEXT);
   86 
   87       // The region must fit within the specified pitch,
   88       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
   89          throw error(CL_INVALID_VALUE);
   90 
   91       // ...and within the specified object.
   92       if (dot(pitch, origin) + size(pitch, region) > mem.size())
   93          throw error(CL_INVALID_VALUE);
   94 
   95       if (any_of(is_zero(), region))
   96          throw error(CL_INVALID_VALUE);
   97    }
   98 
   99    ///
  100    /// Common error checking for an image argument.
  101    ///
  102    void
  103    validate_object(command_queue &q, image &img,
  104                    const vector_t &orig, const vector_t &region) {
  105       vector_t size = { img.width(), img.height(), img.depth() };
  106 
  107       if (!q.device().image_support())
  108          throw error(CL_INVALID_OPERATION);
  109 
  110       if (img.context() != q.context())
  111          throw error(CL_INVALID_CONTEXT);
  112 
  113       if (any_of(greater(), orig + region, size))
  114          throw error(CL_INVALID_VALUE);
  115 
  116       if (any_of(is_zero(), region))
  117          throw error(CL_INVALID_VALUE);
  118    }
  119 
  120    ///
  121    /// Common error checking for a host pointer argument.
  122    ///
  123    void
  124    validate_object(command_queue &q, const void *ptr, const vector_t &orig,
  125                    const vector_t &pitch, const vector_t &region) {
  126       if (!ptr)
  127          throw error(CL_INVALID_VALUE);
  128 
  129       // The region must fit within the specified pitch.
  130       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
  131          throw error(CL_INVALID_VALUE);
  132    }
  133 
  134    ///
  135    /// Common argument checking for a copy between two buffer objects.
  136    ///
  137    void
  138    validate_copy(command_queue &q, buffer &dst_mem,
  139                  const vector_t &dst_orig, const vector_t &dst_pitch,
  140                  buffer &src_mem,
  141                  const vector_t &src_orig, const vector_t &src_pitch,
  142                  const vector_t &region) {
  143       if (dst_mem == src_mem) {
  144          auto dst_offset = dot(dst_pitch, dst_orig);
  145          auto src_offset = dot(src_pitch, src_orig);
  146 
  147          if (interval_overlaps()(
  148                 dst_offset, dst_offset + size(dst_pitch, region),
  149                 src_offset, src_offset + size(src_pitch, region)))
  150             throw error(CL_MEM_COPY_OVERLAP);
  151       }
  152    }
  153 
  154    ///
  155    /// Common argument checking for a copy between two image objects.
  156    ///
  157    void
  158    validate_copy(command_queue &q,
  159                  image &dst_img, const vector_t &dst_orig,
  160                  image &src_img, const vector_t &src_orig,
  161                  const vector_t &region) {
  162       if (dst_img.format() != src_img.format())
  163          throw error(CL_IMAGE_FORMAT_MISMATCH);
  164 
  165       if (dst_img == src_img) {
  166          if (all_of(interval_overlaps(),
  167                     dst_orig, dst_orig + region,
  168                     src_orig, src_orig + region))
  169             throw error(CL_MEM_COPY_OVERLAP);
  170       }
  171    }
  172 
  173    ///
  174    /// Checks that the host access flags of the memory object are
  175    /// within the allowed set \a flags.
  176    ///
  177    void
  178    validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
  179       if (mem.flags() & ~flags &
  180           (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
  181            CL_MEM_HOST_NO_ACCESS))
  182          throw error(CL_INVALID_OPERATION);
  183    }
  184 
  185    ///
  186    /// Checks that the mapping flags are correct.
  187    ///
  188    void
  189    validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
  190       if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
  191           (flags & CL_MAP_WRITE_INVALIDATE_REGION))
  192          throw error(CL_INVALID_VALUE);
  193 
  194       if (flags & CL_MAP_READ)
  195          validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
  196 
  197       if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
  198          validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
  199    }
  200 
  201    ///
  202    /// Class that encapsulates the task of mapping an object of type
  203    /// \a T.  The return value of get() should be implicitly
  204    /// convertible to \a void *.
  205    ///
  206    template<typename T>
  207    struct _map {
  208       static mapping
  209       get(command_queue &q, T obj, cl_map_flags flags,
  210           size_t offset, size_t size) {
  211          return { q, obj->resource(q), flags, true,
  212                   {{ offset }}, {{ size, 1, 1 }} };
  213       }
  214    };
  215 
  216    template<>
  217    struct _map<void *> {
  218       static void *
  219       get(command_queue &q, void *obj, cl_map_flags flags,
  220           size_t offset, size_t size) {
  221          return (char *)obj + offset;
  222       }
  223    };
  224 
  225    template<>
  226    struct _map<const void *> {
  227       static const void *
  228       get(command_queue &q, const void *obj, cl_map_flags flags,
  229           size_t offset, size_t size) {
  230          return (const char *)obj + offset;
  231       }
  232    };
  233 
  234    ///
  235    /// Software copy from \a src_obj to \a dst_obj.  They can be
  236    /// either pointers or memory objects.
  237    ///
  238    template<typename T, typename S>
  239    std::function<void (event &)>
  240    soft_copy_op(command_queue &q,
  241                 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
  242                 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
  243                 const vector_t &region) {
  244       return [=, &q](event &) {
  245          auto dst = _map<T>::get(q, dst_obj, CL_MAP_WRITE,
  246                                  dot(dst_pitch, dst_orig),
  247                                  size(dst_pitch, region));
  248          auto src = _map<S>::get(q, src_obj, CL_MAP_READ,
  249                                  dot(src_pitch, src_orig),
  250                                  size(src_pitch, region));
  251          vector_t v = {};
  252 
  253          for (v[2] = 0; v[2] < region[2]; ++v[2]) {
  254             for (v[1] = 0; v[1] < region[1]; ++v[1]) {
  255                std::memcpy(
  256                   static_cast<char *>(dst) + dot(dst_pitch, v),
  257                   static_cast<const char *>(src) + dot(src_pitch, v),
  258                   src_pitch[0] * region[0]);
  259             }
  260          }
  261       };
  262    }
  263 
  264    ///
  265    /// Hardware copy from \a src_obj to \a dst_obj.
  266    ///
  267    template<typename T, typename S>
  268    std::function<void (event &)>
  269    hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
  270                 S src_obj, const vector_t &src_orig, const vector_t &region) {
  271       return [=, &q](event &) {
  272          dst_obj->resource(q).copy(q, dst_orig, region,
  273                                    src_obj->resource(q), src_orig);
  274       };
  275    }
  276 }
  277 
  278 CLOVER_API cl_int
  279 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
  280                     size_t offset, size_t size, void *ptr,
  281                     cl_uint num_deps, const cl_event *d_deps,
  282                     cl_event *rd_ev) try {
  283    auto &q = obj(d_q);
  284    auto &mem = obj<buffer>(d_mem);
  285    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  286    vector_t region = { size, 1, 1 };
  287    vector_t obj_origin = { offset };
  288    auto obj_pitch = pitch(region, {{ 1 }});
  289 
  290    validate_common(q, deps);
  291    validate_object(q, ptr, {}, obj_pitch, region);
  292    validate_object(q, mem, obj_origin, obj_pitch, region);
  293    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
  294 
  295    auto hev = create<hard_event>(
  296       q, CL_COMMAND_READ_BUFFER, deps,
  297       soft_copy_op(q, ptr, {}, obj_pitch,
  298                    &mem, obj_origin, obj_pitch,
  299                    region));
  300 
  301    if (blocking)
  302        hev().wait_signalled();
  303 
  304    ret_object(rd_ev, hev);
  305    return CL_SUCCESS;
  306 
  307 } catch (error &e) {
  308    return e.get();
  309 }
  310 
  311 CLOVER_API cl_int
  312 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
  313                      size_t offset, size_t size, const void *ptr,
  314                      cl_uint num_deps, const cl_event *d_deps,
  315                      cl_event *rd_ev) try {
  316    auto &q = obj(d_q);
  317    auto &mem = obj<buffer>(d_mem);
  318    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  319    vector_t region = { size, 1, 1 };
  320    vector_t obj_origin = { offset };
  321    auto obj_pitch = pitch(region, {{ 1 }});
  322 
  323    validate_common(q, deps);
  324    validate_object(q, mem, obj_origin, obj_pitch, region);
  325    validate_object(q, ptr, {}, obj_pitch, region);
  326    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
  327 
  328    auto hev = create<hard_event>(
  329       q, CL_COMMAND_WRITE_BUFFER, deps,
  330       soft_copy_op(q, &mem, obj_origin, obj_pitch,
  331                    ptr, {}, obj_pitch,
  332                    region));
  333 
  334    if (blocking)
  335        hev().wait_signalled();
  336 
  337    ret_object(rd_ev, hev);
  338    return CL_SUCCESS;
  339 
  340 } catch (error &e) {
  341    return e.get();
  342 }
  343 
  344 CLOVER_API cl_int
  345 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
  346                         const size_t *p_obj_origin,
  347                         const size_t *p_host_origin,
  348                         const size_t *p_region,
  349                         size_t obj_row_pitch, size_t obj_slice_pitch,
  350                         size_t host_row_pitch, size_t host_slice_pitch,
  351                         void *ptr,
  352                         cl_uint num_deps, const cl_event *d_deps,
  353                         cl_event *rd_ev) try {
  354    auto &q = obj(d_q);
  355    auto &mem = obj<buffer>(d_mem);
  356    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  357    auto region = vector(p_region);
  358    auto obj_origin = vector(p_obj_origin);
  359    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
  360    auto host_origin = vector(p_host_origin);
  361    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
  362 
  363    validate_common(q, deps);
  364    validate_object(q, ptr, host_origin, host_pitch, region);
  365    validate_object(q, mem, obj_origin, obj_pitch, region);
  366    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
  367 
  368    auto hev = create<hard_event>(
  369       q, CL_COMMAND_READ_BUFFER_RECT, deps,
  370       soft_copy_op(q, ptr, host_origin, host_pitch,
  371                    &mem, obj_origin, obj_pitch,
  372                    region));
  373 
  374    if (blocking)
  375        hev().wait_signalled();
  376 
  377    ret_object(rd_ev, hev);
  378    return CL_SUCCESS;
  379 
  380 } catch (error &e) {
  381    return e.get();
  382 }
  383 
  384 CLOVER_API cl_int
  385 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
  386                          const size_t *p_obj_origin,
  387                          const size_t *p_host_origin,
  388                          const size_t *p_region,
  389                          size_t obj_row_pitch, size_t obj_slice_pitch,
  390                          size_t host_row_pitch, size_t host_slice_pitch,
  391                          const void *ptr,
  392                          cl_uint num_deps, const cl_event *d_deps,
  393                          cl_event *rd_ev) try {
  394    auto &q = obj(d_q);
  395    auto &mem = obj<buffer>(d_mem);
  396    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  397    auto region = vector(p_region);
  398    auto obj_origin = vector(p_obj_origin);
  399    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
  400    auto host_origin = vector(p_host_origin);
  401    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
  402 
  403    validate_common(q, deps);
  404    validate_object(q, mem, obj_origin, obj_pitch, region);
  405    validate_object(q, ptr, host_origin, host_pitch, region);
  406    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
  407 
  408    auto hev = create<hard_event>(
  409       q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
  410       soft_copy_op(q, &mem, obj_origin, obj_pitch,
  411                    ptr, host_origin, host_pitch,
  412                    region));
  413 
  414    if (blocking)
  415        hev().wait_signalled();
  416 
  417    ret_object(rd_ev, hev);
  418    return CL_SUCCESS;
  419 
  420 } catch (error &e) {
  421    return e.get();
  422 }
  423 
  424 CLOVER_API cl_int
  425 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
  426                     size_t src_offset, size_t dst_offset, size_t size,
  427                     cl_uint num_deps, const cl_event *d_deps,
  428                     cl_event *rd_ev) try {
  429    auto &q = obj(d_q);
  430    auto &src_mem = obj<buffer>(d_src_mem);
  431    auto &dst_mem = obj<buffer>(d_dst_mem);
  432    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  433    vector_t region = { size, 1, 1 };
  434    vector_t dst_origin = { dst_offset };
  435    auto dst_pitch = pitch(region, {{ 1 }});
  436    vector_t src_origin = { src_offset };
  437    auto src_pitch = pitch(region, {{ 1 }});
  438 
  439    validate_common(q, deps);
  440    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
  441    validate_object(q, src_mem, src_origin, src_pitch, region);
  442    validate_copy(q, dst_mem, dst_origin, dst_pitch,
  443                  src_mem, src_origin, src_pitch, region);
  444 
  445    auto hev = create<hard_event>(
  446       q, CL_COMMAND_COPY_BUFFER, deps,
  447       hard_copy_op(q, &dst_mem, dst_origin,
  448                    &src_mem, src_origin, region));
  449 
  450    ret_object(rd_ev, hev);
  451    return CL_SUCCESS;
  452 
  453 } catch (error &e) {
  454    return e.get();
  455 }
  456 
  457 CLOVER_API cl_int
  458 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
  459                         cl_mem d_dst_mem,
  460                         const size_t *p_src_origin, const size_t *p_dst_origin,
  461                         const size_t *p_region,
  462                         size_t src_row_pitch, size_t src_slice_pitch,
  463                         size_t dst_row_pitch, size_t dst_slice_pitch,
  464                         cl_uint num_deps, const cl_event *d_deps,
  465                         cl_event *rd_ev) try {
  466    auto &q = obj(d_q);
  467    auto &src_mem = obj<buffer>(d_src_mem);
  468    auto &dst_mem = obj<buffer>(d_dst_mem);
  469    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  470    auto region = vector(p_region);
  471    auto dst_origin = vector(p_dst_origin);
  472    auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
  473    auto src_origin = vector(p_src_origin);
  474    auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
  475 
  476    validate_common(q, deps);
  477    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
  478    validate_object(q, src_mem, src_origin, src_pitch, region);
  479    validate_copy(q, dst_mem, dst_origin, dst_pitch,
  480                  src_mem, src_origin, src_pitch, region);
  481 
  482    auto hev = create<hard_event>(
  483       q, CL_COMMAND_COPY_BUFFER_RECT, deps,
  484       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
  485                    &src_mem, src_origin, src_pitch,
  486                    region));
  487 
  488    ret_object(rd_ev, hev);
  489    return CL_SUCCESS;
  490 
  491 } catch (error &e) {
  492    return e.get();
  493 }
  494 
  495 CLOVER_API cl_int
  496 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
  497                    const size_t *p_origin, const size_t *p_region,
  498                    size_t row_pitch, size_t slice_pitch, void *ptr,
  499                    cl_uint num_deps, const cl_event *d_deps,
  500                    cl_event *rd_ev) try {
  501    auto &q = obj(d_q);
  502    auto &img = obj<image>(d_mem);
  503    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  504    auto region = vector(p_region);
  505    auto dst_pitch = pitch(region, {{ img.pixel_size(),
  506                                      row_pitch, slice_pitch }});
  507    auto src_origin = vector(p_origin);
  508    auto src_pitch = pitch(region, {{ img.pixel_size(),
  509                                      img.row_pitch(), img.slice_pitch() }});
  510 
  511    validate_common(q, deps);
  512    validate_object(q, ptr, {}, dst_pitch, region);
  513    validate_object(q, img, src_origin, region);
  514    validate_object_access(img, CL_MEM_HOST_READ_ONLY);
  515 
  516    auto hev = create<hard_event>(
  517       q, CL_COMMAND_READ_IMAGE, deps,
  518       soft_copy_op(q, ptr, {}, dst_pitch,
  519                    &img, src_origin, src_pitch,
  520                    region));
  521 
  522    if (blocking)
  523        hev().wait_signalled();
  524 
  525    ret_object(rd_ev, hev);
  526    return CL_SUCCESS;
  527 
  528 } catch (error &e) {
  529    return e.get();
  530 }
  531 
  532 CLOVER_API cl_int
  533 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
  534                     const size_t *p_origin, const size_t *p_region,
  535                     size_t row_pitch, size_t slice_pitch, const void *ptr,
  536                     cl_uint num_deps, const cl_event *d_deps,
  537                     cl_event *rd_ev) try {
  538    auto &q = obj(d_q);
  539    auto &img = obj<image>(d_mem);
  540    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  541    auto region = vector(p_region);
  542    auto dst_origin = vector(p_origin);
  543    auto dst_pitch = pitch(region, {{ img.pixel_size(),
  544                                      img.row_pitch(), img.slice_pitch() }});
  545    auto src_pitch = pitch(region, {{ img.pixel_size(),
  546                                      row_pitch, slice_pitch }});
  547 
  548    validate_common(q, deps);
  549    validate_object(q, img, dst_origin, region);
  550    validate_object(q, ptr, {}, src_pitch, region);
  551    validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
  552 
  553    auto hev = create<hard_event>(
  554       q, CL_COMMAND_WRITE_IMAGE, deps,
  555       soft_copy_op(q, &img, dst_origin, dst_pitch,
  556                    ptr, {}, src_pitch,
  557                    region));
  558 
  559    if (blocking)
  560        hev().wait_signalled();
  561 
  562    ret_object(rd_ev, hev);
  563    return CL_SUCCESS;
  564 
  565 } catch (error &e) {
  566    return e.get();
  567 }
  568 
  569 CLOVER_API cl_int
  570 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
  571                    const size_t *p_src_origin, const size_t *p_dst_origin,
  572                    const size_t *p_region,
  573                    cl_uint num_deps, const cl_event *d_deps,
  574                    cl_event *rd_ev) try {
  575    auto &q = obj(d_q);
  576    auto &src_img = obj<image>(d_src_mem);
  577    auto &dst_img = obj<image>(d_dst_mem);
  578    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  579    auto region = vector(p_region);
  580    auto dst_origin = vector(p_dst_origin);
  581    auto src_origin = vector(p_src_origin);
  582 
  583    validate_common(q, deps);
  584    validate_object(q, dst_img, dst_origin, region);
  585    validate_object(q, src_img, src_origin, region);
  586    validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
  587 
  588    auto hev = create<hard_event>(
  589       q, CL_COMMAND_COPY_IMAGE, deps,
  590       hard_copy_op(q, &dst_img, dst_origin,
  591                    &src_img, src_origin,
  592                    region));
  593 
  594    ret_object(rd_ev, hev);
  595    return CL_SUCCESS;
  596 
  597 } catch (error &e) {
  598    return e.get();
  599 }
  600 
  601 CLOVER_API cl_int
  602 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
  603                            cl_mem d_src_mem, cl_mem d_dst_mem,
  604                            const size_t *p_src_origin, const size_t *p_region,
  605                            size_t dst_offset,
  606                            cl_uint num_deps, const cl_event *d_deps,
  607                            cl_event *rd_ev) try {
  608    auto &q = obj(d_q);
  609    auto &src_img = obj<image>(d_src_mem);
  610    auto &dst_mem = obj<buffer>(d_dst_mem);
  611    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  612    auto region = vector(p_region);
  613    vector_t dst_origin = { dst_offset };
  614    auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
  615    auto src_origin = vector(p_src_origin);
  616    auto src_pitch = pitch(region, {{ src_img.pixel_size(),
  617                                      src_img.row_pitch(),
  618                                      src_img.slice_pitch() }});
  619 
  620    validate_common(q, deps);
  621    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
  622    validate_object(q, src_img, src_origin, region);
  623 
  624    auto hev = create<hard_event>(
  625       q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
  626       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
  627                    &src_img, src_origin, src_pitch,
  628                    region));
  629 
  630    ret_object(rd_ev, hev);
  631    return CL_SUCCESS;
  632 
  633 } catch (error &e) {
  634    return e.get();
  635 }
  636 
  637 CLOVER_API cl_int
  638 clEnqueueCopyBufferToImage(cl_command_queue d_q,
  639                            cl_mem d_src_mem, cl_mem d_dst_mem,
  640                            size_t src_offset,
  641                            const size_t *p_dst_origin, const size_t *p_region,
  642                            cl_uint num_deps, const cl_event *d_deps,
  643                            cl_event *rd_ev) try {
  644    auto &q = obj(d_q);
  645    auto &src_mem = obj<buffer>(d_src_mem);
  646    auto &dst_img = obj<image>(d_dst_mem);
  647    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  648    auto region = vector(p_region);
  649    auto dst_origin = vector(p_dst_origin);
  650    auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
  651                                      dst_img.row_pitch(),
  652                                      dst_img.slice_pitch() }});
  653    vector_t src_origin = { src_offset };
  654    auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
  655 
  656    validate_common(q, deps);
  657    validate_object(q, dst_img, dst_origin, region);
  658    validate_object(q, src_mem, src_origin, src_pitch, region);
  659 
  660    auto hev = create<hard_event>(
  661       q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
  662       soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
  663                    &src_mem, src_origin, src_pitch,
  664                    region));
  665 
  666    ret_object(rd_ev, hev);
  667    return CL_SUCCESS;
  668 
  669 } catch (error &e) {
  670    return e.get();
  671 }
  672 
  673 CLOVER_API void *
  674 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
  675                    cl_map_flags flags, size_t offset, size_t size,
  676                    cl_uint num_deps, const cl_event *d_deps,
  677                    cl_event *rd_ev, cl_int *r_errcode) try {
  678    auto &q = obj(d_q);
  679    auto &mem = obj<buffer>(d_mem);
  680    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  681    vector_t region = { size, 1, 1 };
  682    vector_t obj_origin = { offset };
  683    auto obj_pitch = pitch(region, {{ 1 }});
  684 
  685    validate_common(q, deps);
  686    validate_object(q, mem, obj_origin, obj_pitch, region);
  687    validate_map_flags(mem, flags);
  688 
  689    void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region);
  690 
  691    auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
  692    if (blocking)
  693        hev().wait_signalled();
  694 
  695    ret_object(rd_ev, hev);
  696    ret_error(r_errcode, CL_SUCCESS);
  697    return map;
  698 
  699 } catch (error &e) {
  700    ret_error(r_errcode, e);
  701    return NULL;
  702 }
  703 
  704 CLOVER_API void *
  705 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
  706                   cl_map_flags flags,
  707                   const size_t *p_origin, const size_t *p_region,
  708                   size_t *row_pitch, size_t *slice_pitch,
  709                   cl_uint num_deps, const cl_event *d_deps,
  710                   cl_event *rd_ev, cl_int *r_errcode) try {
  711    auto &q = obj(d_q);
  712    auto &img = obj<image>(d_mem);
  713    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  714    auto region = vector(p_region);
  715    auto origin = vector(p_origin);
  716 
  717    validate_common(q, deps);
  718    validate_object(q, img, origin, region);
  719    validate_map_flags(img, flags);
  720 
  721    void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
  722 
  723    auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
  724    if (blocking)
  725        hev().wait_signalled();
  726 
  727    ret_object(rd_ev, hev);
  728    ret_error(r_errcode, CL_SUCCESS);
  729    return map;
  730 
  731 } catch (error &e) {
  732    ret_error(r_errcode, e);
  733    return NULL;
  734 }
  735 
  736 CLOVER_API cl_int
  737 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
  738                         cl_uint num_deps, const cl_event *d_deps,
  739                         cl_event *rd_ev) try {
  740    auto &q = obj(d_q);
  741    auto &mem = obj(d_mem);
  742    auto deps = objs<wait_list_tag>(d_deps, num_deps);
  743 
  744    validate_common(q, deps);
  745 
  746    auto hev = create<hard_event>(
  747       q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
  748       [=, &q, &mem](event &) {
  749          mem.resource(q).del_map(ptr);
  750       });
  751 
  752    ret_object(rd_ev, hev);
  753    return CL_SUCCESS;
  754 
  755 } catch (error &e) {
  756    return e.get();
  757 }
  758 
  759 CLOVER_API cl_int
  760 clEnqueueMigrateMemObjects(cl_command_queue command_queue,
  761                            cl_uint num_mem_objects,
  762                            const cl_mem *mem_objects,
  763                            cl_mem_migration_flags flags,
  764                            cl_uint num_events_in_wait_list,
  765                            const cl_event *event_wait_list,
  766                            cl_event *event) {
  767    CLOVER_NOT_SUPPORTED_UNTIL("1.2");
  768    return CL_INVALID_VALUE;
  769 }
  770 
  771 cl_int
  772 clover::EnqueueSVMFree(cl_command_queue d_q,
  773                        cl_uint num_svm_pointers,
  774                        void *svm_pointers[],
  775                        void (CL_CALLBACK *pfn_free_func) (
  776                            cl_command_queue queue, cl_uint num_svm_pointers,
  777                            void *svm_pointers[], void *user_data),
  778                        void *user_data,
  779                        cl_uint num_events_in_wait_list,
  780                        const cl_event *event_wait_list,
  781                        cl_event *event,
  782                        cl_int cmd) try {
  783 
  784    if (bool(num_svm_pointers) != bool(svm_pointers))
  785       return CL_INVALID_VALUE;
  786 
  787    auto &q = obj(d_q);
  788    bool can_emulate = q.device().has_system_svm();
  789    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
  790 
  791    validate_common(q, deps);
  792 
  793    std::vector<void *> svm_pointers_cpy(svm_pointers,
  794                                         svm_pointers + num_svm_pointers);
  795    if (!pfn_free_func) {
  796       if (!can_emulate) {
  797          CLOVER_NOT_SUPPORTED_UNTIL("2.0");
  798          return CL_INVALID_VALUE;
  799       }
  800       pfn_free_func = [](cl_command_queue, cl_uint num_svm_pointers,
  801                          void *svm_pointers[], void *) {
  802          for (void *p : range(svm_pointers, num_svm_pointers))
  803             free(p);
  804       };
  805    }
  806 
  807    auto hev = create<hard_event>(q, cmd, deps,
  808       [=](clover::event &) mutable {
  809          pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
  810                        user_data);
  811       });
  812 
  813    ret_object(event, hev);
  814    return CL_SUCCESS;
  815 
  816 } catch (error &e) {
  817    return e.get();
  818 }
  819 
  820 CLOVER_API cl_int
  821 clEnqueueSVMFree(cl_command_queue d_q,
  822                  cl_uint num_svm_pointers,
  823                  void *svm_pointers[],
  824                  void (CL_CALLBACK *pfn_free_func) (
  825                     cl_command_queue queue, cl_uint num_svm_pointers,
  826                     void *svm_pointers[], void *user_data),
  827                  void *user_data,
  828                  cl_uint num_events_in_wait_list,
  829                  const cl_event *event_wait_list,
  830                  cl_event *event) {
  831 
  832    return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,
  833                          pfn_free_func, user_data, num_events_in_wait_list,
  834                          event_wait_list, event, CL_COMMAND_SVM_FREE);
  835 }
  836 
  837 cl_int
  838 clover::EnqueueSVMMemcpy(cl_command_queue d_q,
  839                          cl_bool blocking_copy,
  840                          void *dst_ptr,
  841                          const void *src_ptr,
  842                          size_t size,
  843                          cl_uint num_events_in_wait_list,
  844                          const cl_event *event_wait_list,
  845                          cl_event *event,
  846                          cl_int cmd) try {
  847 
  848    if (dst_ptr == nullptr || src_ptr == nullptr)
  849       return CL_INVALID_VALUE;
  850 
  851    if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
  852                                reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
  853       return CL_MEM_COPY_OVERLAP;
  854 
  855    auto &q = obj(d_q);
  856    bool can_emulate = q.device().has_system_svm();
  857    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
  858 
  859    validate_common(q, deps);
  860 
  861    if (can_emulate) {
  862       auto hev = create<hard_event>(q, cmd, deps,
  863          [=](clover::event &) {
  864             memcpy(dst_ptr, src_ptr, size);
  865          });
  866 
  867       if (blocking_copy)
  868          hev().wait();
  869       ret_object(event, hev);
  870       return CL_SUCCESS;
  871    }
  872 
  873    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
  874    return CL_INVALID_VALUE;
  875 
  876 } catch (error &e) {
  877    return e.get();
  878 }
  879 
  880 CLOVER_API cl_int
  881 clEnqueueSVMMemcpy(cl_command_queue d_q,
  882                    cl_bool blocking_copy,
  883                    void *dst_ptr,
  884                    const void *src_ptr,
  885                    size_t size,
  886                    cl_uint num_events_in_wait_list,
  887                    const cl_event *event_wait_list,
  888                    cl_event *event) {
  889 
  890    return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,
  891                            size, num_events_in_wait_list, event_wait_list,
  892                            event, CL_COMMAND_SVM_MEMCPY);
  893 }
  894 
  895 cl_int
  896 clover::EnqueueSVMMemFill(cl_command_queue d_q,
  897                           void *svm_ptr,
  898                           const void *pattern,
  899                           size_t pattern_size,
  900                           size_t size,
  901                           cl_uint num_events_in_wait_list,
  902                           const cl_event *event_wait_list,
  903                           cl_event *event,
  904                           cl_int cmd) try {
  905 
  906    if (svm_ptr == nullptr || pattern == nullptr ||
  907        !util_is_power_of_two_nonzero(pattern_size) ||
  908        pattern_size > 128 ||
  909        !ptr_is_aligned(svm_ptr, pattern_size) ||
  910        size % pattern_size)
  911       return CL_INVALID_VALUE;
  912 
  913    auto &q = obj(d_q);
  914    bool can_emulate = q.device().has_system_svm();
  915    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
  916 
  917    validate_common(q, deps);
  918 
  919    if (can_emulate) {
  920       auto hev = create<hard_event>(q, cmd, deps,
  921          [=](clover::event &) {
  922             void *ptr = svm_ptr;
  923             for (size_t s = size; s; s -= pattern_size) {
  924                memcpy(ptr, pattern, pattern_size);
  925                ptr = static_cast<uint8_t*>(ptr) + pattern_size;
  926             }
  927          });
  928 
  929       ret_object(event, hev);
  930       return CL_SUCCESS;
  931    }
  932 
  933    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
  934    return CL_INVALID_VALUE;
  935 
  936 } catch (error &e) {
  937    return e.get();
  938 }
  939 
  940 CLOVER_API cl_int
  941 clEnqueueSVMMemFill(cl_command_queue d_q,
  942                     void *svm_ptr,
  943                     const void *pattern,
  944                     size_t pattern_size,
  945                     size_t size,
  946                     cl_uint num_events_in_wait_list,
  947                     const cl_event *event_wait_list,
  948                     cl_event *event) {
  949 
  950    return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,
  951                             size, num_events_in_wait_list, event_wait_list,
  952                             event, CL_COMMAND_SVM_MEMFILL);
  953 }
  954 
  955 cl_int
  956 clover::EnqueueSVMMap(cl_command_queue d_q,
  957                       cl_bool blocking_map,
  958                       cl_map_flags map_flags,
  959                       void *svm_ptr,
  960                       size_t size,
  961                       cl_uint num_events_in_wait_list,
  962                       const cl_event *event_wait_list,
  963                       cl_event *event,
  964                       cl_int cmd) try {
  965 
  966    if (svm_ptr == nullptr || size == 0)
  967       return CL_INVALID_VALUE;
  968 
  969    auto &q = obj(d_q);
  970    bool can_emulate = q.device().has_system_svm();
  971    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
  972 
  973    validate_common(q, deps);
  974 
  975    if (can_emulate) {
  976       auto hev = create<hard_event>(q, cmd, deps,
  977          [](clover::event &) { });
  978 
  979       ret_object(event, hev);
  980       return CL_SUCCESS;
  981    }
  982 
  983    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
  984    return CL_INVALID_VALUE;
  985 
  986 } catch (error &e) {
  987    return e.get();
  988 }
  989 
  990 CLOVER_API cl_int
  991 clEnqueueSVMMap(cl_command_queue d_q,
  992                 cl_bool blocking_map,
  993                 cl_map_flags map_flags,
  994                 void *svm_ptr,
  995                 size_t size,
  996                 cl_uint num_events_in_wait_list,
  997                 const cl_event *event_wait_list,
  998                 cl_event *event) {
  999 
 1000    return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,
 1001                         num_events_in_wait_list, event_wait_list, event,
 1002                         CL_COMMAND_SVM_MAP);
 1003 }
 1004 
 1005 cl_int
 1006 clover::EnqueueSVMUnmap(cl_command_queue d_q,
 1007                         void *svm_ptr,
 1008                         cl_uint num_events_in_wait_list,
 1009                         const cl_event *event_wait_list,
 1010                         cl_event *event,
 1011                         cl_int cmd) try {
 1012 
 1013    if (svm_ptr == nullptr)
 1014       return CL_INVALID_VALUE;
 1015 
 1016    auto &q = obj(d_q);
 1017    bool can_emulate = q.device().has_system_svm();
 1018    auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
 1019 
 1020    validate_common(q, deps);
 1021 
 1022    if (can_emulate) {
 1023       auto hev = create<hard_event>(q, cmd, deps,
 1024          [](clover::event &) { });
 1025 
 1026       ret_object(event, hev);
 1027       return CL_SUCCESS;
 1028    }
 1029 
 1030    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
 1031    return CL_INVALID_VALUE;
 1032 
 1033 } catch (error &e) {
 1034    return e.get();
 1035 }
 1036 
 1037 CLOVER_API cl_int
 1038 clEnqueueSVMUnmap(cl_command_queue d_q,
 1039                   void *svm_ptr,
 1040                   cl_uint num_events_in_wait_list,
 1041                   const cl_event *event_wait_list,
 1042                   cl_event *event) {
 1043 
 1044    return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,
 1045                           event_wait_list, event, CL_COMMAND_SVM_UNMAP);
 1046 }
 1047 
 1048 CLOVER_API cl_int
 1049 clEnqueueSVMMigrateMem(cl_command_queue d_q,
 1050                        cl_uint num_svm_pointers,
 1051                        const void **svm_pointers,
 1052                        const size_t *sizes,
 1053                        const cl_mem_migration_flags flags,
 1054                        cl_uint  num_events_in_wait_list,
 1055                        const cl_event *event_wait_list,
 1056                        cl_event *event) {
 1057    CLOVER_NOT_SUPPORTED_UNTIL("2.1");
 1058    return CL_INVALID_VALUE;
 1059 }