"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/compiler/glsl/lower_int64.cpp" (16 Sep 2020, 11894 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "lower_int64.cpp" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright © 2016 Intel Corporation
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   21  * DEALINGS IN THE SOFTWARE.
   22  */
   23 
   24 /**
   25  * \file lower_int64.cpp
   26  *
   27  * Lower 64-bit operations to 32-bit operations.  Each 64-bit value is lowered
   28  * to a uvec2.  For each operation that can be lowered, there is a function
   29  * called __builtin_foo with the same number of parameters that takes uvec2
   30  * sources and produces uvec2 results.  An operation like
   31  *
   32  *     uint64_t(x) * uint64_t(y)
   33  *
   34  * becomes
   35  *
   36  *     packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
   37  */
   38 
   39 #include "main/macros.h"
   40 #include "compiler/glsl_types.h"
   41 #include "ir.h"
   42 #include "ir_rvalue_visitor.h"
   43 #include "ir_builder.h"
   44 #include "ir_optimization.h"
   45 #include "util/hash_table.h"
   46 #include "builtin_functions.h"
   47 
   48 typedef ir_function_signature *(*function_generator)(void *mem_ctx,
   49                                                      builtin_available_predicate avail);
   50 
   51 using namespace ir_builder;
   52 
   53 namespace lower_64bit {
   54 void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
   55 
   56 ir_dereference_variable *compact_destination(ir_factory &,
   57                                              const glsl_type *type,
   58                                              ir_variable *result[4]);
   59 
   60 ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
   61                                      ir_expression *ir,
   62                                      ir_function_signature *callee);
   63 };
   64 
   65 using namespace lower_64bit;
   66 
   67 namespace {
   68 
   69 class lower_64bit_visitor : public ir_rvalue_visitor {
   70 public:
   71    lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
   72       : progress(false), lower(lower),
   73         function_list(), added_functions(&function_list, mem_ctx)
   74    {
   75       functions = _mesa_hash_table_create(mem_ctx,
   76                                           _mesa_hash_string,
   77                                           _mesa_key_string_equal);
   78 
   79       foreach_in_list(ir_instruction, node, instructions) {
   80          ir_function *const f = node->as_function();
   81 
   82          if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
   83             continue;
   84 
   85          add_function(f);
   86       }
   87    }
   88 
   89    ~lower_64bit_visitor()
   90    {
   91       _mesa_hash_table_destroy(functions, NULL);
   92    }
   93 
   94    void handle_rvalue(ir_rvalue **rvalue);
   95 
   96    void add_function(ir_function *f)
   97    {
   98       _mesa_hash_table_insert(functions, f->name, f);
   99    }
  100 
  101    ir_function *find_function(const char *name)
  102    {
  103       struct hash_entry *const entry =
  104          _mesa_hash_table_search(functions, name);
  105 
  106       return entry != NULL ? (ir_function *) entry->data : NULL;
  107    }
  108 
  109    bool progress;
  110 
  111 private:
  112    unsigned lower; /** Bitfield of which operations to lower */
  113 
  114    /** Hashtable containing all of the known functions in the IR */
  115    struct hash_table *functions;
  116 
  117 public:
  118    exec_list function_list;
  119 
  120 private:
  121    ir_factory added_functions;
  122 
  123    ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
  124                         function_generator generator);
  125 };
  126 
  127 } /* anonymous namespace */
  128 
  129 /**
  130  * Determine if a particular type of lowering should occur
  131  */
  132 #define lowering(x) (this->lower & x)
  133 
  134 bool
  135 lower_64bit_integer_instructions(exec_list *instructions,
  136                                  unsigned what_to_lower)
  137 {
  138    if (instructions->is_empty())
  139       return false;
  140 
  141    ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
  142    void *const mem_ctx = ralloc_parent(first_inst);
  143    lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);
  144 
  145    visit_list_elements(&v, instructions);
  146 
  147    if (v.progress && !v.function_list.is_empty()) {
  148       /* Move all of the nodes from function_list to the head if the incoming
  149        * instruction list.
  150        */
  151       exec_node *const after = &instructions->head_sentinel;
  152       exec_node *const before = instructions->head_sentinel.next;
  153       exec_node *const head = v.function_list.head_sentinel.next;
  154       exec_node *const tail = v.function_list.tail_sentinel.prev;
  155 
  156       before->next = head;
  157       head->prev = before;
  158 
  159       after->prev = tail;
  160       tail->next = after;
  161    }
  162 
  163    return v.progress;
  164 }
  165 
  166 
  167 /**
  168  * Expand individual 64-bit values to uvec2 values
  169  *
  170  * Each operation is in one of a few forms.
  171  *
  172  *     vector op vector
  173  *     vector op scalar
  174  *     scalar op vector
  175  *     scalar op scalar
  176  *
  177  * In the 'vector op vector' case, the two vectors must have the same size.
  178  * In a way, the 'scalar op scalar' form is special case of the 'vector op
  179  * vector' form.
  180  *
  181  * This method generates a new set of uvec2 values for each element of a
  182  * single operand.  If the operand is a scalar, the uvec2 is replicated
  183  * multiple times.  A value like
  184  *
  185  *     u64vec3(a) + u64vec3(b)
  186  *
  187  * becomes
  188  *
  189  *     u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
  190  *     uvec2 tmp1 = unpackUint2x32(tmp0.x);
  191  *     uvec2 tmp2 = unpackUint2x32(tmp0.y);
  192  *     uvec2 tmp3 = unpackUint2x32(tmp0.z);
  193  *
  194  * and the returned operands array contains ir_variable pointers to
  195  *
  196  *     { tmp1, tmp2, tmp3, tmp1 }
  197  */
  198 void
  199 lower_64bit::expand_source(ir_factory &body,
  200                            ir_rvalue *val,
  201                            ir_variable **expanded_src)
  202 {
  203    assert(val->type->is_integer_64());
  204 
  205    ir_variable *const temp = body.make_temp(val->type, "tmp");
  206 
  207    body.emit(assign(temp, val));
  208 
  209    const ir_expression_operation unpack_opcode =
  210       val->type->base_type == GLSL_TYPE_UINT64
  211       ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
  212 
  213    const glsl_type *const type =
  214       val->type->base_type == GLSL_TYPE_UINT64
  215       ? glsl_type::uvec2_type : glsl_type::ivec2_type;
  216 
  217    unsigned i;
  218    for (i = 0; i < val->type->vector_elements; i++) {
  219       expanded_src[i] = body.make_temp(type, "expanded_64bit_source");
  220 
  221       body.emit(assign(expanded_src[i],
  222                        expr(unpack_opcode, swizzle(temp, i, 1))));
  223    }
  224 
  225    for (/* empty */; i < 4; i++)
  226       expanded_src[i] = expanded_src[0];
  227 }
  228 
  229 /**
  230  * Convert a series of uvec2 results into a single 64-bit integer vector
  231  */
  232 ir_dereference_variable *
  233 lower_64bit::compact_destination(ir_factory &body,
  234                                  const glsl_type *type,
  235                                  ir_variable *result[4])
  236 {
  237    const ir_expression_operation pack_opcode =
  238       type->base_type == GLSL_TYPE_UINT64
  239       ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;
  240 
  241    ir_variable *const compacted_result =
  242       body.make_temp(type, "compacted_64bit_result");
  243 
  244    for (unsigned i = 0; i < type->vector_elements; i++) {
  245       body.emit(assign(compacted_result,
  246                        expr(pack_opcode, result[i]),
  247                        1U << i));
  248    }
  249 
  250    void *const mem_ctx = ralloc_parent(compacted_result);
  251    return new(mem_ctx) ir_dereference_variable(compacted_result);
  252 }
  253 
  254 ir_rvalue *
  255 lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
  256                                        ir_expression *ir,
  257                                        ir_function_signature *callee)
  258 {
  259    const unsigned num_operands = ir->num_operands;
  260    ir_variable *src[4][4];
  261    ir_variable *dst[4];
  262    void *const mem_ctx = ralloc_parent(ir);
  263    exec_list instructions;
  264    unsigned source_components = 0;
  265    const glsl_type *const result_type =
  266       ir->type->base_type == GLSL_TYPE_UINT64
  267       ? glsl_type::uvec2_type : glsl_type::ivec2_type;
  268 
  269    ir_factory body(&instructions, mem_ctx);
  270 
  271    for (unsigned i = 0; i < num_operands; i++) {
  272       expand_source(body, ir->operands[i], src[i]);
  273 
  274       if (ir->operands[i]->type->vector_elements > source_components)
  275          source_components = ir->operands[i]->type->vector_elements;
  276    }
  277 
  278    for (unsigned i = 0; i < source_components; i++) {
  279       dst[i] = body.make_temp(result_type, "expanded_64bit_result");
  280 
  281       exec_list parameters;
  282 
  283       for (unsigned j = 0; j < num_operands; j++)
  284          parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));
  285 
  286       ir_dereference_variable *const return_deref =
  287          new(mem_ctx) ir_dereference_variable(dst[i]);
  288 
  289       ir_call *const c = new(mem_ctx) ir_call(callee,
  290                                               return_deref,
  291                                               &parameters);
  292 
  293       body.emit(c);
  294    }
  295 
  296    ir_rvalue *const rv = compact_destination(body, ir->type, dst);
  297 
  298    /* Move all of the nodes from instructions between base_ir and the
  299     * instruction before it.
  300     */
  301    exec_node *const after = base_ir;
  302    exec_node *const before = after->prev;
  303    exec_node *const head = instructions.head_sentinel.next;
  304    exec_node *const tail = instructions.tail_sentinel.prev;
  305 
  306    before->next = head;
  307    head->prev = before;
  308 
  309    after->prev = tail;
  310    tail->next = after;
  311 
  312    return rv;
  313 }
  314 
  315 ir_rvalue *
  316 lower_64bit_visitor::handle_op(ir_expression *ir,
  317                                const char *function_name,
  318                                function_generator generator)
  319 {
  320    for (unsigned i = 0; i < ir->num_operands; i++)
  321       if (!ir->operands[i]->type->is_integer_64())
  322          return ir;
  323 
  324    /* Get a handle to the correct ir_function_signature for the core
  325     * operation.
  326     */
  327    ir_function_signature *callee = NULL;
  328    ir_function *f = find_function(function_name);
  329 
  330    if (f != NULL) {
  331       callee = (ir_function_signature *) f->signatures.get_head();
  332       assert(callee != NULL && callee->ir_type == ir_type_function_signature);
  333    } else {
  334       f = new(base_ir) ir_function(function_name);
  335       callee = generator(base_ir, NULL);
  336 
  337       f->add_signature(callee);
  338 
  339       add_function(f);
  340    }
  341 
  342    this->progress = true;
  343    return lower_op_to_function_call(this->base_ir, ir, callee);
  344 }
  345 
  346 void
  347 lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
  348 {
  349    if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
  350       return;
  351 
  352    ir_expression *const ir = (*rvalue)->as_expression();
  353    assert(ir != NULL);
  354 
  355    switch (ir->operation) {
  356    case ir_unop_sign:
  357       if (lowering(SIGN64)) {
  358          *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
  359       }
  360       break;
  361 
  362    case ir_binop_div:
  363       if (lowering(DIV64)) {
  364          if (ir->type->base_type == GLSL_TYPE_UINT64) {
  365             *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
  366          } else {
  367             *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
  368          }
  369       }
  370       break;
  371 
  372    case ir_binop_mod:
  373       if (lowering(MOD64)) {
  374          if (ir->type->base_type == GLSL_TYPE_UINT64) {
  375             *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
  376          } else {
  377             *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
  378          }
  379       }
  380       break;
  381 
  382    case ir_binop_mul:
  383       if (lowering(MUL64)) {
  384          *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
  385       }
  386       break;
  387 
  388    default:
  389       break;
  390    }
  391 }