"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c" (16 Sep 2020, 21668 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "radeon_pair_regalloc.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright (C) 2009 Nicolai Haehnle.
    3  * Copyright 2011 Tom Stellard <tstellar@gmail.com>
    4  *
    5  * All Rights Reserved.
    6  *
    7  * Permission is hereby granted, free of charge, to any person obtaining
    8  * a copy of this software and associated documentation files (the
    9  * "Software"), to deal in the Software without restriction, including
   10  * without limitation the rights to use, copy, modify, merge, publish,
   11  * distribute, sublicense, and/or sell copies of the Software, and to
   12  * permit persons to whom the Software is furnished to do so, subject to
   13  * the following conditions:
   14  *
   15  * The above copyright notice and this permission notice (including the
   16  * next paragraph) shall be included in all copies or substantial
   17  * portions of the Software.
   18  *
   19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
   23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
   24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
   25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   26  *
   27  */
   28 
   29 #include "radeon_program_pair.h"
   30 
   31 #include <stdio.h>
   32 
   33 #include "main/glheader.h"
   34 #include "util/register_allocate.h"
   35 #include "util/u_memory.h"
   36 #include "util/ralloc.h"
   37 
   38 #include "r300_fragprog_swizzle.h"
   39 #include "radeon_compiler.h"
   40 #include "radeon_compiler_util.h"
   41 #include "radeon_dataflow.h"
   42 #include "radeon_list.h"
   43 #include "radeon_regalloc.h"
   44 #include "radeon_variable.h"
   45 
   46 #define VERBOSE 0
   47 
   48 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
   49 
   50 
   51 
   52 struct register_info {
   53     struct live_intervals Live[4];
   54 
   55     unsigned int Used:1;
   56     unsigned int Allocated:1;
   57     unsigned int File:3;
   58     unsigned int Index:RC_REGISTER_INDEX_BITS;
   59     unsigned int Writemask;
   60 };
   61 
   62 struct regalloc_state {
   63     struct radeon_compiler * C;
   64 
   65     struct register_info * Input;
   66     unsigned int NumInputs;
   67 
   68     struct register_info * Temporary;
   69     unsigned int NumTemporaries;
   70 
   71     unsigned int Simple;
   72     int LoopEnd;
   73 };
   74 
   75 struct rc_class {
   76     enum rc_reg_class ID;
   77 
   78     unsigned int WritemaskCount;
   79 
   80     /** List of writemasks that belong to this class */
   81     unsigned int Writemasks[3];
   82 
   83 
   84 };
   85 
   86 static const struct rc_class rc_class_list [] = {
   87     {RC_REG_CLASS_SINGLE, 3,
   88         {RC_MASK_X,
   89          RC_MASK_Y,
   90          RC_MASK_Z}},
   91     {RC_REG_CLASS_DOUBLE, 3,
   92         {RC_MASK_X | RC_MASK_Y,
   93          RC_MASK_X | RC_MASK_Z,
   94          RC_MASK_Y | RC_MASK_Z}},
   95     {RC_REG_CLASS_TRIPLE, 1,
   96         {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
   97          RC_MASK_NONE,
   98          RC_MASK_NONE}},
   99     {RC_REG_CLASS_ALPHA, 1,
  100         {RC_MASK_W,
  101          RC_MASK_NONE,
  102          RC_MASK_NONE}},
  103     {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3,
  104         {RC_MASK_X | RC_MASK_W,
  105          RC_MASK_Y | RC_MASK_W,
  106          RC_MASK_Z | RC_MASK_W}},
  107     {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3,
  108         {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
  109          RC_MASK_X | RC_MASK_Z | RC_MASK_W,
  110          RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
  111     {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1,
  112         {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
  113         RC_MASK_NONE,
  114         RC_MASK_NONE}},
  115     {RC_REG_CLASS_X, 1,
  116         {RC_MASK_X,
  117         RC_MASK_NONE,
  118         RC_MASK_NONE}},
  119     {RC_REG_CLASS_Y, 1,
  120         {RC_MASK_Y,
  121         RC_MASK_NONE,
  122         RC_MASK_NONE}},
  123     {RC_REG_CLASS_Z, 1,
  124         {RC_MASK_Z,
  125         RC_MASK_NONE,
  126         RC_MASK_NONE}},
  127     {RC_REG_CLASS_XY, 1,
  128         {RC_MASK_X | RC_MASK_Y,
  129         RC_MASK_NONE,
  130         RC_MASK_NONE}},
  131     {RC_REG_CLASS_YZ, 1,
  132         {RC_MASK_Y | RC_MASK_Z,
  133         RC_MASK_NONE,
  134         RC_MASK_NONE}},
  135     {RC_REG_CLASS_XZ, 1,
  136         {RC_MASK_X | RC_MASK_Z,
  137         RC_MASK_NONE,
  138         RC_MASK_NONE}},
  139     {RC_REG_CLASS_XW, 1,
  140         {RC_MASK_X | RC_MASK_W,
  141         RC_MASK_NONE,
  142         RC_MASK_NONE}},
  143     {RC_REG_CLASS_YW, 1,
  144         {RC_MASK_Y | RC_MASK_W,
  145         RC_MASK_NONE,
  146         RC_MASK_NONE}},
  147     {RC_REG_CLASS_ZW, 1,
  148         {RC_MASK_Z | RC_MASK_W,
  149         RC_MASK_NONE,
  150         RC_MASK_NONE}},
  151     {RC_REG_CLASS_XYW, 1,
  152         {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
  153         RC_MASK_NONE,
  154         RC_MASK_NONE}},
  155     {RC_REG_CLASS_YZW, 1,
  156         {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
  157         RC_MASK_NONE,
  158         RC_MASK_NONE}},
  159     {RC_REG_CLASS_XZW, 1,
  160         {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
  161         RC_MASK_NONE,
  162         RC_MASK_NONE}}
  163 };
  164 
  165 static void print_live_intervals(struct live_intervals * src)
  166 {
  167     if (!src || !src->Used) {
  168         DBG("(null)");
  169         return;
  170     }
  171 
  172     DBG("(%i,%i)", src->Start, src->End);
  173 }
  174 
  175 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
  176 {
  177     if (VERBOSE) {
  178         DBG("overlap_live_intervals: ");
  179         print_live_intervals(a);
  180         DBG(" to ");
  181         print_live_intervals(b);
  182         DBG("\n");
  183     }
  184 
  185     if (!a->Used || !b->Used) {
  186         DBG("    unused interval\n");
  187         return 0;
  188     }
  189 
  190     if (a->Start > b->Start) {
  191         if (a->Start < b->End) {
  192             DBG("    overlap\n");
  193             return 1;
  194         }
  195     } else if (b->Start > a->Start) {
  196         if (b->Start < a->End) {
  197             DBG("    overlap\n");
  198             return 1;
  199         }
  200     } else { /* a->Start == b->Start */
  201         if (a->Start != a->End && b->Start != b->End) {
  202             DBG("    overlap\n");
  203             return 1;
  204         }
  205     }
  206 
  207     DBG("    no overlap\n");
  208 
  209     return 0;
  210 }
  211 
  212 static void scan_read_callback(void * data, struct rc_instruction * inst,
  213         rc_register_file file, unsigned int index, unsigned int mask)
  214 {
  215     struct regalloc_state * s = data;
  216     struct register_info * reg;
  217     unsigned int i;
  218 
  219     if (file != RC_FILE_INPUT)
  220         return;
  221 
  222     s->Input[index].Used = 1;
  223     reg = &s->Input[index];
  224 
  225     for (i = 0; i < 4; i++) {
  226         if (!((mask >> i) & 0x1)) {
  227             continue;
  228         }
  229         reg->Live[i].Used = 1;
  230         reg->Live[i].Start = 0;
  231         reg->Live[i].End =
  232             s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
  233     }
  234 }
  235 
  236 static void remap_register(void * data, struct rc_instruction * inst,
  237         rc_register_file * file, unsigned int * index)
  238 {
  239     struct regalloc_state * s = data;
  240     const struct register_info * reg;
  241 
  242     if (*file == RC_FILE_TEMPORARY && s->Simple)
  243         reg = &s->Temporary[*index];
  244     else if (*file == RC_FILE_INPUT)
  245         reg = &s->Input[*index];
  246     else
  247         return;
  248 
  249     if (reg->Allocated) {
  250         *index = reg->Index;
  251     }
  252 }
  253 
  254 static void alloc_input_simple(void * data, unsigned int input,
  255                             unsigned int hwreg)
  256 {
  257     struct regalloc_state * s = data;
  258 
  259     if (input >= s->NumInputs)
  260         return;
  261 
  262     s->Input[input].Allocated = 1;
  263     s->Input[input].File = RC_FILE_TEMPORARY;
  264     s->Input[input].Index = hwreg;
  265 }
  266 
  267 /* This functions offsets the temporary register indices by the number
  268  * of input registers, because input registers are actually temporaries and
  269  * should not occupy the same space.
  270  *
  271  * This pass is supposed to be used to maintain correct allocation of inputs
  272  * if the standard register allocation is disabled. */
  273 static void do_regalloc_inputs_only(struct regalloc_state * s)
  274 {
  275     for (unsigned i = 0; i < s->NumTemporaries; i++) {
  276         s->Temporary[i].Allocated = 1;
  277         s->Temporary[i].File = RC_FILE_TEMPORARY;
  278         s->Temporary[i].Index = i + s->NumInputs;
  279     }
  280 }
  281 
  282 static unsigned int is_derivative(rc_opcode op)
  283 {
  284     return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
  285 }
  286 
  287 static int find_class(
  288     const struct rc_class * classes,
  289     unsigned int writemask,
  290     unsigned int max_writemask_count)
  291 {
  292     unsigned int i;
  293     for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
  294         unsigned int j;
  295         if (classes[i].WritemaskCount > max_writemask_count) {
  296             continue;
  297         }
  298         for (j = 0; j < 3; j++) {
  299             if (classes[i].Writemasks[j] == writemask) {
  300                 return i;
  301             }
  302         }
  303     }
  304     return -1;
  305 }
  306 
  307 struct variable_get_class_cb_data {
  308     unsigned int * can_change_writemask;
  309     unsigned int conversion_swizzle;
  310 };
  311 
  312 static void variable_get_class_read_cb(
  313     void * userdata,
  314     struct rc_instruction * inst,
  315     struct rc_pair_instruction_arg * arg,
  316     struct rc_pair_instruction_source * src)
  317 {
  318     struct variable_get_class_cb_data * d = userdata;
  319     unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
  320                             d->conversion_swizzle);
  321     if (!r300_swizzle_is_native_basic(new_swizzle)) {
  322         *d->can_change_writemask = 0;
  323     }
  324 }
  325 
  326 static enum rc_reg_class variable_get_class(
  327     struct rc_variable * variable,
  328     const struct rc_class * classes)
  329 {
  330     unsigned int i;
  331     unsigned int can_change_writemask= 1;
  332     unsigned int writemask = rc_variable_writemask_sum(variable);
  333     struct rc_list * readers = rc_variable_readers_union(variable);
  334     int class_index;
  335 
  336     if (!variable->C->is_r500) {
  337         struct rc_class c;
  338         struct rc_variable * var_ptr;
  339         /* The assumption here is that if an instruction has type
  340          * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
  341          * r300 and r400 can't swizzle the result of a TEX lookup. */
  342         for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
  343             if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
  344                 writemask = RC_MASK_XYZW;
  345             }
  346         }
  347 
  348         /* Check if it is possible to do swizzle packing for r300/r400
  349          * without creating non-native swizzles. */
  350         class_index = find_class(classes, writemask, 3);
  351         if (class_index < 0) {
  352             goto error;
  353         }
  354         c = classes[class_index];
  355         if (c.WritemaskCount == 1) {
  356             goto done;
  357         }
  358         for (i = 0; i < c.WritemaskCount; i++) {
  359             struct rc_variable * var_ptr;
  360             for (var_ptr = variable; var_ptr;
  361                         var_ptr = var_ptr->Friend) {
  362                 int j;
  363                 unsigned int conversion_swizzle =
  364                         rc_make_conversion_swizzle(
  365                         writemask, c.Writemasks[i]);
  366                 struct variable_get_class_cb_data d;
  367                 d.can_change_writemask = &can_change_writemask;
  368                 d.conversion_swizzle = conversion_swizzle;
  369                 /* If we get this far var_ptr->Inst has to
  370                  * be a pair instruction.  If variable or any
  371                  * of its friends are normal instructions,
  372                  * then the writemask will be set to RC_MASK_XYZW
  373                  * and the function will return before it gets
  374                  * here. */
  375                 rc_pair_for_all_reads_arg(var_ptr->Inst,
  376                     variable_get_class_read_cb, &d);
  377 
  378                 for (j = 0; j < var_ptr->ReaderCount; j++) {
  379                     unsigned int old_swizzle;
  380                     unsigned int new_swizzle;
  381                     struct rc_reader r = var_ptr->Readers[j];
  382                     if (r.Inst->Type ==
  383                             RC_INSTRUCTION_PAIR ) {
  384                         old_swizzle = r.U.P.Arg->Swizzle;
  385                     } else {
  386                         /* Source operands of TEX
  387                          * instructions can't be
  388                          * swizzle on r300/r400 GPUs.
  389                          */
  390                         can_change_writemask = 0;
  391                         break;
  392                     }
  393                     new_swizzle = rc_adjust_channels(
  394                         old_swizzle, conversion_swizzle);
  395                     if (!r300_swizzle_is_native_basic(
  396                                 new_swizzle)) {
  397                         can_change_writemask = 0;
  398                         break;
  399                     }
  400                 }
  401                 if (!can_change_writemask) {
  402                     break;
  403                 }
  404             }
  405             if (!can_change_writemask) {
  406                 break;
  407             }
  408         }
  409     }
  410 
  411     if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
  412         /* DDX/DDY seem to always fail when their writemasks are
  413          * changed.*/
  414         if (is_derivative(variable->Inst->U.P.RGB.Opcode)
  415             || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
  416             can_change_writemask = 0;
  417         }
  418     }
  419     for ( ; readers; readers = readers->Next) {
  420         struct rc_reader * r = readers->Item;
  421         if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
  422             if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
  423                 can_change_writemask = 0;
  424                 break;
  425             }
  426             /* DDX/DDY also fail when their swizzles are changed. */
  427             if (is_derivative(r->Inst->U.P.RGB.Opcode)
  428                 || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
  429                 can_change_writemask = 0;
  430                 break;
  431             }
  432         }
  433     }
  434 
  435     class_index = find_class(classes, writemask,
  436                         can_change_writemask ? 3 : 1);
  437 done:
  438     if (class_index > -1) {
  439         return classes[class_index].ID;
  440     } else {
  441 error:
  442         rc_error(variable->C,
  443                 "Could not find class for index=%u mask=%u\n",
  444                 variable->Dst.Index, writemask);
  445         return 0;
  446     }
  447 }
  448 
  449 static unsigned int overlap_live_intervals_array(
  450     struct live_intervals * a,
  451     struct live_intervals * b)
  452 {
  453     unsigned int a_chan, b_chan;
  454     for (a_chan = 0; a_chan < 4; a_chan++) {
  455         for (b_chan = 0; b_chan < 4; b_chan++) {
  456             if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
  457                     return 1;
  458             }
  459         }
  460     }
  461     return 0;
  462 }
  463 
  464 static unsigned int reg_get_index(int reg)
  465 {
  466     return reg / RC_MASK_XYZW;
  467 }
  468 
  469 static unsigned int reg_get_writemask(int reg)
  470 {
  471     return (reg % RC_MASK_XYZW) + 1;
  472 }
  473 
  474 static int get_reg_id(unsigned int index, unsigned int writemask)
  475 {
  476     assert(writemask);
  477     if (writemask == 0) {
  478         return 0;
  479     }
  480     return (index * RC_MASK_XYZW) + (writemask - 1);
  481 }
  482 
  483 #if VERBOSE
  484 static void print_reg(int reg)
  485 {
  486     unsigned int index = reg_get_index(reg);
  487     unsigned int mask = reg_get_writemask(reg);
  488     fprintf(stderr, "Temp[%u].%c%c%c%c", index,
  489         mask & RC_MASK_X ? 'x' : '_',
  490         mask & RC_MASK_Y ? 'y' : '_',
  491         mask & RC_MASK_Z ? 'z' : '_',
  492         mask & RC_MASK_W ? 'w' : '_');
  493 }
  494 #endif
  495 
  496 static void add_register_conflicts(
  497     struct ra_regs * regs,
  498     unsigned int max_temp_regs)
  499 {
  500     unsigned int index, a_mask, b_mask;
  501     for (index = 0; index < max_temp_regs; index++) {
  502         for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
  503             for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
  504                                 b_mask++) {
  505                 if (a_mask & b_mask) {
  506                     ra_add_reg_conflict(regs,
  507                         get_reg_id(index, a_mask),
  508                         get_reg_id(index, b_mask));
  509                 }
  510             }
  511         }
  512     }
  513 }
  514 
  515 static void do_advanced_regalloc(struct regalloc_state * s)
  516 {
  517 
  518     unsigned int i, input_node, node_count, node_index;
  519     unsigned int * node_classes;
  520     struct rc_instruction * inst;
  521     struct rc_list * var_ptr;
  522     struct rc_list * variables;
  523     struct ra_graph * graph;
  524     const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
  525 
  526     /* Get list of program variables */
  527     variables = rc_get_variables(s->C);
  528     node_count = rc_list_count(variables);
  529     node_classes = memory_pool_malloc(&s->C->Pool,
  530             node_count * sizeof(unsigned int));
  531 
  532     for (var_ptr = variables, node_index = 0; var_ptr;
  533                     var_ptr = var_ptr->Next, node_index++) {
  534         unsigned int class_index;
  535         /* Compute the live intervals */
  536         rc_variable_compute_live_intervals(var_ptr->Item);
  537 
  538         class_index = variable_get_class(var_ptr->Item, rc_class_list);
  539         node_classes[node_index] = ra_state->class_ids[class_index];
  540     }
  541 
  542 
  543     /* Calculate live intervals for input registers */
  544     for (inst = s->C->Program.Instructions.Next;
  545                     inst != &s->C->Program.Instructions;
  546                     inst = inst->Next) {
  547         rc_opcode op = rc_get_flow_control_inst(inst);
  548         if (op == RC_OPCODE_BGNLOOP) {
  549             struct rc_instruction * endloop =
  550                             rc_match_bgnloop(inst);
  551             if (endloop->IP > s->LoopEnd) {
  552                 s->LoopEnd = endloop->IP;
  553             }
  554         }
  555         rc_for_all_reads_mask(inst, scan_read_callback, s);
  556     }
  557 
  558     /* Compute the writemask for inputs. */
  559     for (i = 0; i < s->NumInputs; i++) {
  560         unsigned int chan, writemask = 0;
  561         for (chan = 0; chan < 4; chan++) {
  562             if (s->Input[i].Live[chan].Used) {
  563                 writemask |= (1 << chan);
  564             }
  565         }
  566         s->Input[i].Writemask = writemask;
  567     }
  568 
  569     graph = ra_alloc_interference_graph(ra_state->regs,
  570                         node_count + s->NumInputs);
  571 
  572     for (node_index = 0; node_index < node_count; node_index++) {
  573         ra_set_node_class(graph, node_index, node_classes[node_index]);
  574     }
  575 
  576     /* Build the interference graph */
  577     for (var_ptr = variables, node_index = 0; var_ptr;
  578                     var_ptr = var_ptr->Next,node_index++) {
  579         struct rc_list * a, * b;
  580         unsigned int b_index;
  581 
  582         for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
  583                         b; b = b->Next, b_index++) {
  584             struct rc_variable * var_a = a->Item;
  585             while (var_a) {
  586                 struct rc_variable * var_b = b->Item;
  587                 while (var_b) {
  588                     if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
  589                         ra_add_node_interference(graph,
  590                             node_index, b_index);
  591                     }
  592                     var_b = var_b->Friend;
  593                 }
  594                 var_a = var_a->Friend;
  595             }
  596         }
  597     }
  598 
  599     /* Add input registers to the interference graph */
  600     for (i = 0, input_node = 0; i< s->NumInputs; i++) {
  601         if (!s->Input[i].Writemask) {
  602             continue;
  603         }
  604         for (var_ptr = variables, node_index = 0;
  605                 var_ptr; var_ptr = var_ptr->Next, node_index++) {
  606             struct rc_variable * var = var_ptr->Item;
  607             if (overlap_live_intervals_array(s->Input[i].Live,
  608                                 var->Live)) {
  609                 ra_add_node_interference(graph, node_index,
  610                         node_count + input_node);
  611             }
  612         }
  613         /* Manually allocate a register for this input */
  614         ra_set_node_reg(graph, node_count + input_node, get_reg_id(
  615                 s->Input[i].Index, s->Input[i].Writemask));
  616         input_node++;
  617     }
  618 
  619     if (!ra_allocate(graph)) {
  620         rc_error(s->C, "Ran out of hardware temporaries\n");
  621         return;
  622     }
  623 
  624     /* Rewrite the registers */
  625     for (var_ptr = variables, node_index = 0; var_ptr;
  626                 var_ptr = var_ptr->Next, node_index++) {
  627         int reg = ra_get_node_reg(graph, node_index);
  628         unsigned int writemask = reg_get_writemask(reg);
  629         unsigned int index = reg_get_index(reg);
  630         struct rc_variable * var = var_ptr->Item;
  631 
  632         if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
  633             writemask = rc_variable_writemask_sum(var);
  634         }
  635 
  636         if (var->Dst.File == RC_FILE_INPUT) {
  637             continue;
  638         }
  639         rc_variable_change_dst(var, index, writemask);
  640     }
  641 
  642     ralloc_free(graph);
  643 }
  644 
  645 void rc_init_regalloc_state(struct rc_regalloc_state *s)
  646 {
  647     unsigned i, j, index;
  648     unsigned **ra_q_values;
  649 
  650     /* Pre-computed q values.  This array describes the maximum number of
  651      * a class's [row] registers that are in conflict with a single
  652      * register from another class [column].
  653      *
  654      * For example:
  655      * q_values[0][2] is 3, because a register from class 2
  656      * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from
  657      * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
  658      * and T0.z.
  659      *
  660      * q_values[2][0] is 1, because a register from class 0
  661      * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from
  662      * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz
  663      *
  664      * The q values for each register class [row] will never be greater
  665      * than the maximum number of writemask combinations for that class.
  666      *
  667      * For example:
  668      *
  669      * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,
  670      * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater
  671      * than 1.
  672      */
  673     const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = {
  674     {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
  675     {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
  676     {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
  677     {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
  678     {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
  679     {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
  680     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
  681     {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
  682     {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
  683     {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
  684     {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
  685     {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
  686     {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
  687     {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
  688     {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
  689     {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
  690     {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
  691     {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
  692     {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
  693     };
  694 
  695     /* Allocate the main ra data structure */
  696     s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW,
  697                                    true);
  698 
  699     /* Create the register classes */
  700     for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
  701         const struct rc_class *class = &rc_class_list[i];
  702         s->class_ids[class->ID] = ra_alloc_reg_class(s->regs);
  703 
  704         /* Assign registers to the classes */
  705         for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) {
  706             for (j = 0; j < class->WritemaskCount; j++) {
  707                 int reg_id = get_reg_id(index,
  708                         class->Writemasks[j]);
  709                 ra_class_add_reg(s->regs,
  710                     s->class_ids[class->ID], reg_id);
  711             }
  712         }
  713     }
  714 
  715     /* Set the q values.  The q_values array is indexed based on
  716      * the rc_reg_class ID (RC_REG_CLASS_*) which might be
  717      * different than the ID assigned to that class by ra.
  718      * This why we need to manually construct this list.
  719      */
  720     ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *));
  721 
  722     for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
  723         ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned));
  724         for (j = 0; j < RC_REG_CLASS_COUNT; j++) {
  725             ra_q_values[s->class_ids[i]][s->class_ids[j]] =
  726                             q_values[i][j];
  727         }
  728     }
  729 
  730     /* Add register conflicts */
  731     add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS);
  732 
  733     ra_set_finalize(s->regs, ra_q_values);
  734 
  735     for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
  736         FREE(ra_q_values[i]);
  737     }
  738     FREE(ra_q_values);
  739 }
  740 
  741 void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
  742 {
  743     ralloc_free(s->regs);
  744 }
  745 
  746 /**
  747  * @param user This parameter should be a pointer to an integer value.  If this
  748  * integer value is zero, then a simple register allocator will be used that
  749  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
  750  * user is non-zero, then the regular register allocator will be used
  751  * (\sa do_regalloc).
  752   */
  753 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
  754 {
  755     struct r300_fragment_program_compiler *c =
  756                 (struct r300_fragment_program_compiler*)cc;
  757     struct regalloc_state s;
  758     int * do_full_regalloc = (int*)user;
  759 
  760     memset(&s, 0, sizeof(s));
  761     s.C = cc;
  762     s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
  763     s.Input = memory_pool_malloc(&cc->Pool,
  764             s.NumInputs * sizeof(struct register_info));
  765     memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
  766 
  767     s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
  768     s.Temporary = memory_pool_malloc(&cc->Pool,
  769             s.NumTemporaries * sizeof(struct register_info));
  770     memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
  771 
  772     rc_recompute_ips(s.C);
  773 
  774     c->AllocateHwInputs(c, &alloc_input_simple, &s);
  775     if (*do_full_regalloc) {
  776         do_advanced_regalloc(&s);
  777     } else {
  778         s.Simple = 1;
  779         do_regalloc_inputs_only(&s);
  780     }
  781 
  782     /* Rewrite inputs and if we are doing the simple allocation, rewrite
  783      * temporaries too. */
  784     for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
  785                     inst != &s.C->Program.Instructions;
  786                     inst = inst->Next) {
  787         rc_remap_registers(inst, &remap_register, &s);
  788     }
  789 }