"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c" (16 Sep 2020, 20866 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "r500_fragprog_emit.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright (C) 2005 Ben Skeggs.
    3  *
    4  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
    5  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
    6  *
    7  * All Rights Reserved.
    8  *
    9  * Permission is hereby granted, free of charge, to any person obtaining
   10  * a copy of this software and associated documentation files (the
   11  * "Software"), to deal in the Software without restriction, including
   12  * without limitation the rights to use, copy, modify, merge, publish,
   13  * distribute, sublicense, and/or sell copies of the Software, and to
   14  * permit persons to whom the Software is furnished to do so, subject to
   15  * the following conditions:
   16  *
   17  * The above copyright notice and this permission notice (including the
   18  * next paragraph) shall be included in all copies or substantial
   19  * portions of the Software.
   20  *
   21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   24  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
   25  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
   26  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
   27  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   28  *
   29  */
   30 
   31 /**
   32  * \file
   33  *
   34  * \author Ben Skeggs <darktama@iinet.net.au>
   35  *
   36  * \author Jerome Glisse <j.glisse@gmail.com>
   37  *
   38  * \author Corbin Simpson <MostAwesomeDude@gmail.com>
   39  *
   40  */
   41 
   42 #include "r500_fragprog.h"
   43 
   44 #include "r300_reg.h"
   45 
   46 #include "radeon_program_pair.h"
   47 
   48 #define PROG_CODE \
   49     struct r500_fragment_program_code *code = &c->code->code.r500
   50 
   51 #define error(fmt, args...) do {            \
   52         rc_error(&c->Base, "%s::%s(): " fmt "\n",   \
   53             __FILE__, __FUNCTION__, ##args);    \
   54     } while(0)
   55 
   56 
   57 struct branch_info {
   58     int If;
   59     int Else;
   60     int Endif;
   61 };
   62 
   63 struct r500_loop_info {
   64     int BgnLoop;
   65 
   66     int BranchDepth;
   67     int * Brks;
   68     int BrkCount;
   69     int BrkReserved;
   70 
   71     int * Conts;
   72     int ContCount;
   73     int ContReserved;
   74 };
   75 
   76 struct emit_state {
   77     struct radeon_compiler * C;
   78     struct r500_fragment_program_code * Code;
   79 
   80     struct branch_info * Branches;
   81     unsigned int CurrentBranchDepth;
   82     unsigned int BranchesReserved;
   83 
   84     struct r500_loop_info * Loops;
   85     unsigned int CurrentLoopDepth;
   86     unsigned int LoopsReserved;
   87 
   88     unsigned int MaxBranchDepth;
   89 
   90 };
   91 
   92 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
   93 {
   94     switch(opcode) {
   95     case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
   96     case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
   97     case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
   98     case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
   99     case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
  100     case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
  101     case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
  102     default:
  103         error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
  104         /* fall through */
  105     case RC_OPCODE_NOP:
  106         /* fall through */
  107     case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
  108     case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
  109     case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
  110     case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
  111     }
  112 }
  113 
  114 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
  115 {
  116     switch(opcode) {
  117     case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
  118     case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
  119     case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
  120     case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
  121     case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
  122     case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
  123     case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
  124     case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
  125     case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
  126     case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
  127     default:
  128         error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
  129         /* fall through */
  130     case RC_OPCODE_NOP:
  131         /* fall through */
  132     case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
  133     case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
  134     case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
  135     case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
  136     case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
  137     case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
  138     }
  139 }
  140 
  141 static unsigned int fix_hw_swizzle(unsigned int swz)
  142 {
  143     switch (swz) {
  144         case RC_SWIZZLE_ZERO:
  145         case RC_SWIZZLE_UNUSED:
  146             swz = 4;
  147             break;
  148         case RC_SWIZZLE_HALF:
  149             swz = 5;
  150             break;
  151         case RC_SWIZZLE_ONE:
  152             swz = 6;
  153             break;
  154     }
  155 
  156     return swz;
  157 }
  158 
  159 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
  160 {
  161     unsigned int t = inst->RGB.Arg[arg].Source;
  162     int comp;
  163     t |= inst->RGB.Arg[arg].Negate << 11;
  164     t |= inst->RGB.Arg[arg].Abs << 12;
  165 
  166     for(comp = 0; comp < 3; ++comp)
  167         t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
  168 
  169     return t;
  170 }
  171 
  172 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
  173 {
  174     unsigned int t = inst->Alpha.Arg[i].Source;
  175     t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
  176     t |= inst->Alpha.Arg[i].Negate << 5;
  177     t |= inst->Alpha.Arg[i].Abs << 6;
  178     return t;
  179 }
  180 
  181 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
  182 {
  183     switch(func) {
  184     case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
  185     case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
  186     case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
  187     case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
  188     default:
  189         rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
  190         return 0;
  191     }
  192 }
  193 
  194 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
  195 {
  196     if (index > code->max_temp_idx)
  197         code->max_temp_idx = index;
  198 }
  199 
  200 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
  201 {
  202     /* From docs:
  203      *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
  204      * MSB = 1 << 7 */
  205     if (!src.Used)
  206         return 1 << 7;
  207 
  208     if (src.File == RC_FILE_CONSTANT) {
  209         return src.Index | R500_RGB_ADDR0_CONST;
  210     } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
  211         use_temporary(code, src.Index);
  212         return src.Index;
  213     } else if (src.File == RC_FILE_INLINE) {
  214         return src.Index | (1 << 7);
  215     }
  216 
  217     return 0;
  218 }
  219 
  220 /**
  221  * NOP the specified instruction if it is not a texture lookup.
  222  */
  223 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
  224 {
  225     PROG_CODE;
  226 
  227     if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
  228         code->inst[ip].inst0 |= R500_INST_NOP;
  229     }
  230 }
  231 
  232 /**
  233  * Emit a paired ALU instruction.
  234  */
  235 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
  236 {
  237     int ip;
  238     PROG_CODE;
  239 
  240     if (code->inst_end >= c->Base.max_alu_insts-1) {
  241         error("emit_alu: Too many instructions");
  242         return;
  243     }
  244 
  245     ip = ++code->inst_end;
  246 
  247     /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
  248     if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
  249         inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
  250         if (ip > 0) {
  251             alu_nop(c, ip - 1);
  252         }
  253     }
  254 
  255     code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
  256     code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
  257 
  258     if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
  259         code->inst[ip].inst0 = R500_INST_TYPE_OUT;
  260         if (inst->WriteALUResult) {
  261             error("Cannot write output and ALU result at the same time");
  262             return;
  263         }
  264     } else {
  265         code->inst[ip].inst0 = R500_INST_TYPE_ALU;
  266     }
  267     code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
  268 
  269     code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
  270     code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
  271     code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
  272     if (inst->Nop) {
  273         code->inst[ip].inst0 |= R500_INST_NOP;
  274     }
  275     if (inst->Alpha.DepthWriteMask) {
  276         code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
  277         c->code->writes_depth = 1;
  278     }
  279 
  280     code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
  281     code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
  282     use_temporary(code, inst->Alpha.DestIndex);
  283     use_temporary(code, inst->RGB.DestIndex);
  284 
  285     if (inst->RGB.Saturate)
  286         code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
  287     if (inst->Alpha.Saturate)
  288         code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
  289 
  290     /* Set the presubtract operation. */
  291     switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
  292         case RC_PRESUB_BIAS:
  293             code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
  294             break;
  295         case RC_PRESUB_SUB:
  296             code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
  297             break;
  298         case RC_PRESUB_ADD:
  299             code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
  300             break;
  301         case RC_PRESUB_INV:
  302             code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
  303             break;
  304         default:
  305             break;
  306     }
  307     switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
  308         case RC_PRESUB_BIAS:
  309             code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
  310             break;
  311         case RC_PRESUB_SUB:
  312             code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
  313             break;
  314         case RC_PRESUB_ADD:
  315             code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
  316             break;
  317         case RC_PRESUB_INV:
  318             code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
  319             break;
  320         default:
  321             break;
  322     }
  323 
  324     /* Set the output modifier */
  325     code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
  326     code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
  327 
  328     code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
  329     code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
  330     code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
  331 
  332     code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
  333     code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
  334     code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
  335 
  336     code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
  337     code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
  338     code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
  339 
  340     code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
  341     code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
  342     code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
  343 
  344     code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
  345     code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
  346 
  347     if (inst->WriteALUResult) {
  348         code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
  349 
  350         if (inst->WriteALUResult == RC_ALURESULT_X)
  351             code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
  352         else
  353             code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
  354 
  355         code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
  356     }
  357 }
  358 
  359 static unsigned int translate_strq_swizzle(unsigned int swizzle)
  360 {
  361     unsigned int swiz = 0;
  362     int i;
  363     for (i = 0; i < 4; i++)
  364         swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
  365     return swiz;
  366 }
  367 
  368 /**
  369  * Emit a single TEX instruction
  370  */
  371 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
  372 {
  373     int ip;
  374     PROG_CODE;
  375 
  376     if (code->inst_end >= c->Base.max_alu_insts-1) {
  377         error("emit_tex: Too many instructions");
  378         return 0;
  379     }
  380 
  381     ip = ++code->inst_end;
  382 
  383     code->inst[ip].inst0 = R500_INST_TYPE_TEX
  384         | (inst->DstReg.WriteMask << 11)
  385         | (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
  386     code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
  387         | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
  388 
  389     if (inst->TexSrcTarget == RC_TEXTURE_RECT)
  390         code->inst[ip].inst1 |= R500_TEX_UNSCALED;
  391 
  392     switch (inst->Opcode) {
  393     case RC_OPCODE_KIL:
  394         code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
  395         break;
  396     case RC_OPCODE_TEX:
  397         code->inst[ip].inst1 |= R500_TEX_INST_LD;
  398         break;
  399     case RC_OPCODE_TXB:
  400         code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
  401         break;
  402     case RC_OPCODE_TXP:
  403         code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
  404         break;
  405     case RC_OPCODE_TXD:
  406         code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
  407         break;
  408     case RC_OPCODE_TXL:
  409         code->inst[ip].inst1 |= R500_TEX_INST_LOD;
  410         break;
  411     default:
  412         error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
  413     }
  414 
  415     use_temporary(code, inst->SrcReg[0].Index);
  416     if (inst->Opcode != RC_OPCODE_KIL)
  417         use_temporary(code, inst->DstReg.Index);
  418 
  419     code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
  420         | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
  421         | R500_TEX_DST_ADDR(inst->DstReg.Index)
  422         | (GET_SWZ(inst->TexSwizzle, 0) << 24)
  423         | (GET_SWZ(inst->TexSwizzle, 1) << 26)
  424         | (GET_SWZ(inst->TexSwizzle, 2) << 28)
  425         | (GET_SWZ(inst->TexSwizzle, 3) << 30)
  426         ;
  427 
  428     if (inst->Opcode == RC_OPCODE_TXD) {
  429         use_temporary(code, inst->SrcReg[1].Index);
  430         use_temporary(code, inst->SrcReg[2].Index);
  431 
  432         /* DX and DY parameters are specified in a separate register. */
  433         code->inst[ip].inst3 =
  434             R500_DX_ADDR(inst->SrcReg[1].Index) |
  435             (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
  436             R500_DY_ADDR(inst->SrcReg[2].Index) |
  437             (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
  438     }
  439 
  440     return 1;
  441 }
  442 
  443 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
  444 {
  445     unsigned int newip;
  446 
  447     if (s->Code->inst_end >= s->C->max_alu_insts-1) {
  448         rc_error(s->C, "emit_tex: Too many instructions");
  449         return;
  450     }
  451 
  452     newip = ++s->Code->inst_end;
  453 
  454     /* Currently all loops use the same integer constant to intialize
  455      * the loop variables. */
  456     if(!s->Code->int_constants[0]) {
  457         s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
  458         s->Code->int_constant_count = 1;
  459     }
  460     s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
  461 
  462     switch(inst->U.I.Opcode){
  463     struct branch_info * branch;
  464     struct r500_loop_info * loop;
  465     case RC_OPCODE_BGNLOOP:
  466         memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
  467             s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
  468 
  469         loop = &s->Loops[s->CurrentLoopDepth++];
  470         memset(loop, 0, sizeof(struct r500_loop_info));
  471         loop->BranchDepth = s->CurrentBranchDepth;
  472         loop->BgnLoop = newip;
  473 
  474         s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
  475             | R500_FC_JUMP_FUNC(0x00)
  476             | R500_FC_IGNORE_UNCOVERED
  477             ;
  478         break;
  479     case RC_OPCODE_BRK:
  480         loop = &s->Loops[s->CurrentLoopDepth - 1];
  481         memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
  482                     loop->BrkCount, loop->BrkReserved, 1);
  483 
  484         loop->Brks[loop->BrkCount++] = newip;
  485         s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
  486             | R500_FC_JUMP_FUNC(0xff)
  487             | R500_FC_B_OP1_DECR
  488             | R500_FC_B_POP_CNT(
  489                 s->CurrentBranchDepth - loop->BranchDepth)
  490             | R500_FC_IGNORE_UNCOVERED
  491             ;
  492         break;
  493 
  494     case RC_OPCODE_CONT:
  495         loop = &s->Loops[s->CurrentLoopDepth - 1];
  496         memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
  497                     loop->ContCount, loop->ContReserved, 1);
  498         loop->Conts[loop->ContCount++] = newip;
  499         s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
  500             | R500_FC_JUMP_FUNC(0xff)
  501             | R500_FC_B_OP1_DECR
  502             | R500_FC_B_POP_CNT(
  503                 s->CurrentBranchDepth - loop->BranchDepth)
  504             | R500_FC_IGNORE_UNCOVERED
  505             ;
  506         break;
  507 
  508     case RC_OPCODE_ENDLOOP:
  509     {
  510         loop = &s->Loops[s->CurrentLoopDepth - 1];
  511         /* Emit ENDLOOP */
  512         s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
  513             | R500_FC_JUMP_FUNC(0xff)
  514             | R500_FC_JUMP_ANY
  515             | R500_FC_IGNORE_UNCOVERED
  516             ;
  517         /* The constant integer at index 0 is used by all loops. */
  518         s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
  519             | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
  520             ;
  521 
  522         /* Set jump address and int constant for BGNLOOP */
  523         s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
  524             | R500_FC_JUMP_ADDR(newip)
  525             ;
  526 
  527         /* Set jump address for the BRK instructions. */
  528         while(loop->BrkCount--) {
  529             s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
  530                         R500_FC_JUMP_ADDR(newip + 1);
  531         }
  532 
  533         /* Set jump address for CONT instructions. */
  534         while(loop->ContCount--) {
  535             s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
  536                         R500_FC_JUMP_ADDR(newip);
  537         }
  538         s->CurrentLoopDepth--;
  539         break;
  540     }
  541     case RC_OPCODE_IF:
  542         if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
  543             rc_error(s->C, "Branch depth exceeds hardware limit");
  544             return;
  545         }
  546         memory_pool_array_reserve(&s->C->Pool, struct branch_info,
  547                 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
  548 
  549         branch = &s->Branches[s->CurrentBranchDepth++];
  550         branch->If = newip;
  551         branch->Else = -1;
  552         branch->Endif = -1;
  553 
  554         if (s->CurrentBranchDepth > s->MaxBranchDepth)
  555             s->MaxBranchDepth = s->CurrentBranchDepth;
  556 
  557         /* actual instruction is filled in at ENDIF time */
  558         break;
  559     
  560     case RC_OPCODE_ELSE:
  561         if (!s->CurrentBranchDepth) {
  562             rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
  563             return;
  564         }
  565 
  566         branch = &s->Branches[s->CurrentBranchDepth - 1];
  567         branch->Else = newip;
  568 
  569         /* actual instruction is filled in at ENDIF time */
  570         break;
  571 
  572     case RC_OPCODE_ENDIF:
  573         if (!s->CurrentBranchDepth) {
  574             rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
  575             return;
  576         }
  577 
  578         branch = &s->Branches[s->CurrentBranchDepth - 1];
  579         branch->Endif = newip;
  580 
  581         s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
  582             | R500_FC_A_OP_NONE /* no address stack */
  583             | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
  584             | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
  585             | R500_FC_B_OP1_NONE /* no branch counter if stay */
  586             | R500_FC_B_POP_CNT(1)
  587             ;
  588         s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
  589         s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
  590             | R500_FC_A_OP_NONE /* no address stack */
  591             | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
  592             | R500_FC_B_OP0_INCR /* increment branch counter if stay */
  593             | R500_FC_IGNORE_UNCOVERED
  594         ;
  595 
  596         if (branch->Else >= 0) {
  597             /* increment branch counter also if jump */
  598             s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
  599             s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
  600 
  601             s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
  602                 | R500_FC_A_OP_NONE /* no address stack */
  603                 | R500_FC_B_ELSE /* all active pixels want to jump */
  604                 | R500_FC_B_OP0_NONE /* no counter op if stay */
  605                 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
  606                 | R500_FC_B_POP_CNT(1)
  607             ;
  608             s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
  609         } else {
  610             /* don't touch branch counter on jump */
  611             s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
  612             s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
  613         }
  614 
  615 
  616         s->CurrentBranchDepth--;
  617         break;
  618     default:
  619         rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
  620     }
  621 }
  622 
  623 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
  624 {
  625     struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
  626     struct emit_state s;
  627     struct r500_fragment_program_code *code = &compiler->code->code.r500;
  628 
  629     memset(&s, 0, sizeof(s));
  630     s.C = &compiler->Base;
  631     s.Code = code;
  632 
  633     memset(code, 0, sizeof(*code));
  634     code->max_temp_idx = 1;
  635     code->inst_end = -1;
  636 
  637     for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
  638         inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
  639         inst = inst->Next) {
  640         if (inst->Type == RC_INSTRUCTION_NORMAL) {
  641             const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
  642 
  643             if (opcode->IsFlowControl) {
  644                 emit_flowcontrol(&s, inst);
  645             } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
  646                 continue;
  647             } else {
  648                 emit_tex(compiler, &inst->U.I);
  649             }
  650         } else {
  651             emit_paired(compiler, &inst->U.P);
  652         }
  653     }
  654 
  655     if (code->max_temp_idx >= compiler->Base.max_temp_regs)
  656         rc_error(&compiler->Base, "Too many hardware temporaries used");
  657 
  658     if (compiler->Base.Error)
  659         return;
  660 
  661     if (code->inst_end == -1 ||
  662         (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
  663         int ip;
  664 
  665         /* This may happen when dead-code elimination is disabled or
  666          * when most of the fragment program logic is leading to a KIL */
  667         if (code->inst_end >= compiler->Base.max_alu_insts-1) {
  668             rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
  669             return;
  670         }
  671 
  672         ip = ++code->inst_end;
  673         code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
  674     }
  675 
  676     /* Make sure TEX_SEM_WAIT is set on the last instruction */
  677     code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
  678 
  679     /* Enable full flow control mode if we are using loops or have if
  680      * statements nested at least four deep. */
  681     if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
  682         if (code->max_temp_idx < 1)
  683             code->max_temp_idx = 1;
  684 
  685         code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
  686     }
  687 }