diff options
Diffstat (limited to 'mesalib/src/mesa/program/prog_optimize.c')
-rw-r--r-- | mesalib/src/mesa/program/prog_optimize.c | 2706 |
1 files changed, 1353 insertions, 1353 deletions
diff --git a/mesalib/src/mesa/program/prog_optimize.c b/mesalib/src/mesa/program/prog_optimize.c index 8a40fa69e..0c5129ba9 100644 --- a/mesalib/src/mesa/program/prog_optimize.c +++ b/mesalib/src/mesa/program/prog_optimize.c @@ -1,1353 +1,1353 @@ -/* - * Mesa 3-D graphics library - * Version: 7.5 - * - * Copyright (C) 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - - -#include "main/glheader.h" -#include "main/context.h" -#include "main/macros.h" -#include "program.h" -#include "prog_instruction.h" -#include "prog_optimize.h" -#include "prog_print.h" - - -#define MAX_LOOP_NESTING 50 -/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to - * register allocate many temporary values into that small number of - * temps. So allow large temporary indices coming into the register - * allocator. - */ -#define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1) - -static GLboolean dbg = GL_FALSE; - -#define NO_MASK 0xf - -/** - * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which - * are read from the given src in this instruction, We also provide - * one optional masks which may mask other components in the dst - * register - */ -static GLuint -get_src_arg_mask(const struct prog_instruction *inst, - GLuint arg, GLuint dst_mask) -{ - GLuint read_mask, channel_mask; - GLuint comp; - - ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode)); - - /* Form the dst register, find the written channels */ - if (inst->CondUpdate) { - channel_mask = WRITEMASK_XYZW; - } - else { - switch (inst->Opcode) { - case OPCODE_MOV: - case OPCODE_MIN: - case OPCODE_MAX: - case OPCODE_ABS: - case OPCODE_ADD: - case OPCODE_MAD: - case OPCODE_MUL: - case OPCODE_SUB: - case OPCODE_CMP: - case OPCODE_FLR: - case OPCODE_FRC: - case OPCODE_LRP: - case OPCODE_SEQ: - case OPCODE_SGE: - case OPCODE_SGT: - case OPCODE_SLE: - case OPCODE_SLT: - case OPCODE_SNE: - case OPCODE_SSG: - channel_mask = inst->DstReg.WriteMask & dst_mask; - break; - case OPCODE_RCP: - case OPCODE_SIN: - case OPCODE_COS: - case OPCODE_RSQ: - case OPCODE_POW: - case OPCODE_EX2: - case OPCODE_LOG: - channel_mask = WRITEMASK_X; - break; - case OPCODE_DP2: - channel_mask = WRITEMASK_XY; - break; - case OPCODE_DP3: - case OPCODE_XPD: - channel_mask = WRITEMASK_XYZ; - break; - default: - channel_mask = WRITEMASK_XYZW; - break; - } - } - - /* Now, given the src swizzle and the written channels, find which - * components are actually read - */ - read_mask = 0x0; - for (comp = 0; comp < 4; ++comp) { - const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp); - ASSERT(coord < 4); - if (channel_mask & (1 << comp) && coord <= SWIZZLE_W) - read_mask |= 1 << coord; - } - - return read_mask; -} - - -/** - * For a MOV instruction, compute a write mask when src register also has - * a mask - */ -static GLuint -get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask) -{ - const GLuint mask = mov->DstReg.WriteMask; - GLuint comp; - GLuint updated_mask = 0x0; - - ASSERT(mov->Opcode == OPCODE_MOV); - - for (comp = 0; comp < 4; ++comp) { - GLuint src_comp; - if ((mask & (1 << comp)) == 0) - continue; - src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp); - if ((src_mask & (1 << src_comp)) == 0) - continue; - updated_mask |= 1 << comp; - } - - return updated_mask; -} - - -/** - * Ensure that the swizzle is regular. That is, all of the swizzle - * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE. - */ -static GLboolean -is_swizzle_regular(GLuint swz) -{ - return GET_SWZ(swz,0) <= SWIZZLE_W && - GET_SWZ(swz,1) <= SWIZZLE_W && - GET_SWZ(swz,2) <= SWIZZLE_W && - GET_SWZ(swz,3) <= SWIZZLE_W; -} - - -/** - * In 'prog' remove instruction[i] if removeFlags[i] == TRUE. - * \return number of instructions removed - */ -static GLuint -remove_instructions(struct gl_program *prog, const GLboolean *removeFlags) -{ - GLint i, removeEnd = 0, removeCount = 0; - GLuint totalRemoved = 0; - - /* go backward */ - for (i = prog->NumInstructions - 1; i >= 0; i--) { - if (removeFlags[i]) { - totalRemoved++; - if (removeCount == 0) { - /* begin a run of instructions to remove */ - removeEnd = i; - removeCount = 1; - } - else { - /* extend the run of instructions to remove */ - removeCount++; - } - } - else { - /* don't remove this instruction, but check if the preceeding - * instructions are to be removed. - */ - if (removeCount > 0) { - GLint removeStart = removeEnd - removeCount + 1; - _mesa_delete_instructions(prog, removeStart, removeCount); - removeStart = removeCount = 0; /* reset removal info */ - } - } - } - /* Finish removing if the first instruction was to be removed. */ - if (removeCount > 0) { - GLint removeStart = removeEnd - removeCount + 1; - _mesa_delete_instructions(prog, removeStart, removeCount); - } - return totalRemoved; -} - - -/** - * Remap register indexes according to map. - * \param prog the program to search/replace - * \param file the type of register file to search/replace - * \param map maps old register indexes to new indexes - */ -static void -replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) -{ - GLuint i; - - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = prog->Instructions + i; - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == file) { - GLuint index = inst->SrcReg[j].Index; - ASSERT(map[index] >= 0); - inst->SrcReg[j].Index = map[index]; - } - } - if (inst->DstReg.File == file) { - const GLuint index = inst->DstReg.Index; - ASSERT(map[index] >= 0); - inst->DstReg.Index = map[index]; - } - } -} - - -/** - * Remove dead instructions from the given program. - * This is very primitive for now. Basically look for temp registers - * that are written to but never read. Remove any instructions that - * write to such registers. Be careful with condition code setters. - */ -static GLboolean -_mesa_remove_dead_code_global(struct gl_program *prog) -{ - GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4]; - GLboolean *removeInst; /* per-instruction removal flag */ - GLuint i, rem = 0, comp; - - memset(tempRead, 0, sizeof(tempRead)); - - if (dbg) { - printf("Optimize: Begin dead code removal\n"); - /*_mesa_print_program(prog);*/ - } - - removeInst = (GLboolean *) - calloc(1, prog->NumInstructions * sizeof(GLboolean)); - - /* Determine which temps are read and written */ - for (i = 0; i < prog->NumInstructions; i++) { - const struct prog_instruction *inst = prog->Instructions + i; - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - - /* check src regs */ - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - const GLuint index = inst->SrcReg[j].Index; - GLuint read_mask; - ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); - read_mask = get_src_arg_mask(inst, j, NO_MASK); - - if (inst->SrcReg[j].RelAddr) { - if (dbg) - printf("abort remove dead code (indirect temp)\n"); - goto done; - } - - for (comp = 0; comp < 4; comp++) { - const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp); - ASSERT(swz < 4); - if ((read_mask & (1 << swz)) == 0) - continue; - if (swz <= SWIZZLE_W) - tempRead[index][swz] = GL_TRUE; - } - } - } - - /* check dst reg */ - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - const GLuint index = inst->DstReg.Index; - ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); - - if (inst->DstReg.RelAddr) { - if (dbg) - printf("abort remove dead code (indirect temp)\n"); - goto done; - } - - if (inst->CondUpdate) { - /* If we're writing to this register and setting condition - * codes we cannot remove the instruction. Prevent removal - * by setting the 'read' flag. - */ - tempRead[index][0] = GL_TRUE; - tempRead[index][1] = GL_TRUE; - tempRead[index][2] = GL_TRUE; - tempRead[index][3] = GL_TRUE; - } - } - } - - /* find instructions that write to dead registers, flag for removal */ - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = prog->Instructions + i; - const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode); - - if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) { - GLint chan, index = inst->DstReg.Index; - - for (chan = 0; chan < 4; chan++) { - if (!tempRead[index][chan] && - inst->DstReg.WriteMask & (1 << chan)) { - if (dbg) { - printf("Remove writemask on %u.%c\n", i, - chan == 3 ? 'w' : 'x' + chan); - } - inst->DstReg.WriteMask &= ~(1 << chan); - rem++; - } - } - - if (inst->DstReg.WriteMask == 0) { - /* If we cleared all writes, the instruction can be removed. */ - if (dbg) - printf("Remove instruction %u: \n", i); - removeInst[i] = GL_TRUE; - } - } - } - - /* now remove the instructions which aren't needed */ - rem = remove_instructions(prog, removeInst); - - if (dbg) { - printf("Optimize: End dead code removal.\n"); - printf(" %u channel writes removed\n", rem); - printf(" %u instructions removed\n", rem); - /*_mesa_print_program(prog);*/ - } - -done: - free(removeInst); - return rem != 0; -} - - -enum inst_use -{ - READ, - WRITE, - FLOW, - END -}; - - -/** - * Scan forward in program from 'start' for the next occurances of TEMP[index]. - * We look if an instruction reads the component given by the masks and if they - * are overwritten. - * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator - * that we can't look further. - */ -static enum inst_use -find_next_use(const struct gl_program *prog, - GLuint start, - GLuint index, - GLuint mask) -{ - GLuint i; - - for (i = start; i < prog->NumInstructions; i++) { - const struct prog_instruction *inst = prog->Instructions + i; - switch (inst->Opcode) { - case OPCODE_BGNLOOP: - case OPCODE_BGNSUB: - case OPCODE_BRA: - case OPCODE_CAL: - case OPCODE_CONT: - case OPCODE_IF: - case OPCODE_ELSE: - case OPCODE_ENDIF: - case OPCODE_ENDLOOP: - case OPCODE_ENDSUB: - case OPCODE_RET: - return FLOW; - case OPCODE_END: - return END; - default: - { - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].RelAddr || - (inst->SrcReg[j].File == PROGRAM_TEMPORARY && - inst->SrcReg[j].Index == index && - (get_src_arg_mask(inst,j,NO_MASK) & mask))) - return READ; - } - if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 && - inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == index) { - mask &= ~inst->DstReg.WriteMask; - if (mask == 0) - return WRITE; - } - } - } - } - return END; -} - - -/** - * Is the given instruction opcode a flow-control opcode? - * XXX maybe move this into prog_instruction.[ch] - */ -static GLboolean -_mesa_is_flow_control_opcode(enum prog_opcode opcode) -{ - switch (opcode) { - case OPCODE_BGNLOOP: - case OPCODE_BGNSUB: - case OPCODE_BRA: - case OPCODE_CAL: - case OPCODE_CONT: - case OPCODE_IF: - case OPCODE_ELSE: - case OPCODE_END: - case OPCODE_ENDIF: - case OPCODE_ENDLOOP: - case OPCODE_ENDSUB: - case OPCODE_RET: - return GL_TRUE; - default: - return GL_FALSE; - } -} - - -/** - * Test if the given instruction is a simple MOV (no conditional updating, - * not relative addressing, no negation/abs, etc). - */ -static GLboolean -can_downward_mov_be_modifed(const struct prog_instruction *mov) -{ - return - mov->Opcode == OPCODE_MOV && - mov->CondUpdate == GL_FALSE && - mov->SrcReg[0].RelAddr == 0 && - mov->SrcReg[0].Negate == 0 && - mov->SrcReg[0].Abs == 0 && - mov->SrcReg[0].HasIndex2 == 0 && - mov->SrcReg[0].RelAddr2 == 0 && - mov->DstReg.RelAddr == 0 && - mov->DstReg.CondMask == COND_TR && - mov->SaturateMode == SATURATE_OFF; -} - - -static GLboolean -can_upward_mov_be_modifed(const struct prog_instruction *mov) -{ - return - can_downward_mov_be_modifed(mov) && - mov->DstReg.File == PROGRAM_TEMPORARY; -} - - -/** - * Try to remove use of extraneous MOV instructions, to free them up for dead - * code removal. - */ -static void -_mesa_remove_extra_move_use(struct gl_program *prog) -{ - GLuint i, j; - - if (dbg) { - printf("Optimize: Begin remove extra move use\n"); - _mesa_print_program(prog); - } - - /* - * Look for sequences such as this: - * MOV tmpX, arg0; - * ... - * FOO tmpY, tmpX, arg1; - * and convert into: - * MOV tmpX, arg0; - * ... - * FOO tmpY, arg0, arg1; - */ - - for (i = 0; i + 1 < prog->NumInstructions; i++) { - const struct prog_instruction *mov = prog->Instructions + i; - GLuint dst_mask, src_mask; - if (can_upward_mov_be_modifed(mov) == GL_FALSE) - continue; - - /* Scanning the code, we maintain the components which are still active in - * these two masks - */ - dst_mask = mov->DstReg.WriteMask; - src_mask = get_src_arg_mask(mov, 0, NO_MASK); - - /* Walk through remaining instructions until the or src reg gets - * rewritten or we get into some flow-control, eliminating the use of - * this MOV. - */ - for (j = i + 1; j < prog->NumInstructions; j++) { - struct prog_instruction *inst2 = prog->Instructions + j; - GLuint arg; - - if (_mesa_is_flow_control_opcode(inst2->Opcode)) - break; - - /* First rewrite this instruction's args if appropriate. */ - for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { - GLuint comp, read_mask; - - if (inst2->SrcReg[arg].File != mov->DstReg.File || - inst2->SrcReg[arg].Index != mov->DstReg.Index || - inst2->SrcReg[arg].RelAddr || - inst2->SrcReg[arg].Abs) - continue; - read_mask = get_src_arg_mask(inst2, arg, NO_MASK); - - /* Adjust the swizzles of inst2 to point at MOV's source if ALL the - * components read still come from the mov instructions - */ - if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) && - (read_mask & dst_mask) == read_mask) { - for (comp = 0; comp < 4; comp++) { - const GLuint inst2_swz = - GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); - const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); - inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); - inst2->SrcReg[arg].Swizzle |= s << (3 * comp); - inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> - inst2_swz) & 0x1) << comp); - } - inst2->SrcReg[arg].File = mov->SrcReg[0].File; - inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; - } - } - - /* The source of MOV is written. This potentially deactivates some - * components from the src and dst of the MOV instruction - */ - if (inst2->DstReg.File == mov->DstReg.File && - (inst2->DstReg.RelAddr || - inst2->DstReg.Index == mov->DstReg.Index)) { - dst_mask &= ~inst2->DstReg.WriteMask; - src_mask = get_src_arg_mask(mov, 0, dst_mask); - } - - /* Idem when the destination of mov is written */ - if (inst2->DstReg.File == mov->SrcReg[0].File && - (inst2->DstReg.RelAddr || - inst2->DstReg.Index == mov->SrcReg[0].Index)) { - src_mask &= ~inst2->DstReg.WriteMask; - dst_mask &= get_dst_mask_for_mov(mov, src_mask); - } - if (dst_mask == 0) - break; - } - } - - if (dbg) { - printf("Optimize: End remove extra move use.\n"); - /*_mesa_print_program(prog);*/ - } -} - - -/** - * Complements dead_code_global. Try to remove code in block of code by - * carefully monitoring the swizzles. Both functions should be merged into one - * with a proper control flow graph - */ -static GLboolean -_mesa_remove_dead_code_local(struct gl_program *prog) -{ - GLboolean *removeInst; - GLuint i, arg, rem = 0; - - removeInst = (GLboolean *) - calloc(1, prog->NumInstructions * sizeof(GLboolean)); - - for (i = 0; i < prog->NumInstructions; i++) { - const struct prog_instruction *inst = prog->Instructions + i; - const GLuint index = inst->DstReg.Index; - const GLuint mask = inst->DstReg.WriteMask; - enum inst_use use; - - /* We must deactivate the pass as soon as some indirection is used */ - if (inst->DstReg.RelAddr) - goto done; - for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) - if (inst->SrcReg[arg].RelAddr) - goto done; - - if (_mesa_is_flow_control_opcode(inst->Opcode) || - _mesa_num_inst_dst_regs(inst->Opcode) == 0 || - inst->DstReg.File != PROGRAM_TEMPORARY || - inst->DstReg.RelAddr) - continue; - - use = find_next_use(prog, i+1, index, mask); - if (use == WRITE || use == END) - removeInst[i] = GL_TRUE; - } - - rem = remove_instructions(prog, removeInst); - -done: - free(removeInst); - return rem != 0; -} - - -/** - * Try to inject the destination of mov as the destination of inst and recompute - * the swizzles operators for the sources of inst if required. Return GL_TRUE - * of the substitution was possible, GL_FALSE otherwise - */ -static GLboolean -_mesa_merge_mov_into_inst(struct prog_instruction *inst, - const struct prog_instruction *mov) -{ - /* Indirection table which associates destination and source components for - * the mov instruction - */ - const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK); - - /* Some components are not written by inst. We cannot remove the mov */ - if (mask != (inst->DstReg.WriteMask & mask)) - return GL_FALSE; - - /* Depending on the instruction, we may need to recompute the swizzles. - * Also, some other instructions (like TEX) are not linear. We will only - * consider completely active sources and destinations - */ - switch (inst->Opcode) { - - /* Carstesian instructions: we compute the swizzle */ - case OPCODE_MOV: - case OPCODE_MIN: - case OPCODE_MAX: - case OPCODE_ABS: - case OPCODE_ADD: - case OPCODE_MAD: - case OPCODE_MUL: - case OPCODE_SUB: - { - GLuint dst_to_src_comp[4] = {0,0,0,0}; - GLuint dst_comp, arg; - for (dst_comp = 0; dst_comp < 4; ++dst_comp) { - if (mov->DstReg.WriteMask & (1 << dst_comp)) { - const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp); - ASSERT(src_comp < 4); - dst_to_src_comp[dst_comp] = src_comp; - } - } - - /* Patch each source of the instruction */ - for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) { - const GLuint arg_swz = inst->SrcReg[arg].Swizzle; - inst->SrcReg[arg].Swizzle = 0; - - /* Reset each active component of the swizzle */ - for (dst_comp = 0; dst_comp < 4; ++dst_comp) { - GLuint src_comp, arg_comp; - if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0) - continue; - src_comp = dst_to_src_comp[dst_comp]; - ASSERT(src_comp < 4); - arg_comp = GET_SWZ(arg_swz, src_comp); - ASSERT(arg_comp < 4); - inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp); - } - } - inst->DstReg = mov->DstReg; - return GL_TRUE; - } - - /* Dot products and scalar instructions: we only change the destination */ - case OPCODE_RCP: - case OPCODE_SIN: - case OPCODE_COS: - case OPCODE_RSQ: - case OPCODE_POW: - case OPCODE_EX2: - case OPCODE_LOG: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - inst->DstReg = mov->DstReg; - return GL_TRUE; - - /* All other instructions require fully active components with no swizzle */ - default: - if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW || - inst->DstReg.WriteMask != WRITEMASK_XYZW) - return GL_FALSE; - inst->DstReg = mov->DstReg; - return GL_TRUE; - } -} - - -/** - * Try to remove extraneous MOV instructions from the given program. - */ -static GLboolean -_mesa_remove_extra_moves(struct gl_program *prog) -{ - GLboolean *removeInst; /* per-instruction removal flag */ - GLuint i, rem = 0, nesting = 0; - - if (dbg) { - printf("Optimize: Begin remove extra moves\n"); - _mesa_print_program(prog); - } - - removeInst = (GLboolean *) - calloc(1, prog->NumInstructions * sizeof(GLboolean)); - - /* - * Look for sequences such as this: - * FOO tmpX, arg0, arg1; - * MOV tmpY, tmpX; - * and convert into: - * FOO tmpY, arg0, arg1; - */ - - for (i = 0; i < prog->NumInstructions; i++) { - const struct prog_instruction *mov = prog->Instructions + i; - - switch (mov->Opcode) { - case OPCODE_BGNLOOP: - case OPCODE_BGNSUB: - case OPCODE_IF: - nesting++; - break; - case OPCODE_ENDLOOP: - case OPCODE_ENDSUB: - case OPCODE_ENDIF: - nesting--; - break; - case OPCODE_MOV: - if (i > 0 && - can_downward_mov_be_modifed(mov) && - mov->SrcReg[0].File == PROGRAM_TEMPORARY && - nesting == 0) - { - - /* see if this MOV can be removed */ - const GLuint id = mov->SrcReg[0].Index; - struct prog_instruction *prevInst; - GLuint prevI; - - /* get pointer to previous instruction */ - prevI = i - 1; - while (prevI > 0 && removeInst[prevI]) - prevI--; - prevInst = prog->Instructions + prevI; - - if (prevInst->DstReg.File == PROGRAM_TEMPORARY && - prevInst->DstReg.Index == id && - prevInst->DstReg.RelAddr == 0 && - prevInst->DstReg.CondSrc == 0 && - prevInst->DstReg.CondMask == COND_TR) { - - const GLuint dst_mask = prevInst->DstReg.WriteMask; - enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask); - - if (next_use == WRITE || next_use == END) { - /* OK, we can safely remove this MOV instruction. - * Transform: - * prevI: FOO tempIndex, x, y; - * i: MOV z, tempIndex; - * Into: - * prevI: FOO z, x, y; - */ - if (_mesa_merge_mov_into_inst(prevInst, mov)) { - removeInst[i] = GL_TRUE; - if (dbg) { - printf("Remove MOV at %u\n", i); - printf("new prev inst %u: ", prevI); - _mesa_print_instruction(prevInst); - } - } - } - } - } - break; - default: - ; /* nothing */ - } - } - - /* now remove the instructions which aren't needed */ - rem = remove_instructions(prog, removeInst); - - free(removeInst); - - if (dbg) { - printf("Optimize: End remove extra moves. %u instructions removed\n", rem); - /*_mesa_print_program(prog);*/ - } - - return rem != 0; -} - - -/** A live register interval */ -struct interval -{ - GLuint Reg; /** The temporary register index */ - GLuint Start, End; /** Start/end instruction numbers */ -}; - - -/** A list of register intervals */ -struct interval_list -{ - GLuint Num; - struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; -}; - - -static void -append_interval(struct interval_list *list, const struct interval *inv) -{ - list->Intervals[list->Num++] = *inv; -} - - -/** Insert interval inv into list, sorted by interval end */ -static void -insert_interval_by_end(struct interval_list *list, const struct interval *inv) -{ - /* XXX we could do a binary search insertion here since list is sorted */ - GLint i = list->Num - 1; - while (i >= 0 && list->Intervals[i].End > inv->End) { - list->Intervals[i + 1] = list->Intervals[i]; - i--; - } - list->Intervals[i + 1] = *inv; - list->Num++; - -#ifdef DEBUG - { - GLuint i; - for (i = 0; i + 1 < list->Num; i++) { - ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); - } - } -#endif -} - - -/** Remove the given interval from the interval list */ -static void -remove_interval(struct interval_list *list, const struct interval *inv) -{ - /* XXX we could binary search since list is sorted */ - GLuint k; - for (k = 0; k < list->Num; k++) { - if (list->Intervals[k].Reg == inv->Reg) { - /* found, remove it */ - ASSERT(list->Intervals[k].Start == inv->Start); - ASSERT(list->Intervals[k].End == inv->End); - while (k < list->Num - 1) { - list->Intervals[k] = list->Intervals[k + 1]; - k++; - } - list->Num--; - return; - } - } -} - - -/** called by qsort() */ -static int -compare_start(const void *a, const void *b) -{ - const struct interval *ia = (const struct interval *) a; - const struct interval *ib = (const struct interval *) b; - if (ia->Start < ib->Start) - return -1; - else if (ia->Start > ib->Start) - return +1; - else - return 0; -} - - -/** sort the interval list according to interval starts */ -static void -sort_interval_list_by_start(struct interval_list *list) -{ - qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); -#ifdef DEBUG - { - GLuint i; - for (i = 0; i + 1 < list->Num; i++) { - ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); - } - } -#endif -} - -struct loop_info -{ - GLuint Start, End; /**< Start, end instructions of loop */ -}; - -/** - * Update the intermediate interval info for register 'index' and - * instruction 'ic'. - */ -static void -update_interval(GLint intBegin[], GLint intEnd[], - struct loop_info *loopStack, GLuint loopStackDepth, - GLuint index, GLuint ic) -{ - int i; - GLuint begin = ic; - GLuint end = ic; - - /* If the register is used in a loop, extend its lifetime through the end - * of the outermost loop that doesn't contain its definition. - */ - for (i = 0; i < loopStackDepth; i++) { - if (intBegin[index] < loopStack[i].Start) { - end = loopStack[i].End; - break; - } - } - - /* Variables that are live at the end of a loop will also be live at the - * beginning, so an instruction inside of a loop should have its live - * interval begin at the start of the outermost loop. - */ - if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) { - begin = loopStack[0].Start; - } - - ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); - if (intBegin[index] == -1) { - ASSERT(intEnd[index] == -1); - intBegin[index] = begin; - intEnd[index] = end; - } - else { - intEnd[index] = end; - } -} - - -/** - * Find first/last instruction that references each temporary register. - */ -GLboolean -_mesa_find_temp_intervals(const struct prog_instruction *instructions, - GLuint numInstructions, - GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS], - GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS]) -{ - struct loop_info loopStack[MAX_LOOP_NESTING]; - GLuint loopStackDepth = 0; - GLuint i; - - for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){ - intBegin[i] = intEnd[i] = -1; - } - - /* Scan instructions looking for temporary registers */ - for (i = 0; i < numInstructions; i++) { - const struct prog_instruction *inst = instructions + i; - if (inst->Opcode == OPCODE_BGNLOOP) { - loopStack[loopStackDepth].Start = i; - loopStack[loopStackDepth].End = inst->BranchTarget; - loopStackDepth++; - } - else if (inst->Opcode == OPCODE_ENDLOOP) { - loopStackDepth--; - } - else if (inst->Opcode == OPCODE_CAL) { - return GL_FALSE; - } - else { - const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/ - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - const GLuint index = inst->SrcReg[j].Index; - if (inst->SrcReg[j].RelAddr) - return GL_FALSE; - update_interval(intBegin, intEnd, loopStack, loopStackDepth, - index, i); - } - } - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - const GLuint index = inst->DstReg.Index; - if (inst->DstReg.RelAddr) - return GL_FALSE; - update_interval(intBegin, intEnd, loopStack, loopStackDepth, - index, i); - } - } - } - - return GL_TRUE; -} - - -/** - * Find the live intervals for each temporary register in the program. - * For register R, the interval [A,B] indicates that R is referenced - * from instruction A through instruction B. - * Special consideration is needed for loops and subroutines. - * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason - */ -static GLboolean -find_live_intervals(struct gl_program *prog, - struct interval_list *liveIntervals) -{ - GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; - GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; - GLuint i; - - /* - * Note: we'll return GL_FALSE below if we find relative indexing - * into the TEMP register file. We can't handle that yet. - * We also give up on subroutines for now. - */ - - if (dbg) { - printf("Optimize: Begin find intervals\n"); - } - - /* build intermediate arrays */ - if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions, - intBegin, intEnd)) - return GL_FALSE; - - /* Build live intervals list from intermediate arrays */ - liveIntervals->Num = 0; - for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) { - if (intBegin[i] >= 0) { - struct interval inv; - inv.Reg = i; - inv.Start = intBegin[i]; - inv.End = intEnd[i]; - append_interval(liveIntervals, &inv); - } - } - - /* Sort the list according to interval starts */ - sort_interval_list_by_start(liveIntervals); - - if (dbg) { - /* print interval info */ - for (i = 0; i < liveIntervals->Num; i++) { - const struct interval *inv = liveIntervals->Intervals + i; - printf("Reg[%d] live [%d, %d]:", - inv->Reg, inv->Start, inv->End); - if (1) { - GLuint j; - for (j = 0; j < inv->Start; j++) - printf(" "); - for (j = inv->Start; j <= inv->End; j++) - printf("x"); - } - printf("\n"); - } - } - - return GL_TRUE; -} - - -/** Scan the array of used register flags to find free entry */ -static GLint -alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS]) -{ - GLuint k; - for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) { - if (!usedRegs[k]) { - usedRegs[k] = GL_TRUE; - return k; - } - } - return -1; -} - - -/** - * This function implements "Linear Scan Register Allocation" to reduce - * the number of temporary registers used by the program. - * - * We compute the "live interval" for all temporary registers then - * examine the overlap of the intervals to allocate new registers. - * Basically, if two intervals do not overlap, they can use the same register. - */ -static void -_mesa_reallocate_registers(struct gl_program *prog) -{ - struct interval_list liveIntervals; - GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; - GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; - GLuint i; - GLint maxTemp = -1; - - if (dbg) { - printf("Optimize: Begin live-interval register reallocation\n"); - _mesa_print_program(prog); - } - - for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){ - registerMap[i] = -1; - usedRegs[i] = GL_FALSE; - } - - if (!find_live_intervals(prog, &liveIntervals)) { - if (dbg) - printf("Aborting register reallocation\n"); - return; - } - - { - struct interval_list activeIntervals; - activeIntervals.Num = 0; - - /* loop over live intervals, allocating a new register for each */ - for (i = 0; i < liveIntervals.Num; i++) { - const struct interval *live = liveIntervals.Intervals + i; - - if (dbg) - printf("Consider register %u\n", live->Reg); - - /* Expire old intervals. Intervals which have ended with respect - * to the live interval can have their remapped registers freed. - */ - { - GLint j; - for (j = 0; j < (GLint) activeIntervals.Num; j++) { - const struct interval *inv = activeIntervals.Intervals + j; - if (inv->End >= live->Start) { - /* Stop now. Since the activeInterval list is sorted - * we know we don't have to go further. - */ - break; - } - else { - /* Interval 'inv' has expired */ - const GLint regNew = registerMap[inv->Reg]; - ASSERT(regNew >= 0); - - if (dbg) - printf(" expire interval for reg %u\n", inv->Reg); - - /* remove interval j from active list */ - remove_interval(&activeIntervals, inv); - j--; /* counter-act j++ in for-loop above */ - - /* return register regNew to the free pool */ - if (dbg) - printf(" free reg %d\n", regNew); - ASSERT(usedRegs[regNew] == GL_TRUE); - usedRegs[regNew] = GL_FALSE; - } - } - } - - /* find a free register for this live interval */ - { - const GLint k = alloc_register(usedRegs); - if (k < 0) { - /* out of registers, give up */ - return; - } - registerMap[live->Reg] = k; - maxTemp = MAX2(maxTemp, k); - if (dbg) - printf(" remap register %u -> %d\n", live->Reg, k); - } - - /* Insert this live interval into the active list which is sorted - * by increasing end points. - */ - insert_interval_by_end(&activeIntervals, live); - } - } - - if (maxTemp + 1 < (GLint) liveIntervals.Num) { - /* OK, we've reduced the number of registers needed. - * Scan the program and replace all the old temporary register - * indexes with the new indexes. - */ - replace_regs(prog, PROGRAM_TEMPORARY, registerMap); - - prog->NumTemporaries = maxTemp + 1; - } - - if (dbg) { - printf("Optimize: End live-interval register reallocation\n"); - printf("Num temp regs before: %u after: %u\n", - liveIntervals.Num, maxTemp + 1); - _mesa_print_program(prog); - } -} - - -#if 0 -static void -print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) { - fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions); - _mesa_print_program(program); - _mesa_print_program_parameters(ctx, program); - fprintf(stderr, "\n\n"); -} -#endif - -/** - * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP - * instruction is the first instruction to write to register T0. The are - * several lowering passes done in GLSL IR (e.g. branches and - * relative addressing) that create a large number of conditional assignments - * that ir_to_mesa converts to CMP instructions like the one mentioned above. - * - * Here is why this conversion is safe: - * CMP T0, T1 T2 T0 can be expanded to: - * if (T1 < 0.0) - * MOV T0, T2; - * else - * MOV T0, T0; - * - * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same - * as the original program. If (T1 < 0.0) evaluates to false, executing - * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. - * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 - * because any instruction that was going to read from T0 after this was going - * to read a garbage value anyway. - */ -static void -_mesa_simplify_cmp(struct gl_program * program) -{ - GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; - GLuint outputWrites[MAX_PROGRAM_OUTPUTS]; - GLuint i; - - if (dbg) { - printf("Optimize: Begin reads without writes\n"); - _mesa_print_program(program); - } - - for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) { - tempWrites[i] = 0; - } - - for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) { - outputWrites[i] = 0; - } - - for (i = 0; i < program->NumInstructions; i++) { - struct prog_instruction *inst = program->Instructions + i; - GLuint prevWriteMask; - - /* Give up if we encounter relative addressing or flow control. */ - if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) { - return; - } - - if (inst->DstReg.File == PROGRAM_OUTPUT) { - assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS); - prevWriteMask = outputWrites[inst->DstReg.Index]; - outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask; - } else if (inst->DstReg.File == PROGRAM_TEMPORARY) { - assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); - prevWriteMask = tempWrites[inst->DstReg.Index]; - tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask; - } else { - /* No other register type can be a destination register. */ - continue; - } - - /* For a CMP to be considered a conditional write, the destination - * register and source register two must be the same. */ - if (inst->Opcode == OPCODE_CMP - && !(inst->DstReg.WriteMask & prevWriteMask) - && inst->SrcReg[2].File == inst->DstReg.File - && inst->SrcReg[2].Index == inst->DstReg.Index - && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) { - - inst->Opcode = OPCODE_MOV; - inst->SrcReg[0] = inst->SrcReg[1]; - } - } - if (dbg) { - printf("Optimize: End reads without writes\n"); - _mesa_print_program(program); - } -} - -/** - * Apply optimizations to the given program to eliminate unnecessary - * instructions, temp regs, etc. - */ -void -_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program) -{ - GLboolean any_change; - - _mesa_simplify_cmp(program); - /* Stop when no modifications were output */ - do { - any_change = GL_FALSE; - _mesa_remove_extra_move_use(program); - if (_mesa_remove_dead_code_global(program)) - any_change = GL_TRUE; - if (_mesa_remove_extra_moves(program)) - any_change = GL_TRUE; - if (_mesa_remove_dead_code_local(program)) - any_change = GL_TRUE; - _mesa_reallocate_registers(program); - } while (any_change); -} - +/*
+ * Mesa 3-D graphics library
+ * Version: 7.5
+ *
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_instruction.h"
+#include "prog_optimize.h"
+#include "prog_print.h"
+
+
+#define MAX_LOOP_NESTING 50
+/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
+ * register allocate many temporary values into that small number of
+ * temps. So allow large temporary indices coming into the register
+ * allocator.
+ */
+#define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1)
+
+static GLboolean dbg = GL_FALSE;
+
+#define NO_MASK 0xf
+
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction, We also provide
+ * one optional masks which may mask other components in the dst
+ * register
+ */
+static GLuint
+get_src_arg_mask(const struct prog_instruction *inst,
+ GLuint arg, GLuint dst_mask)
+{
+ GLuint read_mask, channel_mask;
+ GLuint comp;
+
+ ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
+
+ /* Form the dst register, find the written channels */
+ if (inst->CondUpdate) {
+ channel_mask = WRITEMASK_XYZW;
+ }
+ else {
+ switch (inst->Opcode) {
+ case OPCODE_MOV:
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ case OPCODE_ABS:
+ case OPCODE_ADD:
+ case OPCODE_MAD:
+ case OPCODE_MUL:
+ case OPCODE_SUB:
+ case OPCODE_CMP:
+ case OPCODE_FLR:
+ case OPCODE_FRC:
+ case OPCODE_LRP:
+ case OPCODE_SEQ:
+ case OPCODE_SGE:
+ case OPCODE_SGT:
+ case OPCODE_SLE:
+ case OPCODE_SLT:
+ case OPCODE_SNE:
+ case OPCODE_SSG:
+ channel_mask = inst->DstReg.WriteMask & dst_mask;
+ break;
+ case OPCODE_RCP:
+ case OPCODE_SIN:
+ case OPCODE_COS:
+ case OPCODE_RSQ:
+ case OPCODE_POW:
+ case OPCODE_EX2:
+ case OPCODE_LOG:
+ channel_mask = WRITEMASK_X;
+ break;
+ case OPCODE_DP2:
+ channel_mask = WRITEMASK_XY;
+ break;
+ case OPCODE_DP3:
+ case OPCODE_XPD:
+ channel_mask = WRITEMASK_XYZ;
+ break;
+ default:
+ channel_mask = WRITEMASK_XYZW;
+ break;
+ }
+ }
+
+ /* Now, given the src swizzle and the written channels, find which
+ * components are actually read
+ */
+ read_mask = 0x0;
+ for (comp = 0; comp < 4; ++comp) {
+ const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
+ ASSERT(coord < 4);
+ if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
+ read_mask |= 1 << coord;
+ }
+
+ return read_mask;
+}
+
+
+/**
+ * For a MOV instruction, compute a write mask when src register also has
+ * a mask
+ */
+static GLuint
+get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
+{
+ const GLuint mask = mov->DstReg.WriteMask;
+ GLuint comp;
+ GLuint updated_mask = 0x0;
+
+ ASSERT(mov->Opcode == OPCODE_MOV);
+
+ for (comp = 0; comp < 4; ++comp) {
+ GLuint src_comp;
+ if ((mask & (1 << comp)) == 0)
+ continue;
+ src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
+ if ((src_mask & (1 << src_comp)) == 0)
+ continue;
+ updated_mask |= 1 << comp;
+ }
+
+ return updated_mask;
+}
+
+
+/**
+ * Ensure that the swizzle is regular. That is, all of the swizzle
+ * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
+ */
+static GLboolean
+is_swizzle_regular(GLuint swz)
+{
+ return GET_SWZ(swz,0) <= SWIZZLE_W &&
+ GET_SWZ(swz,1) <= SWIZZLE_W &&
+ GET_SWZ(swz,2) <= SWIZZLE_W &&
+ GET_SWZ(swz,3) <= SWIZZLE_W;
+}
+
+
+/**
+ * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
+ * \return number of instructions removed
+ */
+static GLuint
+remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
+{
+ GLint i, removeEnd = 0, removeCount = 0;
+ GLuint totalRemoved = 0;
+
+ /* go backward */
+ for (i = prog->NumInstructions - 1; i >= 0; i--) {
+ if (removeFlags[i]) {
+ totalRemoved++;
+ if (removeCount == 0) {
+ /* begin a run of instructions to remove */
+ removeEnd = i;
+ removeCount = 1;
+ }
+ else {
+ /* extend the run of instructions to remove */
+ removeCount++;
+ }
+ }
+ else {
+ /* don't remove this instruction, but check if the preceeding
+ * instructions are to be removed.
+ */
+ if (removeCount > 0) {
+ GLint removeStart = removeEnd - removeCount + 1;
+ _mesa_delete_instructions(prog, removeStart, removeCount);
+ removeStart = removeCount = 0; /* reset removal info */
+ }
+ }
+ }
+ /* Finish removing if the first instruction was to be removed. */
+ if (removeCount > 0) {
+ GLint removeStart = removeEnd - removeCount + 1;
+ _mesa_delete_instructions(prog, removeStart, removeCount);
+ }
+ return totalRemoved;
+}
+
+
+/**
+ * Remap register indexes according to map.
+ * \param prog the program to search/replace
+ * \param file the type of register file to search/replace
+ * \param map maps old register indexes to new indexes
+ */
+static void
+replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
+{
+ GLuint i;
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == file) {
+ GLuint index = inst->SrcReg[j].Index;
+ ASSERT(map[index] >= 0);
+ inst->SrcReg[j].Index = map[index];
+ }
+ }
+ if (inst->DstReg.File == file) {
+ const GLuint index = inst->DstReg.Index;
+ ASSERT(map[index] >= 0);
+ inst->DstReg.Index = map[index];
+ }
+ }
+}
+
+
+/**
+ * Remove dead instructions from the given program.
+ * This is very primitive for now. Basically look for temp registers
+ * that are written to but never read. Remove any instructions that
+ * write to such registers. Be careful with condition code setters.
+ */
+static GLboolean
+_mesa_remove_dead_code_global(struct gl_program *prog)
+{
+ GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
+ GLboolean *removeInst; /* per-instruction removal flag */
+ GLuint i, rem = 0, comp;
+
+ memset(tempRead, 0, sizeof(tempRead));
+
+ if (dbg) {
+ printf("Optimize: Begin dead code removal\n");
+ /*_mesa_print_program(prog);*/
+ }
+
+ removeInst = (GLboolean *)
+ calloc(1, prog->NumInstructions * sizeof(GLboolean));
+
+ /* Determine which temps are read and written */
+ for (i = 0; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint j;
+
+ /* check src regs */
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->SrcReg[j].Index;
+ GLuint read_mask;
+ ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+ read_mask = get_src_arg_mask(inst, j, NO_MASK);
+
+ if (inst->SrcReg[j].RelAddr) {
+ if (dbg)
+ printf("abort remove dead code (indirect temp)\n");
+ goto done;
+ }
+
+ for (comp = 0; comp < 4; comp++) {
+ const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
+ ASSERT(swz < 4);
+ if ((read_mask & (1 << swz)) == 0)
+ continue;
+ if (swz <= SWIZZLE_W)
+ tempRead[index][swz] = GL_TRUE;
+ }
+ }
+ }
+
+ /* check dst reg */
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->DstReg.Index;
+ ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+
+ if (inst->DstReg.RelAddr) {
+ if (dbg)
+ printf("abort remove dead code (indirect temp)\n");
+ goto done;
+ }
+
+ if (inst->CondUpdate) {
+ /* If we're writing to this register and setting condition
+ * codes we cannot remove the instruction. Prevent removal
+ * by setting the 'read' flag.
+ */
+ tempRead[index][0] = GL_TRUE;
+ tempRead[index][1] = GL_TRUE;
+ tempRead[index][2] = GL_TRUE;
+ tempRead[index][3] = GL_TRUE;
+ }
+ }
+ }
+
+ /* find instructions that write to dead registers, flag for removal */
+ for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
+
+ if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
+ GLint chan, index = inst->DstReg.Index;
+
+ for (chan = 0; chan < 4; chan++) {
+ if (!tempRead[index][chan] &&
+ inst->DstReg.WriteMask & (1 << chan)) {
+ if (dbg) {
+ printf("Remove writemask on %u.%c\n", i,
+ chan == 3 ? 'w' : 'x' + chan);
+ }
+ inst->DstReg.WriteMask &= ~(1 << chan);
+ rem++;
+ }
+ }
+
+ if (inst->DstReg.WriteMask == 0) {
+ /* If we cleared all writes, the instruction can be removed. */
+ if (dbg)
+ printf("Remove instruction %u: \n", i);
+ removeInst[i] = GL_TRUE;
+ }
+ }
+ }
+
+ /* now remove the instructions which aren't needed */
+ rem = remove_instructions(prog, removeInst);
+
+ if (dbg) {
+ printf("Optimize: End dead code removal.\n");
+ printf(" %u channel writes removed\n", rem);
+ printf(" %u instructions removed\n", rem);
+ /*_mesa_print_program(prog);*/
+ }
+
+done:
+ free(removeInst);
+ return rem != 0;
+}
+
+
+enum inst_use
+{
+ READ,
+ WRITE,
+ FLOW,
+ END
+};
+
+
+/**
+ * Scan forward in program from 'start' for the next occurances of TEMP[index].
+ * We look if an instruction reads the component given by the masks and if they
+ * are overwritten.
+ * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
+ * that we can't look further.
+ */
+static enum inst_use
+find_next_use(const struct gl_program *prog,
+ GLuint start,
+ GLuint index,
+ GLuint mask)
+{
+ GLuint i;
+
+ for (i = start; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+ switch (inst->Opcode) {
+ case OPCODE_BGNLOOP:
+ case OPCODE_BGNSUB:
+ case OPCODE_BRA:
+ case OPCODE_CAL:
+ case OPCODE_CONT:
+ case OPCODE_IF:
+ case OPCODE_ELSE:
+ case OPCODE_ENDIF:
+ case OPCODE_ENDLOOP:
+ case OPCODE_ENDSUB:
+ case OPCODE_RET:
+ return FLOW;
+ case OPCODE_END:
+ return END;
+ default:
+ {
+ const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].RelAddr ||
+ (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
+ inst->SrcReg[j].Index == index &&
+ (get_src_arg_mask(inst,j,NO_MASK) & mask)))
+ return READ;
+ }
+ if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
+ inst->DstReg.File == PROGRAM_TEMPORARY &&
+ inst->DstReg.Index == index) {
+ mask &= ~inst->DstReg.WriteMask;
+ if (mask == 0)
+ return WRITE;
+ }
+ }
+ }
+ }
+ return END;
+}
+
+
+/**
+ * Is the given instruction opcode a flow-control opcode?
+ * XXX maybe move this into prog_instruction.[ch]
+ */
+static GLboolean
+_mesa_is_flow_control_opcode(enum prog_opcode opcode)
+{
+ switch (opcode) {
+ case OPCODE_BGNLOOP:
+ case OPCODE_BGNSUB:
+ case OPCODE_BRA:
+ case OPCODE_CAL:
+ case OPCODE_CONT:
+ case OPCODE_IF:
+ case OPCODE_ELSE:
+ case OPCODE_END:
+ case OPCODE_ENDIF:
+ case OPCODE_ENDLOOP:
+ case OPCODE_ENDSUB:
+ case OPCODE_RET:
+ return GL_TRUE;
+ default:
+ return GL_FALSE;
+ }
+}
+
+
+/**
+ * Test if the given instruction is a simple MOV (no conditional updating,
+ * not relative addressing, no negation/abs, etc).
+ */
+static GLboolean
+can_downward_mov_be_modifed(const struct prog_instruction *mov)
+{
+ return
+ mov->Opcode == OPCODE_MOV &&
+ mov->CondUpdate == GL_FALSE &&
+ mov->SrcReg[0].RelAddr == 0 &&
+ mov->SrcReg[0].Negate == 0 &&
+ mov->SrcReg[0].Abs == 0 &&
+ mov->SrcReg[0].HasIndex2 == 0 &&
+ mov->SrcReg[0].RelAddr2 == 0 &&
+ mov->DstReg.RelAddr == 0 &&
+ mov->DstReg.CondMask == COND_TR &&
+ mov->SaturateMode == SATURATE_OFF;
+}
+
+
+static GLboolean
+can_upward_mov_be_modifed(const struct prog_instruction *mov)
+{
+ return
+ can_downward_mov_be_modifed(mov) &&
+ mov->DstReg.File == PROGRAM_TEMPORARY;
+}
+
+
+/**
+ * Try to remove use of extraneous MOV instructions, to free them up for dead
+ * code removal.
+ */
+static void
+_mesa_remove_extra_move_use(struct gl_program *prog)
+{
+ GLuint i, j;
+
+ if (dbg) {
+ printf("Optimize: Begin remove extra move use\n");
+ _mesa_print_program(prog);
+ }
+
+ /*
+ * Look for sequences such as this:
+ * MOV tmpX, arg0;
+ * ...
+ * FOO tmpY, tmpX, arg1;
+ * and convert into:
+ * MOV tmpX, arg0;
+ * ...
+ * FOO tmpY, arg0, arg1;
+ */
+
+ for (i = 0; i + 1 < prog->NumInstructions; i++) {
+ const struct prog_instruction *mov = prog->Instructions + i;
+ GLuint dst_mask, src_mask;
+ if (can_upward_mov_be_modifed(mov) == GL_FALSE)
+ continue;
+
+ /* Scanning the code, we maintain the components which are still active in
+ * these two masks
+ */
+ dst_mask = mov->DstReg.WriteMask;
+ src_mask = get_src_arg_mask(mov, 0, NO_MASK);
+
+ /* Walk through remaining instructions until the or src reg gets
+ * rewritten or we get into some flow-control, eliminating the use of
+ * this MOV.
+ */
+ for (j = i + 1; j < prog->NumInstructions; j++) {
+ struct prog_instruction *inst2 = prog->Instructions + j;
+ GLuint arg;
+
+ if (_mesa_is_flow_control_opcode(inst2->Opcode))
+ break;
+
+ /* First rewrite this instruction's args if appropriate. */
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
+ GLuint comp, read_mask;
+
+ if (inst2->SrcReg[arg].File != mov->DstReg.File ||
+ inst2->SrcReg[arg].Index != mov->DstReg.Index ||
+ inst2->SrcReg[arg].RelAddr ||
+ inst2->SrcReg[arg].Abs)
+ continue;
+ read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
+
+ /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
+ * components read still come from the mov instructions
+ */
+ if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
+ (read_mask & dst_mask) == read_mask) {
+ for (comp = 0; comp < 4; comp++) {
+ const GLuint inst2_swz =
+ GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+ const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
+ inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
+ inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
+ inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
+ inst2_swz) & 0x1) << comp);
+ }
+ inst2->SrcReg[arg].File = mov->SrcReg[0].File;
+ inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+ }
+ }
+
+ /* The source of MOV is written. This potentially deactivates some
+ * components from the src and dst of the MOV instruction
+ */
+ if (inst2->DstReg.File == mov->DstReg.File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->DstReg.Index)) {
+ dst_mask &= ~inst2->DstReg.WriteMask;
+ src_mask = get_src_arg_mask(mov, 0, dst_mask);
+ }
+
+ /* Idem when the destination of mov is written */
+ if (inst2->DstReg.File == mov->SrcReg[0].File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->SrcReg[0].Index)) {
+ src_mask &= ~inst2->DstReg.WriteMask;
+ dst_mask &= get_dst_mask_for_mov(mov, src_mask);
+ }
+ if (dst_mask == 0)
+ break;
+ }
+ }
+
+ if (dbg) {
+ printf("Optimize: End remove extra move use.\n");
+ /*_mesa_print_program(prog);*/
+ }
+}
+
+
+/**
+ * Complements dead_code_global. Try to remove code in block of code by
+ * carefully monitoring the swizzles. Both functions should be merged into one
+ * with a proper control flow graph
+ */
+static GLboolean
+_mesa_remove_dead_code_local(struct gl_program *prog)
+{
+ GLboolean *removeInst;
+ GLuint i, arg, rem = 0;
+
+ removeInst = (GLboolean *)
+ calloc(1, prog->NumInstructions * sizeof(GLboolean));
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint index = inst->DstReg.Index;
+ const GLuint mask = inst->DstReg.WriteMask;
+ enum inst_use use;
+
+ /* We must deactivate the pass as soon as some indirection is used */
+ if (inst->DstReg.RelAddr)
+ goto done;
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
+ if (inst->SrcReg[arg].RelAddr)
+ goto done;
+
+ if (_mesa_is_flow_control_opcode(inst->Opcode) ||
+ _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
+ inst->DstReg.File != PROGRAM_TEMPORARY ||
+ inst->DstReg.RelAddr)
+ continue;
+
+ use = find_next_use(prog, i+1, index, mask);
+ if (use == WRITE || use == END)
+ removeInst[i] = GL_TRUE;
+ }
+
+ rem = remove_instructions(prog, removeInst);
+
+done:
+ free(removeInst);
+ return rem != 0;
+}
+
+
+/**
+ * Try to inject the destination of mov as the destination of inst and recompute
+ * the swizzles operators for the sources of inst if required. Return GL_TRUE
+ * of the substitution was possible, GL_FALSE otherwise
+ */
+static GLboolean
+_mesa_merge_mov_into_inst(struct prog_instruction *inst,
+ const struct prog_instruction *mov)
+{
+ /* Indirection table which associates destination and source components for
+ * the mov instruction
+ */
+ const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
+
+ /* Some components are not written by inst. We cannot remove the mov */
+ if (mask != (inst->DstReg.WriteMask & mask))
+ return GL_FALSE;
+
+ /* Depending on the instruction, we may need to recompute the swizzles.
+ * Also, some other instructions (like TEX) are not linear. We will only
+ * consider completely active sources and destinations
+ */
+ switch (inst->Opcode) {
+
+ /* Carstesian instructions: we compute the swizzle */
+ case OPCODE_MOV:
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ case OPCODE_ABS:
+ case OPCODE_ADD:
+ case OPCODE_MAD:
+ case OPCODE_MUL:
+ case OPCODE_SUB:
+ {
+ GLuint dst_to_src_comp[4] = {0,0,0,0};
+ GLuint dst_comp, arg;
+ for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
+ if (mov->DstReg.WriteMask & (1 << dst_comp)) {
+ const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
+ ASSERT(src_comp < 4);
+ dst_to_src_comp[dst_comp] = src_comp;
+ }
+ }
+
+ /* Patch each source of the instruction */
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
+ const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
+ inst->SrcReg[arg].Swizzle = 0;
+
+ /* Reset each active component of the swizzle */
+ for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
+ GLuint src_comp, arg_comp;
+ if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
+ continue;
+ src_comp = dst_to_src_comp[dst_comp];
+ ASSERT(src_comp < 4);
+ arg_comp = GET_SWZ(arg_swz, src_comp);
+ ASSERT(arg_comp < 4);
+ inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
+ }
+ }
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+ }
+
+ /* Dot products and scalar instructions: we only change the destination */
+ case OPCODE_RCP:
+ case OPCODE_SIN:
+ case OPCODE_COS:
+ case OPCODE_RSQ:
+ case OPCODE_POW:
+ case OPCODE_EX2:
+ case OPCODE_LOG:
+ case OPCODE_DP2:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+
+ /* All other instructions require fully active components with no swizzle */
+ default:
+ if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
+ inst->DstReg.WriteMask != WRITEMASK_XYZW)
+ return GL_FALSE;
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+ }
+}
+
+
+/**
+ * Try to remove extraneous MOV instructions from the given program.
+ */
+static GLboolean
+_mesa_remove_extra_moves(struct gl_program *prog)
+{
+ GLboolean *removeInst; /* per-instruction removal flag */
+ GLuint i, rem = 0, nesting = 0;
+
+ if (dbg) {
+ printf("Optimize: Begin remove extra moves\n");
+ _mesa_print_program(prog);
+ }
+
+ removeInst = (GLboolean *)
+ calloc(1, prog->NumInstructions * sizeof(GLboolean));
+
+ /*
+ * Look for sequences such as this:
+ * FOO tmpX, arg0, arg1;
+ * MOV tmpY, tmpX;
+ * and convert into:
+ * FOO tmpY, arg0, arg1;
+ */
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *mov = prog->Instructions + i;
+
+ switch (mov->Opcode) {
+ case OPCODE_BGNLOOP:
+ case OPCODE_BGNSUB:
+ case OPCODE_IF:
+ nesting++;
+ break;
+ case OPCODE_ENDLOOP:
+ case OPCODE_ENDSUB:
+ case OPCODE_ENDIF:
+ nesting--;
+ break;
+ case OPCODE_MOV:
+ if (i > 0 &&
+ can_downward_mov_be_modifed(mov) &&
+ mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
+ nesting == 0)
+ {
+
+ /* see if this MOV can be removed */
+ const GLuint id = mov->SrcReg[0].Index;
+ struct prog_instruction *prevInst;
+ GLuint prevI;
+
+ /* get pointer to previous instruction */
+ prevI = i - 1;
+ while (prevI > 0 && removeInst[prevI])
+ prevI--;
+ prevInst = prog->Instructions + prevI;
+
+ if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
+ prevInst->DstReg.Index == id &&
+ prevInst->DstReg.RelAddr == 0 &&
+ prevInst->DstReg.CondSrc == 0 &&
+ prevInst->DstReg.CondMask == COND_TR) {
+
+ const GLuint dst_mask = prevInst->DstReg.WriteMask;
+ enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
+
+ if (next_use == WRITE || next_use == END) {
+ /* OK, we can safely remove this MOV instruction.
+ * Transform:
+ * prevI: FOO tempIndex, x, y;
+ * i: MOV z, tempIndex;
+ * Into:
+ * prevI: FOO z, x, y;
+ */
+ if (_mesa_merge_mov_into_inst(prevInst, mov)) {
+ removeInst[i] = GL_TRUE;
+ if (dbg) {
+ printf("Remove MOV at %u\n", i);
+ printf("new prev inst %u: ", prevI);
+ _mesa_print_instruction(prevInst);
+ }
+ }
+ }
+ }
+ }
+ break;
+ default:
+ ; /* nothing */
+ }
+ }
+
+ /* now remove the instructions which aren't needed */
+ rem = remove_instructions(prog, removeInst);
+
+ free(removeInst);
+
+ if (dbg) {
+ printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
+ /*_mesa_print_program(prog);*/
+ }
+
+ return rem != 0;
+}
+
+
+/** A live register interval */
+struct interval
+{
+ GLuint Reg; /** The temporary register index */
+ GLuint Start, End; /** Start/end instruction numbers */
+};
+
+
+/** A list of register intervals */
+struct interval_list
+{
+ GLuint Num;
+ struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+};
+
+
+static void
+append_interval(struct interval_list *list, const struct interval *inv)
+{
+ list->Intervals[list->Num++] = *inv;
+}
+
+
+/** Insert interval inv into list, sorted by interval end */
+static void
+insert_interval_by_end(struct interval_list *list, const struct interval *inv)
+{
+ /* XXX we could do a binary search insertion here since list is sorted */
+ GLint i = list->Num - 1;
+ while (i >= 0 && list->Intervals[i].End > inv->End) {
+ list->Intervals[i + 1] = list->Intervals[i];
+ i--;
+ }
+ list->Intervals[i + 1] = *inv;
+ list->Num++;
+
+#ifdef DEBUG
+ {
+ GLuint i;
+ for (i = 0; i + 1 < list->Num; i++) {
+ ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
+ }
+ }
+#endif
+}
+
+
+/** Remove the given interval from the interval list */
+static void
+remove_interval(struct interval_list *list, const struct interval *inv)
+{
+ /* XXX we could binary search since list is sorted */
+ GLuint k;
+ for (k = 0; k < list->Num; k++) {
+ if (list->Intervals[k].Reg == inv->Reg) {
+ /* found, remove it */
+ ASSERT(list->Intervals[k].Start == inv->Start);
+ ASSERT(list->Intervals[k].End == inv->End);
+ while (k < list->Num - 1) {
+ list->Intervals[k] = list->Intervals[k + 1];
+ k++;
+ }
+ list->Num--;
+ return;
+ }
+ }
+}
+
+
+/** called by qsort() */
+static int
+compare_start(const void *a, const void *b)
+{
+ const struct interval *ia = (const struct interval *) a;
+ const struct interval *ib = (const struct interval *) b;
+ if (ia->Start < ib->Start)
+ return -1;
+ else if (ia->Start > ib->Start)
+ return +1;
+ else
+ return 0;
+}
+
+
+/** sort the interval list according to interval starts */
+static void
+sort_interval_list_by_start(struct interval_list *list)
+{
+ qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
+#ifdef DEBUG
+ {
+ GLuint i;
+ for (i = 0; i + 1 < list->Num; i++) {
+ ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
+ }
+ }
+#endif
+}
+
+struct loop_info
+{
+ GLuint Start, End; /**< Start, end instructions of loop */
+};
+
+/**
+ * Update the intermediate interval info for register 'index' and
+ * instruction 'ic'.
+ */
+static void
+update_interval(GLint intBegin[], GLint intEnd[],
+ struct loop_info *loopStack, GLuint loopStackDepth,
+ GLuint index, GLuint ic)
+{
+ int i;
+ GLuint begin = ic;
+ GLuint end = ic;
+
+ /* If the register is used in a loop, extend its lifetime through the end
+ * of the outermost loop that doesn't contain its definition.
+ */
+ for (i = 0; i < loopStackDepth; i++) {
+ if (intBegin[index] < loopStack[i].Start) {
+ end = loopStack[i].End;
+ break;
+ }
+ }
+
+ /* Variables that are live at the end of a loop will also be live at the
+ * beginning, so an instruction inside of a loop should have its live
+ * interval begin at the start of the outermost loop.
+ */
+ if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
+ begin = loopStack[0].Start;
+ }
+
+ ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+ if (intBegin[index] == -1) {
+ ASSERT(intEnd[index] == -1);
+ intBegin[index] = begin;
+ intEnd[index] = end;
+ }
+ else {
+ intEnd[index] = end;
+ }
+}
+
+
+/**
+ * Find first/last instruction that references each temporary register.
+ */
+GLboolean
+_mesa_find_temp_intervals(const struct prog_instruction *instructions,
+ GLuint numInstructions,
+ GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
+ GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
+{
+ struct loop_info loopStack[MAX_LOOP_NESTING];
+ GLuint loopStackDepth = 0;
+ GLuint i;
+
+ for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
+ intBegin[i] = intEnd[i] = -1;
+ }
+
+ /* Scan instructions looking for temporary registers */
+ for (i = 0; i < numInstructions; i++) {
+ const struct prog_instruction *inst = instructions + i;
+ if (inst->Opcode == OPCODE_BGNLOOP) {
+ loopStack[loopStackDepth].Start = i;
+ loopStack[loopStackDepth].End = inst->BranchTarget;
+ loopStackDepth++;
+ }
+ else if (inst->Opcode == OPCODE_ENDLOOP) {
+ loopStackDepth--;
+ }
+ else if (inst->Opcode == OPCODE_CAL) {
+ return GL_FALSE;
+ }
+ else {
+ const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->SrcReg[j].Index;
+ if (inst->SrcReg[j].RelAddr)
+ return GL_FALSE;
+ update_interval(intBegin, intEnd, loopStack, loopStackDepth,
+ index, i);
+ }
+ }
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->DstReg.Index;
+ if (inst->DstReg.RelAddr)
+ return GL_FALSE;
+ update_interval(intBegin, intEnd, loopStack, loopStackDepth,
+ index, i);
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Find the live intervals for each temporary register in the program.
+ * For register R, the interval [A,B] indicates that R is referenced
+ * from instruction A through instruction B.
+ * Special consideration is needed for loops and subroutines.
+ * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
+ */
+static GLboolean
+find_live_intervals(struct gl_program *prog,
+ struct interval_list *liveIntervals)
+{
+ GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+ GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+ GLuint i;
+
+ /*
+ * Note: we'll return GL_FALSE below if we find relative indexing
+ * into the TEMP register file. We can't handle that yet.
+ * We also give up on subroutines for now.
+ */
+
+ if (dbg) {
+ printf("Optimize: Begin find intervals\n");
+ }
+
+ /* build intermediate arrays */
+ if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
+ intBegin, intEnd))
+ return GL_FALSE;
+
+ /* Build live intervals list from intermediate arrays */
+ liveIntervals->Num = 0;
+ for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
+ if (intBegin[i] >= 0) {
+ struct interval inv;
+ inv.Reg = i;
+ inv.Start = intBegin[i];
+ inv.End = intEnd[i];
+ append_interval(liveIntervals, &inv);
+ }
+ }
+
+ /* Sort the list according to interval starts */
+ sort_interval_list_by_start(liveIntervals);
+
+ if (dbg) {
+ /* print interval info */
+ for (i = 0; i < liveIntervals->Num; i++) {
+ const struct interval *inv = liveIntervals->Intervals + i;
+ printf("Reg[%d] live [%d, %d]:",
+ inv->Reg, inv->Start, inv->End);
+ if (1) {
+ GLuint j;
+ for (j = 0; j < inv->Start; j++)
+ printf(" ");
+ for (j = inv->Start; j <= inv->End; j++)
+ printf("x");
+ }
+ printf("\n");
+ }
+ }
+
+ return GL_TRUE;
+}
+
+
+/** Scan the array of used register flags to find free entry */
+static GLint
+alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
+{
+ GLuint k;
+ for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
+ if (!usedRegs[k]) {
+ usedRegs[k] = GL_TRUE;
+ return k;
+ }
+ }
+ return -1;
+}
+
+
+/**
+ * This function implements "Linear Scan Register Allocation" to reduce
+ * the number of temporary registers used by the program.
+ *
+ * We compute the "live interval" for all temporary registers then
+ * examine the overlap of the intervals to allocate new registers.
+ * Basically, if two intervals do not overlap, they can use the same register.
+ */
+static void
+_mesa_reallocate_registers(struct gl_program *prog)
+{
+ struct interval_list liveIntervals;
+ GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+ GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+ GLuint i;
+ GLint maxTemp = -1;
+
+ if (dbg) {
+ printf("Optimize: Begin live-interval register reallocation\n");
+ _mesa_print_program(prog);
+ }
+
+ for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
+ registerMap[i] = -1;
+ usedRegs[i] = GL_FALSE;
+ }
+
+ if (!find_live_intervals(prog, &liveIntervals)) {
+ if (dbg)
+ printf("Aborting register reallocation\n");
+ return;
+ }
+
+ {
+ struct interval_list activeIntervals;
+ activeIntervals.Num = 0;
+
+ /* loop over live intervals, allocating a new register for each */
+ for (i = 0; i < liveIntervals.Num; i++) {
+ const struct interval *live = liveIntervals.Intervals + i;
+
+ if (dbg)
+ printf("Consider register %u\n", live->Reg);
+
+ /* Expire old intervals. Intervals which have ended with respect
+ * to the live interval can have their remapped registers freed.
+ */
+ {
+ GLint j;
+ for (j = 0; j < (GLint) activeIntervals.Num; j++) {
+ const struct interval *inv = activeIntervals.Intervals + j;
+ if (inv->End >= live->Start) {
+ /* Stop now. Since the activeInterval list is sorted
+ * we know we don't have to go further.
+ */
+ break;
+ }
+ else {
+ /* Interval 'inv' has expired */
+ const GLint regNew = registerMap[inv->Reg];
+ ASSERT(regNew >= 0);
+
+ if (dbg)
+ printf(" expire interval for reg %u\n", inv->Reg);
+
+ /* remove interval j from active list */
+ remove_interval(&activeIntervals, inv);
+ j--; /* counter-act j++ in for-loop above */
+
+ /* return register regNew to the free pool */
+ if (dbg)
+ printf(" free reg %d\n", regNew);
+ ASSERT(usedRegs[regNew] == GL_TRUE);
+ usedRegs[regNew] = GL_FALSE;
+ }
+ }
+ }
+
+ /* find a free register for this live interval */
+ {
+ const GLint k = alloc_register(usedRegs);
+ if (k < 0) {
+ /* out of registers, give up */
+ return;
+ }
+ registerMap[live->Reg] = k;
+ maxTemp = MAX2(maxTemp, k);
+ if (dbg)
+ printf(" remap register %u -> %d\n", live->Reg, k);
+ }
+
+ /* Insert this live interval into the active list which is sorted
+ * by increasing end points.
+ */
+ insert_interval_by_end(&activeIntervals, live);
+ }
+ }
+
+ if (maxTemp + 1 < (GLint) liveIntervals.Num) {
+ /* OK, we've reduced the number of registers needed.
+ * Scan the program and replace all the old temporary register
+ * indexes with the new indexes.
+ */
+ replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
+
+ prog->NumTemporaries = maxTemp + 1;
+ }
+
+ if (dbg) {
+ printf("Optimize: End live-interval register reallocation\n");
+ printf("Num temp regs before: %u after: %u\n",
+ liveIntervals.Num, maxTemp + 1);
+ _mesa_print_program(prog);
+ }
+}
+
+
+#if 0
+static void
+print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
+ fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
+ _mesa_print_program(program);
+ _mesa_print_program_parameters(ctx, program);
+ fprintf(stderr, "\n\n");
+}
+#endif
+
+/**
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0. The are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ * MOV T0, T2;
+ * else
+ * MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program. If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
+ */
+static void
+_mesa_simplify_cmp(struct gl_program * program)
+{
+ GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+ GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
+ GLuint i;
+
+ if (dbg) {
+ printf("Optimize: Begin reads without writes\n");
+ _mesa_print_program(program);
+ }
+
+ for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
+ tempWrites[i] = 0;
+ }
+
+ for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
+ outputWrites[i] = 0;
+ }
+
+ for (i = 0; i < program->NumInstructions; i++) {
+ struct prog_instruction *inst = program->Instructions + i;
+ GLuint prevWriteMask;
+
+ /* Give up if we encounter relative addressing or flow control. */
+ if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
+ return;
+ }
+
+ if (inst->DstReg.File == PROGRAM_OUTPUT) {
+ assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
+ prevWriteMask = outputWrites[inst->DstReg.Index];
+ outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
+ } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+ prevWriteMask = tempWrites[inst->DstReg.Index];
+ tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
+ } else {
+ /* No other register type can be a destination register. */
+ continue;
+ }
+
+ /* For a CMP to be considered a conditional write, the destination
+ * register and source register two must be the same. */
+ if (inst->Opcode == OPCODE_CMP
+ && !(inst->DstReg.WriteMask & prevWriteMask)
+ && inst->SrcReg[2].File == inst->DstReg.File
+ && inst->SrcReg[2].Index == inst->DstReg.Index
+ && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = inst->SrcReg[1];
+ }
+ }
+ if (dbg) {
+ printf("Optimize: End reads without writes\n");
+ _mesa_print_program(program);
+ }
+}
+
+/**
+ * Apply optimizations to the given program to eliminate unnecessary
+ * instructions, temp regs, etc.
+ */
+void
+_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
+{
+ GLboolean any_change;
+
+ _mesa_simplify_cmp(program);
+ /* Stop when no modifications were output */
+ do {
+ any_change = GL_FALSE;
+ _mesa_remove_extra_move_use(program);
+ if (_mesa_remove_dead_code_global(program))
+ any_change = GL_TRUE;
+ if (_mesa_remove_extra_moves(program))
+ any_change = GL_TRUE;
+ if (_mesa_remove_dead_code_local(program))
+ any_change = GL_TRUE;
+ _mesa_reallocate_registers(program);
+ } while (any_change);
+}
+
|