diff options
| author | marha <marha@users.sourceforge.net> | 2011-08-29 08:51:20 +0200 | 
|---|---|---|
| committer | marha <marha@users.sourceforge.net> | 2011-08-29 08:51:20 +0200 | 
| commit | 01df5d59e56a1b060568f8cad2e89f7eea22fc70 (patch) | |
| tree | 9db83037fd85d0974b60fc1a05e0665083f26000 /mesalib/src/mesa/program | |
| parent | fd1f4d9fe3ea67fa6def8ee4927a8f71e0440f12 (diff) | |
| download | vcxsrv-01df5d59e56a1b060568f8cad2e89f7eea22fc70.tar.gz vcxsrv-01df5d59e56a1b060568f8cad2e89f7eea22fc70.tar.bz2 vcxsrv-01df5d59e56a1b060568f8cad2e89f7eea22fc70.zip | |
xwininfo libX11 libXmu libxcb mesa xserver xkeyboard-config git update 29
aug 2011
Diffstat (limited to 'mesalib/src/mesa/program')
| -rw-r--r-- | mesalib/src/mesa/program/ir_to_mesa.cpp | 179 | ||||
| -rw-r--r-- | mesalib/src/mesa/program/prog_execute.c | 10 | ||||
| -rw-r--r-- | mesalib/src/mesa/program/prog_opt_constant_fold.c | 451 | ||||
| -rw-r--r-- | mesalib/src/mesa/program/prog_optimize.c | 2 | ||||
| -rw-r--r-- | mesalib/src/mesa/program/prog_optimize.h | 97 | ||||
| -rw-r--r-- | mesalib/src/mesa/program/register_allocate.c | 21 | ||||
| -rw-r--r-- | mesalib/src/mesa/program/register_allocate.h | 2 | 
7 files changed, 688 insertions, 74 deletions
| diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp index 1ef609fe1..6820e4c6b 100644 --- a/mesalib/src/mesa/program/ir_to_mesa.cpp +++ b/mesalib/src/mesa/program/ir_to_mesa.cpp @@ -297,11 +297,11 @@ public:     /**      * Emit the correct dot-product instruction for the type of arguments      */ -   void emit_dp(ir_instruction *ir, -	        dst_reg dst, -	        src_reg src0, -	        src_reg src1, -	        unsigned elements); +   ir_to_mesa_instruction * emit_dp(ir_instruction *ir, +				    dst_reg dst, +				    src_reg src0, +				    src_reg src1, +				    unsigned elements);     void emit_scalar(ir_instruction *ir, enum prog_opcode op,  		    dst_reg dst, src_reg src0); @@ -312,9 +312,11 @@ public:     void emit_scs(ir_instruction *ir, enum prog_opcode op,  		 dst_reg dst, const src_reg &src); -   GLboolean try_emit_mad(ir_expression *ir, +   bool try_emit_mad(ir_expression *ir,  			  int mul_operand); -   GLboolean try_emit_sat(ir_expression *ir); +   bool try_emit_mad_for_and_not(ir_expression *ir, +				 int mul_operand); +   bool try_emit_sat(ir_expression *ir);     void emit_swz(ir_expression *ir); @@ -408,7 +410,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)     return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);  } -void +ir_to_mesa_instruction *  ir_to_mesa_visitor::emit_dp(ir_instruction *ir,  			    dst_reg dst, src_reg src0, src_reg src1,  			    unsigned elements) @@ -417,7 +419,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir,        OPCODE_DP2, OPCODE_DP3, OPCODE_DP4     }; -   emit(ir, dot_opcodes[elements - 2], dst, src0, src1); +   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);  }  /** @@ -579,7 +581,7 @@ ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,     }  } -struct src_reg +src_reg  ir_to_mesa_visitor::src_reg_for_float(float val)  {     src_reg src(PROGRAM_CONSTANT, -1, NULL); @@ -723,7 +725,7 @@ ir_to_mesa_visitor::visit(ir_variable *ir)  	 }        } -      struct variable_storage *storage; +      variable_storage *storage;        dst_reg dst;        if (i == ir->num_state_slots) {  	 /* We'll set the index later. */ @@ -869,7 +871,7 @@ ir_to_mesa_visitor::visit(ir_function *ir)     }  } -GLboolean +bool  ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)  {     int nonmul_operand = 1 - mul_operand; @@ -892,7 +894,47 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)     return true;  } -GLboolean +/** + * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) + * + * The logic values are 1.0 for true and 0.0 for false.  Logical-and is + * implemented using multiplication, and logical-or is implemented using + * addition.  Logical-not can be implemented as (true - x), or (1.0 - x). + * As result, the logical expression (a & !b) can be rewritten as: + * + *     - a * !b + *     - a * (1 - b) + *     - (a * 1) - (a * b) + *     - a + -(a * b) + *     - a + (a * -b) + * + * This final expression can be implemented as a single MAD(a, -b, a) + * instruction. + */ +bool +ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) +{ +   const int other_operand = 1 - try_operand; +   src_reg a, b; + +   ir_expression *expr = ir->operands[try_operand]->as_expression(); +   if (!expr || expr->operation != ir_unop_logic_not) +      return false; + +   ir->operands[other_operand]->accept(this); +   a = this->result; +   expr->operands[0]->accept(this); +   b = this->result; + +   b.negate = ~b.negate; + +   this->result = get_temp(ir->type); +   emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); + +   return true; +} + +bool  ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)  {     /* Saturates were only introduced to vertex programs in @@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir)        if (try_emit_mad(ir, 0))  	 return;     } + +   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) +    */ +   if (ir->operation == ir_binop_logic_and) { +      if (try_emit_mad_for_and_not(ir, 1)) +	 return; +      if (try_emit_mad_for_and_not(ir, 0)) +	 return; +   } +     if (try_emit_sat(ir))        return; @@ -1135,7 +1187,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir)     switch (ir->operation) {     case ir_unop_logic_not: -      emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0)); +      /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many +       * older GPUs implement SEQ using multiple instructions (i915 uses two +       * SGE instructions and a MUL instruction).  Since our logic values are +       * 0.0 and 1.0, 1-x also implements !x. +       */ +      op[0].negate = ~op[0].negate; +      emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));        break;     case ir_unop_neg:        op[0].negate = ~op[0].negate; @@ -1231,8 +1289,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir)  	  ir->operands[1]->type->is_vector()) {  	 src_reg temp = get_temp(glsl_type::vec4_type);  	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); + +	 /* After the dot-product, the value will be an integer on the +	  * range [0,4].  Zero becomes 1.0, and positive values become zero. +	  */  	 emit_dp(ir, result_dst, temp, temp, vector_elements); -	 emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0)); + +	 /* Negating the result of the dot-product gives values on the range +	  * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This +	  * achieved using SGE. +	  */ +	 src_reg sge_src = result_src; +	 sge_src.negate = ~sge_src.negate; +	 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));        } else {  	 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);        } @@ -1243,29 +1312,83 @@ ir_to_mesa_visitor::visit(ir_expression *ir)  	  ir->operands[1]->type->is_vector()) {  	 src_reg temp = get_temp(glsl_type::vec4_type);  	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); -	 emit_dp(ir, result_dst, temp, temp, vector_elements); -	 emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + +	 /* After the dot-product, the value will be an integer on the +	  * range [0,4].  Zero stays zero, and positive values become 1.0. +	  */ +	 ir_to_mesa_instruction *const dp = +	    emit_dp(ir, result_dst, temp, temp, vector_elements); +	 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { +	    /* The clamping to [0,1] can be done for free in the fragment +	     * shader with a saturate. +	     */ +	    dp->saturate = true; +	 } else { +	    /* Negating the result of the dot-product gives values on the range +	     * [-4, 0].  Zero stays zero, and negative values become 1.0.  This +	     * achieved using SLT. +	     */ +	    src_reg slt_src = result_src; +	    slt_src.negate = ~slt_src.negate; +	    emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); +	 }        } else {  	 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);        }        break; -   case ir_unop_any: +   case ir_unop_any: {        assert(ir->operands[0]->type->is_vector()); -      emit_dp(ir, result_dst, op[0], op[0], -	      ir->operands[0]->type->vector_elements); -      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); + +      /* After the dot-product, the value will be an integer on the +       * range [0,4].  Zero stays zero, and positive values become 1.0. +       */ +      ir_to_mesa_instruction *const dp = +	 emit_dp(ir, result_dst, op[0], op[0], +		 ir->operands[0]->type->vector_elements); +      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { +	 /* The clamping to [0,1] can be done for free in the fragment +	  * shader with a saturate. +	  */ +	 dp->saturate = true; +      } else { +	 /* Negating the result of the dot-product gives values on the range +	  * [-4, 0].  Zero stays zero, and negative values become 1.0.  This +	  * is achieved using SLT. +	  */ +	 src_reg slt_src = result_src; +	 slt_src.negate = ~slt_src.negate; +	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); +      }        break; +   }     case ir_binop_logic_xor:        emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);        break; -   case ir_binop_logic_or: -      /* This could be a saturated add and skip the SNE. */ -      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); -      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0)); +   case ir_binop_logic_or: { +      /* After the addition, the value will be an integer on the +       * range [0,2].  Zero stays zero, and positive values become 1.0. +       */ +      ir_to_mesa_instruction *add = +	 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); +      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { +	 /* The clamping to [0,1] can be done for free in the fragment +	  * shader with a saturate. +	  */ +	 add->saturate = true; +      } else { +	 /* Negating the result of the addition gives values on the range +	  * [-2, 0].  Zero stays zero, and negative values become 1.0.  This +	  * is achieved using SLT. +	  */ +	 src_reg slt_src = result_src; +	 slt_src.negate = ~slt_src.negate; +	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); +      }        break; +   }     case ir_binop_logic_and:        /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ @@ -1981,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir)     ir_to_mesa_instruction *inst = NULL;     prog_opcode opcode = OPCODE_NOP; -   ir->coordinate->accept(this); +   if (ir->op == ir_txs) +      this->result = src_reg_for_float(0.0); +   else +      ir->coordinate->accept(this);     /* Put our coords in a temp.  We'll need to modify them for shadow,      * projection, or LOD, so the only case we'd use it as is is if @@ -2005,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir)     switch (ir->op) {     case ir_tex: +   case ir_txs:        opcode = OPCODE_TEX;        break;     case ir_txb: diff --git a/mesalib/src/mesa/program/prog_execute.c b/mesalib/src/mesa/program/prog_execute.c index dbfd1b918..77f842a16 100644 --- a/mesalib/src/mesa/program/prog_execute.c +++ b/mesalib/src/mesa/program/prog_execute.c @@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx,                        struct gl_program_machine *machine)  {     const GLuint numInst = program->NumInstructions; -   const GLuint maxExec = 10000; +   const GLuint maxExec = 65536;     GLuint pc, numExec = 0;     machine->CurProgram = program; @@ -1651,6 +1651,14 @@ _mesa_execute_program(struct gl_context * ctx,              GLfloat texcoord[4], color[4];              fetch_vector4(&inst->SrcReg[0], machine, texcoord); +            /* For TEX, texcoord.Q should not be used and its value should not +             * matter (at most, we pass coord.xyz to texture3D() in GLSL). +             * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value +             * which is effectively what happens when the texcoord swizzle +             * is .xyzz +             */ +            texcoord[3] = 1.0f; +              fetch_texel(ctx, machine, inst, texcoord, 0.0, color);              if (DEBUG_PROG) { diff --git a/mesalib/src/mesa/program/prog_opt_constant_fold.c b/mesalib/src/mesa/program/prog_opt_constant_fold.c new file mode 100644 index 000000000..e2418b554 --- /dev/null +++ b/mesalib/src/mesa/program/prog_opt_constant_fold.c @@ -0,0 +1,451 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "program.h" +#include "prog_instruction.h" +#include "prog_optimize.h" +#include "prog_parameter.h" +#include <stdbool.h> + +static bool +src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs) +{ +   unsigned i; + +   for (i = 0; i < num_srcs; i++) { +      if (inst->SrcReg[i].File != PROGRAM_CONSTANT) +	 return false; +   } + +   return true; +} + +static struct prog_src_register +src_reg_for_float(struct gl_program *prog, float val) +{ +   struct prog_src_register src; +   unsigned swiz; + +   memset(&src, 0, sizeof(src)); + +   src.File = PROGRAM_CONSTANT; +   src.Index = _mesa_add_unnamed_constant(prog->Parameters, +					  (gl_constant_value *) &val, 1, &swiz); +   src.Swizzle = swiz; +   return src; +} + +static struct prog_src_register +src_reg_for_vec4(struct gl_program *prog, const float *val) +{ +   struct prog_src_register src; +   unsigned swiz; + +   memset(&src, 0, sizeof(src)); + +   src.File = PROGRAM_CONSTANT; +   src.Index = _mesa_add_unnamed_constant(prog->Parameters, +					  (gl_constant_value *) val, 4, &swiz); +   src.Swizzle = swiz; +   return src; +} + +static bool +src_regs_are_same(const struct prog_src_register *a, +		  const struct prog_src_register *b) +{ +   return (a->File == b->File) +      && (a->Index == b->Index) +      && (a->Swizzle == b->Swizzle) +      && (a->Abs == b->Abs) +      && (a->Negate == b->Negate) +      && (a->RelAddr == 0) +      && (b->RelAddr == 0); +} + +static void +get_value(struct gl_program *prog, struct prog_src_register *r, float *data) +{ +   const gl_constant_value *const value = +      prog->Parameters->ParameterValues[r->Index]; + +   data[0] = value[GET_SWZ(r->Swizzle, 0)].f; +   data[1] = value[GET_SWZ(r->Swizzle, 1)].f; +   data[2] = value[GET_SWZ(r->Swizzle, 2)].f; +   data[3] = value[GET_SWZ(r->Swizzle, 3)].f; + +   if (r->Abs) { +      data[0] = fabsf(data[0]); +      data[1] = fabsf(data[1]); +      data[2] = fabsf(data[2]); +      data[3] = fabsf(data[3]); +   } + +   if (r->Negate & 0x01) { +      data[0] = -data[0]; +   } + +   if (r->Negate & 0x02) { +      data[1] = -data[1]; +   } + +   if (r->Negate & 0x04) { +      data[2] = -data[2]; +   } + +   if (r->Negate & 0x08) { +      data[3] = -data[3]; +   } +} + +/** + * Try to replace instructions that produce a constant result with simple moves + * + * The hope is that a following copy propagation pass will eliminate the + * unnecessary move instructions. + */ +GLboolean +_mesa_constant_fold(struct gl_program *prog) +{ +   bool progress = false; +   unsigned i; + +   for (i = 0; i < prog->NumInstructions; i++) { +      struct prog_instruction *const inst = &prog->Instructions[i]; + +      switch (inst->Opcode) { +      case OPCODE_ADD: +	 if (src_regs_are_constant(inst, 2)) { +	    float a[4]; +	    float b[4]; +	    float result[4]; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); + +	    result[0] = a[0] + b[0]; +	    result[1] = a[1] + b[1]; +	    result[2] = a[2] + b[2]; +	    result[3] = a[3] + b[3]; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_vec4(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      case OPCODE_CMP: +	 /* FINISHME: We could also optimize CMP instructions where the first +	  * FINISHME: source is a constant that is either all < 0.0 or all +	  * FINISHME: >= 0.0. +	  */ +	 if (src_regs_are_constant(inst, 3)) { +	    float a[4]; +	    float b[4]; +	    float c[4]; +	    float result[4]; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); +	    get_value(prog, &inst->SrcReg[2], c); + +            result[0] = a[0] < 0.0f ? b[0] : c[0]; +            result[1] = a[1] < 0.0f ? b[1] : c[1]; +            result[2] = a[2] < 0.0f ? b[2] : c[2]; +            result[3] = a[3] < 0.0f ? b[3] : c[3]; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_vec4(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; +	    inst->SrcReg[2].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      case OPCODE_DP2: +      case OPCODE_DP3: +      case OPCODE_DP4: +	 if (src_regs_are_constant(inst, 2)) { +	    float a[4]; +	    float b[4]; +	    float result; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); + +	    /* It seems like a loop could be used here, but we cleverly put +	     * DP2A between DP2 and DP3.  Subtracting DP2 (or similar) from +	     * the opcode results in various failures of the loop control. +	     */ +	    result = (a[0] * b[0]) + (a[1] * b[1]); + +	    if (inst->Opcode >= OPCODE_DP3) +	       result += a[2] * b[2]; + +	    if (inst->Opcode == OPCODE_DP4) +	       result += a[3] * b[3]; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_float(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      case OPCODE_MUL: +	 if (src_regs_are_constant(inst, 2)) { +	    float a[4]; +	    float b[4]; +	    float result[4]; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); + +	    result[0] = a[0] * b[0]; +	    result[1] = a[1] * b[1]; +	    result[2] = a[2] * b[2]; +	    result[3] = a[3] * b[3]; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_vec4(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      case OPCODE_SEQ: +	 if (src_regs_are_constant(inst, 2)) { +	    float a[4]; +	    float b[4]; +	    float result[4]; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); + +	    result[0] = (a[0] == b[0]) ? 1.0f : 0.0f; +	    result[1] = (a[1] == b[1]) ? 1.0f : 0.0f; +	    result[2] = (a[2] == b[2]) ? 1.0f : 0.0f; +	    result[3] = (a[3] == b[3]) ? 1.0f : 0.0f; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_vec4(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      case OPCODE_SGE: +	 if (src_regs_are_constant(inst, 2)) { +	    float a[4]; +	    float b[4]; +	    float result[4]; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); + +	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f; +	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f; +	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f; +	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_vec4(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      case OPCODE_SGT: +	 if (src_regs_are_constant(inst, 2)) { +	    float a[4]; +	    float b[4]; +	    float result[4]; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); + +	    result[0] = (a[0] > b[0]) ? 1.0f : 0.0f; +	    result[1] = (a[1] > b[1]) ? 1.0f : 0.0f; +	    result[2] = (a[2] > b[2]) ? 1.0f : 0.0f; +	    result[3] = (a[3] > b[3]) ? 1.0f : 0.0f; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_vec4(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      case OPCODE_SLE: +	 if (src_regs_are_constant(inst, 2)) { +	    float a[4]; +	    float b[4]; +	    float result[4]; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); + +	    result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f; +	    result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f; +	    result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f; +	    result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_vec4(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      case OPCODE_SLT: +	 if (src_regs_are_constant(inst, 2)) { +	    float a[4]; +	    float b[4]; +	    float result[4]; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); + +	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f; +	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f; +	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f; +	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_vec4(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      case OPCODE_SNE: +	 if (src_regs_are_constant(inst, 2)) { +	    float a[4]; +	    float b[4]; +	    float result[4]; + +	    get_value(prog, &inst->SrcReg[0], a); +	    get_value(prog, &inst->SrcReg[1], b); + +	    result[0] = (a[0] != b[0]) ? 1.0f : 0.0f; +	    result[1] = (a[1] != b[1]) ? 1.0f : 0.0f; +	    result[2] = (a[2] != b[2]) ? 1.0f : 0.0f; +	    result[3] = (a[3] != b[3]) ? 1.0f : 0.0f; + +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_vec4(prog, result); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { +	    inst->Opcode = OPCODE_MOV; +	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + +	    inst->SrcReg[1].File = PROGRAM_UNDEFINED; +	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + +	    progress = true; +	 } +	 break; + +      default: +	 break; +      } +   } + +   return progress; +} diff --git a/mesalib/src/mesa/program/prog_optimize.c b/mesalib/src/mesa/program/prog_optimize.c index 3340ce049..25d9684b1 100644 --- a/mesalib/src/mesa/program/prog_optimize.c +++ b/mesalib/src/mesa/program/prog_optimize.c @@ -1358,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)           any_change = GL_TRUE;        if (_mesa_remove_dead_code_local(program))           any_change = GL_TRUE; + +      any_change = _mesa_constant_fold(program) || any_change;        _mesa_reallocate_registers(program);     } while (any_change);  } diff --git a/mesalib/src/mesa/program/prog_optimize.h b/mesalib/src/mesa/program/prog_optimize.h index 8dc58ee52..9854fb7a4 100644 --- a/mesalib/src/mesa/program/prog_optimize.h +++ b/mesalib/src/mesa/program/prog_optimize.h @@ -1,47 +1,50 @@ -/*
 - * Mesa 3-D graphics library
 - * Version:  7.5
 - *
 - * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
 - *
 - * Permission is hereby granted, free of charge, to any person obtaining a
 - * copy of this software and associated documentation files (the "Software"),
 - * to deal in the Software without restriction, including without limitation
 - * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 - * and/or sell copies of the Software, and to permit persons to whom the
 - * Software is furnished to do so, subject to the following conditions:
 - *
 - * The above copyright notice and this permission notice shall be included
 - * in all copies or substantial portions of the Software.
 - *
 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 - * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 - */
 -
 -#ifndef PROG_OPT_H
 -#define PROG_OPT_H
 -
 -
 -#include "main/config.h"
 -#include "main/glheader.h"
 -
 -
 -struct gl_context;
 -struct gl_program;
 -struct prog_instruction;
 -
 -
 -extern GLboolean
 -_mesa_find_temp_intervals(const struct prog_instruction *instructions,
 -                          GLuint numInstructions,
 -                          GLint intBegin[MAX_PROGRAM_TEMPS],
 -                          GLint intEnd[MAX_PROGRAM_TEMPS]);
 -
 -extern void
 -_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program);
 -
 -#endif
 +/* + * Mesa 3-D graphics library + * Version:  7.5 + * + * Copyright (C) 2009  VMware, Inc.  All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef PROG_OPT_H +#define PROG_OPT_H + + +#include "main/config.h" +#include "main/glheader.h" + + +struct gl_context; +struct gl_program; +struct prog_instruction; + + +extern GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, +                          GLuint numInstructions, +                          GLint intBegin[MAX_PROGRAM_TEMPS], +                          GLint intEnd[MAX_PROGRAM_TEMPS]); + +extern void +_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program); + +extern GLboolean +_mesa_constant_fold(struct gl_program *prog); + +#endif diff --git a/mesalib/src/mesa/program/register_allocate.c b/mesalib/src/mesa/program/register_allocate.c index de96eb42c..f5b5174fc 100644 --- a/mesalib/src/mesa/program/register_allocate.c +++ b/mesalib/src/mesa/program/register_allocate.c @@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)     }  } +/** + * Adds a conflict between base_reg and reg, and also between reg and + * anything that base_reg conflicts with. + * + * This can simplify code for setting up multiple register classes + * which are aggregates of some base hardware registers, compared to + * explicitly using ra_add_reg_conflict. + */ +void +ra_add_transitive_reg_conflict(struct ra_regs *regs, +			       unsigned int base_reg, unsigned int reg) +{ +   int i; + +   ra_add_reg_conflict(regs, reg, base_reg); + +   for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) { +      ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]); +   } +} +  unsigned int  ra_alloc_reg_class(struct ra_regs *regs)  { diff --git a/mesalib/src/mesa/program/register_allocate.h b/mesalib/src/mesa/program/register_allocate.h index 5b95833f3..ee2e58a47 100644 --- a/mesalib/src/mesa/program/register_allocate.h +++ b/mesalib/src/mesa/program/register_allocate.h @@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count);  unsigned int ra_alloc_reg_class(struct ra_regs *regs);  void ra_add_reg_conflict(struct ra_regs *regs,  			 unsigned int r1, unsigned int r2); +void ra_add_transitive_reg_conflict(struct ra_regs *regs, +				    unsigned int base_reg, unsigned int reg);  void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);  void ra_set_finalize(struct ra_regs *regs);  /** @} */ | 
