aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/mesa/program
diff options
context:
space:
mode:
authormarha <marha@users.sourceforge.net>2011-08-29 08:51:20 +0200
committermarha <marha@users.sourceforge.net>2011-08-29 08:51:20 +0200
commit01df5d59e56a1b060568f8cad2e89f7eea22fc70 (patch)
tree9db83037fd85d0974b60fc1a05e0665083f26000 /mesalib/src/mesa/program
parentfd1f4d9fe3ea67fa6def8ee4927a8f71e0440f12 (diff)
downloadvcxsrv-01df5d59e56a1b060568f8cad2e89f7eea22fc70.tar.gz
vcxsrv-01df5d59e56a1b060568f8cad2e89f7eea22fc70.tar.bz2
vcxsrv-01df5d59e56a1b060568f8cad2e89f7eea22fc70.zip
xwininfo libX11 libXmu libxcb mesa xserver xkeyboard-config git update 29
aug 2011
Diffstat (limited to 'mesalib/src/mesa/program')
-rw-r--r--mesalib/src/mesa/program/ir_to_mesa.cpp179
-rw-r--r--mesalib/src/mesa/program/prog_execute.c10
-rw-r--r--mesalib/src/mesa/program/prog_opt_constant_fold.c451
-rw-r--r--mesalib/src/mesa/program/prog_optimize.c2
-rw-r--r--mesalib/src/mesa/program/prog_optimize.h97
-rw-r--r--mesalib/src/mesa/program/register_allocate.c21
-rw-r--r--mesalib/src/mesa/program/register_allocate.h2
7 files changed, 688 insertions, 74 deletions
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index 1ef609fe1..6820e4c6b 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -297,11 +297,11 @@ public:
/**
* Emit the correct dot-product instruction for the type of arguments
*/
- void emit_dp(ir_instruction *ir,
- dst_reg dst,
- src_reg src0,
- src_reg src1,
- unsigned elements);
+ ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
+ dst_reg dst,
+ src_reg src0,
+ src_reg src1,
+ unsigned elements);
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, src_reg src0);
@@ -312,9 +312,11 @@ public:
void emit_scs(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, const src_reg &src);
- GLboolean try_emit_mad(ir_expression *ir,
+ bool try_emit_mad(ir_expression *ir,
int mul_operand);
- GLboolean try_emit_sat(ir_expression *ir);
+ bool try_emit_mad_for_and_not(ir_expression *ir,
+ int mul_operand);
+ bool try_emit_sat(ir_expression *ir);
void emit_swz(ir_expression *ir);
@@ -408,7 +410,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
}
-void
+ir_to_mesa_instruction *
ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
dst_reg dst, src_reg src0, src_reg src1,
unsigned elements)
@@ -417,7 +419,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
};
- emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+ return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
}
/**
@@ -579,7 +581,7 @@ ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
}
}
-struct src_reg
+src_reg
ir_to_mesa_visitor::src_reg_for_float(float val)
{
src_reg src(PROGRAM_CONSTANT, -1, NULL);
@@ -723,7 +725,7 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
}
}
- struct variable_storage *storage;
+ variable_storage *storage;
dst_reg dst;
if (i == ir->num_state_slots) {
/* We'll set the index later. */
@@ -869,7 +871,7 @@ ir_to_mesa_visitor::visit(ir_function *ir)
}
}
-GLboolean
+bool
ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
{
int nonmul_operand = 1 - mul_operand;
@@ -892,7 +894,47 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
return true;
}
-GLboolean
+/**
+ * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false. Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ * - a * !b
+ * - a * (1 - b)
+ * - (a * 1) - (a * b)
+ * - a + -(a * b)
+ * - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+ const int other_operand = 1 - try_operand;
+ src_reg a, b;
+
+ ir_expression *expr = ir->operands[try_operand]->as_expression();
+ if (!expr || expr->operation != ir_unop_logic_not)
+ return false;
+
+ ir->operands[other_operand]->accept(this);
+ a = this->result;
+ expr->operands[0]->accept(this);
+ b = this->result;
+
+ b.negate = ~b.negate;
+
+ this->result = get_temp(ir->type);
+ emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
+
+ return true;
+}
+
+bool
ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
{
/* Saturates were only introduced to vertex programs in
@@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
if (try_emit_mad(ir, 0))
return;
}
+
+ /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+ */
+ if (ir->operation == ir_binop_logic_and) {
+ if (try_emit_mad_for_and_not(ir, 1))
+ return;
+ if (try_emit_mad_for_and_not(ir, 0))
+ return;
+ }
+
if (try_emit_sat(ir))
return;
@@ -1135,7 +1187,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
switch (ir->operation) {
case ir_unop_logic_not:
- emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0));
+ /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
+ * older GPUs implement SEQ using multiple instructions (i915 uses two
+ * SGE instructions and a MUL instruction). Since our logic values are
+ * 0.0 and 1.0, 1-x also implements !x.
+ */
+ op[0].negate = ~op[0].negate;
+ emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
break;
case ir_unop_neg:
op[0].negate = ~op[0].negate;
@@ -1231,8 +1289,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
ir->operands[1]->type->is_vector()) {
src_reg temp = get_temp(glsl_type::vec4_type);
emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero becomes 1.0, and positive values become zero.
+ */
emit_dp(ir, result_dst, temp, temp, vector_elements);
- emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero becomes 1.0, and negative values become zero. This
+ * achieved using SGE.
+ */
+ src_reg sge_src = result_src;
+ sge_src.negate = ~sge_src.negate;
+ emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
} else {
emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
}
@@ -1243,29 +1312,83 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
ir->operands[1]->type->is_vector()) {
src_reg temp = get_temp(glsl_type::vec4_type);
emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
- emit_dp(ir, result_dst, temp, temp, vector_elements);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *const dp =
+ emit_dp(ir, result_dst, temp, temp, vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
} else {
emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
}
break;
- case ir_unop_any:
+ case ir_unop_any: {
assert(ir->operands[0]->type->is_vector());
- emit_dp(ir, result_dst, op[0], op[0],
- ir->operands[0]->type->vector_elements);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *const dp =
+ emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
break;
+ }
case ir_binop_logic_xor:
emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
break;
- case ir_binop_logic_or:
- /* This could be a saturated add and skip the SNE. */
- emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+ case ir_binop_logic_or: {
+ /* After the addition, the value will be an integer on the
+ * range [0,2]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *add =
+ emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ add->saturate = true;
+ } else {
+ /* Negating the result of the addition gives values on the range
+ * [-2, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
break;
+ }
case ir_binop_logic_and:
/* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
@@ -1981,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
ir_to_mesa_instruction *inst = NULL;
prog_opcode opcode = OPCODE_NOP;
- ir->coordinate->accept(this);
+ if (ir->op == ir_txs)
+ this->result = src_reg_for_float(0.0);
+ else
+ ir->coordinate->accept(this);
/* Put our coords in a temp. We'll need to modify them for shadow,
* projection, or LOD, so the only case we'd use it as is is if
@@ -2005,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
switch (ir->op) {
case ir_tex:
+ case ir_txs:
opcode = OPCODE_TEX;
break;
case ir_txb:
diff --git a/mesalib/src/mesa/program/prog_execute.c b/mesalib/src/mesa/program/prog_execute.c
index dbfd1b918..77f842a16 100644
--- a/mesalib/src/mesa/program/prog_execute.c
+++ b/mesalib/src/mesa/program/prog_execute.c
@@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx,
struct gl_program_machine *machine)
{
const GLuint numInst = program->NumInstructions;
- const GLuint maxExec = 10000;
+ const GLuint maxExec = 65536;
GLuint pc, numExec = 0;
machine->CurProgram = program;
@@ -1651,6 +1651,14 @@ _mesa_execute_program(struct gl_context * ctx,
GLfloat texcoord[4], color[4];
fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+ /* For TEX, texcoord.Q should not be used and its value should not
+ * matter (at most, we pass coord.xyz to texture3D() in GLSL).
+ * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
+ * which is effectively what happens when the texcoord swizzle
+ * is .xyzz
+ */
+ texcoord[3] = 1.0f;
+
fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
if (DEBUG_PROG) {
diff --git a/mesalib/src/mesa/program/prog_opt_constant_fold.c b/mesalib/src/mesa/program/prog_opt_constant_fold.c
new file mode 100644
index 000000000..e2418b554
--- /dev/null
+++ b/mesalib/src/mesa/program/prog_opt_constant_fold.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_instruction.h"
+#include "prog_optimize.h"
+#include "prog_parameter.h"
+#include <stdbool.h>
+
+static bool
+src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
+{
+ unsigned i;
+
+ for (i = 0; i < num_srcs; i++) {
+ if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
+ return false;
+ }
+
+ return true;
+}
+
+static struct prog_src_register
+src_reg_for_float(struct gl_program *prog, float val)
+{
+ struct prog_src_register src;
+ unsigned swiz;
+
+ memset(&src, 0, sizeof(src));
+
+ src.File = PROGRAM_CONSTANT;
+ src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+ (gl_constant_value *) &val, 1, &swiz);
+ src.Swizzle = swiz;
+ return src;
+}
+
+static struct prog_src_register
+src_reg_for_vec4(struct gl_program *prog, const float *val)
+{
+ struct prog_src_register src;
+ unsigned swiz;
+
+ memset(&src, 0, sizeof(src));
+
+ src.File = PROGRAM_CONSTANT;
+ src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+ (gl_constant_value *) val, 4, &swiz);
+ src.Swizzle = swiz;
+ return src;
+}
+
+static bool
+src_regs_are_same(const struct prog_src_register *a,
+ const struct prog_src_register *b)
+{
+ return (a->File == b->File)
+ && (a->Index == b->Index)
+ && (a->Swizzle == b->Swizzle)
+ && (a->Abs == b->Abs)
+ && (a->Negate == b->Negate)
+ && (a->RelAddr == 0)
+ && (b->RelAddr == 0);
+}
+
+static void
+get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
+{
+ const gl_constant_value *const value =
+ prog->Parameters->ParameterValues[r->Index];
+
+ data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
+ data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
+ data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
+ data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
+
+ if (r->Abs) {
+ data[0] = fabsf(data[0]);
+ data[1] = fabsf(data[1]);
+ data[2] = fabsf(data[2]);
+ data[3] = fabsf(data[3]);
+ }
+
+ if (r->Negate & 0x01) {
+ data[0] = -data[0];
+ }
+
+ if (r->Negate & 0x02) {
+ data[1] = -data[1];
+ }
+
+ if (r->Negate & 0x04) {
+ data[2] = -data[2];
+ }
+
+ if (r->Negate & 0x08) {
+ data[3] = -data[3];
+ }
+}
+
+/**
+ * Try to replace instructions that produce a constant result with simple moves
+ *
+ * The hope is that a following copy propagation pass will eliminate the
+ * unnecessary move instructions.
+ */
+GLboolean
+_mesa_constant_fold(struct gl_program *prog)
+{
+ bool progress = false;
+ unsigned i;
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *const inst = &prog->Instructions[i];
+
+ switch (inst->Opcode) {
+ case OPCODE_ADD:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = a[0] + b[0];
+ result[1] = a[1] + b[1];
+ result[2] = a[2] + b[2];
+ result[3] = a[3] + b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_CMP:
+ /* FINISHME: We could also optimize CMP instructions where the first
+ * FINISHME: source is a constant that is either all < 0.0 or all
+ * FINISHME: >= 0.0.
+ */
+ if (src_regs_are_constant(inst, 3)) {
+ float a[4];
+ float b[4];
+ float c[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+ get_value(prog, &inst->SrcReg[2], c);
+
+ result[0] = a[0] < 0.0f ? b[0] : c[0];
+ result[1] = a[1] < 0.0f ? b[1] : c[1];
+ result[2] = a[2] < 0.0f ? b[2] : c[2];
+ result[3] = a[3] < 0.0f ? b[3] : c[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+ inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_DP2:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result;
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ /* It seems like a loop could be used here, but we cleverly put
+ * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from
+ * the opcode results in various failures of the loop control.
+ */
+ result = (a[0] * b[0]) + (a[1] * b[1]);
+
+ if (inst->Opcode >= OPCODE_DP3)
+ result += a[2] * b[2];
+
+ if (inst->Opcode == OPCODE_DP4)
+ result += a[3] * b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_MUL:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = a[0] * b[0];
+ result[1] = a[1] * b[1];
+ result[2] = a[2] * b[2];
+ result[3] = a[3] * b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SEQ:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SGE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SGT:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SLE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SLT:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SNE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
diff --git a/mesalib/src/mesa/program/prog_optimize.c b/mesalib/src/mesa/program/prog_optimize.c
index 3340ce049..25d9684b1 100644
--- a/mesalib/src/mesa/program/prog_optimize.c
+++ b/mesalib/src/mesa/program/prog_optimize.c
@@ -1358,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
any_change = GL_TRUE;
if (_mesa_remove_dead_code_local(program))
any_change = GL_TRUE;
+
+ any_change = _mesa_constant_fold(program) || any_change;
_mesa_reallocate_registers(program);
} while (any_change);
}
diff --git a/mesalib/src/mesa/program/prog_optimize.h b/mesalib/src/mesa/program/prog_optimize.h
index 8dc58ee52..9854fb7a4 100644
--- a/mesalib/src/mesa/program/prog_optimize.h
+++ b/mesalib/src/mesa/program/prog_optimize.h
@@ -1,47 +1,50 @@
-/*
- * Mesa 3-D graphics library
- * Version: 7.5
- *
- * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef PROG_OPT_H
-#define PROG_OPT_H
-
-
-#include "main/config.h"
-#include "main/glheader.h"
-
-
-struct gl_context;
-struct gl_program;
-struct prog_instruction;
-
-
-extern GLboolean
-_mesa_find_temp_intervals(const struct prog_instruction *instructions,
- GLuint numInstructions,
- GLint intBegin[MAX_PROGRAM_TEMPS],
- GLint intEnd[MAX_PROGRAM_TEMPS]);
-
-extern void
-_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program);
-
-#endif
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.5
+ *
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef PROG_OPT_H
+#define PROG_OPT_H
+
+
+#include "main/config.h"
+#include "main/glheader.h"
+
+
+struct gl_context;
+struct gl_program;
+struct prog_instruction;
+
+
+extern GLboolean
+_mesa_find_temp_intervals(const struct prog_instruction *instructions,
+ GLuint numInstructions,
+ GLint intBegin[MAX_PROGRAM_TEMPS],
+ GLint intEnd[MAX_PROGRAM_TEMPS]);
+
+extern void
+_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program);
+
+extern GLboolean
+_mesa_constant_fold(struct gl_program *prog);
+
+#endif
diff --git a/mesalib/src/mesa/program/register_allocate.c b/mesalib/src/mesa/program/register_allocate.c
index de96eb42c..f5b5174fc 100644
--- a/mesalib/src/mesa/program/register_allocate.c
+++ b/mesalib/src/mesa/program/register_allocate.c
@@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
}
}
+/**
+ * Adds a conflict between base_reg and reg, and also between reg and
+ * anything that base_reg conflicts with.
+ *
+ * This can simplify code for setting up multiple register classes
+ * which are aggregates of some base hardware registers, compared to
+ * explicitly using ra_add_reg_conflict.
+ */
+void
+ra_add_transitive_reg_conflict(struct ra_regs *regs,
+ unsigned int base_reg, unsigned int reg)
+{
+ int i;
+
+ ra_add_reg_conflict(regs, reg, base_reg);
+
+ for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
+ ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
+ }
+}
+
unsigned int
ra_alloc_reg_class(struct ra_regs *regs)
{
diff --git a/mesalib/src/mesa/program/register_allocate.h b/mesalib/src/mesa/program/register_allocate.h
index 5b95833f3..ee2e58a47 100644
--- a/mesalib/src/mesa/program/register_allocate.h
+++ b/mesalib/src/mesa/program/register_allocate.h
@@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count);
unsigned int ra_alloc_reg_class(struct ra_regs *regs);
void ra_add_reg_conflict(struct ra_regs *regs,
unsigned int r1, unsigned int r2);
+void ra_add_transitive_reg_conflict(struct ra_regs *regs,
+ unsigned int base_reg, unsigned int reg);
void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
void ra_set_finalize(struct ra_regs *regs);
/** @} */