aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp')
-rw-r--r--mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp114
1 files changed, 44 insertions, 70 deletions
diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 84bdc4f06..dd9c84f1a 100644
--- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -75,14 +75,6 @@ extern "C" {
(1 << PROGRAM_UNIFORM))
/**
- * Maximum number of temporary registers.
- *
- * It is too big for stack allocated arrays -- it will cause stack overflow on
- * Windows and likely Mac OS X.
- */
-#define MAX_TEMPS 4096
-
-/**
* Maximum number of arrays
*/
#define MAX_ARRAYS 256
@@ -446,7 +438,6 @@ public:
int mul_operand);
bool try_emit_mad_for_and_not(ir_expression *ir,
int mul_operand);
- bool try_emit_sat(ir_expression *ir);
void emit_swz(ir_expression *ir);
@@ -1270,53 +1261,6 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
return true;
}
-bool
-glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
-{
- /* Emit saturates in the vertex shader only if SM 3.0 is supported.
- */
- if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
- !st_context(this->ctx)->has_shader_model3) {
- return false;
- }
-
- ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
- if (!sat_src)
- return false;
-
- sat_src->accept(this);
- st_src_reg src = this->result;
-
- /* If we generated an expression instruction into a temporary in
- * processing the saturate's operand, apply the saturate to that
- * instruction. Otherwise, generate a MOV to do the saturate.
- *
- * Note that we have to be careful to only do this optimization if
- * the instruction in question was what generated src->result. For
- * example, ir_dereference_array might generate a MUL instruction
- * to create the reladdr, and return us a src reg using that
- * reladdr. That MUL result is not the value we're trying to
- * saturate.
- */
- ir_expression *sat_src_expr = sat_src->as_expression();
- if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
- sat_src_expr->operation == ir_binop_add ||
- sat_src_expr->operation == ir_binop_dot)) {
- glsl_to_tgsi_instruction *new_inst;
- new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
- new_inst->saturate = true;
- } else {
- this->result = get_temp(ir->type);
- st_dst_reg result_dst = st_dst_reg(this->result);
- result_dst.writemask = (1 << ir->type->vector_elements) - 1;
- glsl_to_tgsi_instruction *inst;
- inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
- inst->saturate = true;
- }
-
- return true;
-}
-
void
glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
st_src_reg *reg, int *num_reladdr)
@@ -1363,9 +1307,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
return;
}
- if (try_emit_sat(ir))
- return;
-
if (ir->operation == ir_quadop_vector)
assert(!"ir_quadop_vector should have been lowered");
@@ -1460,6 +1401,12 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
case ir_unop_cos_reduced:
emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
break;
+ case ir_unop_saturate: {
+ glsl_to_tgsi_instruction *inst;
+ inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+ inst->saturate = true;
+ break;
+ }
case ir_unop_dFdx:
case ir_unop_dFdx_coarse:
@@ -3301,14 +3248,10 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
void
glsl_to_tgsi_visitor::simplify_cmp(void)
{
- unsigned *tempWrites;
+ int tempWritesSize = 0;
+ unsigned *tempWrites = NULL;
unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
- tempWrites = new unsigned[MAX_TEMPS];
- if (!tempWrites) {
- return;
- }
- memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
memset(outputWrites, 0, sizeof(outputWrites));
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
@@ -3330,7 +3273,19 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
prevWriteMask = outputWrites[inst->dst.index];
outputWrites[inst->dst.index] |= inst->dst.writemask;
} else if (inst->dst.file == PROGRAM_TEMPORARY) {
- assert(inst->dst.index < MAX_TEMPS);
+ if (inst->dst.index >= tempWritesSize) {
+ const int inc = 4096;
+
+ tempWrites = (unsigned*)
+ realloc(tempWrites,
+ (tempWritesSize + inc) * sizeof(unsigned));
+ if (!tempWrites)
+ return;
+
+ memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
+ tempWritesSize += inc;
+ }
+
prevWriteMask = tempWrites[inst->dst.index];
tempWrites[inst->dst.index] |= inst->dst.writemask;
} else
@@ -3349,7 +3304,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
}
}
- delete [] tempWrites;
+ free(tempWrites);
}
/* Replaces all references to a temporary register index with another index. */
@@ -4158,7 +4113,9 @@ struct label {
struct st_translate {
struct ureg_program *ureg;
- struct ureg_dst temps[MAX_TEMPS];
+ unsigned temps_size;
+ struct ureg_dst *temps;
+
struct ureg_dst arrays[MAX_ARRAYS];
struct ureg_src *constants;
struct ureg_src *immediates;
@@ -4299,7 +4256,19 @@ dst_register(struct st_translate *t,
return ureg_dst_undef();
case PROGRAM_TEMPORARY:
- assert(index < Elements(t->temps));
+ /* Allocate space for temporaries on demand. */
+ if (index >= t->temps_size) {
+ const int inc = 4096;
+
+ t->temps = (struct ureg_dst*)
+ realloc(t->temps,
+ (t->temps_size + inc) * sizeof(struct ureg_dst));
+ if (!t->temps)
+ return ureg_dst_undef();
+
+ memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
+ t->temps_size += inc;
+ }
if (ureg_dst_is_undef(t->temps[index]))
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
@@ -5158,6 +5127,7 @@ st_translate_program(
out:
if (t) {
+ free(t->temps);
free(t->insn);
free(t->labels);
free(t->constants);
@@ -5429,6 +5399,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
lower_offset_arrays(ir);
do_mat_op_to_vec(ir);
+ /* Emit saturates in the vertex shader only if SM 3.0 is supported. */
+ bool vs_sm3 = (_mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage) ==
+ GL_VERTEX_PROGRAM_ARB) && st_context(ctx)->has_shader_model3;
lower_instructions(ir,
MOD_TO_FRACT |
DIV_TO_MUL_RCP |
@@ -5438,7 +5411,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
CARRY_TO_ARITH |
BORROW_TO_ARITH |
(options->EmitNoPow ? POW_TO_EXP2 : 0) |
- (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
+ (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
+ (vs_sm3 ? SAT_TO_CLAMP : 0));
lower_ubo_reference(prog->_LinkedShaders[i], ir);
do_vec_index_to_cond_assign(ir);