diff options
Diffstat (limited to 'mesalib/src/mesa/state_tracker')
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_context.h | 2 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 114 |
2 files changed, 45 insertions, 71 deletions
diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h index 6d572bd49..58f14f954 100644 --- a/mesalib/src/mesa/state_tracker/st_context.h +++ b/mesalib/src/mesa/state_tracker/st_context.h @@ -56,7 +56,7 @@ struct u_upload_mgr; struct st_state_flags { GLuint mesa; - GLuint st; + uint64_t st; }; struct st_tracked_state { diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 84bdc4f06..dd9c84f1a 100644 --- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -75,14 +75,6 @@ extern "C" { (1 << PROGRAM_UNIFORM)) /** - * Maximum number of temporary registers. - * - * It is too big for stack allocated arrays -- it will cause stack overflow on - * Windows and likely Mac OS X. - */ -#define MAX_TEMPS 4096 - -/** * Maximum number of arrays */ #define MAX_ARRAYS 256 @@ -446,7 +438,6 @@ public: int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, int mul_operand); - bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -1270,53 +1261,6 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan return true; } -bool -glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) -{ - /* Emit saturates in the vertex shader only if SM 3.0 is supported. - */ - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && - !st_context(this->ctx)->has_shader_model3) { - return false; - } - - ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); - if (!sat_src) - return false; - - sat_src->accept(this); - st_src_reg src = this->result; - - /* If we generated an expression instruction into a temporary in - * processing the saturate's operand, apply the saturate to that - * instruction. Otherwise, generate a MOV to do the saturate. - * - * Note that we have to be careful to only do this optimization if - * the instruction in question was what generated src->result. For - * example, ir_dereference_array might generate a MUL instruction - * to create the reladdr, and return us a src reg using that - * reladdr. That MUL result is not the value we're trying to - * saturate. - */ - ir_expression *sat_src_expr = sat_src->as_expression(); - if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || - sat_src_expr->operation == ir_binop_add || - sat_src_expr->operation == ir_binop_dot)) { - glsl_to_tgsi_instruction *new_inst; - new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - new_inst->saturate = true; - } else { - this->result = get_temp(ir->type); - st_dst_reg result_dst = st_dst_reg(this->result); - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); - inst->saturate = true; - } - - return true; -} - void glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr) @@ -1363,9 +1307,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) return; } - if (try_emit_sat(ir)) - return; - if (ir->operation == ir_quadop_vector) assert(!"ir_quadop_vector should have been lowered"); @@ -1460,6 +1401,12 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_cos_reduced: emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; + case ir_unop_saturate: { + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + inst->saturate = true; + break; + } case ir_unop_dFdx: case ir_unop_dFdx_coarse: @@ -3301,14 +3248,10 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src) void glsl_to_tgsi_visitor::simplify_cmp(void) { - unsigned *tempWrites; + int tempWritesSize = 0; + unsigned *tempWrites = NULL; unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; - tempWrites = new unsigned[MAX_TEMPS]; - if (!tempWrites) { - return; - } - memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS); memset(outputWrites, 0, sizeof(outputWrites)); foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { @@ -3330,7 +3273,19 @@ glsl_to_tgsi_visitor::simplify_cmp(void) prevWriteMask = outputWrites[inst->dst.index]; outputWrites[inst->dst.index] |= inst->dst.writemask; } else if (inst->dst.file == PROGRAM_TEMPORARY) { - assert(inst->dst.index < MAX_TEMPS); + if (inst->dst.index >= tempWritesSize) { + const int inc = 4096; + + tempWrites = (unsigned*) + realloc(tempWrites, + (tempWritesSize + inc) * sizeof(unsigned)); + if (!tempWrites) + return; + + memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned)); + tempWritesSize += inc; + } + prevWriteMask = tempWrites[inst->dst.index]; tempWrites[inst->dst.index] |= inst->dst.writemask; } else @@ -3349,7 +3304,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) } } - delete [] tempWrites; + free(tempWrites); } /* Replaces all references to a temporary register index with another index. */ @@ -4158,7 +4113,9 @@ struct label { struct st_translate { struct ureg_program *ureg; - struct ureg_dst temps[MAX_TEMPS]; + unsigned temps_size; + struct ureg_dst *temps; + struct ureg_dst arrays[MAX_ARRAYS]; struct ureg_src *constants; struct ureg_src *immediates; @@ -4299,7 +4256,19 @@ dst_register(struct st_translate *t, return ureg_dst_undef(); case PROGRAM_TEMPORARY: - assert(index < Elements(t->temps)); + /* Allocate space for temporaries on demand. */ + if (index >= t->temps_size) { + const int inc = 4096; + + t->temps = (struct ureg_dst*) + realloc(t->temps, + (t->temps_size + inc) * sizeof(struct ureg_dst)); + if (!t->temps) + return ureg_dst_undef(); + + memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst)); + t->temps_size += inc; + } if (ureg_dst_is_undef(t->temps[index])) t->temps[index] = ureg_DECL_local_temporary(t->ureg); @@ -5158,6 +5127,7 @@ st_translate_program( out: if (t) { + free(t->temps); free(t->insn); free(t->labels); free(t->constants); @@ -5429,6 +5399,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) lower_offset_arrays(ir); do_mat_op_to_vec(ir); + /* Emit saturates in the vertex shader only if SM 3.0 is supported. */ + bool vs_sm3 = (_mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage) == + GL_VERTEX_PROGRAM_ARB) && st_context(ctx)->has_shader_model3; lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | @@ -5438,7 +5411,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) CARRY_TO_ARITH | BORROW_TO_ARITH | (options->EmitNoPow ? POW_TO_EXP2 : 0) | - (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0)); + (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | + (vs_sm3 ? SAT_TO_CLAMP : 0)); lower_ubo_reference(prog->_LinkedShaders[i], ir); do_vec_index_to_cond_assign(ir); |