From 4aef26e36bc9aba4cd67c158e17f5fb93c9e018c Mon Sep 17 00:00:00 2001 From: marha Date: Tue, 2 Sep 2014 18:56:18 +0200 Subject: mesa git update 2 Sep 2014 mesa commit 5a4e0f3873657d874eb5ff52819f42ea38d54b91 --- mesalib/Makefile.am | 1 - mesalib/docs/GL3.txt | 6 +- mesalib/src/gallium/auxiliary/util/u_blitter.c | 147 +++++++++++++++------ mesalib/src/glsl/ir.cpp | 2 + mesalib/src/glsl/ir.h | 1 + mesalib/src/glsl/ir_builder.cpp | 6 +- mesalib/src/glsl/ir_constant_expression.cpp | 6 + mesalib/src/glsl/ir_optimization.h | 1 + mesalib/src/glsl/ir_validate.cpp | 1 + mesalib/src/glsl/linker.cpp | 4 +- mesalib/src/glsl/lower_instructions.cpp | 29 ++++ mesalib/src/glsl/opt_algebraic.cpp | 98 ++++++++++++++ mesalib/src/mesa/main/mtypes.h | 31 +++-- mesalib/src/mesa/main/textureview.c | 3 + mesalib/src/mesa/program/ir_to_mesa.cpp | 59 ++------- mesalib/src/mesa/program/prog_cache.c | 2 +- mesalib/src/mesa/state_tracker/st_context.h | 2 +- mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 114 ++++++---------- 18 files changed, 331 insertions(+), 182 deletions(-) diff --git a/mesalib/Makefile.am b/mesalib/Makefile.am index d2916da43..c9aadc827 100644 --- a/mesalib/Makefile.am +++ b/mesalib/Makefile.am @@ -64,7 +64,6 @@ IGNORE_FILES = \ parsers: configure $(MAKE) -C src/glsl glsl_parser.cpp glsl_parser.h glsl_lexer.cpp glcpp/glcpp-lex.c glcpp/glcpp-parse.c glcpp/glcpp-parse.h - $(MAKE) -C src/mesa program/lex.yy.c program/program_parse.tab.c program/program_parse.tab.h # Everything for new a Mesa release: ARCHIVES = $(PACKAGE_NAME).tar.gz \ diff --git a/mesalib/docs/GL3.txt b/mesalib/docs/GL3.txt index 56c4994e5..f5d5e72c9 100644 --- a/mesalib/docs/GL3.txt +++ b/mesalib/docs/GL3.txt @@ -104,7 +104,7 @@ GL 4.0, GLSL 4.00: - Fused multiply-add DONE () - Packing/bitfield/conversion functions DONE (r600) - Enhanced textureGather DONE (r600, radeonsi) - - Geometry shader instancing DONE () + - Geometry shader instancing DONE (r600) - Geometry shader multiple streams DONE () - Enhanced per-sample shading DONE (r600) - Interpolation functions DONE () @@ -151,7 +151,7 @@ GL 4.3, GLSL 4.30: GL_ARB_arrays_of_arrays started (Timothy) GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30) GL_ARB_clear_buffer_object DONE (all drivers) - GL_ARB_compute_shader started (currently stalled) + GL_ARB_compute_shader in progress (jljusten) GL_ARB_copy_image DONE (i965) GL_KHR_debug DONE (all drivers) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) @@ -202,7 +202,7 @@ GL 4.5, GLSL 4.50: These are the extensions cherry-picked to make GLES 3.1 GLES3.1, GLSL ES 3.1 GL_ARB_arrays_of_arrays started (Timothy) - GL_ARB_compute_shader started (currently stalled) + GL_ARB_compute_shader in progress (jljusten) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) GL_ARB_framebuffer_no_attachments not started GL_ARB_program_interface_query not started diff --git a/mesalib/src/gallium/auxiliary/util/u_blitter.c b/mesalib/src/gallium/auxiliary/util/u_blitter.c index 609e02fd1..f3fe949c2 100644 --- a/mesalib/src/gallium/auxiliary/util/u_blitter.c +++ b/mesalib/src/gallium/auxiliary/util/u_blitter.c @@ -130,6 +130,7 @@ struct blitter_context_priv unsigned dst_height; boolean has_geometry_shader; + boolean has_layered; boolean has_stream_out; boolean has_stencil_export; boolean has_texture_multisample; @@ -288,28 +289,24 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) } } - /* Fragment shaders are created on-demand, except these. - * The interpolation must be constant for integer texture clearing to work. - */ - ctx->fs_empty = util_make_empty_fragment_shader(pipe); - ctx->fs_write_one_cbuf = - util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC, - TGSI_INTERPOLATE_CONSTANT, FALSE); - ctx->fs_write_all_cbufs = - util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC, - TGSI_INTERPOLATE_CONSTANT, TRUE); - - /* vertex shaders */ - { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC }; - const uint semantic_indices[] = { 0, 0 }; - ctx->vs = - util_make_vertex_passthrough_shader(pipe, 2, semantic_names, - semantic_indices); - } + ctx->has_layered = + pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_INSTANCEID) && + pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT); - if (ctx->has_stream_out) { + /* set invariant vertex coordinates */ + for (i = 0; i < 4; i++) + ctx->vertices[i][0][3] = 1; /*v.w*/ + + ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER); + + return &ctx->base; +} + +static void bind_vs_pos_only(struct blitter_context_priv *ctx) +{ + struct pipe_context *pipe = ctx->base.pipe; + + if (!ctx->vs_pos_only) { struct pipe_stream_output_info so; const uint semantic_names[] = { TGSI_SEMANTIC_POSITION }; const uint semantic_indices[] = { 0 }; @@ -324,18 +321,71 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) semantic_indices, &so); } - if (pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_INSTANCEID) && - pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) { + pipe->bind_vs_state(pipe, ctx->vs_pos_only); +} + +static void bind_vs_passthrough(struct blitter_context_priv *ctx) +{ + struct pipe_context *pipe = ctx->base.pipe; + + if (!ctx->vs) { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indices[] = { 0, 0 }; + ctx->vs = + util_make_vertex_passthrough_shader(pipe, 2, semantic_names, + semantic_indices); + } + + pipe->bind_vs_state(pipe, ctx->vs); +} + +static void bind_vs_layered(struct blitter_context_priv *ctx) +{ + struct pipe_context *pipe = ctx->base.pipe; + + if (!ctx->vs_layered) { ctx->vs_layered = util_make_layered_clear_vertex_shader(pipe); } - /* set invariant vertex coordinates */ - for (i = 0; i < 4; i++) - ctx->vertices[i][0][3] = 1; /*v.w*/ + pipe->bind_vs_state(pipe, ctx->vs_layered); +} - ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER); +static void bind_fs_empty(struct blitter_context_priv *ctx) +{ + struct pipe_context *pipe = ctx->base.pipe; - return &ctx->base; + if (!ctx->fs_empty) { + ctx->fs_empty = util_make_empty_fragment_shader(pipe); + } + + pipe->bind_fs_state(pipe, ctx->fs_empty); +} + +static void bind_fs_write_one_cbuf(struct blitter_context_priv *ctx) +{ + struct pipe_context *pipe = ctx->base.pipe; + + if (!ctx->fs_write_one_cbuf) { + ctx->fs_write_one_cbuf = + util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC, + TGSI_INTERPOLATE_CONSTANT, FALSE); + } + + pipe->bind_fs_state(pipe, ctx->fs_write_one_cbuf); +} + +static void bind_fs_write_all_cbufs(struct blitter_context_priv *ctx) +{ + struct pipe_context *pipe = ctx->base.pipe; + + if (!ctx->fs_write_all_cbufs) { + ctx->fs_write_all_cbufs = + util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC, + TGSI_INTERPOLATE_CONSTANT, TRUE); + } + + pipe->bind_fs_state(pipe, ctx->fs_write_all_cbufs); } void util_blitter_destroy(struct blitter_context *blitter) @@ -361,7 +411,8 @@ void util_blitter_destroy(struct blitter_context *blitter) pipe->delete_rasterizer_state(pipe, ctx->rs_state_scissor); if (ctx->rs_discard_state) pipe->delete_rasterizer_state(pipe, ctx->rs_discard_state); - pipe->delete_vs_state(pipe, ctx->vs); + if (ctx->vs) + pipe->delete_vs_state(pipe, ctx->vs); if (ctx->vs_pos_only) pipe->delete_vs_state(pipe, ctx->vs_pos_only); if (ctx->vs_layered) @@ -408,9 +459,12 @@ void util_blitter_destroy(struct blitter_context *blitter) ctx->delete_fs_state(pipe, ctx->fs_resolve_uint[i][j][f]); } - ctx->delete_fs_state(pipe, ctx->fs_empty); - ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf); - ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs); + if (ctx->fs_empty) + ctx->delete_fs_state(pipe, ctx->fs_empty); + if (ctx->fs_write_one_cbuf) + ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf); + if (ctx->fs_write_all_cbufs) + ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs); pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear); pipe->delete_sampler_state(pipe, ctx->sampler_state_rect); @@ -1021,7 +1075,11 @@ static void blitter_set_common_draw_rect_state(struct blitter_context_priv *ctx, pipe->bind_rasterizer_state(pipe, scissor ? ctx->rs_state_scissor : ctx->rs_state); - pipe->bind_vs_state(pipe, vs_layered ? ctx->vs_layered : ctx->vs); + if (vs_layered) + bind_vs_layered(ctx); + else + bind_vs_passthrough(ctx); + if (ctx->has_geometry_shader) pipe->bind_gs_state(pipe, NULL); if (ctx->has_stream_out) @@ -1118,7 +1176,7 @@ static void util_blitter_clear_custom(struct blitter_context *blitter, struct pipe_context *pipe = ctx->base.pipe; struct pipe_stencil_ref sr = { { 0 } }; - assert(ctx->vs_layered || num_layers <= 1); + assert(ctx->has_layered || num_layers <= 1); blitter_set_running_flag(ctx); blitter_check_saved_vertex_states(ctx); @@ -1148,12 +1206,12 @@ static void util_blitter_clear_custom(struct blitter_context *blitter, pipe->set_stencil_ref(pipe, &sr); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); - ctx->bind_fs_state(pipe, ctx->fs_write_all_cbufs); + bind_fs_write_all_cbufs(ctx); pipe->set_sample_mask(pipe, ~0); blitter_set_dst_dimensions(ctx, width, height); - if (num_layers > 1 && ctx->vs_layered) { + if (num_layers > 1 && ctx->has_layered) { blitter_set_common_draw_rect_state(ctx, FALSE, TRUE); blitter_set_clear_color(ctx, color); blitter_draw(ctx, 0, 0, width, height, depth, num_layers); @@ -1680,7 +1738,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, /* bind states */ pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]); pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); - ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf); + bind_fs_write_one_cbuf(ctx); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); /* set a framebuffer state */ @@ -1748,7 +1806,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, /* hmm that should be illegal probably, or make it a no-op somewhere */ pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); - ctx->bind_fs_state(pipe, ctx->fs_empty); + bind_fs_empty(ctx); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); /* set a framebuffer state */ @@ -1799,7 +1857,10 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] : ctx->blend[0]); pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage); - ctx->bind_fs_state(pipe, cbsurf ? ctx->fs_write_one_cbuf : ctx->fs_empty); + if (cbsurf) + bind_fs_write_one_cbuf(ctx); + else + bind_fs_empty(ctx); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); /* set a framebuffer state */ @@ -1876,7 +1937,7 @@ void util_blitter_copy_buffer(struct blitter_context *blitter, pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb); pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[0]); - pipe->bind_vs_state(pipe, ctx->vs_pos_only); + bind_vs_pos_only(ctx); if (ctx->has_geometry_shader) pipe->bind_gs_state(pipe, NULL); pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state); @@ -1936,7 +1997,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter, pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb); pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[num_channels-1]); - pipe->bind_vs_state(pipe, ctx->vs_pos_only); + bind_vs_pos_only(ctx); if (ctx->has_geometry_shader) pipe->bind_gs_state(pipe, NULL); pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state); @@ -1978,7 +2039,7 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter, pipe->bind_blend_state(pipe, custom_blend); pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); - ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf); + bind_fs_write_one_cbuf(ctx); pipe->set_sample_mask(pipe, sample_mask); memset(&surf_tmpl, 0, sizeof(surf_tmpl)); @@ -2041,7 +2102,7 @@ void util_blitter_custom_color(struct blitter_context *blitter, pipe->bind_blend_state(pipe, custom_blend ? custom_blend : ctx->blend[PIPE_MASK_RGBA]); pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); - ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf); + bind_fs_write_one_cbuf(ctx); pipe->bind_vertex_elements_state(pipe, ctx->velem_state); pipe->set_sample_mask(pipe, (1ull << MAX2(1, dstsurf->texture->nr_samples)) - 1); diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp index b289c2975..739a9f412 100644 --- a/mesalib/src/glsl/ir.cpp +++ b/mesalib/src/glsl/ir.cpp @@ -255,6 +255,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) case ir_unop_dFdy_fine: case ir_unop_bitfield_reverse: case ir_unop_interpolate_at_centroid: + case ir_unop_saturate: this->type = op0->type; break; @@ -534,6 +535,7 @@ static const char *const operator_strs[] = { "bit_count", "find_msb", "find_lsb", + "sat", "noise", "interpolate_at_centroid", "+", diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h index e9051732b..8003f88ce 100644 --- a/mesalib/src/glsl/ir.h +++ b/mesalib/src/glsl/ir.h @@ -1250,6 +1250,7 @@ enum ir_expression_operation { ir_unop_find_lsb, /*@}*/ + ir_unop_saturate, ir_unop_noise, /** diff --git a/mesalib/src/glsl/ir_builder.cpp b/mesalib/src/glsl/ir_builder.cpp index f03941443..a2f6f2967 100644 --- a/mesalib/src/glsl/ir_builder.cpp +++ b/mesalib/src/glsl/ir_builder.cpp @@ -271,11 +271,7 @@ clamp(operand a, operand b, operand c) ir_expression * saturate(operand a) { - void *mem_ctx = ralloc_parent(a.val); - - return expr(ir_binop_max, - expr(ir_binop_min, a, new(mem_ctx) ir_constant(1.0f)), - new(mem_ctx) ir_constant(0.0f)); + return expr(ir_unop_saturate, a); } ir_expression * diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp index 96060217c..1e8b3a3cc 100644 --- a/mesalib/src/glsl/ir_constant_expression.cpp +++ b/mesalib/src/glsl/ir_constant_expression.cpp @@ -1469,6 +1469,12 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) } break; + case ir_unop_saturate: + for (unsigned c = 0; c < components; c++) { + data.f[c] = CLAMP(op[0]->value.f[c], 0.0f, 1.0f); + } + break; + case ir_triop_bitfield_extract: { int offset = op[1]->value.i[0]; int bits = op[2]->value.i[0]; diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h index b83c22592..1c6f72b54 100644 --- a/mesalib/src/glsl/ir_optimization.h +++ b/mesalib/src/glsl/ir_optimization.h @@ -40,6 +40,7 @@ #define LDEXP_TO_ARITH 0x100 #define CARRY_TO_ARITH 0x200 #define BORROW_TO_ARITH 0x400 +#define SAT_TO_CLAMP 0x800 /** * \see class lower_packing_builtins_visitor diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp index 5b2067782..97a581dc2 100644 --- a/mesalib/src/glsl/ir_validate.cpp +++ b/mesalib/src/glsl/ir_validate.cpp @@ -241,6 +241,7 @@ ir_validate::visit_leave(ir_expression *ir) case ir_unop_log: case ir_unop_exp2: case ir_unop_log2: + case ir_unop_saturate: assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); assert(ir->type == ir->operands[0]->type); break; diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index d5473adc3..57be4931d 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -2439,8 +2439,10 @@ check_explicit_uniform_locations(struct gl_context *ctx, ir_variable *var = node->as_variable(); if ((var && var->data.mode == ir_var_uniform) && var->data.explicit_location) { - if (!reserve_explicit_locations(prog, uniform_map, var)) + if (!reserve_explicit_locations(prog, uniform_map, var)) { + delete uniform_map; return; + } } } } diff --git a/mesalib/src/glsl/lower_instructions.cpp b/mesalib/src/glsl/lower_instructions.cpp index 176070c87..684285350 100644 --- a/mesalib/src/glsl/lower_instructions.cpp +++ b/mesalib/src/glsl/lower_instructions.cpp @@ -41,6 +41,7 @@ * - BITFIELD_INSERT_TO_BFM_BFI * - CARRY_TO_ARITH * - BORROW_TO_ARITH + * - SAT_TO_CLAMP * * SUB_TO_ADD_NEG: * --------------- @@ -104,6 +105,10 @@ * ---------------- * Converts ir_borrow into (x < y). * + * SAT_TO_CLAMP: + * ------------- + * Converts ir_unop_saturate into min(max(x, 0.0), 1.0) + * */ #include "main/core.h" /* for M_LOG2E */ @@ -139,6 +144,7 @@ private: void ldexp_to_arith(ir_expression *); void carry_to_arith(ir_expression *); void borrow_to_arith(ir_expression *); + void sat_to_clamp(ir_expression *); }; } /* anonymous namespace */ @@ -484,6 +490,24 @@ lower_instructions_visitor::borrow_to_arith(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::sat_to_clamp(ir_expression *ir) +{ + /* Translates + * ir_unop_saturate x + * into + * ir_binop_min (ir_binop_max(x, 0.0), 1.0) + */ + + ir->operation = ir_binop_min; + ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type, + ir->operands[0], + new(ir) ir_constant(0.0f)); + ir->operands[1] = new(ir) ir_constant(1.0f); + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -540,6 +564,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) borrow_to_arith(ir); break; + case ir_unop_saturate: + if (lowering(SAT_TO_CLAMP)) + sat_to_clamp(ir); + break; + default: return visit_continue; } diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp index ac7514acf..447618f9e 100644 --- a/mesalib/src/glsl/opt_algebraic.cpp +++ b/mesalib/src/glsl/opt_algebraic.cpp @@ -110,6 +110,48 @@ is_vec_basis(ir_constant *ir) return (ir == NULL) ? false : ir->is_basis(); } +static inline bool +is_valid_vec_const(ir_constant *ir) +{ + if (ir == NULL) + return false; + + if (!ir->type->is_scalar() && !ir->type->is_vector()) + return false; + + return true; +} + +static inline bool +is_less_than_one(ir_constant *ir) +{ + if (!is_valid_vec_const(ir)) + return false; + + unsigned component = 0; + for (int c = 0; c < ir->type->vector_elements; c++) { + if (ir->get_float_component(c) < 1.0f) + component++; + } + + return (component == ir->type->vector_elements); +} + +static inline bool +is_greater_than_zero(ir_constant *ir) +{ + if (!is_valid_vec_const(ir)) + return false; + + unsigned component = 0; + for (int c = 0; c < ir->type->vector_elements; c++) { + if (ir->get_float_component(c) > 0.0f) + component++; + } + + return (component == ir->type->vector_elements); +} + static void update_type(ir_expression *ir) { @@ -614,6 +656,62 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) break; + case ir_binop_min: + case ir_binop_max: + if (ir->type->base_type != GLSL_TYPE_FLOAT) + break; + + /* Replace min(max) operations and its commutative combinations with + * a saturate operation + */ + for (int op = 0; op < 2; op++) { + ir_expression *minmax = op_expr[op]; + ir_constant *outer_const = op_const[1 - op]; + ir_expression_operation op_cond = (ir->operation == ir_binop_max) ? + ir_binop_min : ir_binop_max; + + if (!minmax || !outer_const || (minmax->operation != op_cond)) + continue; + + /* Found a min(max) combination. Now try to see if its operands + * meet our conditions that we can do just a single saturate operation + */ + for (int minmax_op = 0; minmax_op < 2; minmax_op++) { + ir_rvalue *inner_val_a = minmax->operands[minmax_op]; + ir_rvalue *inner_val_b = minmax->operands[1 - minmax_op]; + + if (!inner_val_a || !inner_val_b) + continue; + + /* Found a {min|max} ({max|min} (x, 0.0), 1.0) operation and its variations */ + if ((outer_const->is_one() && inner_val_a->is_zero()) || + (inner_val_a->is_one() && outer_const->is_zero())) + return saturate(inner_val_b); + + /* Found a {min|max} ({max|min} (x, 0.0), b) where b < 1.0 + * and its variations + */ + if (is_less_than_one(outer_const) && inner_val_b->is_zero()) + return expr(ir_binop_min, saturate(inner_val_a), outer_const); + + if (!inner_val_b->as_constant()) + continue; + + if (is_less_than_one(inner_val_b->as_constant()) && outer_const->is_zero()) + return expr(ir_binop_min, saturate(inner_val_a), inner_val_b); + + /* Found a {min|max} ({max|min} (x, b), 1.0), where b > 0.0 + * and its variations + */ + if (outer_const->is_one() && is_greater_than_zero(inner_val_b->as_constant())) + return expr(ir_binop_max, saturate(inner_val_a), inner_val_b); + if (inner_val_b->as_constant()->is_one() && is_greater_than_zero(outer_const)) + return expr(ir_binop_max, saturate(inner_val_a), outer_const); + } + } + + break; + case ir_unop_rcp: if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) return op_expr[0]->operands[0]; diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h index dc25ea467..4fb30ffba 100644 --- a/mesalib/src/mesa/main/mtypes.h +++ b/mesalib/src/mesa/main/mtypes.h @@ -2347,6 +2347,20 @@ struct gl_fragment_program_state }; +/** + * Context state for compute programs. + */ +struct gl_compute_program_state +{ + struct gl_compute_program *Current; /**< user-bound compute program */ + + /** Currently enabled and valid program (including internal programs + * and compiled shader programs). + */ + struct gl_compute_program *_Current; +}; + + /** * ATI_fragment_shader runtime state */ @@ -3908,32 +3922,32 @@ typedef enum struct gl_driver_flags { /** gl_context::Array::_DrawArrays (vertex array state) */ - GLbitfield NewArray; + uint64_t NewArray; /** gl_context::TransformFeedback::CurrentObject */ - GLbitfield NewTransformFeedback; + uint64_t NewTransformFeedback; /** gl_context::TransformFeedback::CurrentObject::shader_program */ - GLbitfield NewTransformFeedbackProg; + uint64_t NewTransformFeedbackProg; /** gl_context::RasterDiscard */ - GLbitfield NewRasterizerDiscard; + uint64_t NewRasterizerDiscard; /** * gl_context::UniformBufferBindings * gl_shader_program::UniformBlocks */ - GLbitfield NewUniformBuffer; + uint64_t NewUniformBuffer; /** * gl_context::AtomicBufferBindings */ - GLbitfield NewAtomicBuffer; + uint64_t NewAtomicBuffer; /** * gl_context::ImageUnits */ - GLbitfield NewImageUnits; + uint64_t NewImageUnits; }; struct gl_uniform_buffer_binding @@ -4153,6 +4167,7 @@ struct gl_context struct gl_vertex_program_state VertexProgram; struct gl_fragment_program_state FragmentProgram; struct gl_geometry_program_state GeometryProgram; + struct gl_compute_program_state ComputeProgram; struct gl_ati_fragment_shader_state ATIFragmentShader; struct gl_pipeline_shader_state Pipeline; /**< GLSL pipeline shader object state */ @@ -4240,7 +4255,7 @@ struct gl_context GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */ GLbitfield NewState; /**< bitwise-or of _NEW_* flags */ - GLbitfield NewDriverState;/**< bitwise-or of flags from DriverFlags */ + uint64_t NewDriverState; /**< bitwise-or of flags from DriverFlags */ struct gl_driver_flags DriverFlags; diff --git a/mesalib/src/mesa/main/textureview.c b/mesalib/src/mesa/main/textureview.c index b3521e219..6e86a9a44 100644 --- a/mesalib/src/mesa/main/textureview.c +++ b/mesalib/src/mesa/main/textureview.c @@ -536,6 +536,9 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, /* Adjust width, height, depth to be appropriate for new target */ switch (target) { case GL_TEXTURE_1D: + height = 1; + break; + case GL_TEXTURE_3D: break; diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp index e5844c3c4..49e4a7a40 100644 --- a/mesalib/src/mesa/program/ir_to_mesa.cpp +++ b/mesalib/src/mesa/program/ir_to_mesa.cpp @@ -311,7 +311,6 @@ public: int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, int mul_operand); - bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -866,50 +865,6 @@ ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) return true; } -bool -ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) -{ - /* Saturates were only introduced to vertex programs in - * NV_vertex_program3, so don't give them to drivers in the VP. - */ - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) - return false; - - ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); - if (!sat_src) - return false; - - sat_src->accept(this); - src_reg src = this->result; - - /* If we generated an expression instruction into a temporary in - * processing the saturate's operand, apply the saturate to that - * instruction. Otherwise, generate a MOV to do the saturate. - * - * Note that we have to be careful to only do this optimization if - * the instruction in question was what generated src->result. For - * example, ir_dereference_array might generate a MUL instruction - * to create the reladdr, and return us a src reg using that - * reladdr. That MUL result is not the value we're trying to - * saturate. - */ - ir_expression *sat_src_expr = sat_src->as_expression(); - ir_to_mesa_instruction *new_inst; - new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); - if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || - sat_src_expr->operation == ir_binop_add || - sat_src_expr->operation == ir_binop_dot)) { - new_inst->saturate = true; - } else { - this->result = get_temp(ir->type); - ir_to_mesa_instruction *inst; - inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); - inst->saturate = true; - } - - return true; -} - void ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr) @@ -1072,9 +1027,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir) return; } - if (try_emit_sat(ir)) - return; - if (ir->operation == ir_quadop_vector) { this->emit_swz(ir); return; @@ -1171,6 +1123,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit(ir, OPCODE_DDY, result_dst, op[0]); break; + case ir_unop_saturate: { + ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV, + result_dst, op[0]); + inst->saturate = true; + break; + } case ir_unop_noise: { const enum prog_opcode opcode = prog_opcode(OPCODE_NOISE1 @@ -2990,9 +2948,12 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); + GLenum target = _mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage); lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP - | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0) + | ((target == GL_VERTEX_PROGRAM_ARB) ? SAT_TO_CLAMP + : 0))); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; diff --git a/mesalib/src/mesa/program/prog_cache.c b/mesalib/src/mesa/program/prog_cache.c index 07192a96a..34609f056 100644 --- a/mesalib/src/mesa/program/prog_cache.c +++ b/mesalib/src/mesa/program/prog_cache.c @@ -143,7 +143,7 @@ _mesa_new_program_cache(void) if (cache) { cache->size = 17; cache->items = - calloc(1, cache->size * sizeof(struct cache_item)); + calloc(cache->size, sizeof(struct cache_item *)); if (!cache->items) { free(cache); return NULL; diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h index 6d572bd49..58f14f954 100644 --- a/mesalib/src/mesa/state_tracker/st_context.h +++ b/mesalib/src/mesa/state_tracker/st_context.h @@ -56,7 +56,7 @@ struct u_upload_mgr; struct st_state_flags { GLuint mesa; - GLuint st; + uint64_t st; }; struct st_tracked_state { diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 84bdc4f06..dd9c84f1a 100644 --- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -74,14 +74,6 @@ extern "C" { (1 << PROGRAM_CONSTANT) | \ (1 << PROGRAM_UNIFORM)) -/** - * Maximum number of temporary registers. - * - * It is too big for stack allocated arrays -- it will cause stack overflow on - * Windows and likely Mac OS X. - */ -#define MAX_TEMPS 4096 - /** * Maximum number of arrays */ @@ -446,7 +438,6 @@ public: int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, int mul_operand); - bool try_emit_sat(ir_expression *ir); void emit_swz(ir_expression *ir); @@ -1270,53 +1261,6 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan return true; } -bool -glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) -{ - /* Emit saturates in the vertex shader only if SM 3.0 is supported. - */ - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && - !st_context(this->ctx)->has_shader_model3) { - return false; - } - - ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); - if (!sat_src) - return false; - - sat_src->accept(this); - st_src_reg src = this->result; - - /* If we generated an expression instruction into a temporary in - * processing the saturate's operand, apply the saturate to that - * instruction. Otherwise, generate a MOV to do the saturate. - * - * Note that we have to be careful to only do this optimization if - * the instruction in question was what generated src->result. For - * example, ir_dereference_array might generate a MUL instruction - * to create the reladdr, and return us a src reg using that - * reladdr. That MUL result is not the value we're trying to - * saturate. - */ - ir_expression *sat_src_expr = sat_src->as_expression(); - if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || - sat_src_expr->operation == ir_binop_add || - sat_src_expr->operation == ir_binop_dot)) { - glsl_to_tgsi_instruction *new_inst; - new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); - new_inst->saturate = true; - } else { - this->result = get_temp(ir->type); - st_dst_reg result_dst = st_dst_reg(this->result); - result_dst.writemask = (1 << ir->type->vector_elements) - 1; - glsl_to_tgsi_instruction *inst; - inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); - inst->saturate = true; - } - - return true; -} - void glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr) @@ -1363,9 +1307,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) return; } - if (try_emit_sat(ir)) - return; - if (ir->operation == ir_quadop_vector) assert(!"ir_quadop_vector should have been lowered"); @@ -1460,6 +1401,12 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_cos_reduced: emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; + case ir_unop_saturate: { + glsl_to_tgsi_instruction *inst; + inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]); + inst->saturate = true; + break; + } case ir_unop_dFdx: case ir_unop_dFdx_coarse: @@ -3301,14 +3248,10 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src) void glsl_to_tgsi_visitor::simplify_cmp(void) { - unsigned *tempWrites; + int tempWritesSize = 0; + unsigned *tempWrites = NULL; unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; - tempWrites = new unsigned[MAX_TEMPS]; - if (!tempWrites) { - return; - } - memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS); memset(outputWrites, 0, sizeof(outputWrites)); foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { @@ -3330,7 +3273,19 @@ glsl_to_tgsi_visitor::simplify_cmp(void) prevWriteMask = outputWrites[inst->dst.index]; outputWrites[inst->dst.index] |= inst->dst.writemask; } else if (inst->dst.file == PROGRAM_TEMPORARY) { - assert(inst->dst.index < MAX_TEMPS); + if (inst->dst.index >= tempWritesSize) { + const int inc = 4096; + + tempWrites = (unsigned*) + realloc(tempWrites, + (tempWritesSize + inc) * sizeof(unsigned)); + if (!tempWrites) + return; + + memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned)); + tempWritesSize += inc; + } + prevWriteMask = tempWrites[inst->dst.index]; tempWrites[inst->dst.index] |= inst->dst.writemask; } else @@ -3349,7 +3304,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void) } } - delete [] tempWrites; + free(tempWrites); } /* Replaces all references to a temporary register index with another index. */ @@ -4158,7 +4113,9 @@ struct label { struct st_translate { struct ureg_program *ureg; - struct ureg_dst temps[MAX_TEMPS]; + unsigned temps_size; + struct ureg_dst *temps; + struct ureg_dst arrays[MAX_ARRAYS]; struct ureg_src *constants; struct ureg_src *immediates; @@ -4299,7 +4256,19 @@ dst_register(struct st_translate *t, return ureg_dst_undef(); case PROGRAM_TEMPORARY: - assert(index < Elements(t->temps)); + /* Allocate space for temporaries on demand. */ + if (index >= t->temps_size) { + const int inc = 4096; + + t->temps = (struct ureg_dst*) + realloc(t->temps, + (t->temps_size + inc) * sizeof(struct ureg_dst)); + if (!t->temps) + return ureg_dst_undef(); + + memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst)); + t->temps_size += inc; + } if (ureg_dst_is_undef(t->temps[index])) t->temps[index] = ureg_DECL_local_temporary(t->ureg); @@ -5158,6 +5127,7 @@ st_translate_program( out: if (t) { + free(t->temps); free(t->insn); free(t->labels); free(t->constants); @@ -5429,6 +5399,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) lower_offset_arrays(ir); do_mat_op_to_vec(ir); + /* Emit saturates in the vertex shader only if SM 3.0 is supported. */ + bool vs_sm3 = (_mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage) == + GL_VERTEX_PROGRAM_ARB) && st_context(ctx)->has_shader_model3; lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | @@ -5438,7 +5411,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) CARRY_TO_ARITH | BORROW_TO_ARITH | (options->EmitNoPow ? POW_TO_EXP2 : 0) | - (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0)); + (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | + (vs_sm3 ? SAT_TO_CLAMP : 0)); lower_ubo_reference(prog->_LinkedShaders[i], ir); do_vec_index_to_cond_assign(ir); -- cgit v1.2.3