aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mesalib/Makefile.am1
-rw-r--r--mesalib/docs/GL3.txt6
-rw-r--r--mesalib/src/gallium/auxiliary/util/u_blitter.c147
-rw-r--r--mesalib/src/glsl/ir.cpp2
-rw-r--r--mesalib/src/glsl/ir.h1
-rw-r--r--mesalib/src/glsl/ir_builder.cpp6
-rw-r--r--mesalib/src/glsl/ir_constant_expression.cpp6
-rw-r--r--mesalib/src/glsl/ir_optimization.h1
-rw-r--r--mesalib/src/glsl/ir_validate.cpp1
-rw-r--r--mesalib/src/glsl/linker.cpp4
-rw-r--r--mesalib/src/glsl/lower_instructions.cpp29
-rw-r--r--mesalib/src/glsl/opt_algebraic.cpp98
-rw-r--r--mesalib/src/mesa/main/mtypes.h31
-rw-r--r--mesalib/src/mesa/main/textureview.c3
-rw-r--r--mesalib/src/mesa/program/ir_to_mesa.cpp59
-rw-r--r--mesalib/src/mesa/program/prog_cache.c2
-rw-r--r--mesalib/src/mesa/state_tracker/st_context.h2
-rw-r--r--mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp114
18 files changed, 331 insertions, 182 deletions
diff --git a/mesalib/Makefile.am b/mesalib/Makefile.am
index d2916da43..c9aadc827 100644
--- a/mesalib/Makefile.am
+++ b/mesalib/Makefile.am
@@ -64,7 +64,6 @@ IGNORE_FILES = \
parsers: configure
$(MAKE) -C src/glsl glsl_parser.cpp glsl_parser.h glsl_lexer.cpp glcpp/glcpp-lex.c glcpp/glcpp-parse.c glcpp/glcpp-parse.h
- $(MAKE) -C src/mesa program/lex.yy.c program/program_parse.tab.c program/program_parse.tab.h
# Everything for new a Mesa release:
ARCHIVES = $(PACKAGE_NAME).tar.gz \
diff --git a/mesalib/docs/GL3.txt b/mesalib/docs/GL3.txt
index 56c4994e5..f5d5e72c9 100644
--- a/mesalib/docs/GL3.txt
+++ b/mesalib/docs/GL3.txt
@@ -104,7 +104,7 @@ GL 4.0, GLSL 4.00:
- Fused multiply-add DONE ()
- Packing/bitfield/conversion functions DONE (r600)
- Enhanced textureGather DONE (r600, radeonsi)
- - Geometry shader instancing DONE ()
+ - Geometry shader instancing DONE (r600)
- Geometry shader multiple streams DONE ()
- Enhanced per-sample shading DONE (r600)
- Interpolation functions DONE ()
@@ -151,7 +151,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_arrays_of_arrays started (Timothy)
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
GL_ARB_clear_buffer_object DONE (all drivers)
- GL_ARB_compute_shader started (currently stalled)
+ GL_ARB_compute_shader in progress (jljusten)
GL_ARB_copy_image DONE (i965)
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
@@ -202,7 +202,7 @@ GL 4.5, GLSL 4.50:
These are the extensions cherry-picked to make GLES 3.1
GLES3.1, GLSL ES 3.1
GL_ARB_arrays_of_arrays started (Timothy)
- GL_ARB_compute_shader started (currently stalled)
+ GL_ARB_compute_shader in progress (jljusten)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
GL_ARB_framebuffer_no_attachments not started
GL_ARB_program_interface_query not started
diff --git a/mesalib/src/gallium/auxiliary/util/u_blitter.c b/mesalib/src/gallium/auxiliary/util/u_blitter.c
index 609e02fd1..f3fe949c2 100644
--- a/mesalib/src/gallium/auxiliary/util/u_blitter.c
+++ b/mesalib/src/gallium/auxiliary/util/u_blitter.c
@@ -130,6 +130,7 @@ struct blitter_context_priv
unsigned dst_height;
boolean has_geometry_shader;
+ boolean has_layered;
boolean has_stream_out;
boolean has_stencil_export;
boolean has_texture_multisample;
@@ -288,28 +289,24 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
}
}
- /* Fragment shaders are created on-demand, except these.
- * The interpolation must be constant for integer texture clearing to work.
- */
- ctx->fs_empty = util_make_empty_fragment_shader(pipe);
- ctx->fs_write_one_cbuf =
- util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT, FALSE);
- ctx->fs_write_all_cbufs =
- util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT, TRUE);
-
- /* vertex shaders */
- {
- const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
- TGSI_SEMANTIC_GENERIC };
- const uint semantic_indices[] = { 0, 0 };
- ctx->vs =
- util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
- semantic_indices);
- }
+ ctx->has_layered =
+ pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_INSTANCEID) &&
+ pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT);
- if (ctx->has_stream_out) {
+ /* set invariant vertex coordinates */
+ for (i = 0; i < 4; i++)
+ ctx->vertices[i][0][3] = 1; /*v.w*/
+
+ ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
+
+ return &ctx->base;
+}
+
+static void bind_vs_pos_only(struct blitter_context_priv *ctx)
+{
+ struct pipe_context *pipe = ctx->base.pipe;
+
+ if (!ctx->vs_pos_only) {
struct pipe_stream_output_info so;
const uint semantic_names[] = { TGSI_SEMANTIC_POSITION };
const uint semantic_indices[] = { 0 };
@@ -324,18 +321,71 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
semantic_indices, &so);
}
- if (pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_INSTANCEID) &&
- pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) {
+ pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+}
+
+static void bind_vs_passthrough(struct blitter_context_priv *ctx)
+{
+ struct pipe_context *pipe = ctx->base.pipe;
+
+ if (!ctx->vs) {
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_GENERIC };
+ const uint semantic_indices[] = { 0, 0 };
+ ctx->vs =
+ util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
+ semantic_indices);
+ }
+
+ pipe->bind_vs_state(pipe, ctx->vs);
+}
+
+static void bind_vs_layered(struct blitter_context_priv *ctx)
+{
+ struct pipe_context *pipe = ctx->base.pipe;
+
+ if (!ctx->vs_layered) {
ctx->vs_layered = util_make_layered_clear_vertex_shader(pipe);
}
- /* set invariant vertex coordinates */
- for (i = 0; i < 4; i++)
- ctx->vertices[i][0][3] = 1; /*v.w*/
+ pipe->bind_vs_state(pipe, ctx->vs_layered);
+}
- ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
+static void bind_fs_empty(struct blitter_context_priv *ctx)
+{
+ struct pipe_context *pipe = ctx->base.pipe;
- return &ctx->base;
+ if (!ctx->fs_empty) {
+ ctx->fs_empty = util_make_empty_fragment_shader(pipe);
+ }
+
+ pipe->bind_fs_state(pipe, ctx->fs_empty);
+}
+
+static void bind_fs_write_one_cbuf(struct blitter_context_priv *ctx)
+{
+ struct pipe_context *pipe = ctx->base.pipe;
+
+ if (!ctx->fs_write_one_cbuf) {
+ ctx->fs_write_one_cbuf =
+ util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
+ TGSI_INTERPOLATE_CONSTANT, FALSE);
+ }
+
+ pipe->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
+}
+
+static void bind_fs_write_all_cbufs(struct blitter_context_priv *ctx)
+{
+ struct pipe_context *pipe = ctx->base.pipe;
+
+ if (!ctx->fs_write_all_cbufs) {
+ ctx->fs_write_all_cbufs =
+ util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
+ TGSI_INTERPOLATE_CONSTANT, TRUE);
+ }
+
+ pipe->bind_fs_state(pipe, ctx->fs_write_all_cbufs);
}
void util_blitter_destroy(struct blitter_context *blitter)
@@ -361,7 +411,8 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_rasterizer_state(pipe, ctx->rs_state_scissor);
if (ctx->rs_discard_state)
pipe->delete_rasterizer_state(pipe, ctx->rs_discard_state);
- pipe->delete_vs_state(pipe, ctx->vs);
+ if (ctx->vs)
+ pipe->delete_vs_state(pipe, ctx->vs);
if (ctx->vs_pos_only)
pipe->delete_vs_state(pipe, ctx->vs_pos_only);
if (ctx->vs_layered)
@@ -408,9 +459,12 @@ void util_blitter_destroy(struct blitter_context *blitter)
ctx->delete_fs_state(pipe, ctx->fs_resolve_uint[i][j][f]);
}
- ctx->delete_fs_state(pipe, ctx->fs_empty);
- ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf);
- ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs);
+ if (ctx->fs_empty)
+ ctx->delete_fs_state(pipe, ctx->fs_empty);
+ if (ctx->fs_write_one_cbuf)
+ ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf);
+ if (ctx->fs_write_all_cbufs)
+ ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs);
pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear);
pipe->delete_sampler_state(pipe, ctx->sampler_state_rect);
@@ -1021,7 +1075,11 @@ static void blitter_set_common_draw_rect_state(struct blitter_context_priv *ctx,
pipe->bind_rasterizer_state(pipe, scissor ? ctx->rs_state_scissor
: ctx->rs_state);
- pipe->bind_vs_state(pipe, vs_layered ? ctx->vs_layered : ctx->vs);
+ if (vs_layered)
+ bind_vs_layered(ctx);
+ else
+ bind_vs_passthrough(ctx);
+
if (ctx->has_geometry_shader)
pipe->bind_gs_state(pipe, NULL);
if (ctx->has_stream_out)
@@ -1118,7 +1176,7 @@ static void util_blitter_clear_custom(struct blitter_context *blitter,
struct pipe_context *pipe = ctx->base.pipe;
struct pipe_stencil_ref sr = { { 0 } };
- assert(ctx->vs_layered || num_layers <= 1);
+ assert(ctx->has_layered || num_layers <= 1);
blitter_set_running_flag(ctx);
blitter_check_saved_vertex_states(ctx);
@@ -1148,12 +1206,12 @@ static void util_blitter_clear_custom(struct blitter_context *blitter,
pipe->set_stencil_ref(pipe, &sr);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
- ctx->bind_fs_state(pipe, ctx->fs_write_all_cbufs);
+ bind_fs_write_all_cbufs(ctx);
pipe->set_sample_mask(pipe, ~0);
blitter_set_dst_dimensions(ctx, width, height);
- if (num_layers > 1 && ctx->vs_layered) {
+ if (num_layers > 1 && ctx->has_layered) {
blitter_set_common_draw_rect_state(ctx, FALSE, TRUE);
blitter_set_clear_color(ctx, color);
blitter_draw(ctx, 0, 0, width, height, depth, num_layers);
@@ -1680,7 +1738,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
/* bind states */
pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
- ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
+ bind_fs_write_one_cbuf(ctx);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
@@ -1748,7 +1806,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
/* hmm that should be illegal probably, or make it a no-op somewhere */
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
- ctx->bind_fs_state(pipe, ctx->fs_empty);
+ bind_fs_empty(ctx);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
@@ -1799,7 +1857,10 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] :
ctx->blend[0]);
pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
- ctx->bind_fs_state(pipe, cbsurf ? ctx->fs_write_one_cbuf : ctx->fs_empty);
+ if (cbsurf)
+ bind_fs_write_one_cbuf(ctx);
+ else
+ bind_fs_empty(ctx);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
@@ -1876,7 +1937,7 @@ void util_blitter_copy_buffer(struct blitter_context *blitter,
pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[0]);
- pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+ bind_vs_pos_only(ctx);
if (ctx->has_geometry_shader)
pipe->bind_gs_state(pipe, NULL);
pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state);
@@ -1936,7 +1997,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
pipe->bind_vertex_elements_state(pipe,
ctx->velem_state_readbuf[num_channels-1]);
- pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+ bind_vs_pos_only(ctx);
if (ctx->has_geometry_shader)
pipe->bind_gs_state(pipe, NULL);
pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state);
@@ -1978,7 +2039,7 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
pipe->bind_blend_state(pipe, custom_blend);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
- ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
+ bind_fs_write_one_cbuf(ctx);
pipe->set_sample_mask(pipe, sample_mask);
memset(&surf_tmpl, 0, sizeof(surf_tmpl));
@@ -2041,7 +2102,7 @@ void util_blitter_custom_color(struct blitter_context *blitter,
pipe->bind_blend_state(pipe, custom_blend ? custom_blend
: ctx->blend[PIPE_MASK_RGBA]);
pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
- ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
+ bind_fs_write_one_cbuf(ctx);
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
pipe->set_sample_mask(pipe, (1ull << MAX2(1, dstsurf->texture->nr_samples)) - 1);
diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp
index b289c2975..739a9f412 100644
--- a/mesalib/src/glsl/ir.cpp
+++ b/mesalib/src/glsl/ir.cpp
@@ -255,6 +255,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_dFdy_fine:
case ir_unop_bitfield_reverse:
case ir_unop_interpolate_at_centroid:
+ case ir_unop_saturate:
this->type = op0->type;
break;
@@ -534,6 +535,7 @@ static const char *const operator_strs[] = {
"bit_count",
"find_msb",
"find_lsb",
+ "sat",
"noise",
"interpolate_at_centroid",
"+",
diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h
index e9051732b..8003f88ce 100644
--- a/mesalib/src/glsl/ir.h
+++ b/mesalib/src/glsl/ir.h
@@ -1250,6 +1250,7 @@ enum ir_expression_operation {
ir_unop_find_lsb,
/*@}*/
+ ir_unop_saturate,
ir_unop_noise,
/**
diff --git a/mesalib/src/glsl/ir_builder.cpp b/mesalib/src/glsl/ir_builder.cpp
index f03941443..a2f6f2967 100644
--- a/mesalib/src/glsl/ir_builder.cpp
+++ b/mesalib/src/glsl/ir_builder.cpp
@@ -271,11 +271,7 @@ clamp(operand a, operand b, operand c)
ir_expression *
saturate(operand a)
{
- void *mem_ctx = ralloc_parent(a.val);
-
- return expr(ir_binop_max,
- expr(ir_binop_min, a, new(mem_ctx) ir_constant(1.0f)),
- new(mem_ctx) ir_constant(0.0f));
+ return expr(ir_unop_saturate, a);
}
ir_expression *
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index 96060217c..1e8b3a3cc 100644
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -1469,6 +1469,12 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
}
break;
+ case ir_unop_saturate:
+ for (unsigned c = 0; c < components; c++) {
+ data.f[c] = CLAMP(op[0]->value.f[c], 0.0f, 1.0f);
+ }
+ break;
+
case ir_triop_bitfield_extract: {
int offset = op[1]->value.i[0];
int bits = op[2]->value.i[0];
diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h
index b83c22592..1c6f72b54 100644
--- a/mesalib/src/glsl/ir_optimization.h
+++ b/mesalib/src/glsl/ir_optimization.h
@@ -40,6 +40,7 @@
#define LDEXP_TO_ARITH 0x100
#define CARRY_TO_ARITH 0x200
#define BORROW_TO_ARITH 0x400
+#define SAT_TO_CLAMP 0x800
/**
* \see class lower_packing_builtins_visitor
diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp
index 5b2067782..97a581dc2 100644
--- a/mesalib/src/glsl/ir_validate.cpp
+++ b/mesalib/src/glsl/ir_validate.cpp
@@ -241,6 +241,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_log:
case ir_unop_exp2:
case ir_unop_log2:
+ case ir_unop_saturate:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
assert(ir->type == ir->operands[0]->type);
break;
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index d5473adc3..57be4931d 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -2439,8 +2439,10 @@ check_explicit_uniform_locations(struct gl_context *ctx,
ir_variable *var = node->as_variable();
if ((var && var->data.mode == ir_var_uniform) &&
var->data.explicit_location) {
- if (!reserve_explicit_locations(prog, uniform_map, var))
+ if (!reserve_explicit_locations(prog, uniform_map, var)) {
+ delete uniform_map;
return;
+ }
}
}
}
diff --git a/mesalib/src/glsl/lower_instructions.cpp b/mesalib/src/glsl/lower_instructions.cpp
index 176070c87..684285350 100644
--- a/mesalib/src/glsl/lower_instructions.cpp
+++ b/mesalib/src/glsl/lower_instructions.cpp
@@ -41,6 +41,7 @@
* - BITFIELD_INSERT_TO_BFM_BFI
* - CARRY_TO_ARITH
* - BORROW_TO_ARITH
+ * - SAT_TO_CLAMP
*
* SUB_TO_ADD_NEG:
* ---------------
@@ -104,6 +105,10 @@
* ----------------
* Converts ir_borrow into (x < y).
*
+ * SAT_TO_CLAMP:
+ * -------------
+ * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
+ *
*/
#include "main/core.h" /* for M_LOG2E */
@@ -139,6 +144,7 @@ private:
void ldexp_to_arith(ir_expression *);
void carry_to_arith(ir_expression *);
void borrow_to_arith(ir_expression *);
+ void sat_to_clamp(ir_expression *);
};
} /* anonymous namespace */
@@ -484,6 +490,24 @@ lower_instructions_visitor::borrow_to_arith(ir_expression *ir)
this->progress = true;
}
+void
+lower_instructions_visitor::sat_to_clamp(ir_expression *ir)
+{
+ /* Translates
+ * ir_unop_saturate x
+ * into
+ * ir_binop_min (ir_binop_max(x, 0.0), 1.0)
+ */
+
+ ir->operation = ir_binop_min;
+ ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type,
+ ir->operands[0],
+ new(ir) ir_constant(0.0f));
+ ir->operands[1] = new(ir) ir_constant(1.0f);
+
+ this->progress = true;
+}
+
ir_visitor_status
lower_instructions_visitor::visit_leave(ir_expression *ir)
{
@@ -540,6 +564,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
borrow_to_arith(ir);
break;
+ case ir_unop_saturate:
+ if (lowering(SAT_TO_CLAMP))
+ sat_to_clamp(ir);
+ break;
+
default:
return visit_continue;
}
diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp
index ac7514acf..447618f9e 100644
--- a/mesalib/src/glsl/opt_algebraic.cpp
+++ b/mesalib/src/glsl/opt_algebraic.cpp
@@ -110,6 +110,48 @@ is_vec_basis(ir_constant *ir)
return (ir == NULL) ? false : ir->is_basis();
}
+static inline bool
+is_valid_vec_const(ir_constant *ir)
+{
+ if (ir == NULL)
+ return false;
+
+ if (!ir->type->is_scalar() && !ir->type->is_vector())
+ return false;
+
+ return true;
+}
+
+static inline bool
+is_less_than_one(ir_constant *ir)
+{
+ if (!is_valid_vec_const(ir))
+ return false;
+
+ unsigned component = 0;
+ for (int c = 0; c < ir->type->vector_elements; c++) {
+ if (ir->get_float_component(c) < 1.0f)
+ component++;
+ }
+
+ return (component == ir->type->vector_elements);
+}
+
+static inline bool
+is_greater_than_zero(ir_constant *ir)
+{
+ if (!is_valid_vec_const(ir))
+ return false;
+
+ unsigned component = 0;
+ for (int c = 0; c < ir->type->vector_elements; c++) {
+ if (ir->get_float_component(c) > 0.0f)
+ component++;
+ }
+
+ return (component == ir->type->vector_elements);
+}
+
static void
update_type(ir_expression *ir)
{
@@ -614,6 +656,62 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
break;
+ case ir_binop_min:
+ case ir_binop_max:
+ if (ir->type->base_type != GLSL_TYPE_FLOAT)
+ break;
+
+ /* Replace min(max) operations and its commutative combinations with
+ * a saturate operation
+ */
+ for (int op = 0; op < 2; op++) {
+ ir_expression *minmax = op_expr[op];
+ ir_constant *outer_const = op_const[1 - op];
+ ir_expression_operation op_cond = (ir->operation == ir_binop_max) ?
+ ir_binop_min : ir_binop_max;
+
+ if (!minmax || !outer_const || (minmax->operation != op_cond))
+ continue;
+
+ /* Found a min(max) combination. Now try to see if its operands
+ * meet our conditions that we can do just a single saturate operation
+ */
+ for (int minmax_op = 0; minmax_op < 2; minmax_op++) {
+ ir_rvalue *inner_val_a = minmax->operands[minmax_op];
+ ir_rvalue *inner_val_b = minmax->operands[1 - minmax_op];
+
+ if (!inner_val_a || !inner_val_b)
+ continue;
+
+ /* Found a {min|max} ({max|min} (x, 0.0), 1.0) operation and its variations */
+ if ((outer_const->is_one() && inner_val_a->is_zero()) ||
+ (inner_val_a->is_one() && outer_const->is_zero()))
+ return saturate(inner_val_b);
+
+ /* Found a {min|max} ({max|min} (x, 0.0), b) where b < 1.0
+ * and its variations
+ */
+ if (is_less_than_one(outer_const) && inner_val_b->is_zero())
+ return expr(ir_binop_min, saturate(inner_val_a), outer_const);
+
+ if (!inner_val_b->as_constant())
+ continue;
+
+ if (is_less_than_one(inner_val_b->as_constant()) && outer_const->is_zero())
+ return expr(ir_binop_min, saturate(inner_val_a), inner_val_b);
+
+ /* Found a {min|max} ({max|min} (x, b), 1.0), where b > 0.0
+ * and its variations
+ */
+ if (outer_const->is_one() && is_greater_than_zero(inner_val_b->as_constant()))
+ return expr(ir_binop_max, saturate(inner_val_a), inner_val_b);
+ if (inner_val_b->as_constant()->is_one() && is_greater_than_zero(outer_const))
+ return expr(ir_binop_max, saturate(inner_val_a), outer_const);
+ }
+ }
+
+ break;
+
case ir_unop_rcp:
if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
return op_expr[0]->operands[0];
diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h
index dc25ea467..4fb30ffba 100644
--- a/mesalib/src/mesa/main/mtypes.h
+++ b/mesalib/src/mesa/main/mtypes.h
@@ -2348,6 +2348,20 @@ struct gl_fragment_program_state
/**
+ * Context state for compute programs.
+ */
+struct gl_compute_program_state
+{
+ struct gl_compute_program *Current; /**< user-bound compute program */
+
+ /** Currently enabled and valid program (including internal programs
+ * and compiled shader programs).
+ */
+ struct gl_compute_program *_Current;
+};
+
+
+/**
* ATI_fragment_shader runtime state
*/
#define ATI_FS_INPUT_PRIMARY 0
@@ -3908,32 +3922,32 @@ typedef enum
struct gl_driver_flags
{
/** gl_context::Array::_DrawArrays (vertex array state) */
- GLbitfield NewArray;
+ uint64_t NewArray;
/** gl_context::TransformFeedback::CurrentObject */
- GLbitfield NewTransformFeedback;
+ uint64_t NewTransformFeedback;
/** gl_context::TransformFeedback::CurrentObject::shader_program */
- GLbitfield NewTransformFeedbackProg;
+ uint64_t NewTransformFeedbackProg;
/** gl_context::RasterDiscard */
- GLbitfield NewRasterizerDiscard;
+ uint64_t NewRasterizerDiscard;
/**
* gl_context::UniformBufferBindings
* gl_shader_program::UniformBlocks
*/
- GLbitfield NewUniformBuffer;
+ uint64_t NewUniformBuffer;
/**
* gl_context::AtomicBufferBindings
*/
- GLbitfield NewAtomicBuffer;
+ uint64_t NewAtomicBuffer;
/**
* gl_context::ImageUnits
*/
- GLbitfield NewImageUnits;
+ uint64_t NewImageUnits;
};
struct gl_uniform_buffer_binding
@@ -4153,6 +4167,7 @@ struct gl_context
struct gl_vertex_program_state VertexProgram;
struct gl_fragment_program_state FragmentProgram;
struct gl_geometry_program_state GeometryProgram;
+ struct gl_compute_program_state ComputeProgram;
struct gl_ati_fragment_shader_state ATIFragmentShader;
struct gl_pipeline_shader_state Pipeline; /**< GLSL pipeline shader object state */
@@ -4240,7 +4255,7 @@ struct gl_context
GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
GLbitfield NewState; /**< bitwise-or of _NEW_* flags */
- GLbitfield NewDriverState;/**< bitwise-or of flags from DriverFlags */
+ uint64_t NewDriverState; /**< bitwise-or of flags from DriverFlags */
struct gl_driver_flags DriverFlags;
diff --git a/mesalib/src/mesa/main/textureview.c b/mesalib/src/mesa/main/textureview.c
index b3521e219..6e86a9a44 100644
--- a/mesalib/src/mesa/main/textureview.c
+++ b/mesalib/src/mesa/main/textureview.c
@@ -536,6 +536,9 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture,
/* Adjust width, height, depth to be appropriate for new target */
switch (target) {
case GL_TEXTURE_1D:
+ height = 1;
+ break;
+
case GL_TEXTURE_3D:
break;
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index e5844c3c4..49e4a7a40 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -311,7 +311,6 @@ public:
int mul_operand);
bool try_emit_mad_for_and_not(ir_expression *ir,
int mul_operand);
- bool try_emit_sat(ir_expression *ir);
void emit_swz(ir_expression *ir);
@@ -866,50 +865,6 @@ ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
return true;
}
-bool
-ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
-{
- /* Saturates were only introduced to vertex programs in
- * NV_vertex_program3, so don't give them to drivers in the VP.
- */
- if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
- return false;
-
- ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
- if (!sat_src)
- return false;
-
- sat_src->accept(this);
- src_reg src = this->result;
-
- /* If we generated an expression instruction into a temporary in
- * processing the saturate's operand, apply the saturate to that
- * instruction. Otherwise, generate a MOV to do the saturate.
- *
- * Note that we have to be careful to only do this optimization if
- * the instruction in question was what generated src->result. For
- * example, ir_dereference_array might generate a MUL instruction
- * to create the reladdr, and return us a src reg using that
- * reladdr. That MUL result is not the value we're trying to
- * saturate.
- */
- ir_expression *sat_src_expr = sat_src->as_expression();
- ir_to_mesa_instruction *new_inst;
- new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
- if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
- sat_src_expr->operation == ir_binop_add ||
- sat_src_expr->operation == ir_binop_dot)) {
- new_inst->saturate = true;
- } else {
- this->result = get_temp(ir->type);
- ir_to_mesa_instruction *inst;
- inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
- inst->saturate = true;
- }
-
- return true;
-}
-
void
ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
src_reg *reg, int *num_reladdr)
@@ -1072,9 +1027,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
return;
}
- if (try_emit_sat(ir))
- return;
-
if (ir->operation == ir_quadop_vector) {
this->emit_swz(ir);
return;
@@ -1171,6 +1123,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
emit(ir, OPCODE_DDY, result_dst, op[0]);
break;
+ case ir_unop_saturate: {
+ ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV,
+ result_dst, op[0]);
+ inst->saturate = true;
+ break;
+ }
case ir_unop_noise: {
const enum prog_opcode opcode =
prog_opcode(OPCODE_NOISE1
@@ -2990,9 +2948,12 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
/* Lowering */
do_mat_op_to_vec(ir);
+ GLenum target = _mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage);
lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
- | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
+ | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)
+ | ((target == GL_VERTEX_PROGRAM_ARB) ? SAT_TO_CLAMP
+ : 0)));
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
diff --git a/mesalib/src/mesa/program/prog_cache.c b/mesalib/src/mesa/program/prog_cache.c
index 07192a96a..34609f056 100644
--- a/mesalib/src/mesa/program/prog_cache.c
+++ b/mesalib/src/mesa/program/prog_cache.c
@@ -143,7 +143,7 @@ _mesa_new_program_cache(void)
if (cache) {
cache->size = 17;
cache->items =
- calloc(1, cache->size * sizeof(struct cache_item));
+ calloc(cache->size, sizeof(struct cache_item *));
if (!cache->items) {
free(cache);
return NULL;
diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h
index 6d572bd49..58f14f954 100644
--- a/mesalib/src/mesa/state_tracker/st_context.h
+++ b/mesalib/src/mesa/state_tracker/st_context.h
@@ -56,7 +56,7 @@ struct u_upload_mgr;
struct st_state_flags {
GLuint mesa;
- GLuint st;
+ uint64_t st;
};
struct st_tracked_state {
diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 84bdc4f06..dd9c84f1a 100644
--- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -75,14 +75,6 @@ extern "C" {
(1 << PROGRAM_UNIFORM))
/**
- * Maximum number of temporary registers.
- *
- * It is too big for stack allocated arrays -- it will cause stack overflow on
- * Windows and likely Mac OS X.
- */
-#define MAX_TEMPS 4096
-
-/**
* Maximum number of arrays
*/
#define MAX_ARRAYS 256
@@ -446,7 +438,6 @@ public:
int mul_operand);
bool try_emit_mad_for_and_not(ir_expression *ir,
int mul_operand);
- bool try_emit_sat(ir_expression *ir);
void emit_swz(ir_expression *ir);
@@ -1270,53 +1261,6 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
return true;
}
-bool
-glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
-{
- /* Emit saturates in the vertex shader only if SM 3.0 is supported.
- */
- if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
- !st_context(this->ctx)->has_shader_model3) {
- return false;
- }
-
- ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
- if (!sat_src)
- return false;
-
- sat_src->accept(this);
- st_src_reg src = this->result;
-
- /* If we generated an expression instruction into a temporary in
- * processing the saturate's operand, apply the saturate to that
- * instruction. Otherwise, generate a MOV to do the saturate.
- *
- * Note that we have to be careful to only do this optimization if
- * the instruction in question was what generated src->result. For
- * example, ir_dereference_array might generate a MUL instruction
- * to create the reladdr, and return us a src reg using that
- * reladdr. That MUL result is not the value we're trying to
- * saturate.
- */
- ir_expression *sat_src_expr = sat_src->as_expression();
- if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
- sat_src_expr->operation == ir_binop_add ||
- sat_src_expr->operation == ir_binop_dot)) {
- glsl_to_tgsi_instruction *new_inst;
- new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
- new_inst->saturate = true;
- } else {
- this->result = get_temp(ir->type);
- st_dst_reg result_dst = st_dst_reg(this->result);
- result_dst.writemask = (1 << ir->type->vector_elements) - 1;
- glsl_to_tgsi_instruction *inst;
- inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
- inst->saturate = true;
- }
-
- return true;
-}
-
void
glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
st_src_reg *reg, int *num_reladdr)
@@ -1363,9 +1307,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
return;
}
- if (try_emit_sat(ir))
- return;
-
if (ir->operation == ir_quadop_vector)
assert(!"ir_quadop_vector should have been lowered");
@@ -1460,6 +1401,12 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
case ir_unop_cos_reduced:
emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
break;
+ case ir_unop_saturate: {
+ glsl_to_tgsi_instruction *inst;
+ inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+ inst->saturate = true;
+ break;
+ }
case ir_unop_dFdx:
case ir_unop_dFdx_coarse:
@@ -3301,14 +3248,10 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
void
glsl_to_tgsi_visitor::simplify_cmp(void)
{
- unsigned *tempWrites;
+ int tempWritesSize = 0;
+ unsigned *tempWrites = NULL;
unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
- tempWrites = new unsigned[MAX_TEMPS];
- if (!tempWrites) {
- return;
- }
- memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
memset(outputWrites, 0, sizeof(outputWrites));
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
@@ -3330,7 +3273,19 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
prevWriteMask = outputWrites[inst->dst.index];
outputWrites[inst->dst.index] |= inst->dst.writemask;
} else if (inst->dst.file == PROGRAM_TEMPORARY) {
- assert(inst->dst.index < MAX_TEMPS);
+ if (inst->dst.index >= tempWritesSize) {
+ const int inc = 4096;
+
+ tempWrites = (unsigned*)
+ realloc(tempWrites,
+ (tempWritesSize + inc) * sizeof(unsigned));
+ if (!tempWrites)
+ return;
+
+ memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
+ tempWritesSize += inc;
+ }
+
prevWriteMask = tempWrites[inst->dst.index];
tempWrites[inst->dst.index] |= inst->dst.writemask;
} else
@@ -3349,7 +3304,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
}
}
- delete [] tempWrites;
+ free(tempWrites);
}
/* Replaces all references to a temporary register index with another index. */
@@ -4158,7 +4113,9 @@ struct label {
struct st_translate {
struct ureg_program *ureg;
- struct ureg_dst temps[MAX_TEMPS];
+ unsigned temps_size;
+ struct ureg_dst *temps;
+
struct ureg_dst arrays[MAX_ARRAYS];
struct ureg_src *constants;
struct ureg_src *immediates;
@@ -4299,7 +4256,19 @@ dst_register(struct st_translate *t,
return ureg_dst_undef();
case PROGRAM_TEMPORARY:
- assert(index < Elements(t->temps));
+ /* Allocate space for temporaries on demand. */
+ if (index >= t->temps_size) {
+ const int inc = 4096;
+
+ t->temps = (struct ureg_dst*)
+ realloc(t->temps,
+ (t->temps_size + inc) * sizeof(struct ureg_dst));
+ if (!t->temps)
+ return ureg_dst_undef();
+
+ memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
+ t->temps_size += inc;
+ }
if (ureg_dst_is_undef(t->temps[index]))
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
@@ -5158,6 +5127,7 @@ st_translate_program(
out:
if (t) {
+ free(t->temps);
free(t->insn);
free(t->labels);
free(t->constants);
@@ -5429,6 +5399,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
lower_offset_arrays(ir);
do_mat_op_to_vec(ir);
+ /* Emit saturates in the vertex shader only if SM 3.0 is supported. */
+ bool vs_sm3 = (_mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage) ==
+ GL_VERTEX_PROGRAM_ARB) && st_context(ctx)->has_shader_model3;
lower_instructions(ir,
MOD_TO_FRACT |
DIV_TO_MUL_RCP |
@@ -5438,7 +5411,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
CARRY_TO_ARITH |
BORROW_TO_ARITH |
(options->EmitNoPow ? POW_TO_EXP2 : 0) |
- (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
+ (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
+ (vs_sm3 ? SAT_TO_CLAMP : 0));
lower_ubo_reference(prog->_LinkedShaders[i], ir);
do_vec_index_to_cond_assign(ir);