aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/glsl
diff options
context:
space:
mode:
Diffstat (limited to 'mesalib/src/glsl')
-rw-r--r--mesalib/src/glsl/Makefile.sources1
-rw-r--r--mesalib/src/glsl/ast_to_hir.cpp177
-rw-r--r--mesalib/src/glsl/ast_type.cpp2
-rw-r--r--mesalib/src/glsl/builtin_variables.cpp6
-rw-r--r--mesalib/src/glsl/glcpp/glcpp-parse.y6
-rw-r--r--mesalib/src/glsl/glsl_lexer.ll1
-rw-r--r--mesalib/src/glsl/glsl_parser.yy38
-rw-r--r--mesalib/src/glsl/glsl_parser_extras.cpp5
-rw-r--r--mesalib/src/glsl/glsl_parser_extras.h21
-rw-r--r--mesalib/src/glsl/glsl_symbol_table.h2
-rw-r--r--mesalib/src/glsl/glsl_types.cpp30
-rw-r--r--mesalib/src/glsl/glsl_types.h6
-rw-r--r--mesalib/src/glsl/hir_field_selection.cpp1
-rw-r--r--mesalib/src/glsl/ir.cpp1
-rw-r--r--mesalib/src/glsl/ir_basic_block.cpp2
-rw-r--r--mesalib/src/glsl/ir_constant_expression.cpp1
-rw-r--r--mesalib/src/glsl/ir_expression_flattening.cpp2
-rw-r--r--mesalib/src/glsl/ir_optimization.h4
-rw-r--r--mesalib/src/glsl/ir_uniform.h12
-rw-r--r--mesalib/src/glsl/link_uniform_block_active_visitor.h2
-rw-r--r--mesalib/src/glsl/link_uniform_initializers.cpp2
-rw-r--r--mesalib/src/glsl/link_uniforms.cpp61
-rw-r--r--mesalib/src/glsl/linker.cpp114
-rw-r--r--mesalib/src/glsl/list.h568
-rw-r--r--mesalib/src/glsl/opt_algebraic.cpp59
-rw-r--r--mesalib/src/glsl/opt_dead_builtin_varyings.cpp2
-rw-r--r--mesalib/src/glsl/opt_rebalance_tree.cpp300
-rw-r--r--mesalib/src/glsl/standalone_scaffolding.cpp1
-rw-r--r--mesalib/src/glsl/test_optpass.cpp2
29 files changed, 1132 insertions, 297 deletions
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources
index 5945590a5..b54eae72d 100644
--- a/mesalib/src/glsl/Makefile.sources
+++ b/mesalib/src/glsl/Makefile.sources
@@ -96,6 +96,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/opt_function_inlining.cpp \
$(GLSL_SRCDIR)/opt_if_simplification.cpp \
$(GLSL_SRCDIR)/opt_noop_swizzle.cpp \
+ $(GLSL_SRCDIR)/opt_rebalance_tree.cpp \
$(GLSL_SRCDIR)/opt_redundant_jumps.cpp \
$(GLSL_SRCDIR)/opt_structure_splitting.cpp \
$(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \
diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp
index d1c77f1ec..7ba04a808 100644
--- a/mesalib/src/glsl/ast_to_hir.cpp
+++ b/mesalib/src/glsl/ast_to_hir.cpp
@@ -49,7 +49,6 @@
* parser (and lexer) sources.
*/
-#include "main/core.h" /* for struct gl_extensions */
#include "glsl_symbol_table.h"
#include "glsl_parser_extras.h"
#include "ast.h"
@@ -2182,6 +2181,41 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
{
bool fail = false;
+ /* Checks for GL_ARB_explicit_uniform_location. */
+ if (qual->flags.q.uniform) {
+ if (!state->check_explicit_uniform_location_allowed(loc, var))
+ return;
+
+ const struct gl_context *const ctx = state->ctx;
+ unsigned max_loc = qual->location + var->type->uniform_locations() - 1;
+
+ /* ARB_explicit_uniform_location specification states:
+ *
+ * "The explicitly defined locations and the generated locations
+ * must be in the range of 0 to MAX_UNIFORM_LOCATIONS minus one."
+ *
+ * "Valid locations for default-block uniform variable locations
+ * are in the range of 0 to the implementation-defined maximum
+ * number of uniform locations."
+ */
+ if (qual->location < 0) {
+ _mesa_glsl_error(loc, state,
+ "explicit location < 0 for uniform %s", var->name);
+ return;
+ }
+
+ if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
+ _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
+ ">= MAX_UNIFORM_LOCATIONS (%u)", var->name,
+ ctx->Const.MaxUserAssignableUniformLocations);
+ return;
+ }
+
+ var->data.explicit_location = true;
+ var->data.location = qual->location;
+ return;
+ }
+
/* Between GL_ARB_explicit_attrib_location an
* GL_ARB_separate_shader_objects, the inputs and outputs of any shader
* stage can be assigned explicit locations. The checking here associates
@@ -2435,6 +2469,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
_mesa_shader_stage_to_string(state->stage));
}
+ /* Disallow layout qualifiers which may only appear on layout declarations. */
+ if (qual->flags.q.prim_type) {
+ _mesa_glsl_error(loc, state,
+ "Primitive type may only be specified on GS input or output "
+ "layout declaration, not on variables.");
+ }
+
/* Section 6.1.1 (Function Calling Conventions) of the GLSL 1.10 spec says:
*
* "However, the const qualifier cannot be used with out or inout."
@@ -2649,6 +2690,36 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
const bool uses_deprecated_qualifier = qual->flags.q.attribute
|| qual->flags.q.varying;
+
+ /* Validate auxiliary storage qualifiers */
+
+ /* From section 4.3.4 of the GLSL 1.30 spec:
+ * "It is an error to use centroid in in a vertex shader."
+ *
+ * From section 4.3.4 of the GLSL ES 3.00 spec:
+ * "It is an error to use centroid in or interpolation qualifiers in
+ * a vertex shader input."
+ */
+
+ /* Section 4.3.6 of the GLSL 1.30 specification states:
+ * "It is an error to use centroid out in a fragment shader."
+ *
+ * The GL_ARB_shading_language_420pack extension specification states:
+ * "It is an error to use auxiliary storage qualifiers or interpolation
+ * qualifiers on an output in a fragment shader."
+ */
+ if (qual->flags.q.sample && (!is_varying_var(var, state->stage) || uses_deprecated_qualifier)) {
+ _mesa_glsl_error(loc, state,
+ "sample qualifier may only be used on `in` or `out` "
+ "variables between shader stages");
+ }
+ if (qual->flags.q.centroid && !is_varying_var(var, state->stage)) {
+ _mesa_glsl_error(loc, state,
+ "centroid qualifier may only be used with `in', "
+ "`out' or `varying' variables between shader stages");
+ }
+
+
/* Is the 'layout' keyword used with parameters that allow relaxed checking.
* Many implementations of GL_ARB_fragment_coord_conventions_enable and some
* implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
@@ -3606,45 +3677,6 @@ ast_declarator_list::hir(exec_list *instructions,
}
- /* From section 4.3.4 of the GLSL 1.30 spec:
- * "It is an error to use centroid in in a vertex shader."
- *
- * From section 4.3.4 of the GLSL ES 3.00 spec:
- * "It is an error to use centroid in or interpolation qualifiers in
- * a vertex shader input."
- */
- if (state->is_version(130, 300)
- && this->type->qualifier.flags.q.centroid
- && this->type->qualifier.flags.q.in
- && state->stage == MESA_SHADER_VERTEX) {
-
- _mesa_glsl_error(&loc, state,
- "'centroid in' cannot be used in a vertex shader");
- }
-
- if (state->stage == MESA_SHADER_VERTEX
- && this->type->qualifier.flags.q.sample
- && this->type->qualifier.flags.q.in) {
-
- _mesa_glsl_error(&loc, state,
- "'sample in' cannot be used in a vertex shader");
- }
-
- /* Section 4.3.6 of the GLSL 1.30 specification states:
- * "It is an error to use centroid out in a fragment shader."
- *
- * The GL_ARB_shading_language_420pack extension specification states:
- * "It is an error to use auxiliary storage qualifiers or interpolation
- * qualifiers on an output in a fragment shader."
- */
- if (state->stage == MESA_SHADER_FRAGMENT &&
- this->type->qualifier.flags.q.out &&
- this->type->qualifier.has_auxiliary_storage()) {
- _mesa_glsl_error(&loc, state,
- "auxiliary storage qualifiers cannot be used on "
- "fragment shader outputs");
- }
-
/* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30.
*/
if (this->type->qualifier.precision != ast_precision_none) {
@@ -4632,9 +4664,51 @@ ast_case_label::hir(exec_list *instructions,
ir_dereference_variable *deref_test_var =
new(ctx) ir_dereference_variable(state->switch_state.test_var);
- ir_rvalue *const test_cond = new(ctx) ir_expression(ir_binop_all_equal,
- label_const,
- deref_test_var);
+ ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal,
+ label_const,
+ deref_test_var);
+
+ /*
+ * From GLSL 4.40 specification section 6.2 ("Selection"):
+ *
+ * "The type of the init-expression value in a switch statement must
+ * be a scalar int or uint. The type of the constant-expression value
+ * in a case label also must be a scalar int or uint. When any pair
+ * of these values is tested for "equal value" and the types do not
+ * match, an implicit conversion will be done to convert the int to a
+ * uint (see section 4.1.10 “Implicit Conversions”) before the compare
+ * is done."
+ */
+ if (label_const->type != state->switch_state.test_var->type) {
+ YYLTYPE loc = this->test_value->get_location();
+
+ const glsl_type *type_a = label_const->type;
+ const glsl_type *type_b = state->switch_state.test_var->type;
+
+ /* Check if int->uint implicit conversion is supported. */
+ bool integer_conversion_supported =
+ glsl_type::int_type->can_implicitly_convert_to(glsl_type::uint_type,
+ state);
+
+ if ((!type_a->is_integer() || !type_b->is_integer()) ||
+ !integer_conversion_supported) {
+ _mesa_glsl_error(&loc, state, "type mismatch with switch "
+ "init-expression and case label (%s != %s)",
+ type_a->name, type_b->name);
+ } else {
+ /* Conversion of the case label. */
+ if (type_a->base_type == GLSL_TYPE_INT) {
+ if (!apply_implicit_conversion(glsl_type::uint_type,
+ test_cond->operands[0], state))
+ _mesa_glsl_error(&loc, state, "implicit type conversion error");
+ } else {
+ /* Conversion of the init-expression value. */
+ if (!apply_implicit_conversion(glsl_type::uint_type,
+ test_cond->operands[1], state))
+ _mesa_glsl_error(&loc, state, "implicit type conversion error");
+ }
+ }
+ }
ir_assignment *set_fallthru_on_test =
new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond);
@@ -5041,6 +5115,13 @@ ast_process_structure_or_interface_block(exec_list *instructions,
"with uniform interface blocks");
}
+ if ((qual->flags.q.uniform || !is_interface) &&
+ qual->has_auxiliary_storage()) {
+ _mesa_glsl_error(&loc, state,
+ "auxiliary storage qualifiers cannot be used "
+ "in uniform blocks or structures.");
+ }
+
if (field_type->is_matrix() ||
(field_type->is_array() && field_type->fields.array->is_matrix())) {
fields[i].row_major = block_row_major;
@@ -5090,7 +5171,7 @@ ast_struct_specifier::hir(exec_list *instructions,
*/
if (state->language_version != 110 && state->struct_specifier_depth != 0)
_mesa_glsl_error(&loc, state,
- "embedded structure declartions are not allowed");
+ "embedded structure declarations are not allowed");
state->struct_specifier_depth++;
@@ -5206,6 +5287,12 @@ ast_interface_block::hir(exec_list *instructions,
bool block_row_major = this->layout.flags.q.row_major;
exec_list declared_variables;
glsl_struct_field *fields;
+
+ /* Treat an interface block as one level of nesting, so that embedded struct
+ * specifiers will be disallowed.
+ */
+ state->struct_specifier_depth++;
+
unsigned int num_variables =
ast_process_structure_or_interface_block(&declared_variables,
state,
@@ -5217,6 +5304,8 @@ ast_interface_block::hir(exec_list *instructions,
redeclaring_per_vertex,
var_mode);
+ state->struct_specifier_depth--;
+
if (!redeclaring_per_vertex)
validate_identifier(this->block_name, loc, state);
diff --git a/mesalib/src/glsl/ast_type.cpp b/mesalib/src/glsl/ast_type.cpp
index 0ee2c495a..77053d5b1 100644
--- a/mesalib/src/glsl/ast_type.cpp
+++ b/mesalib/src/glsl/ast_type.cpp
@@ -247,7 +247,7 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
q.flags.q.local_size != 0 &&
state->in_qualifier->flags.q.local_size == 0;
- valid_in_mask.flags.q.local_size = 1;
+ valid_in_mask.flags.q.local_size = 7;
break;
default:
_mesa_glsl_error(loc, state,
diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp
index 9b35850ee..b9c69d23c 100644
--- a/mesalib/src/glsl/builtin_variables.cpp
+++ b/mesalib/src/glsl/builtin_variables.cpp
@@ -26,7 +26,6 @@
#include "glsl_symbol_table.h"
#include "main/core.h"
#include "main/uniforms.h"
-#include "program/prog_parameter.h"
#include "program/prog_statevars.h"
#include "program/prog_instruction.h"
@@ -939,6 +938,11 @@ builtin_variable_generator::generate_fs_special_vars()
if (state->ARB_gpu_shader5_enable) {
add_system_value(SYSTEM_VALUE_SAMPLE_MASK_IN, array(int_t, 1), "gl_SampleMaskIn");
}
+
+ if (state->ARB_fragment_layer_viewport_enable) {
+ add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer");
+ add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ }
}
diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y
index 98875837c..d8c395778 100644
--- a/mesalib/src/glsl/glcpp/glcpp-parse.y
+++ b/mesalib/src/glsl/glcpp/glcpp-parse.y
@@ -2086,9 +2086,15 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_ARB_fragment_coord_conventions",
1);
+ if (extensions->ARB_fragment_layer_viewport)
+ add_builtin_define(parser, "GL_ARB_fragment_layer_viewport", 1);
+
if (extensions->ARB_explicit_attrib_location)
add_builtin_define(parser, "GL_ARB_explicit_attrib_location", 1);
+ if (extensions->ARB_explicit_uniform_location)
+ add_builtin_define(parser, "GL_ARB_explicit_uniform_location", 1);
+
if (extensions->ARB_shader_texture_lod)
add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1);
diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll
index 6c3f9b692..db7b1d179 100644
--- a/mesalib/src/glsl/glsl_lexer.ll
+++ b/mesalib/src/glsl/glsl_lexer.ll
@@ -396,6 +396,7 @@ layout {
|| yyextra->AMD_conservative_depth_enable
|| yyextra->ARB_conservative_depth_enable
|| yyextra->ARB_explicit_attrib_location_enable
+ || yyextra->ARB_explicit_uniform_location_enable
|| yyextra->has_separate_shader_objects()
|| yyextra->ARB_uniform_buffer_object_enable
|| yyextra->ARB_fragment_coord_conventions_enable
diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy
index eddab0518..240995901 100644
--- a/mesalib/src/glsl/glsl_parser.yy
+++ b/mesalib/src/glsl/glsl_parser.yy
@@ -1558,11 +1558,6 @@ type_qualifier:
if ($2.flags.q.invariant)
_mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier");
- if ($2.has_layout()) {
- _mesa_glsl_error(&@1, state,
- "\"invariant\" cannot be used with layout(...)");
- }
-
if (!state->ARB_shading_language_420pack_enable && $2.flags.q.precise)
_mesa_glsl_error(&@1, state,
"\"invariant\" must come after \"precise\"");
@@ -1585,11 +1580,6 @@ type_qualifier:
if ($2.has_interpolation())
_mesa_glsl_error(&@1, state, "duplicate interpolation qualifier");
- if ($2.has_layout()) {
- _mesa_glsl_error(&@1, state, "interpolation qualifiers cannot be used "
- "with layout(...)");
- }
-
if (!state->ARB_shading_language_420pack_enable &&
($2.flags.q.precise || $2.flags.q.invariant)) {
_mesa_glsl_error(&@1, state, "interpolation qualifiers must come "
@@ -1601,28 +1591,18 @@ type_qualifier:
}
| layout_qualifier type_qualifier
{
- /* The GLSL 1.50 grammar indicates that a layout(...) declaration can be
- * used standalone or immediately before a storage qualifier. It cannot
- * be used with interpolation qualifiers or invariant. There does not
- * appear to be any text indicating that it must come before the storage
- * qualifier, but always seems to in examples.
+ /* In the absence of ARB_shading_language_420pack, layout qualifiers may
+ * appear no later than auxiliary storage qualifiers. There is no
+ * particularly clear spec language mandating this, but in all examples
+ * the layout qualifier precedes the storage qualifier.
+ *
+ * We allow combinations of layout with interpolation, invariant or
+ * precise qualifiers since these are useful in ARB_separate_shader_objects.
+ * There is no clear spec guidance on this either.
*/
if (!state->ARB_shading_language_420pack_enable && $2.has_layout())
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
- if ($2.flags.q.invariant)
- _mesa_glsl_error(&@1, state, "layout(...) cannot be used with "
- "the \"invariant\" qualifier");
-
- if ($2.flags.q.precise)
- _mesa_glsl_error(&@1, state, "layout(...) cannot be used with "
- "the \"precise\" qualifier");
-
- if ($2.has_interpolation()) {
- _mesa_glsl_error(&@1, state, "layout(...) cannot be used with "
- "interpolation qualifiers");
- }
-
$$ = $1;
$$.merge_qualifier(&@1, state, $2);
}
@@ -2180,7 +2160,7 @@ condition:
;
/*
- * siwtch_statement grammar is based on the syntax described in the body
+ * switch_statement grammar is based on the syntax described in the body
* of the GLSL spec, not in it's appendix!!!
*/
switch_statement:
diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp
index f3c5bd049..11a9a4320 100644
--- a/mesalib/src/glsl/glsl_parser_extras.cpp
+++ b/mesalib/src/glsl/glsl_parser_extras.cpp
@@ -515,7 +515,9 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(ARB_draw_buffers, true, false, dummy_true),
EXT(ARB_draw_instanced, true, false, ARB_draw_instanced),
EXT(ARB_explicit_attrib_location, true, false, ARB_explicit_attrib_location),
+ EXT(ARB_explicit_uniform_location, true, false, ARB_explicit_uniform_location),
EXT(ARB_fragment_coord_conventions, true, false, ARB_fragment_coord_conventions),
+ EXT(ARB_fragment_layer_viewport, true, false, ARB_fragment_layer_viewport),
EXT(ARB_gpu_shader5, true, false, ARB_gpu_shader5),
EXT(ARB_sample_shading, true, false, ARB_sample_shading),
EXT(ARB_separate_shader_objects, true, false, dummy_true),
@@ -1568,7 +1570,8 @@ do_common_optimization(exec_list *ir, bool linked,
progress = do_constant_variable_unlinked(ir) || progress;
progress = do_constant_folding(ir) || progress;
progress = do_cse(ir) || progress;
- progress = do_algebraic(ir, native_integers) || progress;
+ progress = do_rebalance_tree(ir) || progress;
+ progress = do_algebraic(ir, native_integers, options) || progress;
progress = do_lower_jumps(ir) || progress;
progress = do_vec_index_to_swizzle(ir) || progress;
progress = lower_vector_insert(ir, false) || progress;
diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h
index 0416a9c72..aa4a114e4 100644
--- a/mesalib/src/glsl/glsl_parser_extras.h
+++ b/mesalib/src/glsl/glsl_parser_extras.h
@@ -151,6 +151,21 @@ struct _mesa_glsl_parse_state {
return true;
}
+ bool check_explicit_uniform_location_allowed(YYLTYPE *locp,
+ const ir_variable *)
+ {
+ if (!this->has_explicit_attrib_location() ||
+ !this->ARB_explicit_uniform_location_enable) {
+ _mesa_glsl_error(locp, this,
+ "uniform explicit location requires "
+ "GL_ARB_explicit_uniform_location and either "
+ "GL_ARB_explicit_attrib_location or GLSL 330.");
+ return false;
+ }
+
+ return true;
+ }
+
bool has_explicit_attrib_location() const
{
return ARB_explicit_attrib_location_enable || is_version(330, 300);
@@ -188,7 +203,7 @@ struct _mesa_glsl_parse_state {
/**
* Number of nested struct_specifier levels
*
- * Outside a struct_specifer, this is zero.
+ * Outside a struct_specifier, this is zero.
*/
unsigned struct_specifier_depth;
@@ -363,8 +378,12 @@ struct _mesa_glsl_parse_state {
bool ARB_draw_instanced_warn;
bool ARB_explicit_attrib_location_enable;
bool ARB_explicit_attrib_location_warn;
+ bool ARB_explicit_uniform_location_enable;
+ bool ARB_explicit_uniform_location_warn;
bool ARB_fragment_coord_conventions_enable;
bool ARB_fragment_coord_conventions_warn;
+ bool ARB_fragment_layer_viewport_enable;
+ bool ARB_fragment_layer_viewport_warn;
bool ARB_gpu_shader5_enable;
bool ARB_gpu_shader5_warn;
bool ARB_sample_shading_enable;
diff --git a/mesalib/src/glsl/glsl_symbol_table.h b/mesalib/src/glsl/glsl_symbol_table.h
index f323fc305..25282641f 100644
--- a/mesalib/src/glsl/glsl_symbol_table.h
+++ b/mesalib/src/glsl/glsl_symbol_table.h
@@ -32,9 +32,9 @@ extern "C" {
#include "program/symbol_table.h"
}
#include "ir.h"
-#include "glsl_types.h"
class symbol_table_entry;
+struct glsl_type;
/**
* Facade class for _mesa_symbol_table
diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp
index e77146cdf..f9cd258fe 100644
--- a/mesalib/src/glsl/glsl_types.cpp
+++ b/mesalib/src/glsl/glsl_types.cpp
@@ -22,9 +22,7 @@
*/
#include <stdio.h>
-#include <stdlib.h>
-#include "main/core.h" /* for Elements */
-#include "glsl_symbol_table.h"
+#include "main/core.h" /* for Elements, MAX2 */
#include "glsl_parser_extras.h"
#include "glsl_types.h"
extern "C" {
@@ -677,6 +675,32 @@ glsl_type::component_slots() const
return 0;
}
+unsigned
+glsl_type::uniform_locations() const
+{
+ if (this->is_matrix())
+ return 1;
+
+ unsigned size = 0;
+
+ switch (this->base_type) {
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE:
+ for (unsigned i = 0; i < this->length; i++)
+ size += this->fields.structure[i].type->uniform_locations();
+ return size;
+ case GLSL_TYPE_ARRAY:
+ return this->length * this->fields.array->uniform_locations();
+ default:
+ break;
+ }
+
+ /* The location count for many types match with component_slots() result,
+ * all expections should be handled above.
+ */
+ return component_slots();
+}
+
bool
glsl_type::can_implicitly_convert_to(const glsl_type *desired,
_mesa_glsl_parse_state *state) const
diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h
index 35a4e6acc..f6d4a02ab 100644
--- a/mesalib/src/glsl/glsl_types.h
+++ b/mesalib/src/glsl/glsl_types.h
@@ -256,6 +256,12 @@ struct glsl_type {
unsigned component_slots() const;
/**
+ * Calculate the number of unique values from glGetUniformLocation for the
+ * elements of the type.
+ */
+ unsigned uniform_locations() const;
+
+ /**
* Calculate the number of attribute slots required to hold this type
*
* This implements the language rules of GLSL 1.50 for counting the number
diff --git a/mesalib/src/glsl/hir_field_selection.cpp b/mesalib/src/glsl/hir_field_selection.cpp
index 1e92c89ae..0fa976811 100644
--- a/mesalib/src/glsl/hir_field_selection.cpp
+++ b/mesalib/src/glsl/hir_field_selection.cpp
@@ -22,7 +22,6 @@
*/
#include "ir.h"
-#include "program/symbol_table.h"
#include "glsl_parser_extras.h"
#include "ast.h"
#include "glsl_types.h"
diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp
index 8fed768a2..10c00068e 100644
--- a/mesalib/src/glsl/ir.cpp
+++ b/mesalib/src/glsl/ir.cpp
@@ -23,7 +23,6 @@
#include <string.h>
#include "main/core.h" /* for MAX2 */
#include "ir.h"
-#include "ir_visitor.h"
#include "glsl_types.h"
ir_rvalue::ir_rvalue(enum ir_node_type t)
diff --git a/mesalib/src/glsl/ir_basic_block.cpp b/mesalib/src/glsl/ir_basic_block.cpp
index 426fda2f2..74ee4b696 100644
--- a/mesalib/src/glsl/ir_basic_block.cpp
+++ b/mesalib/src/glsl/ir_basic_block.cpp
@@ -28,9 +28,7 @@
*/
#include "ir.h"
-#include "ir_visitor.h"
#include "ir_basic_block.h"
-#include "glsl_types.h"
/**
* Calls a user function for every basic block in the instruction stream.
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index 8afe8f776..7b4a22df4 100644
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -36,7 +36,6 @@
#include <math.h>
#include "main/core.h" /* for MAX2, MIN2, CLAMP */
#include "ir.h"
-#include "ir_visitor.h"
#include "glsl_types.h"
#include "program/hash_table.h"
diff --git a/mesalib/src/glsl/ir_expression_flattening.cpp b/mesalib/src/glsl/ir_expression_flattening.cpp
index c1cadb122..0b1ada519 100644
--- a/mesalib/src/glsl/ir_expression_flattening.cpp
+++ b/mesalib/src/glsl/ir_expression_flattening.cpp
@@ -32,10 +32,8 @@
*/
#include "ir.h"
-#include "ir_visitor.h"
#include "ir_rvalue_visitor.h"
#include "ir_expression_flattening.h"
-#include "glsl_types.h"
class ir_expression_flattening_visitor : public ir_rvalue_visitor {
public:
diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h
index c63921c26..b83c22592 100644
--- a/mesalib/src/glsl/ir_optimization.h
+++ b/mesalib/src/glsl/ir_optimization.h
@@ -71,7 +71,9 @@ bool do_common_optimization(exec_list *ir, bool linked,
const struct gl_shader_compiler_options *options,
bool native_integers);
-bool do_algebraic(exec_list *instructions, bool native_integers);
+bool do_rebalance_tree(exec_list *instructions);
+bool do_algebraic(exec_list *instructions, bool native_integers,
+ const struct gl_shader_compiler_options *options);
bool do_constant_folding(exec_list *instructions);
bool do_constant_variable(exec_list *instructions);
bool do_constant_variable_unlinked(exec_list *instructions);
diff --git a/mesalib/src/glsl/ir_uniform.h b/mesalib/src/glsl/ir_uniform.h
index 3508509d4..2f7352825 100644
--- a/mesalib/src/glsl/ir_uniform.h
+++ b/mesalib/src/glsl/ir_uniform.h
@@ -32,12 +32,17 @@
#include "program/prog_parameter.h" /* For union gl_constant_value. */
+/**
+ * Used by GL_ARB_explicit_uniform_location extension code in the linker
+ * and glUniform* functions to identify inactive explicit uniform locations.
+ */
+#define INACTIVE_UNIFORM_EXPLICIT_LOCATION ((gl_uniform_storage *) -1)
#ifdef __cplusplus
extern "C" {
#endif
-enum gl_uniform_driver_format {
+enum PACKED gl_uniform_driver_format {
uniform_native = 0, /**< Store data in the native format. */
uniform_int_float, /**< Store integer data as floats. */
uniform_bool_float, /**< Store boolean data as floats. */
@@ -66,11 +71,8 @@ struct gl_uniform_driver_storage {
/**
* Base format of the stored data.
- *
- * This field must have a value from \c GLSL_TYPE_UINT through \c
- * GLSL_TYPE_SAMPLER.
*/
- uint8_t format;
+ enum gl_uniform_driver_format format;
/**
* Pointer to the base of the data.
diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.h b/mesalib/src/glsl/link_uniform_block_active_visitor.h
index d76dbcaf1..524cd6b91 100644
--- a/mesalib/src/glsl/link_uniform_block_active_visitor.h
+++ b/mesalib/src/glsl/link_uniform_block_active_visitor.h
@@ -26,8 +26,6 @@
#define LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H
#include "ir.h"
-#include "ir_visitor.h"
-#include "glsl_types.h"
#include "main/hash_table.h"
struct link_uniform_block_active {
diff --git a/mesalib/src/glsl/link_uniform_initializers.cpp b/mesalib/src/glsl/link_uniform_initializers.cpp
index 2100e0517..d755cec98 100644
--- a/mesalib/src/glsl/link_uniform_initializers.cpp
+++ b/mesalib/src/glsl/link_uniform_initializers.cpp
@@ -25,8 +25,6 @@
#include "ir.h"
#include "linker.h"
#include "ir_uniform.h"
-#include "glsl_symbol_table.h"
-#include "program/hash_table.h"
/* These functions are put in a "private" namespace instead of being marked
* static so that the unit tests can access them. See
diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp
index ba66053ed..66f6d4db3 100644
--- a/mesalib/src/glsl/link_uniforms.cpp
+++ b/mesalib/src/glsl/link_uniforms.cpp
@@ -37,6 +37,11 @@
*/
/**
+ * Used by linker to indicate uniforms that have no location set.
+ */
+#define UNMAPPED_UNIFORM_LOC ~0u
+
+/**
* Count the backing storage requirements for a type
*/
static unsigned
@@ -386,6 +391,9 @@ public:
void set_and_process(struct gl_shader_program *prog,
ir_variable *var)
{
+ current_var = var;
+ field_counter = 0;
+
ubo_block_index = -1;
if (var->is_in_uniform_block()) {
if (var->is_interface_instance() && var->type->is_array()) {
@@ -542,6 +550,22 @@ private:
return;
}
+ /* Assign explicit locations. */
+ if (current_var->data.explicit_location) {
+ /* Set sequential locations for struct fields. */
+ if (record_type != NULL) {
+ const unsigned entries = MAX2(1, this->uniforms[id].array_elements);
+ this->uniforms[id].remap_location =
+ current_var->data.location + field_counter;
+ field_counter += entries;
+ } else {
+ this->uniforms[id].remap_location = current_var->data.location;
+ }
+ } else {
+ /* Initialize to to indicate that no location is set */
+ this->uniforms[id].remap_location = UNMAPPED_UNIFORM_LOC;
+ }
+
this->uniforms[id].name = ralloc_strdup(this->uniforms, name);
this->uniforms[id].type = base_type;
this->uniforms[id].initialized = 0;
@@ -597,6 +621,17 @@ public:
gl_texture_index targets[MAX_SAMPLERS];
/**
+ * Current variable being processed.
+ */
+ ir_variable *current_var;
+
+ /**
+ * Field counter is used to take care that uniform structures
+ * with explicit locations get sequential locations.
+ */
+ unsigned field_counter;
+
+ /**
* Mask of samplers used by the current shader stage.
*/
unsigned shader_samplers_used;
@@ -798,10 +833,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog)
prog->UniformStorage = NULL;
prog->NumUserUniformStorage = 0;
- ralloc_free(prog->UniformRemapTable);
- prog->UniformRemapTable = NULL;
- prog->NumUniformRemapTable = 0;
-
if (prog->UniformHash != NULL) {
prog->UniformHash->clear();
} else {
@@ -914,8 +945,28 @@ link_assign_uniform_locations(struct gl_shader_program *prog)
sizeof(prog->_LinkedShaders[i]->SamplerTargets));
}
- /* Build the uniform remap table that is used to set/get uniform locations */
+ /* Reserve all the explicit locations of the active uniforms. */
for (unsigned i = 0; i < num_user_uniforms; i++) {
+ if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) {
+ /* How many new entries for this uniform? */
+ const unsigned entries = MAX2(1, uniforms[i].array_elements);
+
+ /* Set remap table entries point to correct gl_uniform_storage. */
+ for (unsigned j = 0; j < entries; j++) {
+ unsigned element_loc = uniforms[i].remap_location + j;
+ assert(prog->UniformRemapTable[element_loc] ==
+ INACTIVE_UNIFORM_EXPLICIT_LOCATION);
+ prog->UniformRemapTable[element_loc] = &uniforms[i];
+ }
+ }
+ }
+
+ /* Reserve locations for rest of the uniforms. */
+ for (unsigned i = 0; i < num_user_uniforms; i++) {
+
+ /* Explicit ones have been set already. */
+ if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC)
+ continue;
/* how many new entries for this uniform? */
const unsigned entries = MAX2(1, uniforms[i].array_elements);
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index a43d23082..0b6a71679 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -74,6 +74,7 @@
#include "link_varyings.h"
#include "ir_optimization.h"
#include "ir_rvalue_visitor.h"
+#include "ir_uniform.h"
extern "C" {
#include "main/shaderobj.h"
@@ -2224,6 +2225,115 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
linker_error(prog, "Too many combined image uniforms and fragment outputs");
}
+
+/**
+ * Initializes explicit location slots to INACTIVE_UNIFORM_EXPLICIT_LOCATION
+ * for a variable, checks for overlaps between other uniforms using explicit
+ * locations.
+ */
+static bool
+reserve_explicit_locations(struct gl_shader_program *prog,
+ string_to_uint_map *map, ir_variable *var)
+{
+ unsigned slots = var->type->uniform_locations();
+ unsigned max_loc = var->data.location + slots - 1;
+
+ /* Resize remap table if locations do not fit in the current one. */
+ if (max_loc + 1 > prog->NumUniformRemapTable) {
+ prog->UniformRemapTable =
+ reralloc(prog, prog->UniformRemapTable,
+ gl_uniform_storage *,
+ max_loc + 1);
+
+ if (!prog->UniformRemapTable) {
+ linker_error(prog, "Out of memory during linking.");
+ return false;
+ }
+
+ /* Initialize allocated space. */
+ for (unsigned i = prog->NumUniformRemapTable; i < max_loc + 1; i++)
+ prog->UniformRemapTable[i] = NULL;
+
+ prog->NumUniformRemapTable = max_loc + 1;
+ }
+
+ for (unsigned i = 0; i < slots; i++) {
+ unsigned loc = var->data.location + i;
+
+ /* Check if location is already used. */
+ if (prog->UniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) {
+
+ /* Possibly same uniform from a different stage, this is ok. */
+ unsigned hash_loc;
+ if (map->get(hash_loc, var->name) && hash_loc == loc - i)
+ continue;
+
+ /* ARB_explicit_uniform_location specification states:
+ *
+ * "No two default-block uniform variables in the program can have
+ * the same location, even if they are unused, otherwise a compiler
+ * or linker error will be generated."
+ */
+ linker_error(prog,
+ "location qualifier for uniform %s overlaps"
+ "previously used location",
+ var->name);
+ return false;
+ }
+
+ /* Initialize location as inactive before optimization
+ * rounds and location assignment.
+ */
+ prog->UniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION;
+ }
+
+ /* Note, base location used for arrays. */
+ map->put(var->data.location, var->name);
+
+ return true;
+}
+
+/**
+ * Check and reserve all explicit uniform locations, called before
+ * any optimizations happen to handle also inactive uniforms and
+ * inactive array elements that may get trimmed away.
+ */
+static void
+check_explicit_uniform_locations(struct gl_context *ctx,
+ struct gl_shader_program *prog)
+{
+ if (!ctx->Extensions.ARB_explicit_uniform_location)
+ return;
+
+ /* This map is used to detect if overlapping explicit locations
+ * occur with the same uniform (from different stage) or a different one.
+ */
+ string_to_uint_map *uniform_map = new string_to_uint_map;
+
+ if (!uniform_map) {
+ linker_error(prog, "Out of memory during linking.");
+ return;
+ }
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct gl_shader *sh = prog->_LinkedShaders[i];
+
+ if (!sh)
+ continue;
+
+ foreach_list(node, sh->ir) {
+ ir_variable *var = ((ir_instruction *)node)->as_variable();
+ if ((var && var->data.mode == ir_var_uniform) &&
+ var->data.explicit_location) {
+ if (!reserve_explicit_locations(prog, uniform_map, var))
+ return;
+ }
+ }
+ }
+
+ delete uniform_map;
+}
+
void
link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
{
@@ -2372,6 +2482,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
break;
}
+ check_explicit_uniform_locations(ctx, prog);
+ if (!prog->LinkStatus)
+ goto done;
+
/* Validate the inputs of each stage with the output of the preceding
* stage.
*/
diff --git a/mesalib/src/glsl/list.h b/mesalib/src/glsl/list.h
index 694b686b0..576bc14e4 100644
--- a/mesalib/src/glsl/list.h
+++ b/mesalib/src/glsl/list.h
@@ -83,67 +83,29 @@ struct exec_node {
/* empty */
}
- const exec_node *get_next() const
- {
- return next;
- }
+ const exec_node *get_next() const;
+ exec_node *get_next();
- exec_node *get_next()
- {
- return next;
- }
+ const exec_node *get_prev() const;
+ exec_node *get_prev();
- const exec_node *get_prev() const
- {
- return prev;
- }
-
- exec_node *get_prev()
- {
- return prev;
- }
-
- void remove()
- {
- next->prev = prev;
- prev->next = next;
- next = NULL;
- prev = NULL;
- }
+ void remove();
/**
* Link a node with itself
*
* This creates a sort of degenerate list that is occasionally useful.
*/
- void self_link()
- {
- next = this;
- prev = this;
- }
+ void self_link();
/**
* Insert a node in the list after the current node
*/
- void insert_after(exec_node *after)
- {
- after->next = this->next;
- after->prev = this;
-
- this->next->prev = after;
- this->next = after;
- }
+ void insert_after(exec_node *after);
/**
* Insert a node in the list before the current node
*/
- void insert_before(exec_node *before)
- {
- before->next = this;
- before->prev = this->prev;
-
- this->prev->next = before;
- this->prev = before;
- }
+ void insert_before(exec_node *before);
/**
* Insert another list in the list before the current node
@@ -153,33 +115,165 @@ struct exec_node {
/**
* Replace the current node with the given node.
*/
- void replace_with(exec_node *replacement)
- {
- replacement->prev = this->prev;
- replacement->next = this->next;
-
- this->prev->next = replacement;
- this->next->prev = replacement;
- }
+ void replace_with(exec_node *replacement);
/**
* Is this the sentinel at the tail of the list?
*/
- bool is_tail_sentinel() const
- {
- return this->next == NULL;
- }
+ bool is_tail_sentinel() const;
/**
* Is this the sentinel at the head of the list?
*/
- bool is_head_sentinel() const
- {
- return this->prev == NULL;
- }
+ bool is_head_sentinel() const;
#endif
};
+static inline void
+exec_node_init(struct exec_node *n)
+{
+ n->next = NULL;
+ n->prev = NULL;
+}
+
+static inline const struct exec_node *
+exec_node_get_next_const(const struct exec_node *n)
+{
+ return n->next;
+}
+
+static inline struct exec_node *
+exec_node_get_next(struct exec_node *n)
+{
+ return n->next;
+}
+
+static inline const struct exec_node *
+exec_node_get_prev_const(const struct exec_node *n)
+{
+ return n->prev;
+}
+
+static inline struct exec_node *
+exec_node_get_prev(struct exec_node *n)
+{
+ return n->prev;
+}
+
+static inline void
+exec_node_remove(struct exec_node *n)
+{
+ n->next->prev = n->prev;
+ n->prev->next = n->next;
+ n->next = NULL;
+ n->prev = NULL;
+}
+
+static inline void
+exec_node_self_link(struct exec_node *n)
+{
+ n->next = n;
+ n->prev = n;
+}
+
+static inline void
+exec_node_insert_after(struct exec_node *n, struct exec_node *after)
+{
+ after->next = n->next;
+ after->prev = n;
+
+ n->next->prev = after;
+ n->next = after;
+}
+
+static inline void
+exec_node_insert_node_before(struct exec_node *n, struct exec_node *before)
+{
+ before->next = n;
+ before->prev = n->prev;
+
+ n->prev->next = before;
+ n->prev = before;
+}
+
+static inline void
+exec_node_replace_with(struct exec_node *n, struct exec_node *replacement)
+{
+ replacement->prev = n->prev;
+ replacement->next = n->next;
+
+ n->prev->next = replacement;
+ n->next->prev = replacement;
+}
+
+static inline bool
+exec_node_is_tail_sentinel(const struct exec_node *n)
+{
+ return n->next == NULL;
+}
+
+static inline bool
+exec_node_is_head_sentinel(const struct exec_node *n)
+{
+ return n->prev == NULL;
+}
+
+#ifdef __cplusplus
+inline const exec_node *exec_node::get_next() const
+{
+ return exec_node_get_next_const(this);
+}
+
+inline exec_node *exec_node::get_next()
+{
+ return exec_node_get_next(this);
+}
+
+inline const exec_node *exec_node::get_prev() const
+{
+ return exec_node_get_prev_const(this);
+}
+
+inline exec_node *exec_node::get_prev()
+{
+ return exec_node_get_prev(this);
+}
+
+inline void exec_node::remove()
+{
+ exec_node_remove(this);
+}
+
+inline void exec_node::self_link()
+{
+ exec_node_self_link(this);
+}
+
+inline void exec_node::insert_after(exec_node *after)
+{
+ exec_node_insert_after(this, after);
+}
+
+inline void exec_node::insert_before(exec_node *before)
+{
+ exec_node_insert_node_before(this, before);
+}
+
+inline void exec_node::replace_with(exec_node *replacement)
+{
+ exec_node_replace_with(this, replacement);
+}
+
+inline bool exec_node::is_tail_sentinel() const
+{
+ return exec_node_is_tail_sentinel(this);
+}
+
+inline bool exec_node::is_head_sentinel() const
+{
+ return exec_node_is_head_sentinel(this);
+}
+#endif
#ifdef __cplusplus
/* This macro will not work correctly if `t' uses virtual inheritance. If you
@@ -221,75 +315,19 @@ struct exec_list {
make_empty();
}
- void make_empty()
- {
- head = (exec_node *) & tail;
- tail = NULL;
- tail_pred = (exec_node *) & head;
- }
-
- bool is_empty() const
- {
- /* There are three ways to test whether a list is empty or not.
- *
- * - Check to see if the \c head points to the \c tail.
- * - Check to see if the \c tail_pred points to the \c head.
- * - Check to see if the \c head is the sentinel node by test whether its
- * \c next pointer is \c NULL.
- *
- * The first two methods tend to generate better code on modern systems
- * because they save a pointer dereference.
- */
- return head == (exec_node *) &tail;
- }
-
- const exec_node *get_head() const
- {
- return !is_empty() ? head : NULL;
- }
-
- exec_node *get_head()
- {
- return !is_empty() ? head : NULL;
- }
-
- const exec_node *get_tail() const
- {
- return !is_empty() ? tail_pred : NULL;
- }
-
- exec_node *get_tail()
- {
- return !is_empty() ? tail_pred : NULL;
- }
+ void make_empty();
- void push_head(exec_node *n)
- {
- n->next = head;
- n->prev = (exec_node *) &head;
+ bool is_empty() const;
- n->next->prev = n;
- head = n;
- }
+ const exec_node *get_head() const;
+ exec_node *get_head();
- void push_tail(exec_node *n)
- {
- n->next = (exec_node *) &tail;
- n->prev = tail_pred;
+ const exec_node *get_tail() const;
+ exec_node *get_tail();
- n->prev->next = n;
- tail_pred = n;
- }
-
- void push_degenerate_list_at_head(exec_node *n)
- {
- assert(n->prev->next == n);
-
- n->prev->next = head;
- head->prev = n->prev;
- n->prev = (exec_node *) &head;
- head = n;
- }
+ void push_head(exec_node *n);
+ void push_tail(exec_node *n);
+ void push_degenerate_list_at_head(exec_node *n);
/**
* Remove the first node from a list and return it
@@ -299,87 +337,239 @@ struct exec_list {
*
* \sa exec_list::get_head
*/
- exec_node *pop_head()
- {
- exec_node *const n = this->get_head();
- if (n != NULL)
- n->remove();
-
- return n;
- }
+ exec_node *pop_head();
/**
* Move all of the nodes from this list to the target list
*/
- void move_nodes_to(exec_list *target)
- {
- if (is_empty()) {
- target->make_empty();
- } else {
- target->head = head;
- target->tail = NULL;
- target->tail_pred = tail_pred;
-
- target->head->prev = (exec_node *) &target->head;
- target->tail_pred->next = (exec_node *) &target->tail;
-
- make_empty();
- }
- }
+ void move_nodes_to(exec_list *target);
/**
* Append all nodes from the source list to the target list
*/
- void
- append_list(exec_list *source)
- {
- if (source->is_empty())
- return;
-
- /* Link the first node of the source with the last node of the target list.
- */
- this->tail_pred->next = source->head;
- source->head->prev = this->tail_pred;
-
- /* Make the tail of the source list be the tail of the target list.
- */
- this->tail_pred = source->tail_pred;
- this->tail_pred->next = (exec_node *) &this->tail;
-
- /* Make the source list empty for good measure.
- */
- source->make_empty();
- }
+ void append_list(exec_list *source);
#endif
};
+static inline void
+exec_list_make_empty(struct exec_list *list)
+{
+ list->head = (struct exec_node *) & list->tail;
+ list->tail = NULL;
+ list->tail_pred = (struct exec_node *) & list->head;
+}
-#ifdef __cplusplus
-inline void exec_node::insert_before(exec_list *before)
+static inline bool
+exec_list_is_empty(const struct exec_list *list)
+{
+ /* There are three ways to test whether a list is empty or not.
+ *
+ * - Check to see if the \c head points to the \c tail.
+ * - Check to see if the \c tail_pred points to the \c head.
+ * - Check to see if the \c head is the sentinel node by test whether its
+ * \c next pointer is \c NULL.
+ *
+ * The first two methods tend to generate better code on modern systems
+ * because they save a pointer dereference.
+ */
+ return list->head == (struct exec_node *) &list->tail;
+}
+
+static inline const struct exec_node *
+exec_list_get_head_const(const struct exec_list *list)
+{
+ return !exec_list_is_empty(list) ? list->head : NULL;
+}
+
+static inline struct exec_node *
+exec_list_get_head(struct exec_list *list)
+{
+ return !exec_list_is_empty(list) ? list->head : NULL;
+}
+
+static inline const struct exec_node *
+exec_list_get_tail_const(const struct exec_list *list)
+{
+ return !exec_list_is_empty(list) ? list->tail_pred : NULL;
+}
+
+static inline struct exec_node *
+exec_list_get_tail(struct exec_list *list)
+{
+ return !exec_list_is_empty(list) ? list->tail_pred : NULL;
+}
+
+static inline void
+exec_list_push_head(struct exec_list *list, struct exec_node *n)
+{
+ n->next = list->head;
+ n->prev = (struct exec_node *) &list->head;
+
+ n->next->prev = n;
+ list->head = n;
+}
+
+static inline void
+exec_list_push_tail(struct exec_list *list, struct exec_node *n)
+{
+ n->next = (struct exec_node *) &list->tail;
+ n->prev = list->tail_pred;
+
+ n->prev->next = n;
+ list->tail_pred = n;
+}
+
+static inline void
+exec_list_push_degenerate_list_at_head(struct exec_list *list, struct exec_node *n)
+{
+ assert(n->prev->next == n);
+
+ n->prev->next = list->head;
+ list->head->prev = n->prev;
+ n->prev = (struct exec_node *) &list->head;
+ list->head = n;
+}
+
+static inline struct exec_node *
+exec_list_pop_head(struct exec_list *list)
+{
+ struct exec_node *const n = exec_list_get_head(list);
+ if (n != NULL)
+ exec_node_remove(n);
+
+ return n;
+}
+
+static inline void
+exec_list_move_nodes_to(struct exec_list *list, struct exec_list *target)
+{
+ if (exec_list_is_empty(list)) {
+ exec_list_make_empty(target);
+ } else {
+ target->head = list->head;
+ target->tail = NULL;
+ target->tail_pred = list->tail_pred;
+
+ target->head->prev = (struct exec_node *) &target->head;
+ target->tail_pred->next = (struct exec_node *) &target->tail;
+
+ exec_list_make_empty(list);
+ }
+}
+
+static inline void
+exec_list_append(struct exec_list *list, struct exec_list *source)
+{
+ if (exec_list_is_empty(source))
+ return;
+
+ /* Link the first node of the source with the last node of the target list.
+ */
+ list->tail_pred->next = source->head;
+ source->head->prev = list->tail_pred;
+
+ /* Make the tail of the source list be the tail of the target list.
+ */
+ list->tail_pred = source->tail_pred;
+ list->tail_pred->next = (struct exec_node *) &list->tail;
+
+ /* Make the source list empty for good measure.
+ */
+ exec_list_make_empty(source);
+}
+
+static inline void
+exec_node_insert_list_before(struct exec_node *n, struct exec_list *before)
{
- if (before->is_empty())
+ if (exec_list_is_empty(before))
return;
- before->tail_pred->next = this;
- before->head->prev = this->prev;
+ before->tail_pred->next = n;
+ before->head->prev = n->prev;
- this->prev->next = before->head;
- this->prev = before->tail_pred;
+ n->prev->next = before->head;
+ n->prev = before->tail_pred;
- before->make_empty();
+ exec_list_make_empty(before);
+}
+
+#ifdef __cplusplus
+inline void exec_list::make_empty()
+{
+ exec_list_make_empty(this);
+}
+
+inline bool exec_list::is_empty() const
+{
+ return exec_list_is_empty(this);
+}
+
+inline const exec_node *exec_list::get_head() const
+{
+ return exec_list_get_head_const(this);
+}
+
+inline exec_node *exec_list::get_head()
+{
+ return exec_list_get_head(this);
+}
+
+inline const exec_node *exec_list::get_tail() const
+{
+ return exec_list_get_tail_const(this);
+}
+
+inline exec_node *exec_list::get_tail()
+{
+ return exec_list_get_tail(this);
+}
+
+inline void exec_list::push_head(exec_node *n)
+{
+ exec_list_push_head(this, n);
+}
+
+inline void exec_list::push_tail(exec_node *n)
+{
+ exec_list_push_tail(this, n);
+}
+
+inline void exec_list::push_degenerate_list_at_head(exec_node *n)
+{
+ exec_list_push_degenerate_list_at_head(this, n);
+}
+
+inline exec_node *exec_list::pop_head()
+{
+ return exec_list_pop_head(this);
+}
+
+inline void exec_list::move_nodes_to(exec_list *target)
+{
+ exec_list_move_nodes_to(this, target);
+}
+
+inline void exec_list::append_list(exec_list *source)
+{
+ exec_list_append(this, source);
+}
+
+inline void exec_node::insert_before(exec_list *before)
+{
+ exec_node_insert_list_before(this, before);
}
#endif
/**
* This version is safe even if the current node is removed.
*/
-#define foreach_list_safe(__node, __list) \
- for (exec_node * __node = (__list)->head, * __next = __node->next \
- ; __next != NULL \
+#define foreach_list_safe(__node, __list) \
+ for (struct exec_node * __node = (__list)->head, * __next = __node->next \
+ ; __next != NULL \
; __node = __next, __next = __next->next)
#define foreach_list(__node, __list) \
- for (exec_node * __node = (__list)->head \
+ for (struct exec_node * __node = (__list)->head \
; (__node)->next != NULL \
; (__node) = (__node)->next)
@@ -389,19 +579,19 @@ inline void exec_node::insert_before(exec_list *before)
* This is safe against either current node being removed or replaced.
*/
#define foreach_two_lists(__node1, __list1, __node2, __list2) \
- for (exec_node * __node1 = (__list1)->head, \
- * __node2 = (__list2)->head, \
- * __next1 = __node1->next, \
- * __next2 = __node2->next \
+ for (struct exec_node * __node1 = (__list1)->head, \
+ * __node2 = (__list2)->head, \
+ * __next1 = __node1->next, \
+ * __next2 = __node2->next \
; __next1 != NULL && __next2 != NULL \
; __node1 = __next1, \
__node2 = __next2, \
__next1 = __next1->next, \
__next2 = __next2->next)
-#define foreach_list_const(__node, __list) \
- for (const exec_node * __node = (__list)->head \
- ; (__node)->next != NULL \
+#define foreach_list_const(__node, __list) \
+ for (const struct exec_node * __node = (__list)->head \
+ ; (__node)->next != NULL \
; (__node) = (__node)->next)
#define foreach_list_typed(__type, __node, __field, __list) \
diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp
index 9d5539252..ac7514acf 100644
--- a/mesalib/src/glsl/opt_algebraic.cpp
+++ b/mesalib/src/glsl/opt_algebraic.cpp
@@ -45,7 +45,9 @@ namespace {
class ir_algebraic_visitor : public ir_rvalue_visitor {
public:
- ir_algebraic_visitor(bool native_integers)
+ ir_algebraic_visitor(bool native_integers,
+ const struct gl_shader_compiler_options *options)
+ : options(options)
{
this->progress = false;
this->mem_ctx = NULL;
@@ -69,6 +71,7 @@ public:
ir_rvalue *swizzle_if_required(ir_expression *expr,
ir_rvalue *operand);
+ const struct gl_shader_compiler_options *options;
void *mem_ctx;
bool native_integers;
@@ -116,6 +119,46 @@ update_type(ir_expression *ir)
ir->type = ir->operands[1]->type;
}
+/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
+static ir_expression *
+try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
+{
+ if (expr0 && expr0->operation == ir_binop_add &&
+ expr0->type->is_float() &&
+ expr1 && expr1->operation == ir_binop_add &&
+ expr1->type->is_float()) {
+ ir_swizzle *x = expr0->operands[0]->as_swizzle();
+ ir_swizzle *y = expr0->operands[1]->as_swizzle();
+ ir_swizzle *z = expr1->operands[0]->as_swizzle();
+ ir_swizzle *w = expr1->operands[1]->as_swizzle();
+
+ if (!x || x->mask.num_components != 1 ||
+ !y || y->mask.num_components != 1 ||
+ !z || z->mask.num_components != 1 ||
+ !w || w->mask.num_components != 1) {
+ return NULL;
+ }
+
+ bool swiz_seen[4] = {false, false, false, false};
+ swiz_seen[x->mask.x] = true;
+ swiz_seen[y->mask.x] = true;
+ swiz_seen[z->mask.x] = true;
+ swiz_seen[w->mask.x] = true;
+
+ if (!swiz_seen[0] || !swiz_seen[1] ||
+ !swiz_seen[2] || !swiz_seen[3]) {
+ return NULL;
+ }
+
+ if (x->val->equals(y->val) &&
+ x->val->equals(z->val) &&
+ x->val->equals(w->val)) {
+ return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
+ }
+ }
+ return NULL;
+}
+
void
ir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
int op1,
@@ -329,6 +372,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
if (op_const[1] && !op_const[0])
reassociate_constant(ir, 1, op_const[1], op_expr[0]);
+ /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
+ if (options->OptimizeForAOS) {
+ ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
+ mem_ctx);
+ if (expr)
+ return expr;
+ }
+
/* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
*
* (-x + y) * a + x
@@ -380,6 +431,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
}
}
}
+
break;
case ir_binop_sub:
@@ -647,9 +699,10 @@ ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
}
bool
-do_algebraic(exec_list *instructions, bool native_integers)
+do_algebraic(exec_list *instructions, bool native_integers,
+ const struct gl_shader_compiler_options *options)
{
- ir_algebraic_visitor v(native_integers);
+ ir_algebraic_visitor v(native_integers, options);
visit_list_elements(&v, instructions);
diff --git a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp
index 6612592aa..50c8aa763 100644
--- a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp
+++ b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp
@@ -334,7 +334,7 @@ public:
}
void prepare_array(exec_list *ir,
- struct ir_variable **new_var,
+ ir_variable **new_var,
int max_elements, unsigned start_location,
const char *var_name, const char *mode_str,
unsigned usage, unsigned external_usage)
diff --git a/mesalib/src/glsl/opt_rebalance_tree.cpp b/mesalib/src/glsl/opt_rebalance_tree.cpp
new file mode 100644
index 000000000..773aab3f6
--- /dev/null
+++ b/mesalib/src/glsl/opt_rebalance_tree.cpp
@@ -0,0 +1,300 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file opt_rebalance_tree.cpp
+ *
+ * Rebalances a reduction expression tree.
+ *
+ * For reduction operations (e.g., x + y + z + w) we generate an expression
+ * tree like
+ *
+ * +
+ * / \
+ * + w
+ * / \
+ * + z
+ * / \
+ * x y
+ *
+ * which we can rebalance into
+ *
+ * +
+ * / \
+ * / \
+ * + +
+ * / \ / \
+ * x y z w
+ *
+ * to get a better instruction scheduling.
+ *
+ * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
+ * and Bette L. Warren.
+ *
+ * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
+ * explanation of the of the tree_to_vine() (rightward rotation) and
+ * vine_to_tree() (leftward rotation) algorithms.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_optimization.h"
+
+/* The DSW algorithm generates a degenerate tree (really, a linked list) in
+ * tree_to_vine(). We'd rather not leave a binary expression with only one
+ * operand, so trivial modifications (the ternary operators below) are needed
+ * to ensure that we only rotate around the ir_expression nodes of the tree.
+ */
+static unsigned
+tree_to_vine(ir_expression *root)
+{
+ unsigned size = 0;
+ ir_rvalue *vine_tail = root;
+ ir_rvalue *remainder = root->operands[1];
+
+ while (remainder != NULL) {
+ ir_expression *remainder_temp = remainder->as_expression();
+ ir_expression *remainder_left = remainder_temp ?
+ remainder_temp->operands[0]->as_expression() : NULL;
+
+ if (remainder_left == NULL) {
+ /* move vine_tail down one */
+ vine_tail = remainder;
+ remainder = remainder->as_expression() ?
+ ((ir_expression *)remainder)->operands[1] : NULL;
+ size++;
+ } else {
+ /* rotate */
+ ir_expression *tempptr = remainder_left;
+ ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
+ tempptr->operands[1] = remainder;
+ remainder = tempptr;
+ ((ir_expression *)vine_tail)->operands[1] = tempptr;
+ }
+ }
+
+ return size;
+}
+
+static void
+compression(ir_expression *root, unsigned count)
+{
+ ir_expression *scanner = root;
+
+ for (unsigned i = 0; i < count; i++) {
+ ir_expression *child = (ir_expression *)scanner->operands[1];
+ scanner->operands[1] = child->operands[1];
+ scanner = (ir_expression *)scanner->operands[1];
+ child->operands[1] = scanner->operands[0];
+ scanner->operands[0] = child;
+ }
+}
+
+static void
+vine_to_tree(ir_expression *root, unsigned size)
+{
+ int n = size - 1;
+ for (int m = n / 2; m > 0; m = n / 2) {
+ compression(root, m);
+ n -= m + 1;
+ }
+}
+
+namespace {
+
+class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
+public:
+ ir_rebalance_visitor()
+ {
+ progress = false;
+ }
+
+ void handle_rvalue(ir_rvalue **rvalue);
+
+ bool progress;
+};
+
+struct is_reduction_data {
+ ir_expression_operation operation;
+ const glsl_type *type;
+ unsigned num_expr;
+ bool is_reduction;
+ bool contains_constant;
+};
+
+} /* anonymous namespace */
+
+static bool
+is_reduction_operation(ir_expression_operation operation)
+{
+ switch (operation) {
+ case ir_binop_add:
+ case ir_binop_mul:
+ case ir_binop_bit_and:
+ case ir_binop_bit_xor:
+ case ir_binop_bit_or:
+ case ir_binop_logic_and:
+ case ir_binop_logic_xor:
+ case ir_binop_logic_or:
+ case ir_binop_min:
+ case ir_binop_max:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* Note that this function does not attempt to recognize that reduction trees
+ * are already balanced.
+ *
+ * We return false from this function for a number of reasons other than an
+ * expression tree not being a mathematical reduction. Namely,
+ *
+ * - if the tree contains multiple constants that we may be able to combine.
+ * - if the tree contains matrices:
+ * - they might contain vec4's with many constant components that we can
+ * simplify after splitting.
+ * - applying the matrix chain ordering optimization is more than just
+ * balancing an expression tree.
+ * - if the tree contains operations on multiple types.
+ * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
+ * would trick the visiting pass.
+ */
+static void
+is_reduction(ir_instruction *ir, void *data)
+{
+ struct is_reduction_data *ird = (struct is_reduction_data *)data;
+ if (!ird->is_reduction)
+ return;
+
+ /* We don't want to balance a tree that contains multiple constants, since
+ * we'll be able to constant fold them if they're not in separate subtrees.
+ */
+ if (ir->as_constant()) {
+ if (ird->contains_constant) {
+ ird->is_reduction = false;
+ }
+ ird->contains_constant = true;
+ return;
+ }
+
+ /* Array/record dereferences have subtrees that are not part of the expr
+ * tree we're balancing. Skip trees containing them.
+ */
+ if (ir->ir_type == ir_type_dereference_array ||
+ ir->ir_type == ir_type_dereference_record) {
+ ird->is_reduction = false;
+ return;
+ }
+
+ ir_expression *expr = ir->as_expression();
+ if (!expr)
+ return;
+
+ /* Non-constant matrices might still contain constant vec4 that we can
+ * constant fold once split up. Handling matrices will need some more
+ * work.
+ */
+ if (expr->type->is_matrix()) {
+ ird->is_reduction = false;
+ return;
+ }
+
+ if (ird->type != NULL && ird->type != expr->type) {
+ ird->is_reduction = false;
+ return;
+ }
+ ird->type = expr->type;
+
+ ird->num_expr++;
+ if (is_reduction_operation(expr->operation)) {
+ if (ird->operation != 0 && ird->operation != expr->operation)
+ ird->is_reduction = false;
+ ird->operation = expr->operation;
+ } else {
+ ird->is_reduction = false;
+ }
+}
+
+static ir_rvalue *
+handle_expression(ir_expression *expr)
+{
+ struct is_reduction_data ird;
+ ird.operation = (ir_expression_operation)0;
+ ird.type = NULL;
+ ird.num_expr = 0;
+ ird.is_reduction = true;
+ ird.contains_constant = false;
+
+ visit_tree(expr, is_reduction, (void *)&ird);
+
+ if (ird.is_reduction && ird.num_expr > 2) {
+ ir_constant z = ir_constant(0.0f);
+ ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
+
+ unsigned size = tree_to_vine(&pseudo_root);
+ vine_to_tree(&pseudo_root, size);
+
+ expr = (ir_expression *)pseudo_root.operands[1];
+ }
+ return expr;
+}
+
+void
+ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+ if (!*rvalue)
+ return;
+
+ ir_expression *expr = (*rvalue)->as_expression();
+ if (!expr || !is_reduction_operation(expr->operation))
+ return;
+
+ ir_rvalue *new_rvalue = handle_expression(expr);
+
+ /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
+ * or some other set of cases) new_rvalue will point to the same root as
+ * before.
+ *
+ * Similarly, if the tree rooted at *rvalue was a reduction and was already
+ * balanced, the algorithm will rearrange the tree but will ultimately
+ * return an identical tree, so this check will handle that as well and
+ * will not set progress = true.
+ */
+ if (new_rvalue == *rvalue)
+ return;
+
+ *rvalue = new_rvalue;
+ this->progress = true;
+}
+
+bool
+do_rebalance_tree(exec_list *instructions)
+{
+ ir_rebalance_visitor v;
+
+ v.run(instructions);
+
+ return v.progress;
+}
diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp
index 6c25010b7..809732c7e 100644
--- a/mesalib/src/glsl/standalone_scaffolding.cpp
+++ b/mesalib/src/glsl/standalone_scaffolding.cpp
@@ -98,6 +98,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
ctx->Extensions.ARB_ES3_compatibility = true;
ctx->Extensions.ARB_explicit_attrib_location = true;
ctx->Extensions.ARB_fragment_coord_conventions = true;
+ ctx->Extensions.ARB_fragment_layer_viewport = true;
ctx->Extensions.ARB_gpu_shader5 = true;
ctx->Extensions.ARB_sample_shading = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
diff --git a/mesalib/src/glsl/test_optpass.cpp b/mesalib/src/glsl/test_optpass.cpp
index db5cb2662..e4878bf15 100644
--- a/mesalib/src/glsl/test_optpass.cpp
+++ b/mesalib/src/glsl/test_optpass.cpp
@@ -65,7 +65,7 @@ do_optimization(struct exec_list *ir, const char *optimization,
if (sscanf(optimization, "do_common_optimization ( %d ) ", &int_0) == 1) {
return do_common_optimization(ir, int_0 != 0, false, options, true);
} else if (strcmp(optimization, "do_algebraic") == 0) {
- return do_algebraic(ir, true);
+ return do_algebraic(ir, true, options);
} else if (strcmp(optimization, "do_constant_folding") == 0) {
return do_constant_folding(ir);
} else if (strcmp(optimization, "do_constant_variable") == 0) {