diff options
Diffstat (limited to 'mesalib/src/glsl')
29 files changed, 1132 insertions, 297 deletions
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources index 5945590a5..b54eae72d 100644 --- a/mesalib/src/glsl/Makefile.sources +++ b/mesalib/src/glsl/Makefile.sources @@ -96,6 +96,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/opt_function_inlining.cpp \ $(GLSL_SRCDIR)/opt_if_simplification.cpp \ $(GLSL_SRCDIR)/opt_noop_swizzle.cpp \ + $(GLSL_SRCDIR)/opt_rebalance_tree.cpp \ $(GLSL_SRCDIR)/opt_redundant_jumps.cpp \ $(GLSL_SRCDIR)/opt_structure_splitting.cpp \ $(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \ diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp index d1c77f1ec..7ba04a808 100644 --- a/mesalib/src/glsl/ast_to_hir.cpp +++ b/mesalib/src/glsl/ast_to_hir.cpp @@ -49,7 +49,6 @@ * parser (and lexer) sources. */ -#include "main/core.h" /* for struct gl_extensions */ #include "glsl_symbol_table.h" #include "glsl_parser_extras.h" #include "ast.h" @@ -2182,6 +2181,41 @@ validate_explicit_location(const struct ast_type_qualifier *qual, { bool fail = false; + /* Checks for GL_ARB_explicit_uniform_location. */ + if (qual->flags.q.uniform) { + if (!state->check_explicit_uniform_location_allowed(loc, var)) + return; + + const struct gl_context *const ctx = state->ctx; + unsigned max_loc = qual->location + var->type->uniform_locations() - 1; + + /* ARB_explicit_uniform_location specification states: + * + * "The explicitly defined locations and the generated locations + * must be in the range of 0 to MAX_UNIFORM_LOCATIONS minus one." + * + * "Valid locations for default-block uniform variable locations + * are in the range of 0 to the implementation-defined maximum + * number of uniform locations." + */ + if (qual->location < 0) { + _mesa_glsl_error(loc, state, + "explicit location < 0 for uniform %s", var->name); + return; + } + + if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) { + _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s " + ">= MAX_UNIFORM_LOCATIONS (%u)", var->name, + ctx->Const.MaxUserAssignableUniformLocations); + return; + } + + var->data.explicit_location = true; + var->data.location = qual->location; + return; + } + /* Between GL_ARB_explicit_attrib_location an * GL_ARB_separate_shader_objects, the inputs and outputs of any shader * stage can be assigned explicit locations. The checking here associates @@ -2435,6 +2469,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, _mesa_shader_stage_to_string(state->stage)); } + /* Disallow layout qualifiers which may only appear on layout declarations. */ + if (qual->flags.q.prim_type) { + _mesa_glsl_error(loc, state, + "Primitive type may only be specified on GS input or output " + "layout declaration, not on variables."); + } + /* Section 6.1.1 (Function Calling Conventions) of the GLSL 1.10 spec says: * * "However, the const qualifier cannot be used with out or inout." @@ -2649,6 +2690,36 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, const bool uses_deprecated_qualifier = qual->flags.q.attribute || qual->flags.q.varying; + + /* Validate auxiliary storage qualifiers */ + + /* From section 4.3.4 of the GLSL 1.30 spec: + * "It is an error to use centroid in in a vertex shader." + * + * From section 4.3.4 of the GLSL ES 3.00 spec: + * "It is an error to use centroid in or interpolation qualifiers in + * a vertex shader input." + */ + + /* Section 4.3.6 of the GLSL 1.30 specification states: + * "It is an error to use centroid out in a fragment shader." + * + * The GL_ARB_shading_language_420pack extension specification states: + * "It is an error to use auxiliary storage qualifiers or interpolation + * qualifiers on an output in a fragment shader." + */ + if (qual->flags.q.sample && (!is_varying_var(var, state->stage) || uses_deprecated_qualifier)) { + _mesa_glsl_error(loc, state, + "sample qualifier may only be used on `in` or `out` " + "variables between shader stages"); + } + if (qual->flags.q.centroid && !is_varying_var(var, state->stage)) { + _mesa_glsl_error(loc, state, + "centroid qualifier may only be used with `in', " + "`out' or `varying' variables between shader stages"); + } + + /* Is the 'layout' keyword used with parameters that allow relaxed checking. * Many implementations of GL_ARB_fragment_coord_conventions_enable and some * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable @@ -3606,45 +3677,6 @@ ast_declarator_list::hir(exec_list *instructions, } - /* From section 4.3.4 of the GLSL 1.30 spec: - * "It is an error to use centroid in in a vertex shader." - * - * From section 4.3.4 of the GLSL ES 3.00 spec: - * "It is an error to use centroid in or interpolation qualifiers in - * a vertex shader input." - */ - if (state->is_version(130, 300) - && this->type->qualifier.flags.q.centroid - && this->type->qualifier.flags.q.in - && state->stage == MESA_SHADER_VERTEX) { - - _mesa_glsl_error(&loc, state, - "'centroid in' cannot be used in a vertex shader"); - } - - if (state->stage == MESA_SHADER_VERTEX - && this->type->qualifier.flags.q.sample - && this->type->qualifier.flags.q.in) { - - _mesa_glsl_error(&loc, state, - "'sample in' cannot be used in a vertex shader"); - } - - /* Section 4.3.6 of the GLSL 1.30 specification states: - * "It is an error to use centroid out in a fragment shader." - * - * The GL_ARB_shading_language_420pack extension specification states: - * "It is an error to use auxiliary storage qualifiers or interpolation - * qualifiers on an output in a fragment shader." - */ - if (state->stage == MESA_SHADER_FRAGMENT && - this->type->qualifier.flags.q.out && - this->type->qualifier.has_auxiliary_storage()) { - _mesa_glsl_error(&loc, state, - "auxiliary storage qualifiers cannot be used on " - "fragment shader outputs"); - } - /* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30. */ if (this->type->qualifier.precision != ast_precision_none) { @@ -4632,9 +4664,51 @@ ast_case_label::hir(exec_list *instructions, ir_dereference_variable *deref_test_var = new(ctx) ir_dereference_variable(state->switch_state.test_var); - ir_rvalue *const test_cond = new(ctx) ir_expression(ir_binop_all_equal, - label_const, - deref_test_var); + ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, + label_const, + deref_test_var); + + /* + * From GLSL 4.40 specification section 6.2 ("Selection"): + * + * "The type of the init-expression value in a switch statement must + * be a scalar int or uint. The type of the constant-expression value + * in a case label also must be a scalar int or uint. When any pair + * of these values is tested for "equal value" and the types do not + * match, an implicit conversion will be done to convert the int to a + * uint (see section 4.1.10 “Implicit Conversions”) before the compare + * is done." + */ + if (label_const->type != state->switch_state.test_var->type) { + YYLTYPE loc = this->test_value->get_location(); + + const glsl_type *type_a = label_const->type; + const glsl_type *type_b = state->switch_state.test_var->type; + + /* Check if int->uint implicit conversion is supported. */ + bool integer_conversion_supported = + glsl_type::int_type->can_implicitly_convert_to(glsl_type::uint_type, + state); + + if ((!type_a->is_integer() || !type_b->is_integer()) || + !integer_conversion_supported) { + _mesa_glsl_error(&loc, state, "type mismatch with switch " + "init-expression and case label (%s != %s)", + type_a->name, type_b->name); + } else { + /* Conversion of the case label. */ + if (type_a->base_type == GLSL_TYPE_INT) { + if (!apply_implicit_conversion(glsl_type::uint_type, + test_cond->operands[0], state)) + _mesa_glsl_error(&loc, state, "implicit type conversion error"); + } else { + /* Conversion of the init-expression value. */ + if (!apply_implicit_conversion(glsl_type::uint_type, + test_cond->operands[1], state)) + _mesa_glsl_error(&loc, state, "implicit type conversion error"); + } + } + } ir_assignment *set_fallthru_on_test = new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); @@ -5041,6 +5115,13 @@ ast_process_structure_or_interface_block(exec_list *instructions, "with uniform interface blocks"); } + if ((qual->flags.q.uniform || !is_interface) && + qual->has_auxiliary_storage()) { + _mesa_glsl_error(&loc, state, + "auxiliary storage qualifiers cannot be used " + "in uniform blocks or structures."); + } + if (field_type->is_matrix() || (field_type->is_array() && field_type->fields.array->is_matrix())) { fields[i].row_major = block_row_major; @@ -5090,7 +5171,7 @@ ast_struct_specifier::hir(exec_list *instructions, */ if (state->language_version != 110 && state->struct_specifier_depth != 0) _mesa_glsl_error(&loc, state, - "embedded structure declartions are not allowed"); + "embedded structure declarations are not allowed"); state->struct_specifier_depth++; @@ -5206,6 +5287,12 @@ ast_interface_block::hir(exec_list *instructions, bool block_row_major = this->layout.flags.q.row_major; exec_list declared_variables; glsl_struct_field *fields; + + /* Treat an interface block as one level of nesting, so that embedded struct + * specifiers will be disallowed. + */ + state->struct_specifier_depth++; + unsigned int num_variables = ast_process_structure_or_interface_block(&declared_variables, state, @@ -5217,6 +5304,8 @@ ast_interface_block::hir(exec_list *instructions, redeclaring_per_vertex, var_mode); + state->struct_specifier_depth--; + if (!redeclaring_per_vertex) validate_identifier(this->block_name, loc, state); diff --git a/mesalib/src/glsl/ast_type.cpp b/mesalib/src/glsl/ast_type.cpp index 0ee2c495a..77053d5b1 100644 --- a/mesalib/src/glsl/ast_type.cpp +++ b/mesalib/src/glsl/ast_type.cpp @@ -247,7 +247,7 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, q.flags.q.local_size != 0 && state->in_qualifier->flags.q.local_size == 0; - valid_in_mask.flags.q.local_size = 1; + valid_in_mask.flags.q.local_size = 7; break; default: _mesa_glsl_error(loc, state, diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp index 9b35850ee..b9c69d23c 100644 --- a/mesalib/src/glsl/builtin_variables.cpp +++ b/mesalib/src/glsl/builtin_variables.cpp @@ -26,7 +26,6 @@ #include "glsl_symbol_table.h" #include "main/core.h" #include "main/uniforms.h" -#include "program/prog_parameter.h" #include "program/prog_statevars.h" #include "program/prog_instruction.h" @@ -939,6 +938,11 @@ builtin_variable_generator::generate_fs_special_vars() if (state->ARB_gpu_shader5_enable) { add_system_value(SYSTEM_VALUE_SAMPLE_MASK_IN, array(int_t, 1), "gl_SampleMaskIn"); } + + if (state->ARB_fragment_layer_viewport_enable) { + add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + } } diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y index 98875837c..d8c395778 100644 --- a/mesalib/src/glsl/glcpp/glcpp-parse.y +++ b/mesalib/src/glsl/glcpp/glcpp-parse.y @@ -2086,9 +2086,15 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "GL_ARB_fragment_coord_conventions", 1); + if (extensions->ARB_fragment_layer_viewport) + add_builtin_define(parser, "GL_ARB_fragment_layer_viewport", 1); + if (extensions->ARB_explicit_attrib_location) add_builtin_define(parser, "GL_ARB_explicit_attrib_location", 1); + if (extensions->ARB_explicit_uniform_location) + add_builtin_define(parser, "GL_ARB_explicit_uniform_location", 1); + if (extensions->ARB_shader_texture_lod) add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1); diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll index 6c3f9b692..db7b1d179 100644 --- a/mesalib/src/glsl/glsl_lexer.ll +++ b/mesalib/src/glsl/glsl_lexer.ll @@ -396,6 +396,7 @@ layout { || yyextra->AMD_conservative_depth_enable || yyextra->ARB_conservative_depth_enable || yyextra->ARB_explicit_attrib_location_enable + || yyextra->ARB_explicit_uniform_location_enable || yyextra->has_separate_shader_objects() || yyextra->ARB_uniform_buffer_object_enable || yyextra->ARB_fragment_coord_conventions_enable diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy index b69802ddb..2b2de3047 100644 --- a/mesalib/src/glsl/glsl_parser.yy +++ b/mesalib/src/glsl/glsl_parser.yy @@ -1559,11 +1559,6 @@ type_qualifier: if ($2.flags.q.invariant) _mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier"); - if ($2.has_layout()) { - _mesa_glsl_error(&@1, state, - "\"invariant\" cannot be used with layout(...)"); - } - if (!state->ARB_shading_language_420pack_enable && $2.flags.q.precise) _mesa_glsl_error(&@1, state, "\"invariant\" must come after \"precise\""); @@ -1586,11 +1581,6 @@ type_qualifier: if ($2.has_interpolation()) _mesa_glsl_error(&@1, state, "duplicate interpolation qualifier"); - if ($2.has_layout()) { - _mesa_glsl_error(&@1, state, "interpolation qualifiers cannot be used " - "with layout(...)"); - } - if (!state->ARB_shading_language_420pack_enable && ($2.flags.q.precise || $2.flags.q.invariant)) { _mesa_glsl_error(&@1, state, "interpolation qualifiers must come " @@ -1602,28 +1592,18 @@ type_qualifier: } | layout_qualifier type_qualifier { - /* The GLSL 1.50 grammar indicates that a layout(...) declaration can be - * used standalone or immediately before a storage qualifier. It cannot - * be used with interpolation qualifiers or invariant. There does not - * appear to be any text indicating that it must come before the storage - * qualifier, but always seems to in examples. + /* In the absence of ARB_shading_language_420pack, layout qualifiers may + * appear no later than auxiliary storage qualifiers. There is no + * particularly clear spec language mandating this, but in all examples + * the layout qualifier precedes the storage qualifier. + * + * We allow combinations of layout with interpolation, invariant or + * precise qualifiers since these are useful in ARB_separate_shader_objects. + * There is no clear spec guidance on this either. */ if (!state->ARB_shading_language_420pack_enable && $2.has_layout()) _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); - if ($2.flags.q.invariant) - _mesa_glsl_error(&@1, state, "layout(...) cannot be used with " - "the \"invariant\" qualifier"); - - if ($2.flags.q.precise) - _mesa_glsl_error(&@1, state, "layout(...) cannot be used with " - "the \"precise\" qualifier"); - - if ($2.has_interpolation()) { - _mesa_glsl_error(&@1, state, "layout(...) cannot be used with " - "interpolation qualifiers"); - } - $$ = $1; $$.merge_qualifier(&@1, state, $2); } @@ -2181,7 +2161,7 @@ condition: ; /* - * siwtch_statement grammar is based on the syntax described in the body + * switch_statement grammar is based on the syntax described in the body * of the GLSL spec, not in it's appendix!!! */ switch_statement: diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp index f3c5bd049..11a9a4320 100644 --- a/mesalib/src/glsl/glsl_parser_extras.cpp +++ b/mesalib/src/glsl/glsl_parser_extras.cpp @@ -515,7 +515,9 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_draw_buffers, true, false, dummy_true), EXT(ARB_draw_instanced, true, false, ARB_draw_instanced), EXT(ARB_explicit_attrib_location, true, false, ARB_explicit_attrib_location), + EXT(ARB_explicit_uniform_location, true, false, ARB_explicit_uniform_location), EXT(ARB_fragment_coord_conventions, true, false, ARB_fragment_coord_conventions), + EXT(ARB_fragment_layer_viewport, true, false, ARB_fragment_layer_viewport), EXT(ARB_gpu_shader5, true, false, ARB_gpu_shader5), EXT(ARB_sample_shading, true, false, ARB_sample_shading), EXT(ARB_separate_shader_objects, true, false, dummy_true), @@ -1568,7 +1570,8 @@ do_common_optimization(exec_list *ir, bool linked, progress = do_constant_variable_unlinked(ir) || progress; progress = do_constant_folding(ir) || progress; progress = do_cse(ir) || progress; - progress = do_algebraic(ir, native_integers) || progress; + progress = do_rebalance_tree(ir) || progress; + progress = do_algebraic(ir, native_integers, options) || progress; progress = do_lower_jumps(ir) || progress; progress = do_vec_index_to_swizzle(ir) || progress; progress = lower_vector_insert(ir, false) || progress; diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h index 49402fa21..2a5aea477 100644 --- a/mesalib/src/glsl/glsl_parser_extras.h +++ b/mesalib/src/glsl/glsl_parser_extras.h @@ -155,6 +155,21 @@ struct _mesa_glsl_parse_state { return true; } + bool check_explicit_uniform_location_allowed(YYLTYPE *locp, + const ir_variable *) + { + if (!this->has_explicit_attrib_location() || + !this->ARB_explicit_uniform_location_enable) { + _mesa_glsl_error(locp, this, + "uniform explicit location requires " + "GL_ARB_explicit_uniform_location and either " + "GL_ARB_explicit_attrib_location or GLSL 330."); + return false; + } + + return true; + } + bool has_explicit_attrib_location() const { return ARB_explicit_attrib_location_enable || is_version(330, 300); @@ -192,7 +207,7 @@ struct _mesa_glsl_parse_state { /** * Number of nested struct_specifier levels * - * Outside a struct_specifer, this is zero. + * Outside a struct_specifier, this is zero. */ unsigned struct_specifier_depth; @@ -367,8 +382,12 @@ struct _mesa_glsl_parse_state { bool ARB_draw_instanced_warn; bool ARB_explicit_attrib_location_enable; bool ARB_explicit_attrib_location_warn; + bool ARB_explicit_uniform_location_enable; + bool ARB_explicit_uniform_location_warn; bool ARB_fragment_coord_conventions_enable; bool ARB_fragment_coord_conventions_warn; + bool ARB_fragment_layer_viewport_enable; + bool ARB_fragment_layer_viewport_warn; bool ARB_gpu_shader5_enable; bool ARB_gpu_shader5_warn; bool ARB_sample_shading_enable; diff --git a/mesalib/src/glsl/glsl_symbol_table.h b/mesalib/src/glsl/glsl_symbol_table.h index 83d7935d8..db8863a20 100644 --- a/mesalib/src/glsl/glsl_symbol_table.h +++ b/mesalib/src/glsl/glsl_symbol_table.h @@ -32,9 +32,9 @@ extern "C" { #include "program/symbol_table.h" } #include "ir.h" -#include "glsl_types.h" class symbol_table_entry; +struct glsl_type; /** * Facade class for _mesa_symbol_table diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp index e77146cdf..f9cd258fe 100644 --- a/mesalib/src/glsl/glsl_types.cpp +++ b/mesalib/src/glsl/glsl_types.cpp @@ -22,9 +22,7 @@ */ #include <stdio.h> -#include <stdlib.h> -#include "main/core.h" /* for Elements */ -#include "glsl_symbol_table.h" +#include "main/core.h" /* for Elements, MAX2 */ #include "glsl_parser_extras.h" #include "glsl_types.h" extern "C" { @@ -677,6 +675,32 @@ glsl_type::component_slots() const return 0; } +unsigned +glsl_type::uniform_locations() const +{ + if (this->is_matrix()) + return 1; + + unsigned size = 0; + + switch (this->base_type) { + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->uniform_locations(); + return size; + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->uniform_locations(); + default: + break; + } + + /* The location count for many types match with component_slots() result, + * all expections should be handled above. + */ + return component_slots(); +} + bool glsl_type::can_implicitly_convert_to(const glsl_type *desired, _mesa_glsl_parse_state *state) const diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h index 35a4e6acc..f6d4a02ab 100644 --- a/mesalib/src/glsl/glsl_types.h +++ b/mesalib/src/glsl/glsl_types.h @@ -256,6 +256,12 @@ struct glsl_type { unsigned component_slots() const; /** + * Calculate the number of unique values from glGetUniformLocation for the + * elements of the type. + */ + unsigned uniform_locations() const; + + /** * Calculate the number of attribute slots required to hold this type * * This implements the language rules of GLSL 1.50 for counting the number diff --git a/mesalib/src/glsl/hir_field_selection.cpp b/mesalib/src/glsl/hir_field_selection.cpp index 1e92c89ae..0fa976811 100644 --- a/mesalib/src/glsl/hir_field_selection.cpp +++ b/mesalib/src/glsl/hir_field_selection.cpp @@ -22,7 +22,6 @@ */ #include "ir.h" -#include "program/symbol_table.h" #include "glsl_parser_extras.h" #include "ast.h" #include "glsl_types.h" diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp index 2f4a0bec8..67dbac1eb 100644 --- a/mesalib/src/glsl/ir.cpp +++ b/mesalib/src/glsl/ir.cpp @@ -23,7 +23,6 @@ #include <string.h> #include "main/core.h" /* for MAX2 */ #include "ir.h" -#include "ir_visitor.h" #include "glsl_types.h" ir_rvalue::ir_rvalue(enum ir_node_type t) diff --git a/mesalib/src/glsl/ir_basic_block.cpp b/mesalib/src/glsl/ir_basic_block.cpp index 426fda2f2..74ee4b696 100644 --- a/mesalib/src/glsl/ir_basic_block.cpp +++ b/mesalib/src/glsl/ir_basic_block.cpp @@ -28,9 +28,7 @@ */ #include "ir.h" -#include "ir_visitor.h" #include "ir_basic_block.h" -#include "glsl_types.h" /** * Calls a user function for every basic block in the instruction stream. diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp index f5cb12343..73380d243 100755 --- a/mesalib/src/glsl/ir_constant_expression.cpp +++ b/mesalib/src/glsl/ir_constant_expression.cpp @@ -36,7 +36,6 @@ #include <math.h> #include "main/core.h" /* for MAX2, MIN2, CLAMP */ #include "ir.h" -#include "ir_visitor.h" #include "glsl_types.h" #include "program/hash_table.h" diff --git a/mesalib/src/glsl/ir_expression_flattening.cpp b/mesalib/src/glsl/ir_expression_flattening.cpp index c1cadb122..0b1ada519 100644 --- a/mesalib/src/glsl/ir_expression_flattening.cpp +++ b/mesalib/src/glsl/ir_expression_flattening.cpp @@ -32,10 +32,8 @@ */ #include "ir.h" -#include "ir_visitor.h" #include "ir_rvalue_visitor.h" #include "ir_expression_flattening.h" -#include "glsl_types.h" class ir_expression_flattening_visitor : public ir_rvalue_visitor { public: diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h index c63921c26..b83c22592 100644 --- a/mesalib/src/glsl/ir_optimization.h +++ b/mesalib/src/glsl/ir_optimization.h @@ -71,7 +71,9 @@ bool do_common_optimization(exec_list *ir, bool linked, const struct gl_shader_compiler_options *options, bool native_integers); -bool do_algebraic(exec_list *instructions, bool native_integers); +bool do_rebalance_tree(exec_list *instructions); +bool do_algebraic(exec_list *instructions, bool native_integers, + const struct gl_shader_compiler_options *options); bool do_constant_folding(exec_list *instructions); bool do_constant_variable(exec_list *instructions); bool do_constant_variable_unlinked(exec_list *instructions); diff --git a/mesalib/src/glsl/ir_uniform.h b/mesalib/src/glsl/ir_uniform.h index 3508509d4..2f7352825 100644 --- a/mesalib/src/glsl/ir_uniform.h +++ b/mesalib/src/glsl/ir_uniform.h @@ -32,12 +32,17 @@ #include "program/prog_parameter.h" /* For union gl_constant_value. */ +/** + * Used by GL_ARB_explicit_uniform_location extension code in the linker + * and glUniform* functions to identify inactive explicit uniform locations. + */ +#define INACTIVE_UNIFORM_EXPLICIT_LOCATION ((gl_uniform_storage *) -1) #ifdef __cplusplus extern "C" { #endif -enum gl_uniform_driver_format { +enum PACKED gl_uniform_driver_format { uniform_native = 0, /**< Store data in the native format. */ uniform_int_float, /**< Store integer data as floats. */ uniform_bool_float, /**< Store boolean data as floats. */ @@ -66,11 +71,8 @@ struct gl_uniform_driver_storage { /** * Base format of the stored data. - * - * This field must have a value from \c GLSL_TYPE_UINT through \c - * GLSL_TYPE_SAMPLER. */ - uint8_t format; + enum gl_uniform_driver_format format; /** * Pointer to the base of the data. diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.h b/mesalib/src/glsl/link_uniform_block_active_visitor.h index d76dbcaf1..524cd6b91 100644 --- a/mesalib/src/glsl/link_uniform_block_active_visitor.h +++ b/mesalib/src/glsl/link_uniform_block_active_visitor.h @@ -26,8 +26,6 @@ #define LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H #include "ir.h" -#include "ir_visitor.h" -#include "glsl_types.h" #include "main/hash_table.h" struct link_uniform_block_active { diff --git a/mesalib/src/glsl/link_uniform_initializers.cpp b/mesalib/src/glsl/link_uniform_initializers.cpp index 2100e0517..d755cec98 100644 --- a/mesalib/src/glsl/link_uniform_initializers.cpp +++ b/mesalib/src/glsl/link_uniform_initializers.cpp @@ -25,8 +25,6 @@ #include "ir.h" #include "linker.h" #include "ir_uniform.h" -#include "glsl_symbol_table.h" -#include "program/hash_table.h" /* These functions are put in a "private" namespace instead of being marked * static so that the unit tests can access them. See diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp index 377fed64a..5dcb7b571 100644 --- a/mesalib/src/glsl/link_uniforms.cpp +++ b/mesalib/src/glsl/link_uniforms.cpp @@ -37,6 +37,11 @@ */ /** + * Used by linker to indicate uniforms that have no location set. + */ +#define UNMAPPED_UNIFORM_LOC ~0u + +/** * Count the backing storage requirements for a type */ static unsigned @@ -386,6 +391,9 @@ public: void set_and_process(struct gl_shader_program *prog, ir_variable *var) { + current_var = var; + field_counter = 0; + ubo_block_index = -1; if (var->is_in_uniform_block()) { if (var->is_interface_instance() && var->type->is_array()) { @@ -542,6 +550,22 @@ private: return; } + /* Assign explicit locations. */ + if (current_var->data.explicit_location) { + /* Set sequential locations for struct fields. */ + if (record_type != NULL) { + const unsigned entries = MAX2(1, this->uniforms[id].array_elements); + this->uniforms[id].remap_location = + current_var->data.location + field_counter; + field_counter += entries; + } else { + this->uniforms[id].remap_location = current_var->data.location; + } + } else { + /* Initialize to to indicate that no location is set */ + this->uniforms[id].remap_location = UNMAPPED_UNIFORM_LOC; + } + this->uniforms[id].name = ralloc_strdup(this->uniforms, name); this->uniforms[id].type = base_type; this->uniforms[id].initialized = 0; @@ -597,6 +621,17 @@ public: gl_texture_index targets[MAX_SAMPLERS]; /** + * Current variable being processed. + */ + ir_variable *current_var; + + /** + * Field counter is used to take care that uniform structures + * with explicit locations get sequential locations. + */ + unsigned field_counter; + + /** * Mask of samplers used by the current shader stage. */ unsigned shader_samplers_used; @@ -798,10 +833,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog) prog->UniformStorage = NULL; prog->NumUserUniformStorage = 0; - ralloc_free(prog->UniformRemapTable); - prog->UniformRemapTable = NULL; - prog->NumUniformRemapTable = 0; - if (prog->UniformHash != NULL) { prog->UniformHash->clear(); } else { @@ -914,8 +945,28 @@ link_assign_uniform_locations(struct gl_shader_program *prog) sizeof(prog->_LinkedShaders[i]->SamplerTargets)); } - /* Build the uniform remap table that is used to set/get uniform locations */ + /* Reserve all the explicit locations of the active uniforms. */ for (unsigned i = 0; i < num_user_uniforms; i++) { + if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) { + /* How many new entries for this uniform? */ + const unsigned entries = MAX2(1, uniforms[i].array_elements); + + /* Set remap table entries point to correct gl_uniform_storage. */ + for (unsigned j = 0; j < entries; j++) { + unsigned element_loc = uniforms[i].remap_location + j; + assert(prog->UniformRemapTable[element_loc] == + INACTIVE_UNIFORM_EXPLICIT_LOCATION); + prog->UniformRemapTable[element_loc] = &uniforms[i]; + } + } + } + + /* Reserve locations for rest of the uniforms. */ + for (unsigned i = 0; i < num_user_uniforms; i++) { + + /* Explicit ones have been set already. */ + if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) + continue; /* how many new entries for this uniform? */ const unsigned entries = MAX2(1, uniforms[i].array_elements); diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index a43d23082..0b6a71679 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -74,6 +74,7 @@ #include "link_varyings.h" #include "ir_optimization.h" #include "ir_rvalue_visitor.h" +#include "ir_uniform.h" extern "C" { #include "main/shaderobj.h" @@ -2224,6 +2225,115 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog) linker_error(prog, "Too many combined image uniforms and fragment outputs"); } + +/** + * Initializes explicit location slots to INACTIVE_UNIFORM_EXPLICIT_LOCATION + * for a variable, checks for overlaps between other uniforms using explicit + * locations. + */ +static bool +reserve_explicit_locations(struct gl_shader_program *prog, + string_to_uint_map *map, ir_variable *var) +{ + unsigned slots = var->type->uniform_locations(); + unsigned max_loc = var->data.location + slots - 1; + + /* Resize remap table if locations do not fit in the current one. */ + if (max_loc + 1 > prog->NumUniformRemapTable) { + prog->UniformRemapTable = + reralloc(prog, prog->UniformRemapTable, + gl_uniform_storage *, + max_loc + 1); + + if (!prog->UniformRemapTable) { + linker_error(prog, "Out of memory during linking."); + return false; + } + + /* Initialize allocated space. */ + for (unsigned i = prog->NumUniformRemapTable; i < max_loc + 1; i++) + prog->UniformRemapTable[i] = NULL; + + prog->NumUniformRemapTable = max_loc + 1; + } + + for (unsigned i = 0; i < slots; i++) { + unsigned loc = var->data.location + i; + + /* Check if location is already used. */ + if (prog->UniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) { + + /* Possibly same uniform from a different stage, this is ok. */ + unsigned hash_loc; + if (map->get(hash_loc, var->name) && hash_loc == loc - i) + continue; + + /* ARB_explicit_uniform_location specification states: + * + * "No two default-block uniform variables in the program can have + * the same location, even if they are unused, otherwise a compiler + * or linker error will be generated." + */ + linker_error(prog, + "location qualifier for uniform %s overlaps" + "previously used location", + var->name); + return false; + } + + /* Initialize location as inactive before optimization + * rounds and location assignment. + */ + prog->UniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION; + } + + /* Note, base location used for arrays. */ + map->put(var->data.location, var->name); + + return true; +} + +/** + * Check and reserve all explicit uniform locations, called before + * any optimizations happen to handle also inactive uniforms and + * inactive array elements that may get trimmed away. + */ +static void +check_explicit_uniform_locations(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + if (!ctx->Extensions.ARB_explicit_uniform_location) + return; + + /* This map is used to detect if overlapping explicit locations + * occur with the same uniform (from different stage) or a different one. + */ + string_to_uint_map *uniform_map = new string_to_uint_map; + + if (!uniform_map) { + linker_error(prog, "Out of memory during linking."); + return; + } + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (!sh) + continue; + + foreach_list(node, sh->ir) { + ir_variable *var = ((ir_instruction *)node)->as_variable(); + if ((var && var->data.mode == ir_var_uniform) && + var->data.explicit_location) { + if (!reserve_explicit_locations(prog, uniform_map, var)) + return; + } + } + } + + delete uniform_map; +} + void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) { @@ -2372,6 +2482,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) break; } + check_explicit_uniform_locations(ctx, prog); + if (!prog->LinkStatus) + goto done; + /* Validate the inputs of each stage with the output of the preceding * stage. */ diff --git a/mesalib/src/glsl/list.h b/mesalib/src/glsl/list.h index b2e249657..fa6ec12cc 100644 --- a/mesalib/src/glsl/list.h +++ b/mesalib/src/glsl/list.h @@ -87,67 +87,29 @@ struct exec_node { /* empty */ } - const exec_node *get_next() const - { - return next; - } + const exec_node *get_next() const; + exec_node *get_next(); - exec_node *get_next() - { - return next; - } + const exec_node *get_prev() const; + exec_node *get_prev(); - const exec_node *get_prev() const - { - return prev; - } - - exec_node *get_prev() - { - return prev; - } - - void remove() - { - next->prev = prev; - prev->next = next; - next = NULL; - prev = NULL; - } + void remove(); /** * Link a node with itself * * This creates a sort of degenerate list that is occasionally useful. */ - void self_link() - { - next = this; - prev = this; - } + void self_link(); /** * Insert a node in the list after the current node */ - void insert_after(exec_node *after) - { - after->next = this->next; - after->prev = this; - - this->next->prev = after; - this->next = after; - } + void insert_after(exec_node *after); /** * Insert a node in the list before the current node */ - void insert_before(exec_node *before) - { - before->next = this; - before->prev = this->prev; - - this->prev->next = before; - this->prev = before; - } + void insert_before(exec_node *before); /** * Insert another list in the list before the current node @@ -157,33 +119,165 @@ struct exec_node { /** * Replace the current node with the given node. */ - void replace_with(exec_node *replacement) - { - replacement->prev = this->prev; - replacement->next = this->next; - - this->prev->next = replacement; - this->next->prev = replacement; - } + void replace_with(exec_node *replacement); /** * Is this the sentinel at the tail of the list? */ - bool is_tail_sentinel() const - { - return this->next == NULL; - } + bool is_tail_sentinel() const; /** * Is this the sentinel at the head of the list? */ - bool is_head_sentinel() const - { - return this->prev == NULL; - } + bool is_head_sentinel() const; #endif }; +static inline void +exec_node_init(struct exec_node *n) +{ + n->next = NULL; + n->prev = NULL; +} + +static inline const struct exec_node * +exec_node_get_next_const(const struct exec_node *n) +{ + return n->next; +} + +static inline struct exec_node * +exec_node_get_next(struct exec_node *n) +{ + return n->next; +} + +static inline const struct exec_node * +exec_node_get_prev_const(const struct exec_node *n) +{ + return n->prev; +} + +static inline struct exec_node * +exec_node_get_prev(struct exec_node *n) +{ + return n->prev; +} + +static inline void +exec_node_remove(struct exec_node *n) +{ + n->next->prev = n->prev; + n->prev->next = n->next; + n->next = NULL; + n->prev = NULL; +} + +static inline void +exec_node_self_link(struct exec_node *n) +{ + n->next = n; + n->prev = n; +} + +static inline void +exec_node_insert_after(struct exec_node *n, struct exec_node *after) +{ + after->next = n->next; + after->prev = n; + + n->next->prev = after; + n->next = after; +} + +static inline void +exec_node_insert_node_before(struct exec_node *n, struct exec_node *before) +{ + before->next = n; + before->prev = n->prev; + + n->prev->next = before; + n->prev = before; +} + +static inline void +exec_node_replace_with(struct exec_node *n, struct exec_node *replacement) +{ + replacement->prev = n->prev; + replacement->next = n->next; + + n->prev->next = replacement; + n->next->prev = replacement; +} + +static inline bool +exec_node_is_tail_sentinel(const struct exec_node *n) +{ + return n->next == NULL; +} + +static inline bool +exec_node_is_head_sentinel(const struct exec_node *n) +{ + return n->prev == NULL; +} + +#ifdef __cplusplus +inline const exec_node *exec_node::get_next() const +{ + return exec_node_get_next_const(this); +} + +inline exec_node *exec_node::get_next() +{ + return exec_node_get_next(this); +} + +inline const exec_node *exec_node::get_prev() const +{ + return exec_node_get_prev_const(this); +} + +inline exec_node *exec_node::get_prev() +{ + return exec_node_get_prev(this); +} + +inline void exec_node::remove() +{ + exec_node_remove(this); +} + +inline void exec_node::self_link() +{ + exec_node_self_link(this); +} + +inline void exec_node::insert_after(exec_node *after) +{ + exec_node_insert_after(this, after); +} + +inline void exec_node::insert_before(exec_node *before) +{ + exec_node_insert_node_before(this, before); +} + +inline void exec_node::replace_with(exec_node *replacement) +{ + exec_node_replace_with(this, replacement); +} + +inline bool exec_node::is_tail_sentinel() const +{ + return exec_node_is_tail_sentinel(this); +} + +inline bool exec_node::is_head_sentinel() const +{ + return exec_node_is_head_sentinel(this); +} +#endif #ifdef __cplusplus /* This macro will not work correctly if `t' uses virtual inheritance. If you @@ -229,75 +323,19 @@ struct exec_list { make_empty(); } - void make_empty() - { - head = (exec_node *) & tail; - tail = NULL; - tail_pred = (exec_node *) & head; - } - - bool is_empty() const - { - /* There are three ways to test whether a list is empty or not. - * - * - Check to see if the \c head points to the \c tail. - * - Check to see if the \c tail_pred points to the \c head. - * - Check to see if the \c head is the sentinel node by test whether its - * \c next pointer is \c NULL. - * - * The first two methods tend to generate better code on modern systems - * because they save a pointer dereference. - */ - return head == (exec_node *) &tail; - } - - const exec_node *get_head() const - { - return !is_empty() ? head : NULL; - } - - exec_node *get_head() - { - return !is_empty() ? head : NULL; - } - - const exec_node *get_tail() const - { - return !is_empty() ? tail_pred : NULL; - } - - exec_node *get_tail() - { - return !is_empty() ? tail_pred : NULL; - } + void make_empty(); - void push_head(exec_node *n) - { - n->next = head; - n->prev = (exec_node *) &head; + bool is_empty() const; - n->next->prev = n; - head = n; - } + const exec_node *get_head() const; + exec_node *get_head(); - void push_tail(exec_node *n) - { - n->next = (exec_node *) &tail; - n->prev = tail_pred; + const exec_node *get_tail() const; + exec_node *get_tail(); - n->prev->next = n; - tail_pred = n; - } - - void push_degenerate_list_at_head(exec_node *n) - { - assert(n->prev->next == n); - - n->prev->next = head; - head->prev = n->prev; - n->prev = (exec_node *) &head; - head = n; - } + void push_head(exec_node *n); + void push_tail(exec_node *n); + void push_degenerate_list_at_head(exec_node *n); /** * Remove the first node from a list and return it @@ -307,87 +345,239 @@ struct exec_list { * * \sa exec_list::get_head */ - exec_node *pop_head() - { - exec_node *const n = this->get_head(); - if (n != NULL) - n->remove(); - - return n; - } + exec_node *pop_head(); /** * Move all of the nodes from this list to the target list */ - void move_nodes_to(exec_list *target) - { - if (is_empty()) { - target->make_empty(); - } else { - target->head = head; - target->tail = NULL; - target->tail_pred = tail_pred; - - target->head->prev = (exec_node *) &target->head; - target->tail_pred->next = (exec_node *) &target->tail; - - make_empty(); - } - } + void move_nodes_to(exec_list *target); /** * Append all nodes from the source list to the target list */ - void - append_list(exec_list *source) - { - if (source->is_empty()) - return; - - /* Link the first node of the source with the last node of the target list. - */ - this->tail_pred->next = source->head; - source->head->prev = this->tail_pred; - - /* Make the tail of the source list be the tail of the target list. - */ - this->tail_pred = source->tail_pred; - this->tail_pred->next = (exec_node *) &this->tail; - - /* Make the source list empty for good measure. - */ - source->make_empty(); - } + void append_list(exec_list *source); #endif }; +static inline void +exec_list_make_empty(struct exec_list *list) +{ + list->head = (struct exec_node *) & list->tail; + list->tail = NULL; + list->tail_pred = (struct exec_node *) & list->head; +} -#ifdef __cplusplus -inline void exec_node::insert_before(exec_list *before) +static inline bool +exec_list_is_empty(const struct exec_list *list) +{ + /* There are three ways to test whether a list is empty or not. + * + * - Check to see if the \c head points to the \c tail. + * - Check to see if the \c tail_pred points to the \c head. + * - Check to see if the \c head is the sentinel node by test whether its + * \c next pointer is \c NULL. + * + * The first two methods tend to generate better code on modern systems + * because they save a pointer dereference. + */ + return list->head == (struct exec_node *) &list->tail; +} + +static inline const struct exec_node * +exec_list_get_head_const(const struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->head : NULL; +} + +static inline struct exec_node * +exec_list_get_head(struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->head : NULL; +} + +static inline const struct exec_node * +exec_list_get_tail_const(const struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->tail_pred : NULL; +} + +static inline struct exec_node * +exec_list_get_tail(struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->tail_pred : NULL; +} + +static inline void +exec_list_push_head(struct exec_list *list, struct exec_node *n) +{ + n->next = list->head; + n->prev = (struct exec_node *) &list->head; + + n->next->prev = n; + list->head = n; +} + +static inline void +exec_list_push_tail(struct exec_list *list, struct exec_node *n) +{ + n->next = (struct exec_node *) &list->tail; + n->prev = list->tail_pred; + + n->prev->next = n; + list->tail_pred = n; +} + +static inline void +exec_list_push_degenerate_list_at_head(struct exec_list *list, struct exec_node *n) +{ + assert(n->prev->next == n); + + n->prev->next = list->head; + list->head->prev = n->prev; + n->prev = (struct exec_node *) &list->head; + list->head = n; +} + +static inline struct exec_node * +exec_list_pop_head(struct exec_list *list) +{ + struct exec_node *const n = exec_list_get_head(list); + if (n != NULL) + exec_node_remove(n); + + return n; +} + +static inline void +exec_list_move_nodes_to(struct exec_list *list, struct exec_list *target) +{ + if (exec_list_is_empty(list)) { + exec_list_make_empty(target); + } else { + target->head = list->head; + target->tail = NULL; + target->tail_pred = list->tail_pred; + + target->head->prev = (struct exec_node *) &target->head; + target->tail_pred->next = (struct exec_node *) &target->tail; + + exec_list_make_empty(list); + } +} + +static inline void +exec_list_append(struct exec_list *list, struct exec_list *source) +{ + if (exec_list_is_empty(source)) + return; + + /* Link the first node of the source with the last node of the target list. + */ + list->tail_pred->next = source->head; + source->head->prev = list->tail_pred; + + /* Make the tail of the source list be the tail of the target list. + */ + list->tail_pred = source->tail_pred; + list->tail_pred->next = (struct exec_node *) &list->tail; + + /* Make the source list empty for good measure. + */ + exec_list_make_empty(source); +} + +static inline void +exec_node_insert_list_before(struct exec_node *n, struct exec_list *before) { - if (before->is_empty()) + if (exec_list_is_empty(before)) return; - before->tail_pred->next = this; - before->head->prev = this->prev; + before->tail_pred->next = n; + before->head->prev = n->prev; - this->prev->next = before->head; - this->prev = before->tail_pred; + n->prev->next = before->head; + n->prev = before->tail_pred; - before->make_empty(); + exec_list_make_empty(before); +} + +#ifdef __cplusplus +inline void exec_list::make_empty() +{ + exec_list_make_empty(this); +} + +inline bool exec_list::is_empty() const +{ + return exec_list_is_empty(this); +} + +inline const exec_node *exec_list::get_head() const +{ + return exec_list_get_head_const(this); +} + +inline exec_node *exec_list::get_head() +{ + return exec_list_get_head(this); +} + +inline const exec_node *exec_list::get_tail() const +{ + return exec_list_get_tail_const(this); +} + +inline exec_node *exec_list::get_tail() +{ + return exec_list_get_tail(this); +} + +inline void exec_list::push_head(exec_node *n) +{ + exec_list_push_head(this, n); +} + +inline void exec_list::push_tail(exec_node *n) +{ + exec_list_push_tail(this, n); +} + +inline void exec_list::push_degenerate_list_at_head(exec_node *n) +{ + exec_list_push_degenerate_list_at_head(this, n); +} + +inline exec_node *exec_list::pop_head() +{ + return exec_list_pop_head(this); +} + +inline void exec_list::move_nodes_to(exec_list *target) +{ + exec_list_move_nodes_to(this, target); +} + +inline void exec_list::append_list(exec_list *source) +{ + exec_list_append(this, source); +} + +inline void exec_node::insert_before(exec_list *before) +{ + exec_node_insert_list_before(this, before); } #endif /** * This version is safe even if the current node is removed. */ -#define foreach_list_safe(__node, __list) \ - for (exec_node * __node = (__list)->head, * __next = __node->next \ - ; __next != NULL \ +#define foreach_list_safe(__node, __list) \ + for (struct exec_node * __node = (__list)->head, * __next = __node->next \ + ; __next != NULL \ ; __node = __next, __next = __next->next) #define foreach_list(__node, __list) \ - for (exec_node * __node = (__list)->head \ + for (struct exec_node * __node = (__list)->head \ ; (__node)->next != NULL \ ; (__node) = (__node)->next) @@ -397,19 +587,19 @@ inline void exec_node::insert_before(exec_list *before) * This is safe against either current node being removed or replaced. */ #define foreach_two_lists(__node1, __list1, __node2, __list2) \ - for (exec_node * __node1 = (__list1)->head, \ - * __node2 = (__list2)->head, \ - * __next1 = __node1->next, \ - * __next2 = __node2->next \ + for (struct exec_node * __node1 = (__list1)->head, \ + * __node2 = (__list2)->head, \ + * __next1 = __node1->next, \ + * __next2 = __node2->next \ ; __next1 != NULL && __next2 != NULL \ ; __node1 = __next1, \ __node2 = __next2, \ __next1 = __next1->next, \ __next2 = __next2->next) -#define foreach_list_const(__node, __list) \ - for (const exec_node * __node = (__list)->head \ - ; (__node)->next != NULL \ +#define foreach_list_const(__node, __list) \ + for (const struct exec_node * __node = (__list)->head \ + ; (__node)->next != NULL \ ; (__node) = (__node)->next) #define foreach_list_typed(__type, __node, __field, __list) \ diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp index 9d5539252..ac7514acf 100644 --- a/mesalib/src/glsl/opt_algebraic.cpp +++ b/mesalib/src/glsl/opt_algebraic.cpp @@ -45,7 +45,9 @@ namespace { class ir_algebraic_visitor : public ir_rvalue_visitor { public: - ir_algebraic_visitor(bool native_integers) + ir_algebraic_visitor(bool native_integers, + const struct gl_shader_compiler_options *options) + : options(options) { this->progress = false; this->mem_ctx = NULL; @@ -69,6 +71,7 @@ public: ir_rvalue *swizzle_if_required(ir_expression *expr, ir_rvalue *operand); + const struct gl_shader_compiler_options *options; void *mem_ctx; bool native_integers; @@ -116,6 +119,46 @@ update_type(ir_expression *ir) ir->type = ir->operands[1]->type; } +/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ +static ir_expression * +try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx) +{ + if (expr0 && expr0->operation == ir_binop_add && + expr0->type->is_float() && + expr1 && expr1->operation == ir_binop_add && + expr1->type->is_float()) { + ir_swizzle *x = expr0->operands[0]->as_swizzle(); + ir_swizzle *y = expr0->operands[1]->as_swizzle(); + ir_swizzle *z = expr1->operands[0]->as_swizzle(); + ir_swizzle *w = expr1->operands[1]->as_swizzle(); + + if (!x || x->mask.num_components != 1 || + !y || y->mask.num_components != 1 || + !z || z->mask.num_components != 1 || + !w || w->mask.num_components != 1) { + return NULL; + } + + bool swiz_seen[4] = {false, false, false, false}; + swiz_seen[x->mask.x] = true; + swiz_seen[y->mask.x] = true; + swiz_seen[z->mask.x] = true; + swiz_seen[w->mask.x] = true; + + if (!swiz_seen[0] || !swiz_seen[1] || + !swiz_seen[2] || !swiz_seen[3]) { + return NULL; + } + + if (x->val->equals(y->val) && + x->val->equals(z->val) && + x->val->equals(w->val)) { + return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); + } + } + return NULL; +} + void ir_algebraic_visitor::reassociate_operands(ir_expression *ir1, int op1, @@ -329,6 +372,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if (op_const[1] && !op_const[0]) reassociate_constant(ir, 1, op_const[1], op_expr[0]); + /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ + if (options->OptimizeForAOS) { + ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], + mem_ctx); + if (expr) + return expr; + } + /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). * * (-x + y) * a + x @@ -380,6 +431,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) } } } + break; case ir_binop_sub: @@ -647,9 +699,10 @@ ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue) } bool -do_algebraic(exec_list *instructions, bool native_integers) +do_algebraic(exec_list *instructions, bool native_integers, + const struct gl_shader_compiler_options *options) { - ir_algebraic_visitor v(native_integers); + ir_algebraic_visitor v(native_integers, options); visit_list_elements(&v, instructions); diff --git a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp index 6612592aa..50c8aa763 100644 --- a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp +++ b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp @@ -334,7 +334,7 @@ public: } void prepare_array(exec_list *ir, - struct ir_variable **new_var, + ir_variable **new_var, int max_elements, unsigned start_location, const char *var_name, const char *mode_str, unsigned usage, unsigned external_usage) diff --git a/mesalib/src/glsl/opt_rebalance_tree.cpp b/mesalib/src/glsl/opt_rebalance_tree.cpp new file mode 100644 index 000000000..773aab3f6 --- /dev/null +++ b/mesalib/src/glsl/opt_rebalance_tree.cpp @@ -0,0 +1,300 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_rebalance_tree.cpp + * + * Rebalances a reduction expression tree. + * + * For reduction operations (e.g., x + y + z + w) we generate an expression + * tree like + * + * + + * / \ + * + w + * / \ + * + z + * / \ + * x y + * + * which we can rebalance into + * + * + + * / \ + * / \ + * + + + * / \ / \ + * x y z w + * + * to get a better instruction scheduling. + * + * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout + * and Bette L. Warren. + * + * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable + * explanation of the of the tree_to_vine() (rightward rotation) and + * vine_to_tree() (leftward rotation) algorithms. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" + +/* The DSW algorithm generates a degenerate tree (really, a linked list) in + * tree_to_vine(). We'd rather not leave a binary expression with only one + * operand, so trivial modifications (the ternary operators below) are needed + * to ensure that we only rotate around the ir_expression nodes of the tree. + */ +static unsigned +tree_to_vine(ir_expression *root) +{ + unsigned size = 0; + ir_rvalue *vine_tail = root; + ir_rvalue *remainder = root->operands[1]; + + while (remainder != NULL) { + ir_expression *remainder_temp = remainder->as_expression(); + ir_expression *remainder_left = remainder_temp ? + remainder_temp->operands[0]->as_expression() : NULL; + + if (remainder_left == NULL) { + /* move vine_tail down one */ + vine_tail = remainder; + remainder = remainder->as_expression() ? + ((ir_expression *)remainder)->operands[1] : NULL; + size++; + } else { + /* rotate */ + ir_expression *tempptr = remainder_left; + ((ir_expression *)remainder)->operands[0] = tempptr->operands[1]; + tempptr->operands[1] = remainder; + remainder = tempptr; + ((ir_expression *)vine_tail)->operands[1] = tempptr; + } + } + + return size; +} + +static void +compression(ir_expression *root, unsigned count) +{ + ir_expression *scanner = root; + + for (unsigned i = 0; i < count; i++) { + ir_expression *child = (ir_expression *)scanner->operands[1]; + scanner->operands[1] = child->operands[1]; + scanner = (ir_expression *)scanner->operands[1]; + child->operands[1] = scanner->operands[0]; + scanner->operands[0] = child; + } +} + +static void +vine_to_tree(ir_expression *root, unsigned size) +{ + int n = size - 1; + for (int m = n / 2; m > 0; m = n / 2) { + compression(root, m); + n -= m + 1; + } +} + +namespace { + +class ir_rebalance_visitor : public ir_rvalue_enter_visitor { +public: + ir_rebalance_visitor() + { + progress = false; + } + + void handle_rvalue(ir_rvalue **rvalue); + + bool progress; +}; + +struct is_reduction_data { + ir_expression_operation operation; + const glsl_type *type; + unsigned num_expr; + bool is_reduction; + bool contains_constant; +}; + +} /* anonymous namespace */ + +static bool +is_reduction_operation(ir_expression_operation operation) +{ + switch (operation) { + case ir_binop_add: + case ir_binop_mul: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + case ir_binop_min: + case ir_binop_max: + return true; + default: + return false; + } +} + +/* Note that this function does not attempt to recognize that reduction trees + * are already balanced. + * + * We return false from this function for a number of reasons other than an + * expression tree not being a mathematical reduction. Namely, + * + * - if the tree contains multiple constants that we may be able to combine. + * - if the tree contains matrices: + * - they might contain vec4's with many constant components that we can + * simplify after splitting. + * - applying the matrix chain ordering optimization is more than just + * balancing an expression tree. + * - if the tree contains operations on multiple types. + * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c + * would trick the visiting pass. + */ +static void +is_reduction(ir_instruction *ir, void *data) +{ + struct is_reduction_data *ird = (struct is_reduction_data *)data; + if (!ird->is_reduction) + return; + + /* We don't want to balance a tree that contains multiple constants, since + * we'll be able to constant fold them if they're not in separate subtrees. + */ + if (ir->as_constant()) { + if (ird->contains_constant) { + ird->is_reduction = false; + } + ird->contains_constant = true; + return; + } + + /* Array/record dereferences have subtrees that are not part of the expr + * tree we're balancing. Skip trees containing them. + */ + if (ir->ir_type == ir_type_dereference_array || + ir->ir_type == ir_type_dereference_record) { + ird->is_reduction = false; + return; + } + + ir_expression *expr = ir->as_expression(); + if (!expr) + return; + + /* Non-constant matrices might still contain constant vec4 that we can + * constant fold once split up. Handling matrices will need some more + * work. + */ + if (expr->type->is_matrix()) { + ird->is_reduction = false; + return; + } + + if (ird->type != NULL && ird->type != expr->type) { + ird->is_reduction = false; + return; + } + ird->type = expr->type; + + ird->num_expr++; + if (is_reduction_operation(expr->operation)) { + if (ird->operation != 0 && ird->operation != expr->operation) + ird->is_reduction = false; + ird->operation = expr->operation; + } else { + ird->is_reduction = false; + } +} + +static ir_rvalue * +handle_expression(ir_expression *expr) +{ + struct is_reduction_data ird; + ird.operation = (ir_expression_operation)0; + ird.type = NULL; + ird.num_expr = 0; + ird.is_reduction = true; + ird.contains_constant = false; + + visit_tree(expr, is_reduction, (void *)&ird); + + if (ird.is_reduction && ird.num_expr > 2) { + ir_constant z = ir_constant(0.0f); + ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr); + + unsigned size = tree_to_vine(&pseudo_root); + vine_to_tree(&pseudo_root, size); + + expr = (ir_expression *)pseudo_root.operands[1]; + } + return expr; +} + +void +ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr || !is_reduction_operation(expr->operation)) + return; + + ir_rvalue *new_rvalue = handle_expression(expr); + + /* If we failed to rebalance the tree (e.g., because it wasn't a reduction, + * or some other set of cases) new_rvalue will point to the same root as + * before. + * + * Similarly, if the tree rooted at *rvalue was a reduction and was already + * balanced, the algorithm will rearrange the tree but will ultimately + * return an identical tree, so this check will handle that as well and + * will not set progress = true. + */ + if (new_rvalue == *rvalue) + return; + + *rvalue = new_rvalue; + this->progress = true; +} + +bool +do_rebalance_tree(exec_list *instructions) +{ + ir_rebalance_visitor v; + + v.run(instructions); + + return v.progress; +} diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp index 6c25010b7..809732c7e 100644 --- a/mesalib/src/glsl/standalone_scaffolding.cpp +++ b/mesalib/src/glsl/standalone_scaffolding.cpp @@ -98,6 +98,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) ctx->Extensions.ARB_ES3_compatibility = true; ctx->Extensions.ARB_explicit_attrib_location = true; ctx->Extensions.ARB_fragment_coord_conventions = true; + ctx->Extensions.ARB_fragment_layer_viewport = true; ctx->Extensions.ARB_gpu_shader5 = true; ctx->Extensions.ARB_sample_shading = true; ctx->Extensions.ARB_shader_bit_encoding = true; diff --git a/mesalib/src/glsl/test_optpass.cpp b/mesalib/src/glsl/test_optpass.cpp index db5cb2662..e4878bf15 100644 --- a/mesalib/src/glsl/test_optpass.cpp +++ b/mesalib/src/glsl/test_optpass.cpp @@ -65,7 +65,7 @@ do_optimization(struct exec_list *ir, const char *optimization, if (sscanf(optimization, "do_common_optimization ( %d ) ", &int_0) == 1) { return do_common_optimization(ir, int_0 != 0, false, options, true); } else if (strcmp(optimization, "do_algebraic") == 0) { - return do_algebraic(ir, true); + return do_algebraic(ir, true, options); } else if (strcmp(optimization, "do_constant_folding") == 0) { return do_constant_folding(ir); } else if (strcmp(optimization, "do_constant_variable") == 0) { |