diff options
Diffstat (limited to 'mesalib/src/glsl')
51 files changed, 2033 insertions, 445 deletions
diff --git a/mesalib/src/glsl/Makefile.am b/mesalib/src/glsl/Makefile.am index b466a3b5c..23c6fe8bb 100644 --- a/mesalib/src/glsl/Makefile.am +++ b/mesalib/src/glsl/Makefile.am @@ -46,6 +46,7 @@ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \ glcpp/glcpp-lex.l \ glcpp/glcpp-parse.y \ nir/nir_algebraic.py \ + nir/nir_builder_opcodes_h.py \ nir/nir_constant_expressions.py \ nir/nir_opcodes.py \ nir/nir_opcodes_c.py \ @@ -67,7 +68,7 @@ TESTS_ENVIRONMENT= \ export PYTHON2=$(PYTHON2); \ export PYTHON_FLAGS=$(PYTHON_FLAGS); -noinst_LTLIBRARIES = libglsl.la libglcpp.la +noinst_LTLIBRARIES = libnir.la libglsl.la libglcpp.la check_PROGRAMS = \ glcpp/glcpp \ glsl_test \ @@ -147,6 +148,12 @@ libglsl_la_SOURCES = \ $(LIBGLSL_FILES) \ $(NIR_FILES) +libnir_la_SOURCES = \ + glsl_types.cpp \ + builtin_types.cpp \ + glsl_symbol_table.cpp \ + $(NIR_FILES) + glsl_compiler_SOURCES = \ $(GLSL_COMPILER_CXX_FILES) @@ -251,8 +258,6 @@ nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py $(MKDIR_P) nir; \ $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@ -nir/nir.h: nir/nir_opcodes.h - nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py $(MKDIR_P) nir; \ $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@ diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources index b876642e8..c471eca23 100644 --- a/mesalib/src/glsl/Makefile.sources +++ b/mesalib/src/glsl/Makefile.sources @@ -22,6 +22,7 @@ NIR_FILES = \ nir/glsl_to_nir.h \ nir/nir.c \ nir/nir.h \ + nir/nir_builder.h \ nir/nir_constant_expressions.h \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ @@ -32,21 +33,25 @@ NIR_FILES = \ nir/nir_lower_atomics.c \ nir/nir_lower_global_vars_to_local.c \ nir/nir_lower_locals_to_regs.c \ + nir/nir_lower_idiv.c \ nir/nir_lower_io.c \ nir/nir_lower_phis_to_scalar.c \ nir/nir_lower_samplers.cpp \ nir/nir_lower_system_values.c \ + nir/nir_lower_tex_projector.c \ nir/nir_lower_to_source_mods.c \ nir/nir_lower_vars_to_ssa.c \ nir/nir_lower_var_copies.c \ nir/nir_lower_vec_to_movs.c \ nir/nir_metadata.c \ + nir/nir_normalize_cubemap_coords.c \ nir/nir_opt_constant_folding.c \ nir/nir_opt_copy_propagate.c \ nir/nir_opt_cse.c \ nir/nir_opt_dce.c \ nir/nir_opt_gcm.c \ nir/nir_opt_global_to_local.c \ + nir/nir_opt_peephole_ffma.c \ nir/nir_opt_peephole_select.c \ nir/nir_opt_remove_phis.c \ nir/nir_print.c \ @@ -54,9 +59,11 @@ NIR_FILES = \ nir/nir_search.c \ nir/nir_search.h \ nir/nir_split_var_copies.c \ + nir/nir_sweep.c \ nir/nir_to_ssa.c \ nir/nir_types.h \ nir/nir_validate.c \ + nir/nir_vla.h \ nir/nir_worklist.c \ nir/nir_worklist.h \ nir/nir_types.cpp \ @@ -183,7 +190,8 @@ LIBGLSL_FILES = \ opt_vectorize.cpp \ program.h \ s_expression.cpp \ - s_expression.h + s_expression.h \ + shader_enums.h # glsl_compiler diff --git a/mesalib/src/glsl/ast_function.cpp b/mesalib/src/glsl/ast_function.cpp index 918be6966..87df93e68 100644 --- a/mesalib/src/glsl/ast_function.cpp +++ b/mesalib/src/glsl/ast_function.cpp @@ -1370,71 +1370,59 @@ emit_inline_matrix_constructor(const glsl_type *type, } else { const unsigned cols = type->matrix_columns; const unsigned rows = type->vector_elements; + unsigned remaining_slots = rows * cols; unsigned col_idx = 0; unsigned row_idx = 0; foreach_in_list(ir_rvalue, rhs, parameters) { - const unsigned components_remaining_this_column = rows - row_idx; - unsigned rhs_components = rhs->type->components(); - unsigned rhs_base = 0; - - /* Since the parameter might be used in the RHS of two assignments, - * generate a temporary and copy the paramter there. - */ - ir_variable *rhs_var = - new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary); - instructions->push_tail(rhs_var); - - ir_dereference *rhs_var_ref = - new(ctx) ir_dereference_variable(rhs_var); - ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL); - instructions->push_tail(inst); - - /* Assign the current parameter to as many components of the matrix - * as it will fill. - * - * NOTE: A single vector parameter can span two matrix columns. A - * single vec4, for example, can completely fill a mat2. - */ - if (rhs_components >= components_remaining_this_column) { - const unsigned count = MIN2(rhs_components, - components_remaining_this_column); - - rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); - - ir_instruction *inst = assign_to_matrix_column(var, col_idx, - row_idx, - rhs_var_ref, 0, - count, ctx); - instructions->push_tail(inst); - - rhs_base = count; - - col_idx++; - row_idx = 0; - } - - /* If there is data left in the parameter and components left to be - * set in the destination, emit another assignment. It is possible - * that the assignment could be of a vec4 to the last element of the - * matrix. In this case col_idx==cols, but there is still data - * left in the source parameter. Obviously, don't emit an assignment - * to data outside the destination matrix. - */ - if ((col_idx < cols) && (rhs_base < rhs_components)) { - const unsigned count = rhs_components - rhs_base; - - rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); - - ir_instruction *inst = assign_to_matrix_column(var, col_idx, - row_idx, - rhs_var_ref, - rhs_base, - count, ctx); - instructions->push_tail(inst); - - row_idx += count; - } + unsigned rhs_components = rhs->type->components(); + unsigned rhs_base = 0; + + if (remaining_slots == 0) + break; + + /* Since the parameter might be used in the RHS of two assignments, + * generate a temporary and copy the paramter there. + */ + ir_variable *rhs_var = + new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary); + instructions->push_tail(rhs_var); + + ir_dereference *rhs_var_ref = + new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL); + instructions->push_tail(inst); + + do { + /* Assign the current parameter to as many components of the matrix + * as it will fill. + * + * NOTE: A single vector parameter can span two matrix columns. A + * single vec4, for example, can completely fill a mat2. + */ + unsigned count = MIN2(rows - row_idx, + rhs_components - rhs_base); + + rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *inst = assign_to_matrix_column(var, col_idx, + row_idx, + rhs_var_ref, + rhs_base, + count, ctx); + instructions->push_tail(inst); + rhs_base += count; + row_idx += count; + remaining_slots -= count; + + /* Sometimes, there is still data left in the parameters and + * components left to be set in the destination but in other + * column. + */ + if (row_idx >= rows) { + row_idx = 0; + col_idx++; + } + } while(remaining_slots > 0 && rhs_base < rhs_components); } } @@ -1791,7 +1779,7 @@ ast_function_expression::hir(exec_list *instructions, return value; } - return ir_rvalue::error_value(ctx); + unreachable("not reached"); } ir_rvalue * diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp index d387b2e35..78369360f 100644 --- a/mesalib/src/glsl/ast_to_hir.cpp +++ b/mesalib/src/glsl/ast_to_hir.cpp @@ -375,66 +375,14 @@ arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, if (type_a == type_b) return type_a; } else { - if (type_a->is_matrix() && type_b->is_matrix()) { - /* Matrix multiply. The columns of A must match the rows of B. Given - * the other previously tested constraints, this means the vector type - * of a row from A must be the same as the vector type of a column from - * B. - */ - if (type_a->row_type() == type_b->column_type()) { - /* The resulting matrix has the number of columns of matrix B and - * the number of rows of matrix A. We get the row count of A by - * looking at the size of a vector that makes up a column. The - * transpose (size of a row) is done for B. - */ - const glsl_type *const type = - glsl_type::get_instance(type_a->base_type, - type_a->column_type()->vector_elements, - type_b->row_type()->vector_elements); - assert(type != glsl_type::error_type); - - return type; - } - } else if (type_a->is_matrix()) { - /* A is a matrix and B is a column vector. Columns of A must match - * rows of B. Given the other previously tested constraints, this - * means the vector type of a row from A must be the same as the - * vector the type of B. - */ - if (type_a->row_type() == type_b) { - /* The resulting vector has a number of elements equal to - * the number of rows of matrix A. */ - const glsl_type *const type = - glsl_type::get_instance(type_a->base_type, - type_a->column_type()->vector_elements, - 1); - assert(type != glsl_type::error_type); - - return type; - } - } else { - assert(type_b->is_matrix()); + const glsl_type *type = glsl_type::get_mul_type(type_a, type_b); - /* A is a row vector and B is a matrix. Columns of A must match rows - * of B. Given the other previously tested constraints, this means - * the type of A must be the same as the vector type of a column from - * B. - */ - if (type_a == type_b->column_type()) { - /* The resulting vector has a number of elements equal to - * the number of columns of matrix B. */ - const glsl_type *const type = - glsl_type::get_instance(type_a->base_type, - type_b->row_type()->vector_elements, - 1); - assert(type != glsl_type::error_type); - - return type; - } + if (type == glsl_type::error_type) { + _mesa_glsl_error(loc, state, + "size mismatch for matrix multiplication"); } - _mesa_glsl_error(loc, state, "size mismatch for matrix multiplication"); - return glsl_type::error_type; + return type; } @@ -5776,6 +5724,9 @@ ast_interface_block::hir(exec_list *instructions, var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; + if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) + var->data.read_only = true; + if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in) handle_geometry_shader_input_decl(state, loc, var); @@ -5816,6 +5767,9 @@ ast_interface_block::hir(exec_list *instructions, var->data.sample = fields[i].sample; var->init_interface_type(block_type); + if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) + var->data.read_only = true; + if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) { var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; diff --git a/mesalib/src/glsl/builtin_functions.cpp b/mesalib/src/glsl/builtin_functions.cpp index e46db8552..98d0b1887 100755 --- a/mesalib/src/glsl/builtin_functions.cpp +++ b/mesalib/src/glsl/builtin_functions.cpp @@ -60,7 +60,7 @@ #include "ir_builder.h" #include "glsl_parser_extras.h" #include "program/prog_instruction.h" -#include <limits> +#include <math.h> #define M_PIf ((float) M_PI) #define M_PI_2f ((float) M_PI/2.0f) @@ -3215,7 +3215,7 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type ir_constant_data infinities; for (int i = 0; i < type->vector_elements; i++) { - infinities.f[i] = std::numeric_limits<float>::infinity(); + infinities.f[i] = INFINITY; } body.emit(ret(equal(abs(x), imm(type, infinities)))); diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll index 8dc3d106b..2785ed168 100644 --- a/mesalib/src/glsl/glsl_lexer.ll +++ b/mesalib/src/glsl/glsl_lexer.ll @@ -36,14 +36,13 @@ static int classify_identifier(struct _mesa_glsl_parse_state *, const char *); #define YY_USER_ACTION \ do { \ - yylloc->source = 0; \ yylloc->first_column = yycolumn + 1; \ yylloc->first_line = yylloc->last_line = yylineno + 1; \ yycolumn += yyleng; \ yylloc->last_column = yycolumn + 1; \ } while(0); -#define YY_USER_INIT yylineno = 0; yycolumn = 0; +#define YY_USER_INIT yylineno = 0; yycolumn = 0; yylloc->source = 0; /* A macro for handling reserved words and keywords across language versions. * @@ -188,6 +187,15 @@ HASH ^{SPC}#{SPC} * one-based. */ yylineno = strtol(ptr, &ptr, 0) - 1; + + /* From GLSL 3.30 and GLSL ES on, after processing the + * line directive (including its new-line), the implementation + * will behave as if it is compiling at the line number passed + * as argument. It was line number + 1 in older specifications. + */ + if (yyextra->is_version(330, 100)) + yylineno--; + yylloc->source = strtol(ptr, NULL, 0); } {HASH}line{SPCP}{INT}{SPC}$ { @@ -203,6 +211,14 @@ HASH ^{SPC}#{SPC} * one-based. */ yylineno = strtol(ptr, &ptr, 0) - 1; + + /* From GLSL 3.30 and GLSL ES on, after processing the + * line directive (including its new-line), the implementation + * will behave as if it is compiling at the line number passed + * as argument. It was line number + 1 in older specifications. + */ + if (yyextra->is_version(330, 100)) + yylineno--; } ^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}on{SPC}\) { BEGIN PP; diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp index 79624bc26..0aa3c54fc 100644 --- a/mesalib/src/glsl/glsl_parser_extras.cpp +++ b/mesalib/src/glsl/glsl_parser_extras.cpp @@ -73,8 +73,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->uses_builtin_functions = false; /* Set default language version and extensions */ - this->language_version = ctx->Const.ForceGLSLVersion ? - ctx->Const.ForceGLSLVersion : 110; + this->language_version = 110; + this->forced_language_version = ctx->Const.ForceGLSLVersion; this->es_shader = false; this->ARB_texture_rectangle_enable = true; @@ -320,11 +320,14 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version, this->ARB_texture_rectangle_enable = false; } - this->language_version = version; + if (this->forced_language_version) + this->language_version = this->forced_language_version; + else + this->language_version = version; bool supported = false; for (unsigned i = 0; i < this->num_supported_versions; i++) { - if (this->supported_versions[i].ver == (unsigned) version + if (this->supported_versions[i].ver == this->language_version && this->supported_versions[i].es == this->es_shader) { supported = true; break; diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h index c5670fdb1..5047049fb 100644 --- a/mesalib/src/glsl/glsl_parser_extras.h +++ b/mesalib/src/glsl/glsl_parser_extras.h @@ -109,8 +109,10 @@ struct _mesa_glsl_parse_state { { unsigned required_version = this->es_shader ? required_glsl_es_version : required_glsl_version; + unsigned this_version = this->forced_language_version + ? this->forced_language_version : this->language_version; return required_version != 0 - && this->language_version >= required_version; + && this_version >= required_version; } bool check_version(unsigned required_glsl_version, @@ -230,6 +232,7 @@ struct _mesa_glsl_parse_state { bool es_shader; unsigned language_version; + unsigned forced_language_version; gl_shader_stage stage; /** diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp index 38b37a6a9..4aa36a794 100644 --- a/mesalib/src/glsl/glsl_types.cpp +++ b/mesalib/src/glsl/glsl_types.cpp @@ -825,6 +825,73 @@ glsl_type::get_interface_instance(const glsl_struct_field *fields, const glsl_type * +glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) +{ + if (type_a == type_b) { + return type_a; + } else if (type_a->is_matrix() && type_b->is_matrix()) { + /* Matrix multiply. The columns of A must match the rows of B. Given + * the other previously tested constraints, this means the vector type + * of a row from A must be the same as the vector type of a column from + * B. + */ + if (type_a->row_type() == type_b->column_type()) { + /* The resulting matrix has the number of columns of matrix B and + * the number of rows of matrix A. We get the row count of A by + * looking at the size of a vector that makes up a column. The + * transpose (size of a row) is done for B. + */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_a->column_type()->vector_elements, + type_b->row_type()->vector_elements); + assert(type != error_type); + + return type; + } + } else if (type_a->is_matrix()) { + /* A is a matrix and B is a column vector. Columns of A must match + * rows of B. Given the other previously tested constraints, this + * means the vector type of a row from A must be the same as the + * vector the type of B. + */ + if (type_a->row_type() == type_b) { + /* The resulting vector has a number of elements equal to + * the number of rows of matrix A. */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_a->column_type()->vector_elements, + 1); + assert(type != error_type); + + return type; + } + } else { + assert(type_b->is_matrix()); + + /* A is a row vector and B is a matrix. Columns of A must match rows + * of B. Given the other previously tested constraints, this means + * the type of A must be the same as the vector type of a column from + * B. + */ + if (type_a == type_b->column_type()) { + /* The resulting vector has a number of elements equal to + * the number of columns of matrix B. */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_b->row_type()->vector_elements, + 1); + assert(type != error_type); + + return type; + } + } + + return error_type; +} + + +const glsl_type * glsl_type::field_type(const char *name) const { if (this->base_type != GLSL_TYPE_STRUCT @@ -1077,15 +1144,6 @@ glsl_type::std140_base_alignment(bool row_major) const return base_alignment; } - /* A sampler may never occur in a UBO (without bindless of some sort), - * however it is convenient to use this alignment function even with - * regular uniforms. This allows use of this function on uniform structs - * that contain samplers. - */ - if (this->is_sampler()) { - return 0; - } - assert(!"not reached"); return -1; } diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h index 7359e9476..d383dd5be 100644 --- a/mesalib/src/glsl/glsl_types.h +++ b/mesalib/src/glsl/glsl_types.h @@ -276,6 +276,12 @@ struct glsl_type { const char *block_name); /** + * Get the type resulting from a multiplication of \p type_a * \p type_b + */ + static const glsl_type *get_mul_type(const glsl_type *type_a, + const glsl_type *type_b); + + /** * Query the total number of scalars that make up a scalar, vector or matrix */ unsigned components() const diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp index 516a53499..68c37275c 100755 --- a/mesalib/src/glsl/ir.cpp +++ b/mesalib/src/glsl/ir.cpp @@ -240,8 +240,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) case ir_unop_round_even: case ir_unop_sin: case ir_unop_cos: - case ir_unop_sin_reduced: - case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdx_coarse: case ir_unop_dFdx_fine: @@ -380,10 +378,12 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) } else if (op1->type->is_scalar()) { this->type = op0->type; } else { - /* FINISHME: matrix types */ - assert(!op0->type->is_matrix() && !op1->type->is_matrix()); - assert(op0->type == op1->type); - this->type = op0->type; + if (this->operation == ir_binop_mul) { + this->type = glsl_type::get_mul_type(op0->type, op1->type); + } else { + assert(op0->type == op1->type); + this->type = op0->type; + } } break; @@ -540,8 +540,6 @@ static const char *const operator_strs[] = { "round_even", "sin", "cos", - "sin_reduced", - "cos_reduced", "dFdx", "dFdxCoarse", "dFdxFine", diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h index fdc22edf1..fab1cd2d2 100644 --- a/mesalib/src/glsl/ir.h +++ b/mesalib/src/glsl/ir.h @@ -109,6 +109,31 @@ public: virtual ir_instruction *clone(void *mem_ctx, struct hash_table *ht) const = 0; + bool is_rvalue() const + { + return ir_type == ir_type_dereference_array || + ir_type == ir_type_dereference_record || + ir_type == ir_type_dereference_variable || + ir_type == ir_type_constant || + ir_type == ir_type_expression || + ir_type == ir_type_swizzle || + ir_type == ir_type_texture; + } + + bool is_dereference() const + { + return ir_type == ir_type_dereference_array || + ir_type == ir_type_dereference_record || + ir_type == ir_type_dereference_variable; + } + + bool is_jump() const + { + return ir_type == ir_type_loop_jump || + ir_type == ir_type_return || + ir_type == ir_type_discard; + } + /** * \name IR instruction downcast functions * @@ -117,45 +142,33 @@ public: * Additional downcast functions will be added as needed. */ /*@{*/ - class ir_rvalue *as_rvalue() - { - assume(this != NULL); - if (ir_type == ir_type_dereference_array || - ir_type == ir_type_dereference_record || - ir_type == ir_type_dereference_variable || - ir_type == ir_type_constant || - ir_type == ir_type_expression || - ir_type == ir_type_swizzle || - ir_type == ir_type_texture) - return (class ir_rvalue *) this; - return NULL; - } - - class ir_dereference *as_dereference() - { - assume(this != NULL); - if (ir_type == ir_type_dereference_array || - ir_type == ir_type_dereference_record || - ir_type == ir_type_dereference_variable) - return (class ir_dereference *) this; - return NULL; - } - - class ir_jump *as_jump() - { - assume(this != NULL); - if (ir_type == ir_type_loop_jump || - ir_type == ir_type_return || - ir_type == ir_type_discard) - return (class ir_jump *) this; - return NULL; - } + #define AS_BASE(TYPE) \ + class ir_##TYPE *as_##TYPE() \ + { \ + assume(this != NULL); \ + return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ + } \ + const class ir_##TYPE *as_##TYPE() const \ + { \ + assume(this != NULL); \ + return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ + } + + AS_BASE(rvalue) + AS_BASE(dereference) + AS_BASE(jump) + #undef AS_BASE #define AS_CHILD(TYPE) \ class ir_##TYPE * as_##TYPE() \ { \ assume(this != NULL); \ return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \ + } \ + const class ir_##TYPE * as_##TYPE() const \ + { \ + assume(this != NULL); \ + return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \ } AS_CHILD(variable) AS_CHILD(function) @@ -183,7 +196,8 @@ public: * in particular. No support for other instruction types (assignments, * jumps, calls, etc.) is planned. */ - virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset); + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; protected: ir_instruction(enum ir_node_type t) @@ -1300,8 +1314,6 @@ enum ir_expression_operation { /*@{*/ ir_unop_sin, ir_unop_cos, - ir_unop_sin_reduced, /**< Reduced range sin. [-pi, pi] */ - ir_unop_cos_reduced, /**< Reduced range cos. [-pi, pi] */ /*@}*/ /** @@ -1598,7 +1610,8 @@ public: */ ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2); - virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset); + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const; @@ -1909,7 +1922,8 @@ public: virtual ir_visitor_status accept(ir_hierarchical_visitor *); - virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset); + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; /** * Return a string representing the ir_texture_opcode. @@ -2010,7 +2024,8 @@ public: virtual ir_visitor_status accept(ir_hierarchical_visitor *); - virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset); + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; bool is_lvalue() const { @@ -2063,7 +2078,8 @@ public: virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset); + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; /** * Get the variable that is ultimately referenced by an r-value @@ -2109,7 +2125,8 @@ public: virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset); + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; /** * Get the variable that is ultimately referenced by an r-value @@ -2219,7 +2236,8 @@ public: virtual ir_visitor_status accept(ir_hierarchical_visitor *); - virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset); + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; /** * Get a particular component of a constant as a specific type diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp index d198276c6..bbf2b0ca8 100755 --- a/mesalib/src/glsl/ir_constant_expression.cpp +++ b/mesalib/src/glsl/ir_constant_expression.cpp @@ -774,7 +774,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) break; case ir_unop_sin: - case ir_unop_sin_reduced: assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { data.f[c] = sinf(op[0]->value.f[c]); @@ -782,7 +781,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) break; case ir_unop_cos: - case ir_unop_cos_reduced: assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { data.f[c] = cosf(op[0]->value.f[c]); diff --git a/mesalib/src/glsl/ir_equals.cpp b/mesalib/src/glsl/ir_equals.cpp index 65376cd94..cc1964eef 100644 --- a/mesalib/src/glsl/ir_equals.cpp +++ b/mesalib/src/glsl/ir_equals.cpp @@ -28,7 +28,8 @@ * can't access a's vtable in that case. */ static bool -possibly_null_equals(ir_instruction *a, ir_instruction *b, enum ir_node_type ignore) +possibly_null_equals(const ir_instruction *a, const ir_instruction *b, + enum ir_node_type ignore) { if (!a || !b) return !a && !b; @@ -41,13 +42,13 @@ possibly_null_equals(ir_instruction *a, ir_instruction *b, enum ir_node_type ign * about. */ bool -ir_instruction::equals(ir_instruction *, enum ir_node_type) +ir_instruction::equals(const ir_instruction *, enum ir_node_type) const { return false; } bool -ir_constant::equals(ir_instruction *ir, enum ir_node_type) +ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const { const ir_constant *other = ir->as_constant(); if (!other) @@ -65,7 +66,8 @@ ir_constant::equals(ir_instruction *ir, enum ir_node_type) } bool -ir_dereference_variable::equals(ir_instruction *ir, enum ir_node_type) +ir_dereference_variable::equals(const ir_instruction *ir, + enum ir_node_type) const { const ir_dereference_variable *other = ir->as_dereference_variable(); if (!other) @@ -75,7 +77,8 @@ ir_dereference_variable::equals(ir_instruction *ir, enum ir_node_type) } bool -ir_dereference_array::equals(ir_instruction *ir, enum ir_node_type ignore) +ir_dereference_array::equals(const ir_instruction *ir, + enum ir_node_type ignore) const { const ir_dereference_array *other = ir->as_dereference_array(); if (!other) @@ -94,7 +97,8 @@ ir_dereference_array::equals(ir_instruction *ir, enum ir_node_type ignore) } bool -ir_swizzle::equals(ir_instruction *ir, enum ir_node_type ignore) +ir_swizzle::equals(const ir_instruction *ir, + enum ir_node_type ignore) const { const ir_swizzle *other = ir->as_swizzle(); if (!other) @@ -116,7 +120,7 @@ ir_swizzle::equals(ir_instruction *ir, enum ir_node_type ignore) } bool -ir_texture::equals(ir_instruction *ir, enum ir_node_type ignore) +ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const { const ir_texture *other = ir->as_texture(); if (!other) @@ -179,7 +183,7 @@ ir_texture::equals(ir_instruction *ir, enum ir_node_type ignore) } bool -ir_expression::equals(ir_instruction *ir, enum ir_node_type ignore) +ir_expression::equals(const ir_instruction *ir, enum ir_node_type ignore) const { const ir_expression *other = ir->as_expression(); if (!other) diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp index 7a7688cb2..cfe0df3dc 100644 --- a/mesalib/src/glsl/ir_validate.cpp +++ b/mesalib/src/glsl/ir_validate.cpp @@ -334,8 +334,6 @@ ir_validate::visit_leave(ir_expression *ir) break; case ir_unop_sin: case ir_unop_cos: - case ir_unop_sin_reduced: - case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdx_coarse: case ir_unop_dFdx_fine: @@ -543,9 +541,9 @@ ir_validate::visit_leave(ir_expression *ir) case ir_binop_logic_and: case ir_binop_logic_xor: case ir_binop_logic_or: - assert(ir->type == glsl_type::bool_type); - assert(ir->operands[0]->type == glsl_type::bool_type); - assert(ir->operands[1]->type == glsl_type::bool_type); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[1]->type->base_type == GLSL_TYPE_BOOL); break; case ir_binop_dot: diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp index 972ba8100..51b939098 100644 --- a/mesalib/src/glsl/link_uniforms.cpp +++ b/mesalib/src/glsl/link_uniforms.cpp @@ -547,6 +547,8 @@ private: virtual void enter_record(const glsl_type *type, const char *name, bool row_major) { assert(type->is_record()); + if (this->ubo_block_index == -1) + return; this->ubo_byte_offset = glsl_align( this->ubo_byte_offset, type->std140_base_alignment(row_major)); } @@ -554,6 +556,8 @@ private: virtual void leave_record(const glsl_type *type, const char *name, bool row_major) { assert(type->is_record()); + if (this->ubo_block_index == -1) + return; this->ubo_byte_offset = glsl_align( this->ubo_byte_offset, type->std140_base_alignment(row_major)); } diff --git a/mesalib/src/glsl/link_varyings.cpp b/mesalib/src/glsl/link_varyings.cpp index 22617990f..605748a9c 100644 --- a/mesalib/src/glsl/link_varyings.cpp +++ b/mesalib/src/glsl/link_varyings.cpp @@ -263,6 +263,19 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, if (output != NULL) { cross_validate_types_and_qualifiers(prog, input, output, consumer->Stage, producer->Stage); + } else { + /* Check for input vars with unmatched output vars in prev stage + * taking into account that interface blocks could have a matching + * output but with different name, so we ignore them. + */ + assert(!input->data.assigned); + if (input->data.used && !input->get_interface_type() && + !input->data.explicit_location && !prog->SeparateShader) + linker_error(prog, + "%s shader input `%s' " + "has no matching output in the previous stage\n", + _mesa_shader_stage_to_string(consumer->Stage), + input->name); } } } diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index 4349f0973..b6baa5d36 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -1377,24 +1377,13 @@ link_fs_input_layout_qualifiers(struct gl_shader_program *prog, * "If gl_FragCoord is redeclared in any fragment shader in a program, * it must be redeclared in all the fragment shaders in that program * that have a static use gl_FragCoord." - * - * Exclude the case when one of the 'linked_shader' or 'shader' redeclares - * gl_FragCoord with no layout qualifiers but the other one doesn't - * redeclare it. If we strictly follow GLSL 1.50 spec's language, it - * should be a link error. But, generating link error for this case will - * be a wrong behaviour which spec didn't intend to do and it could also - * break some applications. */ if ((linked_shader->redeclares_gl_fragcoord && !shader->redeclares_gl_fragcoord - && shader->uses_gl_fragcoord - && (linked_shader->origin_upper_left - || linked_shader->pixel_center_integer)) + && shader->uses_gl_fragcoord) || (shader->redeclares_gl_fragcoord && !linked_shader->redeclares_gl_fragcoord - && linked_shader->uses_gl_fragcoord - && (shader->origin_upper_left - || shader->pixel_center_integer))) { + && linked_shader->uses_gl_fragcoord)) { linker_error(prog, "fragment shader defined with conflicting " "layout qualifiers for gl_FragCoord\n"); } @@ -2503,6 +2492,194 @@ check_explicit_uniform_locations(struct gl_context *ctx, delete uniform_map; } +static bool +add_program_resource(struct gl_shader_program *prog, GLenum type, + const void *data, uint8_t stages) +{ + assert(data); + + /* If resource already exists, do not add it again. */ + for (unsigned i = 0; i < prog->NumProgramResourceList; i++) + if (prog->ProgramResourceList[i].Data == data) + return true; + + prog->ProgramResourceList = + reralloc(prog, + prog->ProgramResourceList, + gl_program_resource, + prog->NumProgramResourceList + 1); + + if (!prog->ProgramResourceList) { + linker_error(prog, "Out of memory during linking.\n"); + return false; + } + + struct gl_program_resource *res = + &prog->ProgramResourceList[prog->NumProgramResourceList]; + + res->Type = type; + res->Data = data; + res->StageReferences = stages; + + prog->NumProgramResourceList++; + + return true; +} + +/** + * Function builds a stage reference bitmask from variable name. + */ +static uint8_t +build_stageref(struct gl_shader_program *shProg, const char *name) +{ + uint8_t stages = 0; + + /* Note, that we assume MAX 8 stages, if there will be more stages, type + * used for reference mask in gl_program_resource will need to be changed. + */ + assert(MESA_SHADER_STAGES < 8); + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = shProg->_LinkedShaders[i]; + if (!sh) + continue; + ir_variable *var = sh->symbols->get_variable(name); + if (var) + stages |= (1 << i); + } + return stages; +} + +static bool +add_interface_variables(struct gl_shader_program *shProg, + struct gl_shader *sh, GLenum programInterface) +{ + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + + if (!var) + continue; + + switch (var->data.mode) { + /* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes): + * "For GetActiveAttrib, all active vertex shader input variables + * are enumerated, including the special built-in inputs gl_VertexID + * and gl_InstanceID." + */ + case ir_var_system_value: + if (var->data.location != SYSTEM_VALUE_VERTEX_ID && + var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE && + var->data.location != SYSTEM_VALUE_INSTANCE_ID) + continue; + case ir_var_shader_in: + if (programInterface != GL_PROGRAM_INPUT) + continue; + break; + case ir_var_shader_out: + if (programInterface != GL_PROGRAM_OUTPUT) + continue; + break; + default: + continue; + }; + + if (!add_program_resource(shProg, programInterface, var, + build_stageref(shProg, var->name))) + return false; + } + return true; +} + +/** + * Builds up a list of program resources that point to existing + * resource data. + */ +static void +build_program_resource_list(struct gl_context *ctx, + struct gl_shader_program *shProg) +{ + /* Rebuild resource list. */ + if (shProg->ProgramResourceList) { + ralloc_free(shProg->ProgramResourceList); + shProg->ProgramResourceList = NULL; + shProg->NumProgramResourceList = 0; + } + + int input_stage = MESA_SHADER_STAGES, output_stage = 0; + + /* Determine first input and final output stage. These are used to + * detect which variables should be enumerated in the resource list + * for GL_PROGRAM_INPUT and GL_PROGRAM_OUTPUT. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!shProg->_LinkedShaders[i]) + continue; + if (input_stage == MESA_SHADER_STAGES) + input_stage = i; + output_stage = i; + } + + /* Empty shader, no resources. */ + if (input_stage == MESA_SHADER_STAGES && output_stage == 0) + return; + + /* Add inputs and outputs to the resource list. */ + if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage], + GL_PROGRAM_INPUT)) + return; + + if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage], + GL_PROGRAM_OUTPUT)) + return; + + /* Add transform feedback varyings. */ + if (shProg->LinkedTransformFeedback.NumVarying > 0) { + for (int i = 0; i < shProg->LinkedTransformFeedback.NumVarying; i++) { + uint8_t stageref = + build_stageref(shProg, + shProg->LinkedTransformFeedback.Varyings[i].Name); + if (!add_program_resource(shProg, GL_TRANSFORM_FEEDBACK_VARYING, + &shProg->LinkedTransformFeedback.Varyings[i], + stageref)) + return; + } + } + + /* Add uniforms from uniform storage. */ + for (unsigned i = 0; i < shProg->NumUserUniformStorage; i++) { + /* Do not add uniforms internally used by Mesa. */ + if (shProg->UniformStorage[i].hidden) + continue; + + uint8_t stageref = + build_stageref(shProg, shProg->UniformStorage[i].name); + if (!add_program_resource(shProg, GL_UNIFORM, + &shProg->UniformStorage[i], stageref)) + return; + } + + /* Add program uniform blocks. */ + for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) { + if (!add_program_resource(shProg, GL_UNIFORM_BLOCK, + &shProg->UniformBlocks[i], 0)) + return; + } + + /* Add atomic counter buffers. */ + for (unsigned i = 0; i < shProg->NumAtomicBuffers; i++) { + if (!add_program_resource(shProg, GL_ATOMIC_COUNTER_BUFFER, + &shProg->AtomicBuffers[i], 0)) + return; + } + + /* TODO - following extensions will require more resource types: + * + * GL_ARB_shader_storage_buffer_object + * GL_ARB_shader_subroutine + */ +} + + void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) { @@ -2737,10 +2914,18 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) goto done; } - unsigned first; - for (first = 0; first <= MESA_SHADER_FRAGMENT; first++) { - if (prog->_LinkedShaders[first] != NULL) - break; + unsigned first, last; + + first = MESA_SHADER_STAGES; + last = 0; + + /* Determine first and last stage. */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!prog->_LinkedShaders[i]) + continue; + if (first == MESA_SHADER_STAGES) + first = i; + last = i; } if (num_tfeedback_decls != 0) { @@ -2769,13 +2954,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) * ensures that inter-shader outputs written to in an earlier stage are * eliminated if they are (transitively) not used in a later stage. */ - int last, next; - for (last = MESA_SHADER_FRAGMENT; last >= 0; last--) { - if (prog->_LinkedShaders[last] != NULL) - break; - } + int next; - if (last >= 0 && last < MESA_SHADER_FRAGMENT) { + if (first < MESA_SHADER_FRAGMENT) { gl_shader *const sh = prog->_LinkedShaders[last]; if (first == MESA_SHADER_GEOMETRY) { @@ -2787,13 +2968,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) * MESA_SHADER_GEOMETRY. */ if (!assign_varying_locations(ctx, mem_ctx, prog, - NULL, sh, + NULL, prog->_LinkedShaders[first], num_tfeedback_decls, tfeedback_decls, prog->Geom.VerticesIn)) goto done; } - if (num_tfeedback_decls != 0 || prog->SeparateShader) { + if (last != MESA_SHADER_FRAGMENT && + (num_tfeedback_decls != 0 || prog->SeparateShader)) { /* There was no fragment shader, but we still have to assign varying * locations for use by transform feedback. */ @@ -2905,6 +3087,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) } } + build_program_resource_list(ctx, prog); + if (!prog->LinkStatus) + goto done; + /* FINISHME: Assign fragment shader output locations. */ done: diff --git a/mesalib/src/glsl/loop_controls.cpp b/mesalib/src/glsl/loop_controls.cpp index d7f0b2809..51804bb5f 100644 --- a/mesalib/src/glsl/loop_controls.cpp +++ b/mesalib/src/glsl/loop_controls.cpp @@ -139,7 +139,7 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, iter = new(mem_ctx) ir_constant(double(iter_value + bias[i])); break; default: - unreachable(!"Unsupported type for loop iterator."); + unreachable("Unsupported type for loop iterator."); } ir_expression *const mul = diff --git a/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp b/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp index 2243f479a..44967dcdb 100644 --- a/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp +++ b/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp @@ -49,7 +49,6 @@ public: { instructions = insts; progress = false; - index = 0; } bool run() @@ -63,7 +62,6 @@ public: private: exec_list *instructions; bool progress; - unsigned index; }; void @@ -82,7 +80,7 @@ lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue) void *mem_ctx = ralloc_parent(con); - char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%d", index++); + char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%p", dra); ir_variable *uni = new(mem_ctx) ir_variable(con->type, uniform_name, ir_var_uniform); diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp index 357944da6..f6b8331d4 100644 --- a/mesalib/src/glsl/nir/glsl_to_nir.cpp +++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp @@ -88,6 +88,8 @@ private: exec_list *cf_node_list; nir_instr *result; /* result of the expression tree last visited */ + nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir); + /* the head of the dereference chain we're creating */ nir_deref_var *deref_head; /* the tail of the dereference chain we're creating */ @@ -156,6 +158,14 @@ nir_visitor::~nir_visitor() _mesa_hash_table_destroy(this->overload_table, NULL); } +nir_deref_var * +nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir) +{ + ir->accept(this); + ralloc_steal(mem_ctx, this->deref_head); + return this->deref_head; +} + static nir_constant * constant_copy(ir_constant *ir, void *mem_ctx) { @@ -582,13 +592,11 @@ void nir_visitor::visit(ir_return *ir) { if (ir->value != NULL) { - ir->value->accept(this); nir_intrinsic_instr *copy = nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_var_create(this->shader, - this->impl->return_var); - copy->variables[1] = this->deref_head; + copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var); + copy->variables[1] = evaluate_deref(©->instr, ir->value); } nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); @@ -613,8 +621,7 @@ nir_visitor::visit(ir_call *ir) nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); ir_dereference *param = (ir_dereference *) ir->actual_parameters.get_head(); - param->accept(this); - instr->variables[0] = this->deref_head; + instr->variables[0] = evaluate_deref(&instr->instr, param); nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); @@ -623,8 +630,7 @@ nir_visitor::visit(ir_call *ir) nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); store_instr->num_components = 1; - ir->return_deref->accept(this); - store_instr->variables[0] = this->deref_head; + store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref); store_instr->src[0].is_ssa = true; store_instr->src[0].ssa = &instr->dest.ssa; @@ -642,13 +648,11 @@ nir_visitor::visit(ir_call *ir) unsigned i = 0; foreach_in_list(ir_dereference, param, &ir->actual_parameters) { - param->accept(this); - instr->params[i] = this->deref_head; + instr->params[i] = evaluate_deref(&instr->instr, param); i++; } - ir->return_deref->accept(this); - instr->return_deref = this->deref_head; + instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref); nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); } @@ -663,12 +667,8 @@ nir_visitor::visit(ir_assignment *ir) nir_intrinsic_instr *copy = nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); - ir->lhs->accept(this); - copy->variables[0] = this->deref_head; - - ir->rhs->accept(this); - copy->variables[1] = this->deref_head; - + copy->variables[0] = evaluate_deref(©->instr, ir->lhs); + copy->variables[1] = evaluate_deref(©->instr, ir->rhs); if (ir->condition) { nir_if *if_stmt = nir_if_create(this->shader); @@ -700,6 +700,7 @@ nir_visitor::visit(ir_assignment *ir) load->num_components = ir->lhs->type->vector_elements; nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); load->variables[0] = lhs_deref; + ralloc_steal(load, load->variables[0]); nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr); nir_op vec_op; @@ -741,7 +742,7 @@ nir_visitor::visit(ir_assignment *ir) nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); store->num_components = ir->lhs->type->vector_elements; - nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref); + nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref); store->variables[0] = nir_deref_as_var(store_deref); store->src[0] = src; @@ -816,6 +817,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir) nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var); load_instr->num_components = ir->type->vector_elements; load_instr->variables[0] = this->deref_head; + ralloc_steal(load_instr, load_instr->variables[0]); add_instr(&load_instr->instr, ir->type->vector_elements); } @@ -959,6 +961,7 @@ nir_visitor::visit(ir_expression *ir) nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); intrin->num_components = deref->type->vector_elements; intrin->variables[0] = this->deref_head; + ralloc_steal(intrin, intrin->variables[0]); if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset || intrin->intrinsic == nir_intrinsic_interp_var_at_sample) @@ -1087,12 +1090,6 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break; case ir_unop_sin: emit(nir_op_fsin, dest_size, srcs); break; case ir_unop_cos: emit(nir_op_fcos, dest_size, srcs); break; - case ir_unop_sin_reduced: - emit(nir_op_fsin_reduced, dest_size, srcs); - break; - case ir_unop_cos_reduced: - emit(nir_op_fcos_reduced, dest_size, srcs); - break; case ir_unop_dFdx: emit(nir_op_fddx, dest_size, srcs); break; case ir_unop_dFdy: emit(nir_op_fddy, dest_size, srcs); break; case ir_unop_dFdx_fine: emit(nir_op_fddx_fine, dest_size, srcs); break; @@ -1210,6 +1207,9 @@ nir_visitor::visit(ir_expression *ir) case ir_binop_bit_and: case ir_binop_bit_or: case ir_binop_bit_xor: + case ir_binop_logic_and: + case ir_binop_logic_or: + case ir_binop_logic_xor: case ir_binop_lshift: case ir_binop_rshift: switch (ir->operation) { @@ -1270,6 +1270,24 @@ nir_visitor::visit(ir_expression *ir) case ir_binop_bit_xor: op = nir_op_ixor; break; + case ir_binop_logic_and: + if (supports_ints) + op = nir_op_iand; + else + op = nir_op_fand; + break; + case ir_binop_logic_or: + if (supports_ints) + op = nir_op_ior; + else + op = nir_op_for; + break; + case ir_binop_logic_xor: + if (supports_ints) + op = nir_op_ixor; + else + op = nir_op_fxor; + break; case ir_binop_lshift: op = nir_op_ishl; break; @@ -1444,24 +1462,6 @@ nir_visitor::visit(ir_expression *ir) } } break; - case ir_binop_logic_and: - if (supports_ints) - emit(nir_op_iand, dest_size, srcs); - else - emit(nir_op_fand, dest_size, srcs); - break; - case ir_binop_logic_or: - if (supports_ints) - emit(nir_op_ior, dest_size, srcs); - else - emit(nir_op_for, dest_size, srcs); - break; - case ir_binop_logic_xor: - if (supports_ints) - emit(nir_op_ixor, dest_size, srcs); - else - emit(nir_op_fxor, dest_size, srcs); - break; case ir_binop_dot: switch (ir->operands[0]->type->vector_elements) { case 2: emit(nir_op_fdot2, dest_size, srcs); break; @@ -1633,8 +1633,7 @@ nir_visitor::visit(ir_texture *ir) unreachable("not reached"); } - ir->sampler->accept(this); - instr->sampler = this->deref_head; + instr->sampler = evaluate_deref(&instr->instr, ir->sampler); unsigned src_number = 0; @@ -1759,7 +1758,7 @@ nir_visitor::visit(ir_dereference_record *ir) int field_index = this->deref_tail->type->field_index(ir->field); assert(field_index >= 0); - nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index); + nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index); deref->deref.type = ir->type; this->deref_tail->child = &deref->deref; this->deref_tail = &deref->deref; @@ -1783,5 +1782,6 @@ nir_visitor::visit(ir_dereference_array *ir) ir->array->accept(this); this->deref_tail->child = &deref->deref; + ralloc_steal(this->deref_tail, deref); this->deref_tail = &deref->deref; } diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c index 6459d5108..c6e53612b 100644 --- a/mesalib/src/glsl/nir/nir.c +++ b/mesalib/src/glsl/nir/nir.c @@ -58,11 +58,11 @@ reg_create(void *mem_ctx, struct exec_list *list) nir_register *reg = ralloc(mem_ctx, nir_register); reg->parent_instr = NULL; - reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + reg->uses = _mesa_set_create(reg, _mesa_hash_pointer, _mesa_key_pointer_equal); - reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + reg->defs = _mesa_set_create(reg, _mesa_hash_pointer, _mesa_key_pointer_equal); - reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer, _mesa_key_pointer_equal); reg->num_components = 0; @@ -108,7 +108,7 @@ nir_function_create(nir_shader *shader, const char *name) exec_list_push_tail(&shader->functions, &func->node); exec_list_make_empty(&func->overload_list); - func->name = name; + func->name = ralloc_strdup(func, name); func->shader = shader; return func; @@ -285,10 +285,10 @@ nir_block_create(void *mem_ctx) cf_init(&block->cf_node, nir_cf_node_block); block->successors[0] = block->successors[1] = NULL; - block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); block->imm_dom = NULL; - block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); exec_list_make_empty(&block->instr_list); @@ -381,11 +381,11 @@ alu_src_init(nir_alu_src *src) } nir_alu_instr * -nir_alu_instr_create(void *mem_ctx, nir_op op) +nir_alu_instr_create(nir_shader *shader, nir_op op) { unsigned num_srcs = nir_op_infos[op].num_inputs; nir_alu_instr *instr = - ralloc_size(mem_ctx, + ralloc_size(shader, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); instr_init(&instr->instr, nir_instr_type_alu); @@ -398,18 +398,18 @@ nir_alu_instr_create(void *mem_ctx, nir_op op) } nir_jump_instr * -nir_jump_instr_create(void *mem_ctx, nir_jump_type type) +nir_jump_instr_create(nir_shader *shader, nir_jump_type type) { - nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr); + nir_jump_instr *instr = ralloc(shader, nir_jump_instr); instr_init(&instr->instr, nir_instr_type_jump); instr->type = type; return instr; } nir_load_const_instr * -nir_load_const_instr_create(void *mem_ctx, unsigned num_components) +nir_load_const_instr_create(nir_shader *shader, unsigned num_components) { - nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr); + nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr); instr_init(&instr->instr, nir_instr_type_load_const); nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); @@ -418,11 +418,11 @@ nir_load_const_instr_create(void *mem_ctx, unsigned num_components) } nir_intrinsic_instr * -nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op) +nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) { unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; nir_intrinsic_instr *instr = - ralloc_size(mem_ctx, + ralloc_size(shader, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); instr_init(&instr->instr, nir_instr_type_intrinsic); @@ -438,29 +438,29 @@ nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op) } nir_call_instr * -nir_call_instr_create(void *mem_ctx, nir_function_overload *callee) +nir_call_instr_create(nir_shader *shader, nir_function_overload *callee) { - nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr); + nir_call_instr *instr = ralloc(shader, nir_call_instr); instr_init(&instr->instr, nir_instr_type_call); instr->callee = callee; instr->num_params = callee->num_params; - instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params); + instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params); instr->return_deref = NULL; return instr; } nir_tex_instr * -nir_tex_instr_create(void *mem_ctx, unsigned num_srcs) +nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) { - nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr); + nir_tex_instr *instr = ralloc(shader, nir_tex_instr); instr_init(&instr->instr, nir_instr_type_tex); dest_init(&instr->dest); instr->num_srcs = num_srcs; - instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs); + instr->src = ralloc_array(instr, nir_tex_src, num_srcs); for (unsigned i = 0; i < num_srcs; i++) src_init(&instr->src[i].src); @@ -472,9 +472,9 @@ nir_tex_instr_create(void *mem_ctx, unsigned num_srcs) } nir_phi_instr * -nir_phi_instr_create(void *mem_ctx) +nir_phi_instr_create(nir_shader *shader) { - nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr); + nir_phi_instr *instr = ralloc(shader, nir_phi_instr); instr_init(&instr->instr, nir_instr_type_phi); dest_init(&instr->dest); @@ -483,9 +483,9 @@ nir_phi_instr_create(void *mem_ctx) } nir_parallel_copy_instr * -nir_parallel_copy_instr_create(void *mem_ctx) +nir_parallel_copy_instr_create(nir_shader *shader) { - nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr); + nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); instr_init(&instr->instr, nir_instr_type_parallel_copy); exec_list_make_empty(&instr->entries); @@ -494,9 +494,9 @@ nir_parallel_copy_instr_create(void *mem_ctx) } nir_ssa_undef_instr * -nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components) +nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components) { - nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr); + nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); instr_init(&instr->instr, nir_instr_type_ssa_undef); nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); @@ -543,7 +543,7 @@ copy_deref_var(void *mem_ctx, nir_deref_var *deref) nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); ret->deref.type = deref->deref.type; if (deref->deref.child) - ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + ret->deref.child = nir_copy_deref(ret, deref->deref.child); return ret; } @@ -558,7 +558,7 @@ copy_deref_array(void *mem_ctx, nir_deref_array *deref) } ret->deref.type = deref->deref.type; if (deref->deref.child) - ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + ret->deref.child = nir_copy_deref(ret, deref->deref.child); return ret; } @@ -568,7 +568,7 @@ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref) nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index); ret->deref.type = deref->deref.type; if (deref->deref.child) - ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + ret->deref.child = nir_copy_deref(ret, deref->deref.child); return ret; } @@ -1834,13 +1834,11 @@ void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, unsigned num_components, const char *name) { - void *mem_ctx = ralloc_parent(instr); - def->name = name; def->parent_instr = instr; - def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + def->uses = _mesa_set_create(instr, _mesa_hash_pointer, _mesa_key_pointer_equal); - def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer, _mesa_key_pointer_equal); def->num_components = num_components; diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h index 29fe94243..74772c798 100644 --- a/mesalib/src/glsl/nir/nir.h +++ b/mesalib/src/glsl/nir/nir.h @@ -34,6 +34,7 @@ #include "util/set.h" #include "util/bitset.h" #include "nir_types.h" +#include "glsl/shader_enums.h" #include <stdio.h> #include "nir_opcodes.h" @@ -529,6 +530,16 @@ nir_src_for_reg(nir_register *reg) return src; } +static inline nir_instr * +nir_src_get_parent_instr(const nir_src *src) +{ + if (src->is_ssa) { + return src->ssa->parent_instr; + } else { + return src->reg.reg->parent_instr; + } +} + static inline nir_dest nir_dest_for_reg(nir_register *reg) { @@ -1365,11 +1376,17 @@ typedef struct nir_function { typedef struct nir_shader_compiler_options { bool lower_ffma; + bool lower_flrp; bool lower_fpow; bool lower_fsat; bool lower_fsqrt; /** lowers fneg and ineg to fsub and isub. */ bool lower_negate; + /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ + bool lower_sub; + + /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ + bool lower_scmp; /** * Does the driver support real 32-bit integers? (Otherwise, integers @@ -1414,6 +1431,9 @@ typedef struct nir_shader { * access plus one */ unsigned num_inputs, num_uniforms, num_outputs; + + /** the number of uniforms that are only accessed directly */ + unsigned num_direct_uniforms; } nir_shader; #define nir_foreach_overload(shader, overload) \ @@ -1466,26 +1486,26 @@ void nir_metadata_require(nir_function_impl *impl, nir_metadata required); void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); /** creates an instruction with default swizzle/writemask/etc. with NULL registers */ -nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op); +nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); -nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type); +nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); -nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx, +nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, unsigned num_components); -nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx, +nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op); -nir_call_instr *nir_call_instr_create(void *mem_ctx, +nir_call_instr *nir_call_instr_create(nir_shader *shader, nir_function_overload *callee); -nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs); +nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); -nir_phi_instr *nir_phi_instr_create(void *mem_ctx); +nir_phi_instr *nir_phi_instr_create(nir_shader *shader); -nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx); +nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); -nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx, +nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components); nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var); @@ -1550,7 +1570,7 @@ void nir_print_instr(const nir_instr *instr, FILE *fp); #ifdef DEBUG void nir_validate_shader(nir_shader *shader); #else -static inline void nir_validate_shader(nir_shader *shader) { } +static inline void nir_validate_shader(nir_shader *shader) { (void) shader; } #endif /* DEBUG */ void nir_calc_dominance_impl(nir_function_impl *impl); @@ -1596,14 +1616,18 @@ void nir_lower_alu_to_scalar(nir_shader *shader); void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, - struct gl_shader_program *shader_program, - struct gl_program *prog); + const struct gl_shader_program *shader_program, + gl_shader_stage stage); void nir_lower_system_values(nir_shader *shader); +void nir_lower_tex_projector(nir_shader *shader); +void nir_lower_idiv(nir_shader *shader); void nir_lower_atomics(nir_shader *shader); void nir_lower_to_source_mods(nir_shader *shader); +void nir_normalize_cubemap_coords(nir_shader *shader); + void nir_live_variables_impl(nir_function_impl *impl); bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); @@ -1612,6 +1636,7 @@ void nir_convert_to_ssa(nir_shader *shader); void nir_convert_from_ssa(nir_shader *shader); bool nir_opt_algebraic(nir_shader *shader); +bool nir_opt_algebraic_late(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); bool nir_opt_global_to_local(nir_shader *shader); @@ -1631,6 +1656,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader); bool nir_opt_remove_phis(nir_shader *shader); +void nir_sweep(nir_shader *shader); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py index afab1a008..bbf4f08ef 100644 --- a/mesalib/src/glsl/nir/nir_algebraic.py +++ b/mesalib/src/glsl/nir/nir_algebraic.py @@ -181,12 +181,23 @@ _algebraic_pass_template = mako.template.Template(""" #include "nir.h" #include "nir_search.h" +#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS +#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS + struct transform { const nir_search_expression *search; const nir_search_value *replace; unsigned condition_offset; }; +struct opt_state { + void *mem_ctx; + bool progress; + const bool *condition_flags; +}; + +#endif + % for (opcode, xform_list) in xform_dict.iteritems(): % for xform in xform_list: ${xform.search.render()} @@ -200,12 +211,6 @@ static const struct transform ${pass_name}_${opcode}_xforms[] = { }; % endfor -struct opt_state { - void *mem_ctx; - bool progress; - const bool *condition_flags; -}; - static bool ${pass_name}_block(nir_block *block, void *void_state) { diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h index 7c4f7fd96..d1419ee21 100644 --- a/mesalib/src/glsl/nir/nir_builder.h +++ b/mesalib/src/glsl/nir/nir_builder.h @@ -28,6 +28,9 @@ struct exec_list; typedef struct nir_builder { struct exec_list *cf_node_list; + nir_instr *before_instr; + nir_instr *after_instr; + nir_shader *shader; nir_function_impl *impl; } nir_builder; @@ -45,8 +48,75 @@ nir_builder_insert_after_cf_list(nir_builder *build, struct exec_list *cf_node_list) { build->cf_node_list = cf_node_list; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr) +{ + build->cf_node_list = NULL; + build->before_instr = before_instr; + build->after_instr = NULL; } +static inline void +nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr) +{ + build->cf_node_list = NULL; + build->before_instr = NULL; + build->after_instr = after_instr; +} + +static inline void +nir_builder_instr_insert(nir_builder *build, nir_instr *instr) +{ + if (build->cf_node_list) { + nir_instr_insert_after_cf_list(build->cf_node_list, instr); + } else if (build->before_instr) { + nir_instr_insert_before(build->before_instr, instr); + } else { + assert(build->after_instr); + nir_instr_insert_after(build->after_instr, instr); + build->after_instr = instr; + } +} + +static inline nir_ssa_def * +nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value) +{ + nir_load_const_instr *load_const = + nir_load_const_instr_create(build->shader, num_components); + if (!load_const) + return NULL; + + load_const->value = value; + + nir_builder_instr_insert(build, &load_const->instr); + + return &load_const->def; +} + +static inline nir_ssa_def * +nir_imm_float(nir_builder *build, float x) +{ + nir_const_value v = { { .f = {x, 0, 0, 0} } }; + return nir_build_imm(build, 1, v); +} + +static inline nir_ssa_def * +nir_imm_vec4(nir_builder *build, float x, float y, float z, float w) +{ + nir_const_value v = { { .f = {x, y, z, w} } }; + return nir_build_imm(build, 4, v); +} + +static inline nir_ssa_def * +nir_imm_int(nir_builder *build, int x) +{ + nir_const_value v = { { .i = {x, 0, 0, 0} } }; + return nir_build_imm(build, 1, v); +} static inline nir_ssa_def * nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, @@ -90,7 +160,7 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); instr->dest.write_mask = (1 << num_components) - 1; - nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr); + nir_builder_instr_insert(build, &instr->instr); return &instr->dest.dest.ssa; } @@ -127,4 +197,67 @@ nir_##op(nir_builder *build, nir_ssa_def *src0, \ #include "nir_builder_opcodes.h" +/** + * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def. + */ +static inline nir_ssa_def * +nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) +{ + nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); + mov->dest.write_mask = (1 << num_components) - 1; + mov->src[0] = src; + nir_builder_instr_insert(build, &mov->instr); + + return &mov->dest.dest.ssa; +} + +static inline nir_ssa_def * +nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) +{ + nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); + mov->dest.write_mask = (1 << num_components) - 1; + mov->src[0] = src; + nir_builder_instr_insert(build, &mov->instr); + + return &mov->dest.dest.ssa; +} + +/** + * Construct an fmov or imov that reswizzles the source's components. + */ +static inline nir_ssa_def * +nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], + unsigned num_components, bool use_fmov) +{ + nir_alu_src alu_src; + memset(&alu_src, 0, sizeof(alu_src)); + alu_src.src = nir_src_for_ssa(src); + for (int i = 0; i < 4; i++) + alu_src.swizzle[i] = swiz[i]; + + return use_fmov ? nir_fmov_alu(build, alu_src, num_components) : + nir_imov_alu(build, alu_src, num_components); +} + +/** + * Turns a nir_src into a nir_ssa_def * so it can be passed to + * nir_build_alu()-based builder calls. + */ +static inline nir_ssa_def * +nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) +{ + if (src.is_ssa && src.ssa->num_components == num_components) + return src.ssa; + + nir_alu_src alu; + memset(&alu, 0, sizeof(alu)); + alu.src = src; + for (int j = 0; j < 4; j++) + alu.swizzle[j] = j; + + return nir_imov_alu(build, alu, num_components); +} + #endif /* NIR_BUILDER_H */ diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c index c3090fb06..184698abd 100644 --- a/mesalib/src/glsl/nir/nir_from_ssa.c +++ b/mesalib/src/glsl/nir/nir_from_ssa.c @@ -509,12 +509,13 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) reg->num_components = def->num_components; reg->num_array_elems = 0; - /* This register comes from an SSA definition that was not part of a - * phi-web. Therefore, we know it has a single unique definition - * that dominates all of its uses. Therefore, we can copy the + /* This register comes from an SSA definition that is defined and not + * part of a phi-web. Therefore, we know it has a single unique + * definition that dominates all of its uses; we can copy the * parent_instr from the SSA def safely. */ - reg->parent_instr = def->parent_instr; + if (def->parent_instr->type != nir_instr_type_ssa_undef) + reg->parent_instr = def->parent_instr; _mesa_hash_table_insert(state->ssa_table, def, reg); return reg; diff --git a/mesalib/src/glsl/nir/nir_lower_idiv.c b/mesalib/src/glsl/nir/nir_lower_idiv.c new file mode 100644 index 000000000..7b6803207 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_idiv.c @@ -0,0 +1,155 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Rob Clark <robclark@freedesktop.org> + */ + +#include "nir.h" +#include "nir_builder.h" + +/* Lowers idiv/udiv/umod + * Based on NV50LegalizeSSA::handleDIV() + * + * Note that this is probably not enough precision for compute shaders. + * Perhaps we want a second higher precision (looping) version of this? + * Or perhaps we assume if you can do compute shaders you can also + * branch out to a pre-optimized shader library routine.. + */ + +static void +convert_instr(nir_builder *bld, nir_alu_instr *alu) +{ + nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r; + nir_op op = alu->op; + bool is_signed; + + if ((op != nir_op_idiv) && + (op != nir_op_udiv) && + (op != nir_op_umod)) + return; + + is_signed = (op == nir_op_idiv); + + nir_builder_insert_before_instr(bld, &alu->instr); + + numer = nir_ssa_for_src(bld, alu->src[0].src, + nir_ssa_alu_instr_src_components(alu, 0)); + denom = nir_ssa_for_src(bld, alu->src[1].src, + nir_ssa_alu_instr_src_components(alu, 1)); + + if (is_signed) { + af = nir_i2f(bld, numer); + bf = nir_i2f(bld, denom); + af = nir_fabs(bld, af); + bf = nir_fabs(bld, bf); + a = nir_iabs(bld, numer); + b = nir_iabs(bld, denom); + } else { + af = nir_u2f(bld, numer); + bf = nir_u2f(bld, denom); + a = numer; + b = denom; + } + + /* get first result: */ + bf = nir_frcp(bld, bf); + bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */ + q = nir_fmul(bld, af, bf); + + if (is_signed) { + q = nir_f2i(bld, q); + } else { + q = nir_f2u(bld, q); + } + + /* get error of first result: */ + r = nir_imul(bld, q, b); + r = nir_isub(bld, a, r); + r = nir_u2f(bld, r); + r = nir_fmul(bld, r, bf); + r = nir_f2u(bld, r); + + /* add quotients: */ + q = nir_iadd(bld, q, r); + + /* correction: if modulus >= divisor, add 1 */ + r = nir_imul(bld, q, b); + r = nir_isub(bld, a, r); + + r = nir_ige(bld, r, b); + r = nir_b2i(bld, r); + + q = nir_iadd(bld, q, r); + if (is_signed) { + /* fix the sign: */ + r = nir_ixor(bld, numer, denom); + r = nir_ushr(bld, r, nir_imm_int(bld, 31)); + r = nir_i2b(bld, r); + b = nir_ineg(bld, q); + q = nir_bcsel(bld, r, b, q); + } + + if (op == nir_op_umod) { + /* division result in q */ + r = nir_imul(bld, q, b); + q = nir_isub(bld, a, r); + } + + assert(alu->dest.dest.is_ssa); + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, + nir_src_for_ssa(q), + ralloc_parent(alu)); +} + +static bool +convert_block(nir_block *block, void *state) +{ + nir_builder *b = state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_alu) + convert_instr(b, nir_instr_as_alu(instr)); + } + + return true; +} + +static void +convert_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(impl, convert_block, &b); + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_idiv(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + convert_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c index 7cd93ea0a..4bdb80072 100644 --- a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c +++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c @@ -223,7 +223,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state) else nir_instr_insert_after_block(src->pred, &mov->instr); - nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src); + nir_phi_src *new_src = ralloc(new_phi, nir_phi_src); new_src->pred = src->pred; new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa); diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp index 3015dbd09..cf8ab8325 100644 --- a/mesalib/src/glsl/nir/nir_lower_samplers.cpp +++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp @@ -36,33 +36,26 @@ extern "C" { } static unsigned -get_sampler_index(struct gl_shader_program *shader_program, const char *name, - const struct gl_program *prog) +get_sampler_index(const struct gl_shader_program *shader_program, + gl_shader_stage stage, const char *name) { - GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); - unsigned location; if (!shader_program->UniformHash->get(location, name)) { - linker_error(shader_program, - "failed to find sampler named %s.\n", name); + assert(!"failed to find sampler"); return 0; } - if (!shader_program->UniformStorage[location].sampler[shader].active) { - assert(0 && "cannot return a sampler"); - linker_error(shader_program, - "cannot return a sampler named %s, because it is not " - "used in this shader stage. This is a driver bug.\n", - name); + if (!shader_program->UniformStorage[location].sampler[stage].active) { + assert(!"cannot return a sampler"); return 0; } - return shader_program->UniformStorage[location].sampler[shader].index; + return shader_program->UniformStorage[location].sampler[stage].index; } static void -lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, - const struct gl_program *prog, void *mem_ctx) +lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, + gl_shader_stage stage, void *mem_ctx) { if (instr->sampler == NULL) return; @@ -90,7 +83,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset); break; case nir_deref_array_type_indirect: { - instr->src = reralloc(mem_ctx, instr->src, nir_tex_src, + instr->src = reralloc(instr, instr->src, nir_tex_src, instr->num_srcs + 1); memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src); instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; @@ -133,15 +126,15 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, } } - instr->sampler_index += get_sampler_index(shader_program, name, prog); + instr->sampler_index += get_sampler_index(shader_program, stage, name); instr->sampler = NULL; } typedef struct { void *mem_ctx; - struct gl_shader_program *shader_program; - struct gl_program *prog; + const struct gl_shader_program *shader_program; + gl_shader_stage stage; } lower_state; static bool @@ -152,7 +145,7 @@ lower_block_cb(nir_block *block, void *_state) nir_foreach_instr(block, instr) { if (instr->type == nir_instr_type_tex) { nir_tex_instr *tex_instr = nir_instr_as_tex(instr); - lower_sampler(tex_instr, state->shader_program, state->prog, + lower_sampler(tex_instr, state->shader_program, state->stage, state->mem_ctx); } } @@ -161,24 +154,24 @@ lower_block_cb(nir_block *block, void *_state) } static void -lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program, - struct gl_program *prog) +lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, + gl_shader_stage stage) { lower_state state; state.mem_ctx = ralloc_parent(impl); state.shader_program = shader_program; - state.prog = prog; + state.stage = stage; nir_foreach_block(impl, lower_block_cb, &state); } extern "C" void -nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program, - struct gl_program *prog) +nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program, + gl_shader_stage stage) { nir_foreach_overload(shader, overload) { if (overload->impl) - lower_impl(overload->impl, shader_program, prog); + lower_impl(overload->impl, shader_program, stage); } } diff --git a/mesalib/src/glsl/nir/nir_lower_tex_projector.c b/mesalib/src/glsl/nir/nir_lower_tex_projector.c new file mode 100644 index 000000000..6b0e9c340 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_tex_projector.c @@ -0,0 +1,143 @@ +/* + * Copyright © 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * This lowering pass converts the coordinate division for texture projection + * to be done in ALU instructions instead of asking the texture operation to + * do so. + */ + +#include "nir.h" +#include "nir_builder.h" + +static nir_ssa_def * +channel(nir_builder *b, nir_ssa_def *def, int c) +{ + return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false); +} + +static bool +nir_lower_tex_projector_block(nir_block *block, void *void_state) +{ + nir_builder *b = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + nir_builder_insert_before_instr(b, &tex->instr); + + /* Find the projector in the srcs list, if present. */ + int proj_index; + for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) { + if (tex->src[proj_index].src_type == nir_tex_src_projector) + break; + } + if (proj_index == tex->num_srcs) + continue; + nir_ssa_def *inv_proj = + nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); + + /* Walk through the sources projecting the arguments. */ + for (int i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + case nir_tex_src_comparitor: + break; + default: + continue; + } + nir_ssa_def *unprojected = + nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); + nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); + + /* Array indices don't get projected, so make an new vector with the + * coordinate's array index untouched. + */ + if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { + switch (tex->coord_components) { + case 4: + projected = nir_vec4(b, + channel(b, projected, 0), + channel(b, projected, 1), + channel(b, projected, 2), + channel(b, unprojected, 3)); + break; + case 3: + projected = nir_vec3(b, + channel(b, projected, 0), + channel(b, projected, 1), + channel(b, unprojected, 2)); + break; + case 2: + projected = nir_vec2(b, + channel(b, projected, 0), + channel(b, unprojected, 1)); + break; + default: + unreachable("bad texture coord count for array"); + break; + } + } + + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(projected)); + } + + /* Now move the later tex sources down the array so that the projector + * disappears. + */ + nir_src dead; + memset(&dead, 0, sizeof dead); + nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead); + memmove(&tex->src[proj_index], + &tex->src[proj_index + 1], + (tex->num_srcs - proj_index) * sizeof(*tex->src)); + tex->num_srcs--; + } + + return true; +} + +static void +nir_lower_tex_projector_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(impl, nir_lower_tex_projector_block, &b); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_tex_projector(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_tex_projector_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c index 85ebb281c..58389a7c7 100644 --- a/mesalib/src/glsl/nir/nir_lower_var_copies.c +++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c @@ -148,13 +148,10 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, unsigned num_components = glsl_get_vector_elements(src_tail->type); - nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref); - nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref); - nir_intrinsic_instr *load = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var); load->num_components = num_components; - load->variables[0] = nir_deref_as_var(src_deref); + load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref)); nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); nir_instr_insert_before(©_instr->instr, &load->instr); @@ -162,7 +159,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); store->num_components = num_components; - store->variables[0] = nir_deref_as_var(dest_deref); + store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref)); + store->src[0].is_ssa = true; store->src[0].ssa = &load->dest.ssa; diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c index 86e6ab416..2ca74d71b 100644 --- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -642,7 +642,7 @@ add_phi_sources(nir_block *block, nir_block *pred, struct deref_node *node = entry->data; - nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src); + nir_phi_src *src = ralloc(phi, nir_phi_src); src->pred = pred; src->src.is_ssa = true; src->src.ssa = get_ssa_def_for_block(node, pred, state); diff --git a/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c new file mode 100644 index 000000000..0da8447ac --- /dev/null +++ b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c @@ -0,0 +1,110 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand <jason@jlekstrand.net> + */ + +#include "nir.h" +#include "nir_builder.h" + +/** + * This file implements a NIR lowering pass to perform the normalization of + * the cubemap coordinates to have the largest magnitude component be -1.0 + * or 1.0. This is based on the old GLSL IR based pass by Eric. + */ + +static nir_ssa_def * +channel(nir_builder *b, nir_ssa_def *def, int c) +{ + return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false); +} + +static bool +normalize_cubemap_coords_block(nir_block *block, void *void_state) +{ + nir_builder *b = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) + continue; + + nir_builder_insert_before_instr(b, &tex->instr); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type != nir_tex_src_coord) + continue; + + nir_ssa_def *orig_coord = + nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); + assert(orig_coord->num_components >= 3); + + nir_ssa_def *abs = nir_fabs(b, orig_coord); + nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0), + nir_fmax(b, channel(b, abs, 1), + channel(b, abs, 2))); + + nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm)); + + /* Array indices don't have to be normalized, so make a new vector + * with the coordinate's array index untouched. + */ + if (tex->coord_components == 4) { + normalized = nir_vec4(b, + channel(b, normalized, 0), + channel(b, normalized, 1), + channel(b, normalized, 2), + channel(b, orig_coord, 3)); + } + + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(normalized)); + } + } + + return true; +} + +static void +normalize_cubemap_coords_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(impl, normalize_cubemap_coords_block, &b); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_normalize_cubemap_coords(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) + if (overload->impl) + normalize_cubemap_coords_impl(overload->impl); +} diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py index 062cd628b..264806f5d 100644 --- a/mesalib/src/glsl/nir/nir_opcodes.py +++ b/mesalib/src/glsl/nir/nir_opcodes.py @@ -161,12 +161,12 @@ unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. # Float-to-boolean conversion -unop_convert("f2b", tfloat, tbool, "src0 == 0.0f") +unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") # Boolean-to-float conversion unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") # Int-to-boolean conversion -unop_convert("i2b", tint, tbool, "src0 == 0") -unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion +unop_convert("i2b", tint, tbool, "src0 != 0") +unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion. unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}") @@ -191,8 +191,6 @@ unop("fround_even", tfloat, "_mesa_roundevenf(src0)") unop("fsin", tfloat, "sinf(src0)") unop("fcos", tfloat, "cosf(src0)") -unop("fsin_reduced", tfloat, "sinf(src0)") -unop("fcos_reduced", tfloat, "cosf(src0)") # Partial derivatives. diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py index ef855aa77..cdb19241c 100644 --- a/mesalib/src/glsl/nir/nir_opt_algebraic.py +++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py @@ -75,6 +75,9 @@ optimizations = [ (('flrp', a, b, 1.0), b), (('flrp', a, a, b), a), (('flrp', 0.0, a, b), ('fmul', a, b)), + (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'), + (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'), + (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'), (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'), # Comparison simplifications @@ -82,10 +85,6 @@ optimizations = [ (('inot', ('fge', a, b)), ('flt', a, b)), (('inot', ('ilt', a, b)), ('ige', a, b)), (('inot', ('ige', a, b)), ('ilt', a, b)), - (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), - (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), - (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), - (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)), (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), @@ -95,6 +94,18 @@ optimizations = [ (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), (('fsat', ('fsat', a)), ('fsat', a)), (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)), + (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))), + (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))), + (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'), + (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'), + (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'), + (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'), + # Emulating booleans + (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), + (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), + (('iand', 'a@bool', 1.0), ('b2f', a)), + (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. + (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. # Comparison with the same args. Note that these are not done for # the float versions because NaN always returns false on float # inequalities. @@ -122,7 +133,7 @@ optimizations = [ (('ishr', 0, a), 0), (('ishr', a, 0), a), (('ushr', 0, a), 0), - (('ushr', a, 0), 0), + (('ushr', a, 0), a), # Exponential/logarithmic identities (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a (('fexp', ('flog', a)), a), # e^ln(a) = a @@ -134,6 +145,26 @@ optimizations = [ (('fpow', a, 1.0), a), (('fpow', a, 2.0), ('fmul', a, a)), (('fpow', 2.0, a), ('fexp2', a)), + (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), + (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))), + (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), + (('frcp', ('fexp', a)), ('fexp', ('fneg', a))), + (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), + (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))), + (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))), + (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))), + (('flog2', ('frcp', a)), ('fneg', ('flog2', a))), + (('flog', ('frcp', a)), ('fneg', ('flog', a))), + (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), + (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))), + (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), + (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))), + (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), + (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))), + (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), + (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))), + (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), + (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))), # Division and reciprocal (('fdiv', 1.0, a), ('frcp', a)), (('frcp', ('frcp', a)), a), @@ -154,18 +185,21 @@ optimizations = [ (('bcsel', a, b, b), b), (('fcsel', a, b, b), b), + # Conversions + (('f2i', ('ftrunc', a)), ('f2i', a)), + (('f2u', ('ftrunc', a)), ('f2u', a)), + # Subtracts (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), (('isub', a, ('isub', 0, b)), ('iadd', a, b)), + (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'), + (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'), (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), (('ineg', a), ('isub', 0, a), 'options->lower_negate'), (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)), (('iadd', a, ('isub', 0, b)), ('isub', a, b)), (('fabs', ('fsub', 0.0, a)), ('fabs', a)), (('iabs', ('isub', 0, a)), ('iabs', a)), - -# This one may not be exact - (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), ] # Add optimizations to handle the case where the result of a ternary is @@ -189,4 +223,17 @@ for op in ['flt', 'fge', 'feq', 'fne', ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), ] +# This section contains "late" optimizations that should be run after the +# regular optimizations have finished. Optimizations should go here if +# they help code generation but do not necessarily produce code that is +# more easily optimizable. +late_optimizations = [ + (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), + (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), + (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), + (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), +] + print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render() +print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late", + late_optimizations).render() diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c index 9b383202d..553906e12 100644 --- a/mesalib/src/glsl/nir/nir_opt_cse.c +++ b/mesalib/src/glsl/nir/nir_opt_cse.c @@ -37,20 +37,19 @@ struct cse_state { }; static bool -nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask) +nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1, + unsigned src2) { - if (src1.abs != src2.abs || src1.negate != src2.negate) + if (alu1->src[src1].abs != alu2->src[src2].abs || + alu1->src[src1].negate != alu2->src[src2].negate) return false; - for (int i = 0; i < 4; ++i) { - if (!(read_mask & (1 << i))) - continue; - - if (src1.swizzle[i] != src2.swizzle[i]) + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { + if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) return false; } - return nir_srcs_equal(src1.src, src2.src); + return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); } static bool @@ -73,10 +72,17 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2) if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) return false; - for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { - if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i], - (1 << alu1->dest.dest.ssa.num_components) - 1)) - return false; + if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[alu1->op].num_inputs == 2); + return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && + nir_alu_srcs_equal(alu1, alu2, 1, 1)) || + (nir_alu_srcs_equal(alu1, alu2, 0, 1) && + nir_alu_srcs_equal(alu1, alu2, 1, 0)); + } else { + for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { + if (!nir_alu_srcs_equal(alu1, alu2, i, i)) + return false; + } } return true; } @@ -154,12 +160,14 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2) static bool src_is_ssa(nir_src *src, void *data) { + (void) data; return src->is_ssa; } static bool dest_is_ssa(nir_dest *dest, void *data) { + (void) data; return dest->is_ssa; } diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c new file mode 100644 index 000000000..9d5646fe6 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c @@ -0,0 +1,261 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements a small peephole optimization that looks for a multiply that + * is only ever used in an add and replaces both with an fma. + */ + +struct peephole_ffma_state { + void *mem_ctx; + nir_function_impl *impl; + bool progress; +}; + +static inline bool +are_all_uses_fadd(nir_ssa_def *def) +{ + if (def->if_uses->entries > 0) + return false; + + struct set_entry *use_iter; + set_foreach(def->uses, use_iter) { + nir_instr *use_instr = (nir_instr *)use_iter->key; + + if (use_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *use_alu = nir_instr_as_alu(use_instr); + switch (use_alu->op) { + case nir_op_fadd: + break; /* This one's ok */ + + case nir_op_imov: + case nir_op_fmov: + case nir_op_fneg: + case nir_op_fabs: + assert(use_alu->dest.dest.is_ssa); + if (!are_all_uses_fadd(&use_alu->dest.dest.ssa)) + return false; + break; + + default: + return false; + } + } + + return true; +} + +static nir_alu_instr * +get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) +{ + assert(src->src.is_ssa && !src->abs && !src->negate); + + nir_instr *instr = src->src.ssa->parent_instr; + if (instr->type != nir_instr_type_alu) + return NULL; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_imov: + case nir_op_fmov: + alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + break; + + case nir_op_fneg: + alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + *negate = !*negate; + break; + + case nir_op_fabs: + alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + *negate = false; + *abs = true; + break; + + case nir_op_fmul: + /* Only absorb a fmul into a ffma if the fmul is is only used in fadd + * operations. This prevents us from being too aggressive with our + * fusing which can actually lead to more instructions. + */ + if (!are_all_uses_fadd(&alu->dest.dest.ssa)) + return NULL; + break; + + default: + return NULL; + } + + if (!alu) + return NULL; + + for (unsigned i = 0; i < 4; i++) { + if (!(alu->dest.write_mask & (1 << i))) + break; + + swizzle[i] = swizzle[src->swizzle[i]]; + } + + return alu; +} + +static bool +nir_opt_peephole_ffma_block(nir_block *block, void *void_state) +{ + struct peephole_ffma_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *add = nir_instr_as_alu(instr); + if (add->op != nir_op_fadd) + continue; + + /* TODO: Maybe bail if this expression is considered "precise"? */ + + assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa); + + /* This, is the case a + a. We would rather handle this with an + * algebraic reduction than fuse it. Also, we want to only fuse + * things where the multiply is used only once and, in this case, + * it would be used twice by the same instruction. + */ + if (add->src[0].src.ssa == add->src[1].src.ssa) + continue; + + nir_alu_instr *mul; + uint8_t add_mul_src, swizzle[4]; + bool negate, abs; + for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) { + for (unsigned i = 0; i < 4; i++) + swizzle[i] = i; + + negate = false; + abs = false; + + mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs); + + if (mul != NULL) + break; + } + + if (mul == NULL) + continue; + + nir_ssa_def *mul_src[2]; + mul_src[0] = mul->src[0].src.ssa; + mul_src[1] = mul->src[1].src.ssa; + + if (abs) { + for (unsigned i = 0; i < 2; i++) { + nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx, + nir_op_fabs); + abs->src[0].src = nir_src_for_ssa(mul_src[i]); + nir_ssa_dest_init(&abs->instr, &abs->dest.dest, + mul_src[i]->num_components, NULL); + abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1; + nir_instr_insert_before(&add->instr, &abs->instr); + mul_src[i] = &abs->dest.dest.ssa; + } + } + + if (negate) { + nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx, + nir_op_fneg); + neg->src[0].src = nir_src_for_ssa(mul_src[0]); + nir_ssa_dest_init(&neg->instr, &neg->dest.dest, + mul_src[0]->num_components, NULL); + neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1; + nir_instr_insert_before(&add->instr, &neg->instr); + mul_src[0] = &neg->dest.dest.ssa; + } + + nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma); + ffma->dest.saturate = add->dest.saturate; + ffma->dest.write_mask = add->dest.write_mask; + + for (unsigned i = 0; i < 2; i++) { + ffma->src[i].src = nir_src_for_ssa(mul_src[i]); + for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++) + ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]]; + } + nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], + state->mem_ctx); + + assert(add->dest.dest.is_ssa); + + nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest, + add->dest.dest.ssa.num_components, + add->dest.dest.ssa.name); + nir_ssa_def_rewrite_uses(&add->dest.dest.ssa, + nir_src_for_ssa(&ffma->dest.dest.ssa), + state->mem_ctx); + + nir_instr_insert_before(&add->instr, &ffma->instr); + assert(add->dest.dest.ssa.uses->entries == 0); + nir_instr_remove(&add->instr); + + state->progress = true; + } + + return true; +} + +static bool +nir_opt_peephole_ffma_impl(nir_function_impl *impl) +{ + struct peephole_ffma_state state; + + state.mem_ctx = ralloc_parent(impl); + state.impl = impl; + state.progress = false; + + nir_foreach_block(impl, nir_opt_peephole_ffma_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; +} + +bool +nir_opt_peephole_ffma(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress |= nir_opt_peephole_ffma_impl(overload->impl); + } + + return progress; +} diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c index b89451b09..f400cfd66 100644 --- a/mesalib/src/glsl/nir/nir_opt_peephole_select.c +++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c @@ -84,7 +84,9 @@ block_check_for_allowed_instrs(nir_block *block) case nir_instr_type_alu: { /* It must be a move operation */ nir_alu_instr *mov = nir_instr_as_alu(instr); - if (mov->op != nir_op_fmov && mov->op != nir_op_imov) + if (mov->op != nir_op_fmov && mov->op != nir_op_imov && + mov->op != nir_op_fneg && mov->op != nir_op_ineg && + mov->op != nir_op_fabs && mov->op != nir_op_iabs) return false; /* Can't handle saturate */ diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c index fa11a312e..fb8c9344c 100644 --- a/mesalib/src/glsl/nir/nir_print.c +++ b/mesalib/src/glsl/nir/nir_print.c @@ -137,25 +137,37 @@ print_dest(nir_dest *dest, FILE *fp) } static void -print_alu_src(nir_alu_src *src, FILE *fp) +print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp) { - if (src->negate) + if (instr->src[src].negate) fprintf(fp, "-"); - if (src->abs) + if (instr->src[src].abs) fprintf(fp, "abs("); - print_src(&src->src, fp); + print_src(&instr->src[src].src, fp); - if (src->swizzle[0] != 0 || - src->swizzle[1] != 1 || - src->swizzle[2] != 2 || - src->swizzle[3] != 3) { + bool print_swizzle = false; + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; + + if (instr->src[src].swizzle[i] != i) { + print_swizzle = true; + break; + } + } + + if (print_swizzle) { fprintf(fp, "."); - for (unsigned i = 0; i < 4; i++) - fprintf(fp, "%c", "xyzw"[src->swizzle[i]]); + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; + + fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]); + } } - if (src->abs) + if (instr->src[src].abs) fprintf(fp, ")"); } @@ -189,7 +201,7 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp) if (i != 0) fprintf(fp, ", "); - print_alu_src(&instr->src[i], fp); + print_alu_src(instr, i, fp); } } diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c index e7f8aeacb..4417e2a48 100644 --- a/mesalib/src/glsl/nir/nir_remove_dead_variables.c +++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c @@ -98,22 +98,14 @@ add_var_use_shader(nir_shader *shader, struct set *live) } static void -remove_dead_local_vars(nir_function_impl *impl, struct set *live) +remove_dead_vars(struct exec_list *var_list, struct set *live) { - foreach_list_typed_safe(nir_variable, var, node, &impl->locals) { + foreach_list_typed_safe(nir_variable, var, node, var_list) { struct set_entry *entry = _mesa_set_search(live, var); - if (entry == NULL) - exec_node_remove(&var->node); - } -} - -static void -remove_dead_global_vars(nir_shader *shader, struct set *live) -{ - foreach_list_typed_safe(nir_variable, var, node, &shader->globals) { - struct set_entry *entry = _mesa_set_search(live, var); - if (entry == NULL) + if (entry == NULL) { exec_node_remove(&var->node); + ralloc_free(var); + } } } @@ -125,11 +117,11 @@ nir_remove_dead_variables(nir_shader *shader) add_var_use_shader(shader, live); - remove_dead_global_vars(shader, live); + remove_dead_vars(&shader->globals, live); nir_foreach_overload(shader, overload) { if (overload->impl) - remove_dead_local_vars(overload->impl, live); + remove_dead_vars(&overload->impl->locals, live); } _mesa_set_destroy(live, NULL); diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c index 73a802be7..5ba016085 100644 --- a/mesalib/src/glsl/nir/nir_search.c +++ b/mesalib/src/glsl/nir/nir_search.c @@ -218,8 +218,8 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr, if (matched) return true; - if (nir_op_infos[instr->op].num_inputs == 2 && - (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) { + if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[instr->op].num_inputs == 2); if (!match_value(expr->srcs[0], instr, 1, num_components, swizzle, state)) return false; diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c index 4d663b51b..fc72c078c 100644 --- a/mesalib/src/glsl/nir/nir_split_var_copies.c +++ b/mesalib/src/glsl/nir/nir_split_var_copies.c @@ -188,8 +188,8 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy, * belongs to the copy instruction and b) the deref chains may * have some of the same links due to the way we constructed them */ - nir_deref *src = nir_copy_deref(state->mem_ctx, src_head); - nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head); + nir_deref *src = nir_copy_deref(new_copy, src_head); + nir_deref *dest = nir_copy_deref(new_copy, dest_head); new_copy->variables[0] = nir_deref_as_var(dest); new_copy->variables[1] = nir_deref_as_var(src); diff --git a/mesalib/src/glsl/nir/nir_sweep.c b/mesalib/src/glsl/nir/nir_sweep.c new file mode 100644 index 000000000..d3549756a --- /dev/null +++ b/mesalib/src/glsl/nir/nir_sweep.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +/** + * \file nir_sweep.c + * + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated + * memory - anything still connected to the program will be kept, and any dead memory + * we dropped on the floor will be freed. + * + * The expectation is that drivers should call this when finished compiling the shader + * (after any optimization, lowering, and so on). However, it's also fine to call it + * earlier, and even many times, trading CPU cycles for memory savings. + */ + +#define steal_list(mem_ctx, type, list) \ + foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); } + +static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node); + +static bool +sweep_src_indirect(nir_src *src, void *nir) +{ + if (!src->is_ssa && src->reg.indirect) + ralloc_steal(nir, src->reg.indirect); + + return true; +} + +static bool +sweep_dest_indirect(nir_dest *dest, void *nir) +{ + if (!dest->is_ssa && dest->reg.indirect) + ralloc_steal(nir, dest->reg.indirect); + + return true; +} + +static void +sweep_block(nir_shader *nir, nir_block *block) +{ + ralloc_steal(nir, block); + + nir_foreach_instr(block, instr) { + ralloc_steal(nir, instr); + + nir_foreach_src(instr, sweep_src_indirect, nir); + nir_foreach_dest(instr, sweep_dest_indirect, nir); + } +} + +static void +sweep_if(nir_shader *nir, nir_if *iff) +{ + ralloc_steal(nir, iff); + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) { + sweep_cf_node(nir, cf_node); + } + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) { + sweep_cf_node(nir, cf_node); + } +} + +static void +sweep_loop(nir_shader *nir, nir_loop *loop) +{ + ralloc_steal(nir, loop); + + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { + sweep_cf_node(nir, cf_node); + } +} + +static void +sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node) +{ + switch (cf_node->type) { + case nir_cf_node_block: + sweep_block(nir, nir_cf_node_as_block(cf_node)); + break; + case nir_cf_node_if: + sweep_if(nir, nir_cf_node_as_if(cf_node)); + break; + case nir_cf_node_loop: + sweep_loop(nir, nir_cf_node_as_loop(cf_node)); + break; + default: + unreachable("Invalid CF node type"); + } +} + +static void +sweep_impl(nir_shader *nir, nir_function_impl *impl) +{ + ralloc_steal(nir, impl); + + ralloc_steal(nir, impl->params); + ralloc_steal(nir, impl->return_var); + steal_list(nir, nir_variable, &impl->locals); + steal_list(nir, nir_register, &impl->registers); + + foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) { + sweep_cf_node(nir, cf_node); + } + + sweep_block(nir, impl->end_block); + + /* Wipe out all the metadata, if any. */ + nir_metadata_preserve(impl, nir_metadata_none); +} + +static void +sweep_function(nir_shader *nir, nir_function *f) +{ + ralloc_steal(nir, f); + + foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) { + ralloc_steal(nir, overload); + ralloc_steal(nir, overload->params); + if (overload->impl) + sweep_impl(nir, overload->impl); + } +} + +void +nir_sweep(nir_shader *nir) +{ + void *rubbish = ralloc_context(NULL); + + /* First, move ownership of all the memory to a temporary context; assume dead. */ + ralloc_adopt(rubbish, nir); + + /* Variables and registers are not dead. Steal them back. */ + steal_list(nir, nir_variable, &nir->uniforms); + steal_list(nir, nir_variable, &nir->inputs); + steal_list(nir, nir_variable, &nir->outputs); + steal_list(nir, nir_variable, &nir->globals); + steal_list(nir, nir_variable, &nir->system_values); + steal_list(nir, nir_register, &nir->registers); + + /* Recurse into functions, stealing their contents back. */ + foreach_list_typed(nir_function, func, node, &nir->functions) { + sweep_function(nir, func); + } + + /* Free everything we didn't steal back. */ + ralloc_free(rubbish); +} diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c index 47cf45393..53ff54766 100644 --- a/mesalib/src/glsl/nir/nir_to_ssa.c +++ b/mesalib/src/glsl/nir/nir_to_ssa.c @@ -47,7 +47,7 @@ insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx) set_foreach(block->predecessors, entry) { nir_block *pred = (nir_block *) entry->key; - nir_phi_src *src = ralloc(mem_ctx, nir_phi_src); + nir_phi_src *src = ralloc(instr, nir_phi_src); src->pred = pred; src->src.is_ssa = false; src->src.reg.base_offset = 0; diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp index a13c3e12a..f0d0b46d2 100644 --- a/mesalib/src/glsl/nir/nir_types.cpp +++ b/mesalib/src/glsl/nir/nir_types.cpp @@ -143,6 +143,12 @@ glsl_void_type(void) } const glsl_type * +glsl_float_type(void) +{ + return glsl_type::float_type; +} + +const glsl_type * glsl_vec4_type(void) { return glsl_type::vec4_type; diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h index 494051a67..276d4ad62 100644 --- a/mesalib/src/glsl/nir/nir_types.h +++ b/mesalib/src/glsl/nir/nir_types.h @@ -69,6 +69,7 @@ bool glsl_type_is_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); +const struct glsl_type *glsl_float_type(void); const struct glsl_type *glsl_vec4_type(void); const struct glsl_type *glsl_array_type(const struct glsl_type *base, unsigned elements); diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c index f247ae069..a7aa79837 100644 --- a/mesalib/src/glsl/nir/nir_validate.c +++ b/mesalib/src/glsl/nir/nir_validate.c @@ -295,6 +295,8 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state) static void validate_deref_chain(nir_deref *deref, validate_state *state) { + assert(deref->child == NULL || ralloc_parent(deref->child) == deref); + nir_deref *parent = NULL; while (deref != NULL) { switch (deref->deref_type) { @@ -336,9 +338,10 @@ validate_var_use(nir_variable *var, validate_state *state) } static void -validate_deref_var(nir_deref_var *deref, validate_state *state) +validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state) { assert(deref != NULL); + assert(ralloc_parent(deref) == parent_mem_ctx); assert(deref->deref.type == deref->var->type); validate_var_use(deref->var, state); @@ -386,7 +389,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; for (unsigned i = 0; i < num_vars; i++) { - validate_deref_var(instr->variables[i], state); + validate_deref_var(instr, instr->variables[i], state); } switch (instr->intrinsic) { @@ -423,7 +426,7 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state) } if (instr->sampler != NULL) - validate_deref_var(instr->sampler, state); + validate_deref_var(instr, instr->sampler, state); } static void @@ -438,10 +441,10 @@ validate_call_instr(nir_call_instr *instr, validate_state *state) for (unsigned i = 0; i < instr->num_params; i++) { assert(instr->callee->params[i].type == instr->params[i]->deref.type); - validate_deref_var(instr->params[i], state); + validate_deref_var(instr, instr->params[i], state); } - validate_deref_var(instr->return_deref, state); + validate_deref_var(instr, instr->return_deref, state); } static void @@ -680,8 +683,7 @@ validate_cf_node(nir_cf_node *node, validate_state *state) break; default: - assert(!"Invalid ALU instruction type"); - break; + unreachable("Invalid CF node type"); } } diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp index 69c03ea8b..3d2f2ca0b 100644 --- a/mesalib/src/glsl/opt_algebraic.cpp +++ b/mesalib/src/glsl/opt_algebraic.cpp @@ -290,6 +290,20 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL}; unsigned int i; + if (ir->operation == ir_binop_mul && + ir->operands[0]->type->is_matrix() && + ir->operands[1]->type->is_vector()) { + ir_expression *matrix_mul = ir->operands[0]->as_expression(); + + if (matrix_mul && matrix_mul->operation == ir_binop_mul && + matrix_mul->operands[0]->type->is_matrix() && + matrix_mul->operands[1]->type->is_matrix()) { + + return mul(matrix_mul->operands[0], + mul(matrix_mul->operands[1], ir->operands[1])); + } + } + assert(ir->get_num_operands() <= 4); for (i = 0; i < ir->get_num_operands(); i++) { if (ir->operands[i]->type->is_matrix()) @@ -421,6 +435,18 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) break; } + case ir_unop_saturate: + if (op_expr[0] && op_expr[0]->operation == ir_binop_add) { + ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression(); + ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression(); + + if (b2f_0 && b2f_0->operation == ir_unop_b2f && + b2f_1 && b2f_1->operation == ir_unop_b2f) { + return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0])); + } + } + break; + case ir_binop_add: if (is_vec_zero(op_const[0])) return ir->operands[1]; @@ -518,6 +544,10 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if (is_vec_negative_one(op_const[1])) return neg(ir->operands[0]); + if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f && + op_expr[1] && op_expr[1]->operation == ir_unop_b2f) { + return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0])); + } /* Reassociate multiplication of constants so that we can do * constant folding. @@ -544,6 +574,8 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) continue; ir_expression *add_expr = floor_expr->operands[0]->as_expression(); + if (!add_expr) + continue; for (int j = 0; j < 2; j++) { ir_expression *abs_expr = add_expr->operands[j]->as_expression(); diff --git a/mesalib/src/glsl/opt_cse.cpp b/mesalib/src/glsl/opt_cse.cpp index b0b67f496..4b8e9a07b 100644 --- a/mesalib/src/glsl/opt_cse.cpp +++ b/mesalib/src/glsl/opt_cse.cpp @@ -63,6 +63,17 @@ public: var = NULL; } + void init(ir_instruction *base_ir, ir_rvalue **val) + { + this->val = val; + this->base_ir = base_ir; + this->var = NULL; + + assert(val); + assert(*val); + assert(base_ir); + } + /** * The pointer to the expression that we might be able to reuse * @@ -116,6 +127,18 @@ private: ir_rvalue *try_cse(ir_rvalue *rvalue); void add_to_ae(ir_rvalue **rvalue); + /** + * Move all nodes from the ae list to the free list + */ + void empty_ae_list(); + + /** + * Get and initialize a new ae_entry + * + * This will either come from the free list or be freshly allocated. + */ + ae_entry *get_ae_entry(ir_rvalue **rvalue); + /** List of ae_entry: The available expressions to reuse */ exec_list *ae; @@ -126,6 +149,11 @@ private: * right. */ exec_list *validate_instructions; + + /** + * List of available-for-use ae_entry objects. + */ + exec_list free_ae_entries; }; /** @@ -322,6 +350,25 @@ cse_visitor::try_cse(ir_rvalue *rvalue) return NULL; } +void +cse_visitor::empty_ae_list() +{ + free_ae_entries.append_list(ae); +} + +ae_entry * +cse_visitor::get_ae_entry(ir_rvalue **rvalue) +{ + ae_entry *entry = (ae_entry *) free_ae_entries.pop_head(); + if (entry) { + entry->init(base_ir, rvalue); + } else { + entry = new(mem_ctx) ae_entry(base_ir, rvalue); + } + + return entry; +} + /** Add the rvalue to the list of available expressions for CSE. */ void cse_visitor::add_to_ae(ir_rvalue **rvalue) @@ -332,7 +379,7 @@ cse_visitor::add_to_ae(ir_rvalue **rvalue) printf("\n"); } - ae->push_tail(new(mem_ctx) ae_entry(base_ir, rvalue)); + ae->push_tail(get_ae_entry(rvalue)); if (debug) dump_ae(ae); @@ -370,33 +417,33 @@ cse_visitor::visit_enter(ir_if *ir) { handle_rvalue(&ir->condition); - ae->make_empty(); + empty_ae_list(); visit_list_elements(this, &ir->then_instructions); - ae->make_empty(); + empty_ae_list(); visit_list_elements(this, &ir->else_instructions); - ae->make_empty(); + empty_ae_list(); return visit_continue_with_parent; } ir_visitor_status cse_visitor::visit_enter(ir_function_signature *ir) { - ae->make_empty(); + empty_ae_list(); visit_list_elements(this, &ir->body); - ae->make_empty(); + empty_ae_list(); return visit_continue_with_parent; } ir_visitor_status cse_visitor::visit_enter(ir_loop *ir) { - ae->make_empty(); + empty_ae_list(); visit_list_elements(this, &ir->body_instructions); - ae->make_empty(); + empty_ae_list(); return visit_continue_with_parent; } diff --git a/mesalib/src/glsl/s_expression.cpp b/mesalib/src/glsl/s_expression.cpp index 7eaa491e2..f82e155a6 100644 --- a/mesalib/src/glsl/s_expression.cpp +++ b/mesalib/src/glsl/s_expression.cpp @@ -23,8 +23,8 @@ */ #include <assert.h> -#include <limits> #include <stdio.h> +#include <math.h> #include "s_expression.h" s_symbol::s_symbol(const char *str, size_t n) @@ -70,7 +70,7 @@ read_atom(void *ctx, const char *&src, char *&symbol_buffer) // requires strtof to parse '+INF' as +Infinity, but we still support some // non-C99-compliant compilers (e.g. MSVC). if (n == 4 && strncmp(src, "+INF", 4) == 0) { - expr = new(ctx) s_float(std::numeric_limits<float>::infinity()); + expr = new(ctx) s_float(INFINITY); } else { // Check if the atom is a number. char *float_end = NULL; diff --git a/mesalib/src/glsl/shader_enums.h b/mesalib/src/glsl/shader_enums.h new file mode 100644 index 000000000..79e0f6b5f --- /dev/null +++ b/mesalib/src/glsl/shader_enums.h @@ -0,0 +1,187 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SHADER_ENUMS_H +#define SHADER_ENUMS_H + +/** + * Shader stages. Note that these will become 5 with tessellation. + * + * The order must match how shaders are ordered in the pipeline. + * The GLSL linker assumes that if i<j, then the j-th shader is + * executed later than the i-th shader. + */ +typedef enum +{ + MESA_SHADER_VERTEX = 0, + MESA_SHADER_GEOMETRY = 1, + MESA_SHADER_FRAGMENT = 2, + MESA_SHADER_COMPUTE = 3, +} gl_shader_stage; + +#define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1) + +/** + * Bitflags for system values. + */ +#define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID) +#define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS) +#define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN) +/** + * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be + * one of these values. If a NIR variable's mode is nir_var_system_value, it + * will be one of these values. + */ +typedef enum +{ + /** + * \name Vertex shader system values + */ + /*@{*/ + /** + * OpenGL-style vertex ID. + * + * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the + * OpenGL 3.3 core profile spec says: + * + * "gl_VertexID holds the integer index i implicitly passed by + * DrawArrays or one of the other drawing commands defined in section + * 2.8.3." + * + * Section 2.8.3 (Drawing Commands) of the same spec says: + * + * "The commands....are equivalent to the commands with the same base + * name (without the BaseVertex suffix), except that the ith element + * transferred by the corresponding draw call will be taken from + * element indices[i] + basevertex of each enabled array." + * + * Additionally, the overview in the GL_ARB_shader_draw_parameters spec + * says: + * + * "In unextended GL, vertex shaders have inputs named gl_VertexID and + * gl_InstanceID, which contain, respectively the index of the vertex + * and instance. The value of gl_VertexID is the implicitly passed + * index of the vertex being processed, which includes the value of + * baseVertex, for those commands that accept it." + * + * gl_VertexID gets basevertex added in. This differs from DirectX where + * SV_VertexID does \b not get basevertex added in. + * + * \note + * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be + * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus + * \c SYSTEM_VALUE_BASE_VERTEX. + * + * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX + */ + SYSTEM_VALUE_VERTEX_ID, + + /** + * Instanced ID as supplied to gl_InstanceID + * + * Values assigned to gl_InstanceID always begin with zero, regardless of + * the value of baseinstance. + * + * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec + * says: + * + * "gl_InstanceID holds the integer instance number of the current + * primitive in an instanced draw call (see section 10.5)." + * + * Through a big chain of pseudocode, section 10.5 describes that + * baseinstance is not counted by gl_InstanceID. In that section, notice + * + * "If an enabled vertex attribute array is instanced (it has a + * non-zero divisor as specified by VertexAttribDivisor), the element + * index that is transferred to the GL, for all vertices, is given by + * + * floor(instance/divisor) + baseinstance + * + * If an array corresponding to an attribute required by a vertex + * shader is not enabled, then the corresponding element is taken from + * the current attribute state (see section 10.2)." + * + * Note that baseinstance is \b not included in the value of instance. + */ + SYSTEM_VALUE_INSTANCE_ID, + + /** + * DirectX-style vertex ID. + * + * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include + * the value of basevertex. + * + * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX + */ + SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, + + /** + * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar + * functions. + * + * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE + */ + SYSTEM_VALUE_BASE_VERTEX, + /*@}*/ + + /** + * \name Geometry shader system values + */ + /*@{*/ + SYSTEM_VALUE_INVOCATION_ID, + /*@}*/ + + /** + * \name Fragment shader system values + */ + /*@{*/ + SYSTEM_VALUE_FRONT_FACE, /**< (not done yet) */ + SYSTEM_VALUE_SAMPLE_ID, + SYSTEM_VALUE_SAMPLE_POS, + SYSTEM_VALUE_SAMPLE_MASK_IN, + /*@}*/ + + SYSTEM_VALUE_MAX /**< Number of values */ +} gl_system_value; + + +/** + * The possible interpolation qualifiers that can be applied to a fragment + * shader input in GLSL. + * + * Note: INTERP_QUALIFIER_NONE must be 0 so that memsetting the + * gl_fragment_program data structure to 0 causes the default behavior. + */ +enum glsl_interp_qualifier +{ + INTERP_QUALIFIER_NONE = 0, + INTERP_QUALIFIER_SMOOTH, + INTERP_QUALIFIER_FLAT, + INTERP_QUALIFIER_NOPERSPECTIVE, + INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */ +}; + + +#endif /* SHADER_ENUMS_H */ |