aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/glsl
diff options
context:
space:
mode:
Diffstat (limited to 'mesalib/src/glsl')
-rw-r--r--mesalib/src/glsl/Makefile.am11
-rw-r--r--mesalib/src/glsl/Makefile.sources10
-rw-r--r--mesalib/src/glsl/ast_function.cpp112
-rw-r--r--mesalib/src/glsl/ast_to_hir.cpp68
-rw-r--r--mesalib/src/glsl/builtin_functions.cpp4
-rw-r--r--mesalib/src/glsl/glsl_lexer.ll20
-rw-r--r--mesalib/src/glsl/glsl_parser_extras.cpp11
-rw-r--r--mesalib/src/glsl/glsl_parser_extras.h5
-rw-r--r--mesalib/src/glsl/glsl_types.cpp76
-rw-r--r--mesalib/src/glsl/glsl_types.h6
-rw-r--r--mesalib/src/glsl/ir.cpp14
-rw-r--r--mesalib/src/glsl/ir.h102
-rw-r--r--mesalib/src/glsl/ir_constant_expression.cpp2
-rw-r--r--mesalib/src/glsl/ir_equals.cpp20
-rw-r--r--mesalib/src/glsl/ir_validate.cpp8
-rw-r--r--mesalib/src/glsl/link_uniforms.cpp4
-rw-r--r--mesalib/src/glsl/link_varyings.cpp13
-rw-r--r--mesalib/src/glsl/linker.cpp236
-rw-r--r--mesalib/src/glsl/loop_controls.cpp2
-rw-r--r--mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp4
-rw-r--r--mesalib/src/glsl/nir/glsl_to_nir.cpp92
-rw-r--r--mesalib/src/glsl/nir/nir.c64
-rw-r--r--mesalib/src/glsl/nir/nir.h51
-rw-r--r--mesalib/src/glsl/nir/nir_algebraic.py17
-rw-r--r--mesalib/src/glsl/nir/nir_builder.h135
-rw-r--r--mesalib/src/glsl/nir/nir_from_ssa.c9
-rw-r--r--mesalib/src/glsl/nir/nir_lower_idiv.c155
-rw-r--r--mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c2
-rw-r--r--mesalib/src/glsl/nir/nir_lower_samplers.cpp45
-rw-r--r--mesalib/src/glsl/nir/nir_lower_tex_projector.c143
-rw-r--r--mesalib/src/glsl/nir/nir_lower_var_copies.c8
-rw-r--r--mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c2
-rw-r--r--mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c110
-rw-r--r--mesalib/src/glsl/nir/nir_opcodes.py8
-rw-r--r--mesalib/src/glsl/nir/nir_opt_algebraic.py63
-rw-r--r--mesalib/src/glsl/nir/nir_opt_cse.c32
-rw-r--r--mesalib/src/glsl/nir/nir_opt_peephole_ffma.c261
-rw-r--r--mesalib/src/glsl/nir/nir_opt_peephole_select.c4
-rw-r--r--mesalib/src/glsl/nir/nir_print.c36
-rw-r--r--mesalib/src/glsl/nir/nir_remove_dead_variables.c22
-rw-r--r--mesalib/src/glsl/nir/nir_search.c4
-rw-r--r--mesalib/src/glsl/nir/nir_split_var_copies.c4
-rw-r--r--mesalib/src/glsl/nir/nir_sweep.c172
-rw-r--r--mesalib/src/glsl/nir/nir_to_ssa.c2
-rw-r--r--mesalib/src/glsl/nir/nir_types.cpp6
-rw-r--r--mesalib/src/glsl/nir/nir_types.h1
-rw-r--r--mesalib/src/glsl/nir/nir_validate.c16
-rw-r--r--mesalib/src/glsl/opt_algebraic.cpp32
-rw-r--r--mesalib/src/glsl/opt_cse.cpp63
-rw-r--r--mesalib/src/glsl/s_expression.cpp4
-rw-r--r--mesalib/src/glsl/shader_enums.h187
51 files changed, 2033 insertions, 445 deletions
diff --git a/mesalib/src/glsl/Makefile.am b/mesalib/src/glsl/Makefile.am
index b466a3b5c..23c6fe8bb 100644
--- a/mesalib/src/glsl/Makefile.am
+++ b/mesalib/src/glsl/Makefile.am
@@ -46,6 +46,7 @@ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \
glcpp/glcpp-lex.l \
glcpp/glcpp-parse.y \
nir/nir_algebraic.py \
+ nir/nir_builder_opcodes_h.py \
nir/nir_constant_expressions.py \
nir/nir_opcodes.py \
nir/nir_opcodes_c.py \
@@ -67,7 +68,7 @@ TESTS_ENVIRONMENT= \
export PYTHON2=$(PYTHON2); \
export PYTHON_FLAGS=$(PYTHON_FLAGS);
-noinst_LTLIBRARIES = libglsl.la libglcpp.la
+noinst_LTLIBRARIES = libnir.la libglsl.la libglcpp.la
check_PROGRAMS = \
glcpp/glcpp \
glsl_test \
@@ -147,6 +148,12 @@ libglsl_la_SOURCES = \
$(LIBGLSL_FILES) \
$(NIR_FILES)
+libnir_la_SOURCES = \
+ glsl_types.cpp \
+ builtin_types.cpp \
+ glsl_symbol_table.cpp \
+ $(NIR_FILES)
+
glsl_compiler_SOURCES = \
$(GLSL_COMPILER_CXX_FILES)
@@ -251,8 +258,6 @@ nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
$(MKDIR_P) nir; \
$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
-nir/nir.h: nir/nir_opcodes.h
-
nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
$(MKDIR_P) nir; \
$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources
index b876642e8..c471eca23 100644
--- a/mesalib/src/glsl/Makefile.sources
+++ b/mesalib/src/glsl/Makefile.sources
@@ -22,6 +22,7 @@ NIR_FILES = \
nir/glsl_to_nir.h \
nir/nir.c \
nir/nir.h \
+ nir/nir_builder.h \
nir/nir_constant_expressions.h \
nir/nir_dominance.c \
nir/nir_from_ssa.c \
@@ -32,21 +33,25 @@ NIR_FILES = \
nir/nir_lower_atomics.c \
nir/nir_lower_global_vars_to_local.c \
nir/nir_lower_locals_to_regs.c \
+ nir/nir_lower_idiv.c \
nir/nir_lower_io.c \
nir/nir_lower_phis_to_scalar.c \
nir/nir_lower_samplers.cpp \
nir/nir_lower_system_values.c \
+ nir/nir_lower_tex_projector.c \
nir/nir_lower_to_source_mods.c \
nir/nir_lower_vars_to_ssa.c \
nir/nir_lower_var_copies.c \
nir/nir_lower_vec_to_movs.c \
nir/nir_metadata.c \
+ nir/nir_normalize_cubemap_coords.c \
nir/nir_opt_constant_folding.c \
nir/nir_opt_copy_propagate.c \
nir/nir_opt_cse.c \
nir/nir_opt_dce.c \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
+ nir/nir_opt_peephole_ffma.c \
nir/nir_opt_peephole_select.c \
nir/nir_opt_remove_phis.c \
nir/nir_print.c \
@@ -54,9 +59,11 @@ NIR_FILES = \
nir/nir_search.c \
nir/nir_search.h \
nir/nir_split_var_copies.c \
+ nir/nir_sweep.c \
nir/nir_to_ssa.c \
nir/nir_types.h \
nir/nir_validate.c \
+ nir/nir_vla.h \
nir/nir_worklist.c \
nir/nir_worklist.h \
nir/nir_types.cpp \
@@ -183,7 +190,8 @@ LIBGLSL_FILES = \
opt_vectorize.cpp \
program.h \
s_expression.cpp \
- s_expression.h
+ s_expression.h \
+ shader_enums.h
# glsl_compiler
diff --git a/mesalib/src/glsl/ast_function.cpp b/mesalib/src/glsl/ast_function.cpp
index 918be6966..87df93e68 100644
--- a/mesalib/src/glsl/ast_function.cpp
+++ b/mesalib/src/glsl/ast_function.cpp
@@ -1370,71 +1370,59 @@ emit_inline_matrix_constructor(const glsl_type *type,
} else {
const unsigned cols = type->matrix_columns;
const unsigned rows = type->vector_elements;
+ unsigned remaining_slots = rows * cols;
unsigned col_idx = 0;
unsigned row_idx = 0;
foreach_in_list(ir_rvalue, rhs, parameters) {
- const unsigned components_remaining_this_column = rows - row_idx;
- unsigned rhs_components = rhs->type->components();
- unsigned rhs_base = 0;
-
- /* Since the parameter might be used in the RHS of two assignments,
- * generate a temporary and copy the paramter there.
- */
- ir_variable *rhs_var =
- new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary);
- instructions->push_tail(rhs_var);
-
- ir_dereference *rhs_var_ref =
- new(ctx) ir_dereference_variable(rhs_var);
- ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL);
- instructions->push_tail(inst);
-
- /* Assign the current parameter to as many components of the matrix
- * as it will fill.
- *
- * NOTE: A single vector parameter can span two matrix columns. A
- * single vec4, for example, can completely fill a mat2.
- */
- if (rhs_components >= components_remaining_this_column) {
- const unsigned count = MIN2(rhs_components,
- components_remaining_this_column);
-
- rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var);
-
- ir_instruction *inst = assign_to_matrix_column(var, col_idx,
- row_idx,
- rhs_var_ref, 0,
- count, ctx);
- instructions->push_tail(inst);
-
- rhs_base = count;
-
- col_idx++;
- row_idx = 0;
- }
-
- /* If there is data left in the parameter and components left to be
- * set in the destination, emit another assignment. It is possible
- * that the assignment could be of a vec4 to the last element of the
- * matrix. In this case col_idx==cols, but there is still data
- * left in the source parameter. Obviously, don't emit an assignment
- * to data outside the destination matrix.
- */
- if ((col_idx < cols) && (rhs_base < rhs_components)) {
- const unsigned count = rhs_components - rhs_base;
-
- rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var);
-
- ir_instruction *inst = assign_to_matrix_column(var, col_idx,
- row_idx,
- rhs_var_ref,
- rhs_base,
- count, ctx);
- instructions->push_tail(inst);
-
- row_idx += count;
- }
+ unsigned rhs_components = rhs->type->components();
+ unsigned rhs_base = 0;
+
+ if (remaining_slots == 0)
+ break;
+
+ /* Since the parameter might be used in the RHS of two assignments,
+ * generate a temporary and copy the paramter there.
+ */
+ ir_variable *rhs_var =
+ new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary);
+ instructions->push_tail(rhs_var);
+
+ ir_dereference *rhs_var_ref =
+ new(ctx) ir_dereference_variable(rhs_var);
+ ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL);
+ instructions->push_tail(inst);
+
+ do {
+ /* Assign the current parameter to as many components of the matrix
+ * as it will fill.
+ *
+ * NOTE: A single vector parameter can span two matrix columns. A
+ * single vec4, for example, can completely fill a mat2.
+ */
+ unsigned count = MIN2(rows - row_idx,
+ rhs_components - rhs_base);
+
+ rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var);
+ ir_instruction *inst = assign_to_matrix_column(var, col_idx,
+ row_idx,
+ rhs_var_ref,
+ rhs_base,
+ count, ctx);
+ instructions->push_tail(inst);
+ rhs_base += count;
+ row_idx += count;
+ remaining_slots -= count;
+
+ /* Sometimes, there is still data left in the parameters and
+ * components left to be set in the destination but in other
+ * column.
+ */
+ if (row_idx >= rows) {
+ row_idx = 0;
+ col_idx++;
+ }
+ } while(remaining_slots > 0 && rhs_base < rhs_components);
}
}
@@ -1791,7 +1779,7 @@ ast_function_expression::hir(exec_list *instructions,
return value;
}
- return ir_rvalue::error_value(ctx);
+ unreachable("not reached");
}
ir_rvalue *
diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp
index d387b2e35..78369360f 100644
--- a/mesalib/src/glsl/ast_to_hir.cpp
+++ b/mesalib/src/glsl/ast_to_hir.cpp
@@ -375,66 +375,14 @@ arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
if (type_a == type_b)
return type_a;
} else {
- if (type_a->is_matrix() && type_b->is_matrix()) {
- /* Matrix multiply. The columns of A must match the rows of B. Given
- * the other previously tested constraints, this means the vector type
- * of a row from A must be the same as the vector type of a column from
- * B.
- */
- if (type_a->row_type() == type_b->column_type()) {
- /* The resulting matrix has the number of columns of matrix B and
- * the number of rows of matrix A. We get the row count of A by
- * looking at the size of a vector that makes up a column. The
- * transpose (size of a row) is done for B.
- */
- const glsl_type *const type =
- glsl_type::get_instance(type_a->base_type,
- type_a->column_type()->vector_elements,
- type_b->row_type()->vector_elements);
- assert(type != glsl_type::error_type);
-
- return type;
- }
- } else if (type_a->is_matrix()) {
- /* A is a matrix and B is a column vector. Columns of A must match
- * rows of B. Given the other previously tested constraints, this
- * means the vector type of a row from A must be the same as the
- * vector the type of B.
- */
- if (type_a->row_type() == type_b) {
- /* The resulting vector has a number of elements equal to
- * the number of rows of matrix A. */
- const glsl_type *const type =
- glsl_type::get_instance(type_a->base_type,
- type_a->column_type()->vector_elements,
- 1);
- assert(type != glsl_type::error_type);
-
- return type;
- }
- } else {
- assert(type_b->is_matrix());
+ const glsl_type *type = glsl_type::get_mul_type(type_a, type_b);
- /* A is a row vector and B is a matrix. Columns of A must match rows
- * of B. Given the other previously tested constraints, this means
- * the type of A must be the same as the vector type of a column from
- * B.
- */
- if (type_a == type_b->column_type()) {
- /* The resulting vector has a number of elements equal to
- * the number of columns of matrix B. */
- const glsl_type *const type =
- glsl_type::get_instance(type_a->base_type,
- type_b->row_type()->vector_elements,
- 1);
- assert(type != glsl_type::error_type);
-
- return type;
- }
+ if (type == glsl_type::error_type) {
+ _mesa_glsl_error(loc, state,
+ "size mismatch for matrix multiplication");
}
- _mesa_glsl_error(loc, state, "size mismatch for matrix multiplication");
- return glsl_type::error_type;
+ return type;
}
@@ -5776,6 +5724,9 @@ ast_interface_block::hir(exec_list *instructions,
var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
+ if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
+ var->data.read_only = true;
+
if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in)
handle_geometry_shader_input_decl(state, loc, var);
@@ -5816,6 +5767,9 @@ ast_interface_block::hir(exec_list *instructions,
var->data.sample = fields[i].sample;
var->init_interface_type(block_type);
+ if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
+ var->data.read_only = true;
+
if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) {
var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
diff --git a/mesalib/src/glsl/builtin_functions.cpp b/mesalib/src/glsl/builtin_functions.cpp
index c6075722c..524b8d6e8 100644
--- a/mesalib/src/glsl/builtin_functions.cpp
+++ b/mesalib/src/glsl/builtin_functions.cpp
@@ -60,7 +60,7 @@
#include "ir_builder.h"
#include "glsl_parser_extras.h"
#include "program/prog_instruction.h"
-#include <limits>
+#include <math.h>
#define M_PIf ((float) M_PI)
#define M_PI_2f ((float) M_PI_2)
@@ -3215,7 +3215,7 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type
ir_constant_data infinities;
for (int i = 0; i < type->vector_elements; i++) {
- infinities.f[i] = std::numeric_limits<float>::infinity();
+ infinities.f[i] = INFINITY;
}
body.emit(ret(equal(abs(x), imm(type, infinities))));
diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll
index 8dc3d106b..2785ed168 100644
--- a/mesalib/src/glsl/glsl_lexer.ll
+++ b/mesalib/src/glsl/glsl_lexer.ll
@@ -36,14 +36,13 @@ static int classify_identifier(struct _mesa_glsl_parse_state *, const char *);
#define YY_USER_ACTION \
do { \
- yylloc->source = 0; \
yylloc->first_column = yycolumn + 1; \
yylloc->first_line = yylloc->last_line = yylineno + 1; \
yycolumn += yyleng; \
yylloc->last_column = yycolumn + 1; \
} while(0);
-#define YY_USER_INIT yylineno = 0; yycolumn = 0;
+#define YY_USER_INIT yylineno = 0; yycolumn = 0; yylloc->source = 0;
/* A macro for handling reserved words and keywords across language versions.
*
@@ -188,6 +187,15 @@ HASH ^{SPC}#{SPC}
* one-based.
*/
yylineno = strtol(ptr, &ptr, 0) - 1;
+
+ /* From GLSL 3.30 and GLSL ES on, after processing the
+ * line directive (including its new-line), the implementation
+ * will behave as if it is compiling at the line number passed
+ * as argument. It was line number + 1 in older specifications.
+ */
+ if (yyextra->is_version(330, 100))
+ yylineno--;
+
yylloc->source = strtol(ptr, NULL, 0);
}
{HASH}line{SPCP}{INT}{SPC}$ {
@@ -203,6 +211,14 @@ HASH ^{SPC}#{SPC}
* one-based.
*/
yylineno = strtol(ptr, &ptr, 0) - 1;
+
+ /* From GLSL 3.30 and GLSL ES on, after processing the
+ * line directive (including its new-line), the implementation
+ * will behave as if it is compiling at the line number passed
+ * as argument. It was line number + 1 in older specifications.
+ */
+ if (yyextra->is_version(330, 100))
+ yylineno--;
}
^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}on{SPC}\) {
BEGIN PP;
diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp
index 79624bc26..0aa3c54fc 100644
--- a/mesalib/src/glsl/glsl_parser_extras.cpp
+++ b/mesalib/src/glsl/glsl_parser_extras.cpp
@@ -73,8 +73,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->uses_builtin_functions = false;
/* Set default language version and extensions */
- this->language_version = ctx->Const.ForceGLSLVersion ?
- ctx->Const.ForceGLSLVersion : 110;
+ this->language_version = 110;
+ this->forced_language_version = ctx->Const.ForceGLSLVersion;
this->es_shader = false;
this->ARB_texture_rectangle_enable = true;
@@ -320,11 +320,14 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version,
this->ARB_texture_rectangle_enable = false;
}
- this->language_version = version;
+ if (this->forced_language_version)
+ this->language_version = this->forced_language_version;
+ else
+ this->language_version = version;
bool supported = false;
for (unsigned i = 0; i < this->num_supported_versions; i++) {
- if (this->supported_versions[i].ver == (unsigned) version
+ if (this->supported_versions[i].ver == this->language_version
&& this->supported_versions[i].es == this->es_shader) {
supported = true;
break;
diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h
index 0975c86ed..dae7864fd 100644
--- a/mesalib/src/glsl/glsl_parser_extras.h
+++ b/mesalib/src/glsl/glsl_parser_extras.h
@@ -105,8 +105,10 @@ struct _mesa_glsl_parse_state {
{
unsigned required_version = this->es_shader ?
required_glsl_es_version : required_glsl_version;
+ unsigned this_version = this->forced_language_version
+ ? this->forced_language_version : this->language_version;
return required_version != 0
- && this->language_version >= required_version;
+ && this_version >= required_version;
}
bool check_version(unsigned required_glsl_version,
@@ -226,6 +228,7 @@ struct _mesa_glsl_parse_state {
bool es_shader;
unsigned language_version;
+ unsigned forced_language_version;
gl_shader_stage stage;
/**
diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp
index 38b37a6a9..4aa36a794 100644
--- a/mesalib/src/glsl/glsl_types.cpp
+++ b/mesalib/src/glsl/glsl_types.cpp
@@ -825,6 +825,73 @@ glsl_type::get_interface_instance(const glsl_struct_field *fields,
const glsl_type *
+glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
+{
+ if (type_a == type_b) {
+ return type_a;
+ } else if (type_a->is_matrix() && type_b->is_matrix()) {
+ /* Matrix multiply. The columns of A must match the rows of B. Given
+ * the other previously tested constraints, this means the vector type
+ * of a row from A must be the same as the vector type of a column from
+ * B.
+ */
+ if (type_a->row_type() == type_b->column_type()) {
+ /* The resulting matrix has the number of columns of matrix B and
+ * the number of rows of matrix A. We get the row count of A by
+ * looking at the size of a vector that makes up a column. The
+ * transpose (size of a row) is done for B.
+ */
+ const glsl_type *const type =
+ get_instance(type_a->base_type,
+ type_a->column_type()->vector_elements,
+ type_b->row_type()->vector_elements);
+ assert(type != error_type);
+
+ return type;
+ }
+ } else if (type_a->is_matrix()) {
+ /* A is a matrix and B is a column vector. Columns of A must match
+ * rows of B. Given the other previously tested constraints, this
+ * means the vector type of a row from A must be the same as the
+ * vector the type of B.
+ */
+ if (type_a->row_type() == type_b) {
+ /* The resulting vector has a number of elements equal to
+ * the number of rows of matrix A. */
+ const glsl_type *const type =
+ get_instance(type_a->base_type,
+ type_a->column_type()->vector_elements,
+ 1);
+ assert(type != error_type);
+
+ return type;
+ }
+ } else {
+ assert(type_b->is_matrix());
+
+ /* A is a row vector and B is a matrix. Columns of A must match rows
+ * of B. Given the other previously tested constraints, this means
+ * the type of A must be the same as the vector type of a column from
+ * B.
+ */
+ if (type_a == type_b->column_type()) {
+ /* The resulting vector has a number of elements equal to
+ * the number of columns of matrix B. */
+ const glsl_type *const type =
+ get_instance(type_a->base_type,
+ type_b->row_type()->vector_elements,
+ 1);
+ assert(type != error_type);
+
+ return type;
+ }
+ }
+
+ return error_type;
+}
+
+
+const glsl_type *
glsl_type::field_type(const char *name) const
{
if (this->base_type != GLSL_TYPE_STRUCT
@@ -1077,15 +1144,6 @@ glsl_type::std140_base_alignment(bool row_major) const
return base_alignment;
}
- /* A sampler may never occur in a UBO (without bindless of some sort),
- * however it is convenient to use this alignment function even with
- * regular uniforms. This allows use of this function on uniform structs
- * that contain samplers.
- */
- if (this->is_sampler()) {
- return 0;
- }
-
assert(!"not reached");
return -1;
}
diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h
index 7359e9476..d383dd5be 100644
--- a/mesalib/src/glsl/glsl_types.h
+++ b/mesalib/src/glsl/glsl_types.h
@@ -276,6 +276,12 @@ struct glsl_type {
const char *block_name);
/**
+ * Get the type resulting from a multiplication of \p type_a * \p type_b
+ */
+ static const glsl_type *get_mul_type(const glsl_type *type_a,
+ const glsl_type *type_b);
+
+ /**
* Query the total number of scalars that make up a scalar, vector or matrix
*/
unsigned components() const
diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp
index 54656f899..9e3238552 100644
--- a/mesalib/src/glsl/ir.cpp
+++ b/mesalib/src/glsl/ir.cpp
@@ -240,8 +240,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_round_even:
case ir_unop_sin:
case ir_unop_cos:
- case ir_unop_sin_reduced:
- case ir_unop_cos_reduced:
case ir_unop_dFdx:
case ir_unop_dFdx_coarse:
case ir_unop_dFdx_fine:
@@ -380,10 +378,12 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
} else if (op1->type->is_scalar()) {
this->type = op0->type;
} else {
- /* FINISHME: matrix types */
- assert(!op0->type->is_matrix() && !op1->type->is_matrix());
- assert(op0->type == op1->type);
- this->type = op0->type;
+ if (this->operation == ir_binop_mul) {
+ this->type = glsl_type::get_mul_type(op0->type, op1->type);
+ } else {
+ assert(op0->type == op1->type);
+ this->type = op0->type;
+ }
}
break;
@@ -540,8 +540,6 @@ static const char *const operator_strs[] = {
"round_even",
"sin",
"cos",
- "sin_reduced",
- "cos_reduced",
"dFdx",
"dFdxCoarse",
"dFdxFine",
diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h
index fdc22edf1..fab1cd2d2 100644
--- a/mesalib/src/glsl/ir.h
+++ b/mesalib/src/glsl/ir.h
@@ -109,6 +109,31 @@ public:
virtual ir_instruction *clone(void *mem_ctx,
struct hash_table *ht) const = 0;
+ bool is_rvalue() const
+ {
+ return ir_type == ir_type_dereference_array ||
+ ir_type == ir_type_dereference_record ||
+ ir_type == ir_type_dereference_variable ||
+ ir_type == ir_type_constant ||
+ ir_type == ir_type_expression ||
+ ir_type == ir_type_swizzle ||
+ ir_type == ir_type_texture;
+ }
+
+ bool is_dereference() const
+ {
+ return ir_type == ir_type_dereference_array ||
+ ir_type == ir_type_dereference_record ||
+ ir_type == ir_type_dereference_variable;
+ }
+
+ bool is_jump() const
+ {
+ return ir_type == ir_type_loop_jump ||
+ ir_type == ir_type_return ||
+ ir_type == ir_type_discard;
+ }
+
/**
* \name IR instruction downcast functions
*
@@ -117,45 +142,33 @@ public:
* Additional downcast functions will be added as needed.
*/
/*@{*/
- class ir_rvalue *as_rvalue()
- {
- assume(this != NULL);
- if (ir_type == ir_type_dereference_array ||
- ir_type == ir_type_dereference_record ||
- ir_type == ir_type_dereference_variable ||
- ir_type == ir_type_constant ||
- ir_type == ir_type_expression ||
- ir_type == ir_type_swizzle ||
- ir_type == ir_type_texture)
- return (class ir_rvalue *) this;
- return NULL;
- }
-
- class ir_dereference *as_dereference()
- {
- assume(this != NULL);
- if (ir_type == ir_type_dereference_array ||
- ir_type == ir_type_dereference_record ||
- ir_type == ir_type_dereference_variable)
- return (class ir_dereference *) this;
- return NULL;
- }
-
- class ir_jump *as_jump()
- {
- assume(this != NULL);
- if (ir_type == ir_type_loop_jump ||
- ir_type == ir_type_return ||
- ir_type == ir_type_discard)
- return (class ir_jump *) this;
- return NULL;
- }
+ #define AS_BASE(TYPE) \
+ class ir_##TYPE *as_##TYPE() \
+ { \
+ assume(this != NULL); \
+ return is_##TYPE() ? (ir_##TYPE *) this : NULL; \
+ } \
+ const class ir_##TYPE *as_##TYPE() const \
+ { \
+ assume(this != NULL); \
+ return is_##TYPE() ? (ir_##TYPE *) this : NULL; \
+ }
+
+ AS_BASE(rvalue)
+ AS_BASE(dereference)
+ AS_BASE(jump)
+ #undef AS_BASE
#define AS_CHILD(TYPE) \
class ir_##TYPE * as_##TYPE() \
{ \
assume(this != NULL); \
return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \
+ } \
+ const class ir_##TYPE * as_##TYPE() const \
+ { \
+ assume(this != NULL); \
+ return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \
}
AS_CHILD(variable)
AS_CHILD(function)
@@ -183,7 +196,8 @@ public:
* in particular. No support for other instruction types (assignments,
* jumps, calls, etc.) is planned.
*/
- virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
protected:
ir_instruction(enum ir_node_type t)
@@ -1300,8 +1314,6 @@ enum ir_expression_operation {
/*@{*/
ir_unop_sin,
ir_unop_cos,
- ir_unop_sin_reduced, /**< Reduced range sin. [-pi, pi] */
- ir_unop_cos_reduced, /**< Reduced range cos. [-pi, pi] */
/*@}*/
/**
@@ -1598,7 +1610,8 @@ public:
*/
ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2);
- virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const;
@@ -1909,7 +1922,8 @@ public:
virtual ir_visitor_status accept(ir_hierarchical_visitor *);
- virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
/**
* Return a string representing the ir_texture_opcode.
@@ -2010,7 +2024,8 @@ public:
virtual ir_visitor_status accept(ir_hierarchical_visitor *);
- virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
bool is_lvalue() const
{
@@ -2063,7 +2078,8 @@ public:
virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
- virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
/**
* Get the variable that is ultimately referenced by an r-value
@@ -2109,7 +2125,8 @@ public:
virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
- virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
/**
* Get the variable that is ultimately referenced by an r-value
@@ -2219,7 +2236,8 @@ public:
virtual ir_visitor_status accept(ir_hierarchical_visitor *);
- virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+ virtual bool equals(const ir_instruction *ir,
+ enum ir_node_type ignore = ir_type_unset) const;
/**
* Get a particular component of a constant as a specific type
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index ecebc3cdc..171b8e954 100644
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -781,7 +781,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
break;
case ir_unop_sin:
- case ir_unop_sin_reduced:
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
for (unsigned c = 0; c < op[0]->type->components(); c++) {
data.f[c] = sinf(op[0]->value.f[c]);
@@ -789,7 +788,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
break;
case ir_unop_cos:
- case ir_unop_cos_reduced:
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
for (unsigned c = 0; c < op[0]->type->components(); c++) {
data.f[c] = cosf(op[0]->value.f[c]);
diff --git a/mesalib/src/glsl/ir_equals.cpp b/mesalib/src/glsl/ir_equals.cpp
index 65376cd94..cc1964eef 100644
--- a/mesalib/src/glsl/ir_equals.cpp
+++ b/mesalib/src/glsl/ir_equals.cpp
@@ -28,7 +28,8 @@
* can't access a's vtable in that case.
*/
static bool
-possibly_null_equals(ir_instruction *a, ir_instruction *b, enum ir_node_type ignore)
+possibly_null_equals(const ir_instruction *a, const ir_instruction *b,
+ enum ir_node_type ignore)
{
if (!a || !b)
return !a && !b;
@@ -41,13 +42,13 @@ possibly_null_equals(ir_instruction *a, ir_instruction *b, enum ir_node_type ign
* about.
*/
bool
-ir_instruction::equals(ir_instruction *, enum ir_node_type)
+ir_instruction::equals(const ir_instruction *, enum ir_node_type) const
{
return false;
}
bool
-ir_constant::equals(ir_instruction *ir, enum ir_node_type)
+ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const
{
const ir_constant *other = ir->as_constant();
if (!other)
@@ -65,7 +66,8 @@ ir_constant::equals(ir_instruction *ir, enum ir_node_type)
}
bool
-ir_dereference_variable::equals(ir_instruction *ir, enum ir_node_type)
+ir_dereference_variable::equals(const ir_instruction *ir,
+ enum ir_node_type) const
{
const ir_dereference_variable *other = ir->as_dereference_variable();
if (!other)
@@ -75,7 +77,8 @@ ir_dereference_variable::equals(ir_instruction *ir, enum ir_node_type)
}
bool
-ir_dereference_array::equals(ir_instruction *ir, enum ir_node_type ignore)
+ir_dereference_array::equals(const ir_instruction *ir,
+ enum ir_node_type ignore) const
{
const ir_dereference_array *other = ir->as_dereference_array();
if (!other)
@@ -94,7 +97,8 @@ ir_dereference_array::equals(ir_instruction *ir, enum ir_node_type ignore)
}
bool
-ir_swizzle::equals(ir_instruction *ir, enum ir_node_type ignore)
+ir_swizzle::equals(const ir_instruction *ir,
+ enum ir_node_type ignore) const
{
const ir_swizzle *other = ir->as_swizzle();
if (!other)
@@ -116,7 +120,7 @@ ir_swizzle::equals(ir_instruction *ir, enum ir_node_type ignore)
}
bool
-ir_texture::equals(ir_instruction *ir, enum ir_node_type ignore)
+ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const
{
const ir_texture *other = ir->as_texture();
if (!other)
@@ -179,7 +183,7 @@ ir_texture::equals(ir_instruction *ir, enum ir_node_type ignore)
}
bool
-ir_expression::equals(ir_instruction *ir, enum ir_node_type ignore)
+ir_expression::equals(const ir_instruction *ir, enum ir_node_type ignore) const
{
const ir_expression *other = ir->as_expression();
if (!other)
diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp
index 7a7688cb2..cfe0df3dc 100644
--- a/mesalib/src/glsl/ir_validate.cpp
+++ b/mesalib/src/glsl/ir_validate.cpp
@@ -334,8 +334,6 @@ ir_validate::visit_leave(ir_expression *ir)
break;
case ir_unop_sin:
case ir_unop_cos:
- case ir_unop_sin_reduced:
- case ir_unop_cos_reduced:
case ir_unop_dFdx:
case ir_unop_dFdx_coarse:
case ir_unop_dFdx_fine:
@@ -543,9 +541,9 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_binop_logic_and:
case ir_binop_logic_xor:
case ir_binop_logic_or:
- assert(ir->type == glsl_type::bool_type);
- assert(ir->operands[0]->type == glsl_type::bool_type);
- assert(ir->operands[1]->type == glsl_type::bool_type);
+ assert(ir->type->base_type == GLSL_TYPE_BOOL);
+ assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+ assert(ir->operands[1]->type->base_type == GLSL_TYPE_BOOL);
break;
case ir_binop_dot:
diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp
index 799c74bb9..59adc298b 100644
--- a/mesalib/src/glsl/link_uniforms.cpp
+++ b/mesalib/src/glsl/link_uniforms.cpp
@@ -547,6 +547,8 @@ private:
virtual void enter_record(const glsl_type *type, const char *name,
bool row_major) {
assert(type->is_record());
+ if (this->ubo_block_index == -1)
+ return;
this->ubo_byte_offset = glsl_align(
this->ubo_byte_offset, type->std140_base_alignment(row_major));
}
@@ -554,6 +556,8 @@ private:
virtual void leave_record(const glsl_type *type, const char *name,
bool row_major) {
assert(type->is_record());
+ if (this->ubo_block_index == -1)
+ return;
this->ubo_byte_offset = glsl_align(
this->ubo_byte_offset, type->std140_base_alignment(row_major));
}
diff --git a/mesalib/src/glsl/link_varyings.cpp b/mesalib/src/glsl/link_varyings.cpp
index 22617990f..605748a9c 100644
--- a/mesalib/src/glsl/link_varyings.cpp
+++ b/mesalib/src/glsl/link_varyings.cpp
@@ -263,6 +263,19 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
if (output != NULL) {
cross_validate_types_and_qualifiers(prog, input, output,
consumer->Stage, producer->Stage);
+ } else {
+ /* Check for input vars with unmatched output vars in prev stage
+ * taking into account that interface blocks could have a matching
+ * output but with different name, so we ignore them.
+ */
+ assert(!input->data.assigned);
+ if (input->data.used && !input->get_interface_type() &&
+ !input->data.explicit_location && !prog->SeparateShader)
+ linker_error(prog,
+ "%s shader input `%s' "
+ "has no matching output in the previous stage\n",
+ _mesa_shader_stage_to_string(consumer->Stage),
+ input->name);
}
}
}
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index 4349f0973..b6baa5d36 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -1377,24 +1377,13 @@ link_fs_input_layout_qualifiers(struct gl_shader_program *prog,
* "If gl_FragCoord is redeclared in any fragment shader in a program,
* it must be redeclared in all the fragment shaders in that program
* that have a static use gl_FragCoord."
- *
- * Exclude the case when one of the 'linked_shader' or 'shader' redeclares
- * gl_FragCoord with no layout qualifiers but the other one doesn't
- * redeclare it. If we strictly follow GLSL 1.50 spec's language, it
- * should be a link error. But, generating link error for this case will
- * be a wrong behaviour which spec didn't intend to do and it could also
- * break some applications.
*/
if ((linked_shader->redeclares_gl_fragcoord
&& !shader->redeclares_gl_fragcoord
- && shader->uses_gl_fragcoord
- && (linked_shader->origin_upper_left
- || linked_shader->pixel_center_integer))
+ && shader->uses_gl_fragcoord)
|| (shader->redeclares_gl_fragcoord
&& !linked_shader->redeclares_gl_fragcoord
- && linked_shader->uses_gl_fragcoord
- && (shader->origin_upper_left
- || shader->pixel_center_integer))) {
+ && linked_shader->uses_gl_fragcoord)) {
linker_error(prog, "fragment shader defined with conflicting "
"layout qualifiers for gl_FragCoord\n");
}
@@ -2503,6 +2492,194 @@ check_explicit_uniform_locations(struct gl_context *ctx,
delete uniform_map;
}
+static bool
+add_program_resource(struct gl_shader_program *prog, GLenum type,
+ const void *data, uint8_t stages)
+{
+ assert(data);
+
+ /* If resource already exists, do not add it again. */
+ for (unsigned i = 0; i < prog->NumProgramResourceList; i++)
+ if (prog->ProgramResourceList[i].Data == data)
+ return true;
+
+ prog->ProgramResourceList =
+ reralloc(prog,
+ prog->ProgramResourceList,
+ gl_program_resource,
+ prog->NumProgramResourceList + 1);
+
+ if (!prog->ProgramResourceList) {
+ linker_error(prog, "Out of memory during linking.\n");
+ return false;
+ }
+
+ struct gl_program_resource *res =
+ &prog->ProgramResourceList[prog->NumProgramResourceList];
+
+ res->Type = type;
+ res->Data = data;
+ res->StageReferences = stages;
+
+ prog->NumProgramResourceList++;
+
+ return true;
+}
+
+/**
+ * Function builds a stage reference bitmask from variable name.
+ */
+static uint8_t
+build_stageref(struct gl_shader_program *shProg, const char *name)
+{
+ uint8_t stages = 0;
+
+ /* Note, that we assume MAX 8 stages, if there will be more stages, type
+ * used for reference mask in gl_program_resource will need to be changed.
+ */
+ assert(MESA_SHADER_STAGES < 8);
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct gl_shader *sh = shProg->_LinkedShaders[i];
+ if (!sh)
+ continue;
+ ir_variable *var = sh->symbols->get_variable(name);
+ if (var)
+ stages |= (1 << i);
+ }
+ return stages;
+}
+
+static bool
+add_interface_variables(struct gl_shader_program *shProg,
+ struct gl_shader *sh, GLenum programInterface)
+{
+ foreach_in_list(ir_instruction, node, sh->ir) {
+ ir_variable *var = node->as_variable();
+
+ if (!var)
+ continue;
+
+ switch (var->data.mode) {
+ /* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes):
+ * "For GetActiveAttrib, all active vertex shader input variables
+ * are enumerated, including the special built-in inputs gl_VertexID
+ * and gl_InstanceID."
+ */
+ case ir_var_system_value:
+ if (var->data.location != SYSTEM_VALUE_VERTEX_ID &&
+ var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE &&
+ var->data.location != SYSTEM_VALUE_INSTANCE_ID)
+ continue;
+ case ir_var_shader_in:
+ if (programInterface != GL_PROGRAM_INPUT)
+ continue;
+ break;
+ case ir_var_shader_out:
+ if (programInterface != GL_PROGRAM_OUTPUT)
+ continue;
+ break;
+ default:
+ continue;
+ };
+
+ if (!add_program_resource(shProg, programInterface, var,
+ build_stageref(shProg, var->name)))
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Builds up a list of program resources that point to existing
+ * resource data.
+ */
+static void
+build_program_resource_list(struct gl_context *ctx,
+ struct gl_shader_program *shProg)
+{
+ /* Rebuild resource list. */
+ if (shProg->ProgramResourceList) {
+ ralloc_free(shProg->ProgramResourceList);
+ shProg->ProgramResourceList = NULL;
+ shProg->NumProgramResourceList = 0;
+ }
+
+ int input_stage = MESA_SHADER_STAGES, output_stage = 0;
+
+ /* Determine first input and final output stage. These are used to
+ * detect which variables should be enumerated in the resource list
+ * for GL_PROGRAM_INPUT and GL_PROGRAM_OUTPUT.
+ */
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (!shProg->_LinkedShaders[i])
+ continue;
+ if (input_stage == MESA_SHADER_STAGES)
+ input_stage = i;
+ output_stage = i;
+ }
+
+ /* Empty shader, no resources. */
+ if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
+ return;
+
+ /* Add inputs and outputs to the resource list. */
+ if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage],
+ GL_PROGRAM_INPUT))
+ return;
+
+ if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage],
+ GL_PROGRAM_OUTPUT))
+ return;
+
+ /* Add transform feedback varyings. */
+ if (shProg->LinkedTransformFeedback.NumVarying > 0) {
+ for (int i = 0; i < shProg->LinkedTransformFeedback.NumVarying; i++) {
+ uint8_t stageref =
+ build_stageref(shProg,
+ shProg->LinkedTransformFeedback.Varyings[i].Name);
+ if (!add_program_resource(shProg, GL_TRANSFORM_FEEDBACK_VARYING,
+ &shProg->LinkedTransformFeedback.Varyings[i],
+ stageref))
+ return;
+ }
+ }
+
+ /* Add uniforms from uniform storage. */
+ for (unsigned i = 0; i < shProg->NumUserUniformStorage; i++) {
+ /* Do not add uniforms internally used by Mesa. */
+ if (shProg->UniformStorage[i].hidden)
+ continue;
+
+ uint8_t stageref =
+ build_stageref(shProg, shProg->UniformStorage[i].name);
+ if (!add_program_resource(shProg, GL_UNIFORM,
+ &shProg->UniformStorage[i], stageref))
+ return;
+ }
+
+ /* Add program uniform blocks. */
+ for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+ if (!add_program_resource(shProg, GL_UNIFORM_BLOCK,
+ &shProg->UniformBlocks[i], 0))
+ return;
+ }
+
+ /* Add atomic counter buffers. */
+ for (unsigned i = 0; i < shProg->NumAtomicBuffers; i++) {
+ if (!add_program_resource(shProg, GL_ATOMIC_COUNTER_BUFFER,
+ &shProg->AtomicBuffers[i], 0))
+ return;
+ }
+
+ /* TODO - following extensions will require more resource types:
+ *
+ * GL_ARB_shader_storage_buffer_object
+ * GL_ARB_shader_subroutine
+ */
+}
+
+
void
link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
{
@@ -2737,10 +2914,18 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
goto done;
}
- unsigned first;
- for (first = 0; first <= MESA_SHADER_FRAGMENT; first++) {
- if (prog->_LinkedShaders[first] != NULL)
- break;
+ unsigned first, last;
+
+ first = MESA_SHADER_STAGES;
+ last = 0;
+
+ /* Determine first and last stage. */
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (!prog->_LinkedShaders[i])
+ continue;
+ if (first == MESA_SHADER_STAGES)
+ first = i;
+ last = i;
}
if (num_tfeedback_decls != 0) {
@@ -2769,13 +2954,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
* ensures that inter-shader outputs written to in an earlier stage are
* eliminated if they are (transitively) not used in a later stage.
*/
- int last, next;
- for (last = MESA_SHADER_FRAGMENT; last >= 0; last--) {
- if (prog->_LinkedShaders[last] != NULL)
- break;
- }
+ int next;
- if (last >= 0 && last < MESA_SHADER_FRAGMENT) {
+ if (first < MESA_SHADER_FRAGMENT) {
gl_shader *const sh = prog->_LinkedShaders[last];
if (first == MESA_SHADER_GEOMETRY) {
@@ -2787,13 +2968,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
* MESA_SHADER_GEOMETRY.
*/
if (!assign_varying_locations(ctx, mem_ctx, prog,
- NULL, sh,
+ NULL, prog->_LinkedShaders[first],
num_tfeedback_decls, tfeedback_decls,
prog->Geom.VerticesIn))
goto done;
}
- if (num_tfeedback_decls != 0 || prog->SeparateShader) {
+ if (last != MESA_SHADER_FRAGMENT &&
+ (num_tfeedback_decls != 0 || prog->SeparateShader)) {
/* There was no fragment shader, but we still have to assign varying
* locations for use by transform feedback.
*/
@@ -2905,6 +3087,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
}
}
+ build_program_resource_list(ctx, prog);
+ if (!prog->LinkStatus)
+ goto done;
+
/* FINISHME: Assign fragment shader output locations. */
done:
diff --git a/mesalib/src/glsl/loop_controls.cpp b/mesalib/src/glsl/loop_controls.cpp
index d7f0b2809..51804bb5f 100644
--- a/mesalib/src/glsl/loop_controls.cpp
+++ b/mesalib/src/glsl/loop_controls.cpp
@@ -139,7 +139,7 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment,
iter = new(mem_ctx) ir_constant(double(iter_value + bias[i]));
break;
default:
- unreachable(!"Unsupported type for loop iterator.");
+ unreachable("Unsupported type for loop iterator.");
}
ir_expression *const mul =
diff --git a/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp b/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp
index 2243f479a..44967dcdb 100644
--- a/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp
+++ b/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp
@@ -49,7 +49,6 @@ public:
{
instructions = insts;
progress = false;
- index = 0;
}
bool run()
@@ -63,7 +62,6 @@ public:
private:
exec_list *instructions;
bool progress;
- unsigned index;
};
void
@@ -82,7 +80,7 @@ lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue)
void *mem_ctx = ralloc_parent(con);
- char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%d", index++);
+ char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%p", dra);
ir_variable *uni =
new(mem_ctx) ir_variable(con->type, uniform_name, ir_var_uniform);
diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp
index 357944da6..f6b8331d4 100644
--- a/mesalib/src/glsl/nir/glsl_to_nir.cpp
+++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp
@@ -88,6 +88,8 @@ private:
exec_list *cf_node_list;
nir_instr *result; /* result of the expression tree last visited */
+ nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
+
/* the head of the dereference chain we're creating */
nir_deref_var *deref_head;
/* the tail of the dereference chain we're creating */
@@ -156,6 +158,14 @@ nir_visitor::~nir_visitor()
_mesa_hash_table_destroy(this->overload_table, NULL);
}
+nir_deref_var *
+nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir)
+{
+ ir->accept(this);
+ ralloc_steal(mem_ctx, this->deref_head);
+ return this->deref_head;
+}
+
static nir_constant *
constant_copy(ir_constant *ir, void *mem_ctx)
{
@@ -582,13 +592,11 @@ void
nir_visitor::visit(ir_return *ir)
{
if (ir->value != NULL) {
- ir->value->accept(this);
nir_intrinsic_instr *copy =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
- copy->variables[0] = nir_deref_var_create(this->shader,
- this->impl->return_var);
- copy->variables[1] = this->deref_head;
+ copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var);
+ copy->variables[1] = evaluate_deref(&copy->instr, ir->value);
}
nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
@@ -613,8 +621,7 @@ nir_visitor::visit(ir_call *ir)
nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
ir_dereference *param =
(ir_dereference *) ir->actual_parameters.get_head();
- param->accept(this);
- instr->variables[0] = this->deref_head;
+ instr->variables[0] = evaluate_deref(&instr->instr, param);
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
@@ -623,8 +630,7 @@ nir_visitor::visit(ir_call *ir)
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store_instr->num_components = 1;
- ir->return_deref->accept(this);
- store_instr->variables[0] = this->deref_head;
+ store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref);
store_instr->src[0].is_ssa = true;
store_instr->src[0].ssa = &instr->dest.ssa;
@@ -642,13 +648,11 @@ nir_visitor::visit(ir_call *ir)
unsigned i = 0;
foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
- param->accept(this);
- instr->params[i] = this->deref_head;
+ instr->params[i] = evaluate_deref(&instr->instr, param);
i++;
}
- ir->return_deref->accept(this);
- instr->return_deref = this->deref_head;
+ instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref);
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
}
@@ -663,12 +667,8 @@ nir_visitor::visit(ir_assignment *ir)
nir_intrinsic_instr *copy =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
- ir->lhs->accept(this);
- copy->variables[0] = this->deref_head;
-
- ir->rhs->accept(this);
- copy->variables[1] = this->deref_head;
-
+ copy->variables[0] = evaluate_deref(&copy->instr, ir->lhs);
+ copy->variables[1] = evaluate_deref(&copy->instr, ir->rhs);
if (ir->condition) {
nir_if *if_stmt = nir_if_create(this->shader);
@@ -700,6 +700,7 @@ nir_visitor::visit(ir_assignment *ir)
load->num_components = ir->lhs->type->vector_elements;
nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
load->variables[0] = lhs_deref;
+ ralloc_steal(load, load->variables[0]);
nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr);
nir_op vec_op;
@@ -741,7 +742,7 @@ nir_visitor::visit(ir_assignment *ir)
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
store->num_components = ir->lhs->type->vector_elements;
- nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
+ nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
store->variables[0] = nir_deref_as_var(store_deref);
store->src[0] = src;
@@ -816,6 +817,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
load_instr->num_components = ir->type->vector_elements;
load_instr->variables[0] = this->deref_head;
+ ralloc_steal(load_instr, load_instr->variables[0]);
add_instr(&load_instr->instr, ir->type->vector_elements);
}
@@ -959,6 +961,7 @@ nir_visitor::visit(ir_expression *ir)
nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
intrin->num_components = deref->type->vector_elements;
intrin->variables[0] = this->deref_head;
+ ralloc_steal(intrin, intrin->variables[0]);
if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
@@ -1087,12 +1090,6 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break;
case ir_unop_sin: emit(nir_op_fsin, dest_size, srcs); break;
case ir_unop_cos: emit(nir_op_fcos, dest_size, srcs); break;
- case ir_unop_sin_reduced:
- emit(nir_op_fsin_reduced, dest_size, srcs);
- break;
- case ir_unop_cos_reduced:
- emit(nir_op_fcos_reduced, dest_size, srcs);
- break;
case ir_unop_dFdx: emit(nir_op_fddx, dest_size, srcs); break;
case ir_unop_dFdy: emit(nir_op_fddy, dest_size, srcs); break;
case ir_unop_dFdx_fine: emit(nir_op_fddx_fine, dest_size, srcs); break;
@@ -1210,6 +1207,9 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_bit_and:
case ir_binop_bit_or:
case ir_binop_bit_xor:
+ case ir_binop_logic_and:
+ case ir_binop_logic_or:
+ case ir_binop_logic_xor:
case ir_binop_lshift:
case ir_binop_rshift:
switch (ir->operation) {
@@ -1270,6 +1270,24 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_bit_xor:
op = nir_op_ixor;
break;
+ case ir_binop_logic_and:
+ if (supports_ints)
+ op = nir_op_iand;
+ else
+ op = nir_op_fand;
+ break;
+ case ir_binop_logic_or:
+ if (supports_ints)
+ op = nir_op_ior;
+ else
+ op = nir_op_for;
+ break;
+ case ir_binop_logic_xor:
+ if (supports_ints)
+ op = nir_op_ixor;
+ else
+ op = nir_op_fxor;
+ break;
case ir_binop_lshift:
op = nir_op_ishl;
break;
@@ -1444,24 +1462,6 @@ nir_visitor::visit(ir_expression *ir)
}
}
break;
- case ir_binop_logic_and:
- if (supports_ints)
- emit(nir_op_iand, dest_size, srcs);
- else
- emit(nir_op_fand, dest_size, srcs);
- break;
- case ir_binop_logic_or:
- if (supports_ints)
- emit(nir_op_ior, dest_size, srcs);
- else
- emit(nir_op_for, dest_size, srcs);
- break;
- case ir_binop_logic_xor:
- if (supports_ints)
- emit(nir_op_ixor, dest_size, srcs);
- else
- emit(nir_op_fxor, dest_size, srcs);
- break;
case ir_binop_dot:
switch (ir->operands[0]->type->vector_elements) {
case 2: emit(nir_op_fdot2, dest_size, srcs); break;
@@ -1633,8 +1633,7 @@ nir_visitor::visit(ir_texture *ir)
unreachable("not reached");
}
- ir->sampler->accept(this);
- instr->sampler = this->deref_head;
+ instr->sampler = evaluate_deref(&instr->instr, ir->sampler);
unsigned src_number = 0;
@@ -1759,7 +1758,7 @@ nir_visitor::visit(ir_dereference_record *ir)
int field_index = this->deref_tail->type->field_index(ir->field);
assert(field_index >= 0);
- nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index);
+ nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index);
deref->deref.type = ir->type;
this->deref_tail->child = &deref->deref;
this->deref_tail = &deref->deref;
@@ -1783,5 +1782,6 @@ nir_visitor::visit(ir_dereference_array *ir)
ir->array->accept(this);
this->deref_tail->child = &deref->deref;
+ ralloc_steal(this->deref_tail, deref);
this->deref_tail = &deref->deref;
}
diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c
index 6459d5108..c6e53612b 100644
--- a/mesalib/src/glsl/nir/nir.c
+++ b/mesalib/src/glsl/nir/nir.c
@@ -58,11 +58,11 @@ reg_create(void *mem_ctx, struct exec_list *list)
nir_register *reg = ralloc(mem_ctx, nir_register);
reg->parent_instr = NULL;
- reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ reg->uses = _mesa_set_create(reg, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ reg->defs = _mesa_set_create(reg, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer,
_mesa_key_pointer_equal);
reg->num_components = 0;
@@ -108,7 +108,7 @@ nir_function_create(nir_shader *shader, const char *name)
exec_list_push_tail(&shader->functions, &func->node);
exec_list_make_empty(&func->overload_list);
- func->name = name;
+ func->name = ralloc_strdup(func, name);
func->shader = shader;
return func;
@@ -285,10 +285,10 @@ nir_block_create(void *mem_ctx)
cf_init(&block->cf_node, nir_cf_node_block);
block->successors[0] = block->successors[1] = NULL;
- block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
block->imm_dom = NULL;
- block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
exec_list_make_empty(&block->instr_list);
@@ -381,11 +381,11 @@ alu_src_init(nir_alu_src *src)
}
nir_alu_instr *
-nir_alu_instr_create(void *mem_ctx, nir_op op)
+nir_alu_instr_create(nir_shader *shader, nir_op op)
{
unsigned num_srcs = nir_op_infos[op].num_inputs;
nir_alu_instr *instr =
- ralloc_size(mem_ctx,
+ ralloc_size(shader,
sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
instr_init(&instr->instr, nir_instr_type_alu);
@@ -398,18 +398,18 @@ nir_alu_instr_create(void *mem_ctx, nir_op op)
}
nir_jump_instr *
-nir_jump_instr_create(void *mem_ctx, nir_jump_type type)
+nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
{
- nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr);
+ nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
instr_init(&instr->instr, nir_instr_type_jump);
instr->type = type;
return instr;
}
nir_load_const_instr *
-nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
+nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
{
- nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr);
+ nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
instr_init(&instr->instr, nir_instr_type_load_const);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -418,11 +418,11 @@ nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
}
nir_intrinsic_instr *
-nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
+nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
{
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
nir_intrinsic_instr *instr =
- ralloc_size(mem_ctx,
+ ralloc_size(shader,
sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
instr_init(&instr->instr, nir_instr_type_intrinsic);
@@ -438,29 +438,29 @@ nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
}
nir_call_instr *
-nir_call_instr_create(void *mem_ctx, nir_function_overload *callee)
+nir_call_instr_create(nir_shader *shader, nir_function_overload *callee)
{
- nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr);
+ nir_call_instr *instr = ralloc(shader, nir_call_instr);
instr_init(&instr->instr, nir_instr_type_call);
instr->callee = callee;
instr->num_params = callee->num_params;
- instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params);
+ instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
instr->return_deref = NULL;
return instr;
}
nir_tex_instr *
-nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
+nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
{
- nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr);
+ nir_tex_instr *instr = ralloc(shader, nir_tex_instr);
instr_init(&instr->instr, nir_instr_type_tex);
dest_init(&instr->dest);
instr->num_srcs = num_srcs;
- instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs);
+ instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
for (unsigned i = 0; i < num_srcs; i++)
src_init(&instr->src[i].src);
@@ -472,9 +472,9 @@ nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
}
nir_phi_instr *
-nir_phi_instr_create(void *mem_ctx)
+nir_phi_instr_create(nir_shader *shader)
{
- nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr);
+ nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
instr_init(&instr->instr, nir_instr_type_phi);
dest_init(&instr->dest);
@@ -483,9 +483,9 @@ nir_phi_instr_create(void *mem_ctx)
}
nir_parallel_copy_instr *
-nir_parallel_copy_instr_create(void *mem_ctx)
+nir_parallel_copy_instr_create(nir_shader *shader)
{
- nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
+ nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
instr_init(&instr->instr, nir_instr_type_parallel_copy);
exec_list_make_empty(&instr->entries);
@@ -494,9 +494,9 @@ nir_parallel_copy_instr_create(void *mem_ctx)
}
nir_ssa_undef_instr *
-nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components)
+nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
{
- nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr);
+ nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -543,7 +543,7 @@ copy_deref_var(void *mem_ctx, nir_deref_var *deref)
nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
ret->deref.type = deref->deref.type;
if (deref->deref.child)
- ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
return ret;
}
@@ -558,7 +558,7 @@ copy_deref_array(void *mem_ctx, nir_deref_array *deref)
}
ret->deref.type = deref->deref.type;
if (deref->deref.child)
- ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
return ret;
}
@@ -568,7 +568,7 @@ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
ret->deref.type = deref->deref.type;
if (deref->deref.child)
- ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
return ret;
}
@@ -1834,13 +1834,11 @@ void
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components, const char *name)
{
- void *mem_ctx = ralloc_parent(instr);
-
def->name = name;
def->parent_instr = instr;
- def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ def->uses = _mesa_set_create(instr, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer,
_mesa_key_pointer_equal);
def->num_components = num_components;
diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h
index 29fe94243..74772c798 100644
--- a/mesalib/src/glsl/nir/nir.h
+++ b/mesalib/src/glsl/nir/nir.h
@@ -34,6 +34,7 @@
#include "util/set.h"
#include "util/bitset.h"
#include "nir_types.h"
+#include "glsl/shader_enums.h"
#include <stdio.h>
#include "nir_opcodes.h"
@@ -529,6 +530,16 @@ nir_src_for_reg(nir_register *reg)
return src;
}
+static inline nir_instr *
+nir_src_get_parent_instr(const nir_src *src)
+{
+ if (src->is_ssa) {
+ return src->ssa->parent_instr;
+ } else {
+ return src->reg.reg->parent_instr;
+ }
+}
+
static inline nir_dest
nir_dest_for_reg(nir_register *reg)
{
@@ -1365,11 +1376,17 @@ typedef struct nir_function {
typedef struct nir_shader_compiler_options {
bool lower_ffma;
+ bool lower_flrp;
bool lower_fpow;
bool lower_fsat;
bool lower_fsqrt;
/** lowers fneg and ineg to fsub and isub. */
bool lower_negate;
+ /** lowers fsub and isub to fadd+fneg and iadd+ineg. */
+ bool lower_sub;
+
+ /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
+ bool lower_scmp;
/**
* Does the driver support real 32-bit integers? (Otherwise, integers
@@ -1414,6 +1431,9 @@ typedef struct nir_shader {
* access plus one
*/
unsigned num_inputs, num_uniforms, num_outputs;
+
+ /** the number of uniforms that are only accessed directly */
+ unsigned num_direct_uniforms;
} nir_shader;
#define nir_foreach_overload(shader, overload) \
@@ -1466,26 +1486,26 @@ void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
-nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op);
+nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
-nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type);
+nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
-nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx,
+nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
unsigned num_components);
-nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx,
+nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
nir_intrinsic_op op);
-nir_call_instr *nir_call_instr_create(void *mem_ctx,
+nir_call_instr *nir_call_instr_create(nir_shader *shader,
nir_function_overload *callee);
-nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs);
+nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
-nir_phi_instr *nir_phi_instr_create(void *mem_ctx);
+nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
-nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx);
+nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
-nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx,
+nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
unsigned num_components);
nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
@@ -1550,7 +1570,7 @@ void nir_print_instr(const nir_instr *instr, FILE *fp);
#ifdef DEBUG
void nir_validate_shader(nir_shader *shader);
#else
-static inline void nir_validate_shader(nir_shader *shader) { }
+static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
#endif /* DEBUG */
void nir_calc_dominance_impl(nir_function_impl *impl);
@@ -1596,14 +1616,18 @@ void nir_lower_alu_to_scalar(nir_shader *shader);
void nir_lower_phis_to_scalar(nir_shader *shader);
void nir_lower_samplers(nir_shader *shader,
- struct gl_shader_program *shader_program,
- struct gl_program *prog);
+ const struct gl_shader_program *shader_program,
+ gl_shader_stage stage);
void nir_lower_system_values(nir_shader *shader);
+void nir_lower_tex_projector(nir_shader *shader);
+void nir_lower_idiv(nir_shader *shader);
void nir_lower_atomics(nir_shader *shader);
void nir_lower_to_source_mods(nir_shader *shader);
+void nir_normalize_cubemap_coords(nir_shader *shader);
+
void nir_live_variables_impl(nir_function_impl *impl);
bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
@@ -1612,6 +1636,7 @@ void nir_convert_to_ssa(nir_shader *shader);
void nir_convert_from_ssa(nir_shader *shader);
bool nir_opt_algebraic(nir_shader *shader);
+bool nir_opt_algebraic_late(nir_shader *shader);
bool nir_opt_constant_folding(nir_shader *shader);
bool nir_opt_global_to_local(nir_shader *shader);
@@ -1631,6 +1656,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);
bool nir_opt_remove_phis(nir_shader *shader);
+void nir_sweep(nir_shader *shader);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py
index afab1a008..bbf4f08ef 100644
--- a/mesalib/src/glsl/nir/nir_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_algebraic.py
@@ -181,12 +181,23 @@ _algebraic_pass_template = mako.template.Template("""
#include "nir.h"
#include "nir_search.h"
+#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+
struct transform {
const nir_search_expression *search;
const nir_search_value *replace;
unsigned condition_offset;
};
+struct opt_state {
+ void *mem_ctx;
+ bool progress;
+ const bool *condition_flags;
+};
+
+#endif
+
% for (opcode, xform_list) in xform_dict.iteritems():
% for xform in xform_list:
${xform.search.render()}
@@ -200,12 +211,6 @@ static const struct transform ${pass_name}_${opcode}_xforms[] = {
};
% endfor
-struct opt_state {
- void *mem_ctx;
- bool progress;
- const bool *condition_flags;
-};
-
static bool
${pass_name}_block(nir_block *block, void *void_state)
{
diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h
index 7c4f7fd96..d1419ee21 100644
--- a/mesalib/src/glsl/nir/nir_builder.h
+++ b/mesalib/src/glsl/nir/nir_builder.h
@@ -28,6 +28,9 @@ struct exec_list;
typedef struct nir_builder {
struct exec_list *cf_node_list;
+ nir_instr *before_instr;
+ nir_instr *after_instr;
+
nir_shader *shader;
nir_function_impl *impl;
} nir_builder;
@@ -45,8 +48,75 @@ nir_builder_insert_after_cf_list(nir_builder *build,
struct exec_list *cf_node_list)
{
build->cf_node_list = cf_node_list;
+ build->before_instr = NULL;
+ build->after_instr = NULL;
+}
+
+static inline void
+nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr)
+{
+ build->cf_node_list = NULL;
+ build->before_instr = before_instr;
+ build->after_instr = NULL;
}
+static inline void
+nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr)
+{
+ build->cf_node_list = NULL;
+ build->before_instr = NULL;
+ build->after_instr = after_instr;
+}
+
+static inline void
+nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
+{
+ if (build->cf_node_list) {
+ nir_instr_insert_after_cf_list(build->cf_node_list, instr);
+ } else if (build->before_instr) {
+ nir_instr_insert_before(build->before_instr, instr);
+ } else {
+ assert(build->after_instr);
+ nir_instr_insert_after(build->after_instr, instr);
+ build->after_instr = instr;
+ }
+}
+
+static inline nir_ssa_def *
+nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
+{
+ nir_load_const_instr *load_const =
+ nir_load_const_instr_create(build->shader, num_components);
+ if (!load_const)
+ return NULL;
+
+ load_const->value = value;
+
+ nir_builder_instr_insert(build, &load_const->instr);
+
+ return &load_const->def;
+}
+
+static inline nir_ssa_def *
+nir_imm_float(nir_builder *build, float x)
+{
+ nir_const_value v = { { .f = {x, 0, 0, 0} } };
+ return nir_build_imm(build, 1, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
+{
+ nir_const_value v = { { .f = {x, y, z, w} } };
+ return nir_build_imm(build, 4, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_int(nir_builder *build, int x)
+{
+ nir_const_value v = { { .i = {x, 0, 0, 0} } };
+ return nir_build_imm(build, 1, v);
+}
static inline nir_ssa_def *
nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
@@ -90,7 +160,7 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
instr->dest.write_mask = (1 << num_components) - 1;
- nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr);
+ nir_builder_instr_insert(build, &instr->instr);
return &instr->dest.dest.ssa;
}
@@ -127,4 +197,67 @@ nir_##op(nir_builder *build, nir_ssa_def *src0, \
#include "nir_builder_opcodes.h"
+/**
+ * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
+ */
+static inline nir_ssa_def *
+nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+}
+
+static inline nir_ssa_def *
+nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+}
+
+/**
+ * Construct an fmov or imov that reswizzles the source's components.
+ */
+static inline nir_ssa_def *
+nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
+ unsigned num_components, bool use_fmov)
+{
+ nir_alu_src alu_src;
+ memset(&alu_src, 0, sizeof(alu_src));
+ alu_src.src = nir_src_for_ssa(src);
+ for (int i = 0; i < 4; i++)
+ alu_src.swizzle[i] = swiz[i];
+
+ return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
+ nir_imov_alu(build, alu_src, num_components);
+}
+
+/**
+ * Turns a nir_src into a nir_ssa_def * so it can be passed to
+ * nir_build_alu()-based builder calls.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
+{
+ if (src.is_ssa && src.ssa->num_components == num_components)
+ return src.ssa;
+
+ nir_alu_src alu;
+ memset(&alu, 0, sizeof(alu));
+ alu.src = src;
+ for (int j = 0; j < 4; j++)
+ alu.swizzle[j] = j;
+
+ return nir_imov_alu(build, alu, num_components);
+}
+
#endif /* NIR_BUILDER_H */
diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c
index c3090fb06..184698abd 100644
--- a/mesalib/src/glsl/nir/nir_from_ssa.c
+++ b/mesalib/src/glsl/nir/nir_from_ssa.c
@@ -509,12 +509,13 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state)
reg->num_components = def->num_components;
reg->num_array_elems = 0;
- /* This register comes from an SSA definition that was not part of a
- * phi-web. Therefore, we know it has a single unique definition
- * that dominates all of its uses. Therefore, we can copy the
+ /* This register comes from an SSA definition that is defined and not
+ * part of a phi-web. Therefore, we know it has a single unique
+ * definition that dominates all of its uses; we can copy the
* parent_instr from the SSA def safely.
*/
- reg->parent_instr = def->parent_instr;
+ if (def->parent_instr->type != nir_instr_type_ssa_undef)
+ reg->parent_instr = def->parent_instr;
_mesa_hash_table_insert(state->ssa_table, def, reg);
return reg;
diff --git a/mesalib/src/glsl/nir/nir_lower_idiv.c b/mesalib/src/glsl/nir/nir_lower_idiv.c
new file mode 100644
index 000000000..7b6803207
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_idiv.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/* Lowers idiv/udiv/umod
+ * Based on NV50LegalizeSSA::handleDIV()
+ *
+ * Note that this is probably not enough precision for compute shaders.
+ * Perhaps we want a second higher precision (looping) version of this?
+ * Or perhaps we assume if you can do compute shaders you can also
+ * branch out to a pre-optimized shader library routine..
+ */
+
+static void
+convert_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
+ nir_op op = alu->op;
+ bool is_signed;
+
+ if ((op != nir_op_idiv) &&
+ (op != nir_op_udiv) &&
+ (op != nir_op_umod))
+ return;
+
+ is_signed = (op == nir_op_idiv);
+
+ nir_builder_insert_before_instr(bld, &alu->instr);
+
+ numer = nir_ssa_for_src(bld, alu->src[0].src,
+ nir_ssa_alu_instr_src_components(alu, 0));
+ denom = nir_ssa_for_src(bld, alu->src[1].src,
+ nir_ssa_alu_instr_src_components(alu, 1));
+
+ if (is_signed) {
+ af = nir_i2f(bld, numer);
+ bf = nir_i2f(bld, denom);
+ af = nir_fabs(bld, af);
+ bf = nir_fabs(bld, bf);
+ a = nir_iabs(bld, numer);
+ b = nir_iabs(bld, denom);
+ } else {
+ af = nir_u2f(bld, numer);
+ bf = nir_u2f(bld, denom);
+ a = numer;
+ b = denom;
+ }
+
+ /* get first result: */
+ bf = nir_frcp(bld, bf);
+ bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */
+ q = nir_fmul(bld, af, bf);
+
+ if (is_signed) {
+ q = nir_f2i(bld, q);
+ } else {
+ q = nir_f2u(bld, q);
+ }
+
+ /* get error of first result: */
+ r = nir_imul(bld, q, b);
+ r = nir_isub(bld, a, r);
+ r = nir_u2f(bld, r);
+ r = nir_fmul(bld, r, bf);
+ r = nir_f2u(bld, r);
+
+ /* add quotients: */
+ q = nir_iadd(bld, q, r);
+
+ /* correction: if modulus >= divisor, add 1 */
+ r = nir_imul(bld, q, b);
+ r = nir_isub(bld, a, r);
+
+ r = nir_ige(bld, r, b);
+ r = nir_b2i(bld, r);
+
+ q = nir_iadd(bld, q, r);
+ if (is_signed) {
+ /* fix the sign: */
+ r = nir_ixor(bld, numer, denom);
+ r = nir_ushr(bld, r, nir_imm_int(bld, 31));
+ r = nir_i2b(bld, r);
+ b = nir_ineg(bld, q);
+ q = nir_bcsel(bld, r, b, q);
+ }
+
+ if (op == nir_op_umod) {
+ /* division result in q */
+ r = nir_imul(bld, q, b);
+ q = nir_isub(bld, a, r);
+ }
+
+ assert(alu->dest.dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+ nir_src_for_ssa(q),
+ ralloc_parent(alu));
+}
+
+static bool
+convert_block(nir_block *block, void *state)
+{
+ nir_builder *b = state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_alu)
+ convert_instr(b, nir_instr_as_alu(instr));
+ }
+
+ return true;
+}
+
+static void
+convert_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, convert_block, &b);
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_idiv(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ convert_impl(overload->impl);
+ }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
index 7cd93ea0a..4bdb80072 100644
--- a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -223,7 +223,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
else
nir_instr_insert_after_block(src->pred, &mov->instr);
- nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src);
+ nir_phi_src *new_src = ralloc(new_phi, nir_phi_src);
new_src->pred = src->pred;
new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa);
diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
index 3015dbd09..cf8ab8325 100644
--- a/mesalib/src/glsl/nir/nir_lower_samplers.cpp
+++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
@@ -36,33 +36,26 @@ extern "C" {
}
static unsigned
-get_sampler_index(struct gl_shader_program *shader_program, const char *name,
- const struct gl_program *prog)
+get_sampler_index(const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, const char *name)
{
- GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
-
unsigned location;
if (!shader_program->UniformHash->get(location, name)) {
- linker_error(shader_program,
- "failed to find sampler named %s.\n", name);
+ assert(!"failed to find sampler");
return 0;
}
- if (!shader_program->UniformStorage[location].sampler[shader].active) {
- assert(0 && "cannot return a sampler");
- linker_error(shader_program,
- "cannot return a sampler named %s, because it is not "
- "used in this shader stage. This is a driver bug.\n",
- name);
+ if (!shader_program->UniformStorage[location].sampler[stage].active) {
+ assert(!"cannot return a sampler");
return 0;
}
- return shader_program->UniformStorage[location].sampler[shader].index;
+ return shader_program->UniformStorage[location].sampler[stage].index;
}
static void
-lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
- const struct gl_program *prog, void *mem_ctx)
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, void *mem_ctx)
{
if (instr->sampler == NULL)
return;
@@ -90,7 +83,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset);
break;
case nir_deref_array_type_indirect: {
- instr->src = reralloc(mem_ctx, instr->src, nir_tex_src,
+ instr->src = reralloc(instr, instr->src, nir_tex_src,
instr->num_srcs + 1);
memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src);
instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
@@ -133,15 +126,15 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
}
}
- instr->sampler_index += get_sampler_index(shader_program, name, prog);
+ instr->sampler_index += get_sampler_index(shader_program, stage, name);
instr->sampler = NULL;
}
typedef struct {
void *mem_ctx;
- struct gl_shader_program *shader_program;
- struct gl_program *prog;
+ const struct gl_shader_program *shader_program;
+ gl_shader_stage stage;
} lower_state;
static bool
@@ -152,7 +145,7 @@ lower_block_cb(nir_block *block, void *_state)
nir_foreach_instr(block, instr) {
if (instr->type == nir_instr_type_tex) {
nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
- lower_sampler(tex_instr, state->shader_program, state->prog,
+ lower_sampler(tex_instr, state->shader_program, state->stage,
state->mem_ctx);
}
}
@@ -161,24 +154,24 @@ lower_block_cb(nir_block *block, void *_state)
}
static void
-lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program,
- struct gl_program *prog)
+lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
{
lower_state state;
state.mem_ctx = ralloc_parent(impl);
state.shader_program = shader_program;
- state.prog = prog;
+ state.stage = stage;
nir_foreach_block(impl, lower_block_cb, &state);
}
extern "C" void
-nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program,
- struct gl_program *prog)
+nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
{
nir_foreach_overload(shader, overload) {
if (overload->impl)
- lower_impl(overload->impl, shader_program, prog);
+ lower_impl(overload->impl, shader_program, stage);
}
}
diff --git a/mesalib/src/glsl/nir/nir_lower_tex_projector.c b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
new file mode 100644
index 000000000..6b0e9c340
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass converts the coordinate division for texture projection
+ * to be done in ALU instructions instead of asking the texture operation to
+ * do so.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+ return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+nir_lower_tex_projector_block(nir_block *block, void *void_state)
+{
+ nir_builder *b = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ nir_builder_insert_before_instr(b, &tex->instr);
+
+ /* Find the projector in the srcs list, if present. */
+ int proj_index;
+ for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+ if (tex->src[proj_index].src_type == nir_tex_src_projector)
+ break;
+ }
+ if (proj_index == tex->num_srcs)
+ continue;
+ nir_ssa_def *inv_proj =
+ nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+ /* Walk through the sources projecting the arguments. */
+ for (int i = 0; i < tex->num_srcs; i++) {
+ switch (tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ case nir_tex_src_comparitor:
+ break;
+ default:
+ continue;
+ }
+ nir_ssa_def *unprojected =
+ nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+ nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+ /* Array indices don't get projected, so make an new vector with the
+ * coordinate's array index untouched.
+ */
+ if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+ switch (tex->coord_components) {
+ case 4:
+ projected = nir_vec4(b,
+ channel(b, projected, 0),
+ channel(b, projected, 1),
+ channel(b, projected, 2),
+ channel(b, unprojected, 3));
+ break;
+ case 3:
+ projected = nir_vec3(b,
+ channel(b, projected, 0),
+ channel(b, projected, 1),
+ channel(b, unprojected, 2));
+ break;
+ case 2:
+ projected = nir_vec2(b,
+ channel(b, projected, 0),
+ channel(b, unprojected, 1));
+ break;
+ default:
+ unreachable("bad texture coord count for array");
+ break;
+ }
+ }
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(projected));
+ }
+
+ /* Now move the later tex sources down the array so that the projector
+ * disappears.
+ */
+ nir_src dead;
+ memset(&dead, 0, sizeof dead);
+ nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead);
+ memmove(&tex->src[proj_index],
+ &tex->src[proj_index + 1],
+ (tex->num_srcs - proj_index) * sizeof(*tex->src));
+ tex->num_srcs--;
+ }
+
+ return true;
+}
+
+static void
+nir_lower_tex_projector_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, nir_lower_tex_projector_block, &b);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_tex_projector(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ nir_lower_tex_projector_impl(overload->impl);
+ }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c
index 85ebb281c..58389a7c7 100644
--- a/mesalib/src/glsl/nir/nir_lower_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c
@@ -148,13 +148,10 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
unsigned num_components = glsl_get_vector_elements(src_tail->type);
- nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref);
- nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref);
-
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
load->num_components = num_components;
- load->variables[0] = nir_deref_as_var(src_deref);
+ load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
nir_instr_insert_before(&copy_instr->instr, &load->instr);
@@ -162,7 +159,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
store->num_components = num_components;
- store->variables[0] = nir_deref_as_var(dest_deref);
+ store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
+
store->src[0].is_ssa = true;
store->src[0].ssa = &load->dest.ssa;
diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
index 86e6ab416..2ca74d71b 100644
--- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -642,7 +642,7 @@ add_phi_sources(nir_block *block, nir_block *pred,
struct deref_node *node = entry->data;
- nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src);
+ nir_phi_src *src = ralloc(phi, nir_phi_src);
src->pred = pred;
src->src.is_ssa = true;
src->src.ssa = get_ssa_def_for_block(node, pred, state);
diff --git a/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
new file mode 100644
index 000000000..0da8447ac
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand <jason@jlekstrand.net>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * This file implements a NIR lowering pass to perform the normalization of
+ * the cubemap coordinates to have the largest magnitude component be -1.0
+ * or 1.0. This is based on the old GLSL IR based pass by Eric.
+ */
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+ return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+normalize_cubemap_coords_block(nir_block *block, void *void_state)
+{
+ nir_builder *b = void_state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+ continue;
+
+ nir_builder_insert_before_instr(b, &tex->instr);
+
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (tex->src[i].src_type != nir_tex_src_coord)
+ continue;
+
+ nir_ssa_def *orig_coord =
+ nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+ assert(orig_coord->num_components >= 3);
+
+ nir_ssa_def *abs = nir_fabs(b, orig_coord);
+ nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0),
+ nir_fmax(b, channel(b, abs, 1),
+ channel(b, abs, 2)));
+
+ nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm));
+
+ /* Array indices don't have to be normalized, so make a new vector
+ * with the coordinate's array index untouched.
+ */
+ if (tex->coord_components == 4) {
+ normalized = nir_vec4(b,
+ channel(b, normalized, 0),
+ channel(b, normalized, 1),
+ channel(b, normalized, 2),
+ channel(b, orig_coord, 3));
+ }
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(normalized));
+ }
+ }
+
+ return true;
+}
+
+static void
+normalize_cubemap_coords_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, normalize_cubemap_coords_block, &b);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_normalize_cubemap_coords(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload)
+ if (overload->impl)
+ normalize_cubemap_coords_impl(overload->impl);
+}
diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py
index 062cd628b..264806f5d 100644
--- a/mesalib/src/glsl/nir/nir_opcodes.py
+++ b/mesalib/src/glsl/nir/nir_opcodes.py
@@ -161,12 +161,12 @@ unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion
-unop_convert("f2b", tfloat, tbool, "src0 == 0.0f")
+unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
# Boolean-to-float conversion
unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
-unop_convert("i2b", tint, tbool, "src0 == 0")
-unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion
+unop_convert("i2b", tint, tbool, "src0 != 0")
+unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
@@ -191,8 +191,6 @@ unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
unop("fsin", tfloat, "sinf(src0)")
unop("fcos", tfloat, "cosf(src0)")
-unop("fsin_reduced", tfloat, "sinf(src0)")
-unop("fcos_reduced", tfloat, "cosf(src0)")
# Partial derivatives.
diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py
index ef855aa77..cdb19241c 100644
--- a/mesalib/src/glsl/nir/nir_opt_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py
@@ -75,6 +75,9 @@ optimizations = [
(('flrp', a, b, 1.0), b),
(('flrp', a, a, b), a),
(('flrp', 0.0, a, b), ('fmul', a, b)),
+ (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+ (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
# Comparison simplifications
@@ -82,10 +85,6 @@ optimizations = [
(('inot', ('fge', a, b)), ('flt', a, b)),
(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
- (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
- (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
- (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
@@ -95,6 +94,18 @@ optimizations = [
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
(('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+ (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+ (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+ (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
+ (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
+ (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
+ (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+ # Emulating booleans
+ (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
+ (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
+ (('iand', 'a@bool', 1.0), ('b2f', a)),
+ (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+ (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
# Comparison with the same args. Note that these are not done for
# the float versions because NaN always returns false on float
# inequalities.
@@ -122,7 +133,7 @@ optimizations = [
(('ishr', 0, a), 0),
(('ishr', a, 0), a),
(('ushr', 0, a), 0),
- (('ushr', a, 0), 0),
+ (('ushr', a, 0), a),
# Exponential/logarithmic identities
(('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
(('fexp', ('flog', a)), a), # e^ln(a) = a
@@ -134,6 +145,26 @@ optimizations = [
(('fpow', a, 1.0), a),
(('fpow', a, 2.0), ('fmul', a, a)),
(('fpow', 2.0, a), ('fexp2', a)),
+ (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+ (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))),
+ (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+ (('frcp', ('fexp', a)), ('fexp', ('fneg', a))),
+ (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+ (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))),
+ (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+ (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))),
+ (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+ (('flog', ('frcp', a)), ('fneg', ('flog', a))),
+ (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+ (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))),
+ (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+ (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))),
+ (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+ (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))),
+ (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+ (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))),
+ (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+ (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))),
# Division and reciprocal
(('fdiv', 1.0, a), ('frcp', a)),
(('frcp', ('frcp', a)), a),
@@ -154,18 +185,21 @@ optimizations = [
(('bcsel', a, b, b), b),
(('fcsel', a, b, b), b),
+ # Conversions
+ (('f2i', ('ftrunc', a)), ('f2i', a)),
+ (('f2u', ('ftrunc', a)), ('f2u', a)),
+
# Subtracts
(('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+ (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
+ (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
(('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
(('iadd', a, ('isub', 0, b)), ('isub', a, b)),
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
(('iabs', ('isub', 0, a)), ('iabs', a)),
-
-# This one may not be exact
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
]
# Add optimizations to handle the case where the result of a ternary is
@@ -189,4 +223,17 @@ for op in ['flt', 'fge', 'feq', 'fne',
('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
]
+# This section contains "late" optimizations that should be run after the
+# regular optimizations have finished. Optimizations should go here if
+# they help code generation but do not necessarily produce code that is
+# more easily optimizable.
+late_optimizations = [
+ (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+ (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+ (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+ (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+]
+
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+ late_optimizations).render()
diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c
index 9b383202d..553906e12 100644
--- a/mesalib/src/glsl/nir/nir_opt_cse.c
+++ b/mesalib/src/glsl/nir/nir_opt_cse.c
@@ -37,20 +37,19 @@ struct cse_state {
};
static bool
-nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask)
+nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1,
+ unsigned src2)
{
- if (src1.abs != src2.abs || src1.negate != src2.negate)
+ if (alu1->src[src1].abs != alu2->src[src2].abs ||
+ alu1->src[src1].negate != alu2->src[src2].negate)
return false;
- for (int i = 0; i < 4; ++i) {
- if (!(read_mask & (1 << i)))
- continue;
-
- if (src1.swizzle[i] != src2.swizzle[i])
+ for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
+ if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
return false;
}
- return nir_srcs_equal(src1.src, src2.src);
+ return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
}
static bool
@@ -73,10 +72,17 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
return false;
- for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
- if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i],
- (1 << alu1->dest.dest.ssa.num_components) - 1))
- return false;
+ if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[alu1->op].num_inputs == 2);
+ return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
+ (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 0));
+ } else {
+ for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+ if (!nir_alu_srcs_equal(alu1, alu2, i, i))
+ return false;
+ }
}
return true;
}
@@ -154,12 +160,14 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
static bool
src_is_ssa(nir_src *src, void *data)
{
+ (void) data;
return src->is_ssa;
}
static bool
dest_is_ssa(nir_dest *dest, void *data)
{
+ (void) data;
return dest->is_ssa;
}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
new file mode 100644
index 000000000..9d5646fe6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a small peephole optimization that looks for a multiply that
+ * is only ever used in an add and replaces both with an fma.
+ */
+
+struct peephole_ffma_state {
+ void *mem_ctx;
+ nir_function_impl *impl;
+ bool progress;
+};
+
+static inline bool
+are_all_uses_fadd(nir_ssa_def *def)
+{
+ if (def->if_uses->entries > 0)
+ return false;
+
+ struct set_entry *use_iter;
+ set_foreach(def->uses, use_iter) {
+ nir_instr *use_instr = (nir_instr *)use_iter->key;
+
+ if (use_instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
+ switch (use_alu->op) {
+ case nir_op_fadd:
+ break; /* This one's ok */
+
+ case nir_op_imov:
+ case nir_op_fmov:
+ case nir_op_fneg:
+ case nir_op_fabs:
+ assert(use_alu->dest.dest.is_ssa);
+ if (!are_all_uses_fadd(&use_alu->dest.dest.ssa))
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static nir_alu_instr *
+get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+{
+ assert(src->src.is_ssa && !src->abs && !src->negate);
+
+ nir_instr *instr = src->src.ssa->parent_instr;
+ if (instr->type != nir_instr_type_alu)
+ return NULL;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_imov:
+ case nir_op_fmov:
+ alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ break;
+
+ case nir_op_fneg:
+ alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ *negate = !*negate;
+ break;
+
+ case nir_op_fabs:
+ alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ *negate = false;
+ *abs = true;
+ break;
+
+ case nir_op_fmul:
+ /* Only absorb a fmul into a ffma if the fmul is is only used in fadd
+ * operations. This prevents us from being too aggressive with our
+ * fusing which can actually lead to more instructions.
+ */
+ if (!are_all_uses_fadd(&alu->dest.dest.ssa))
+ return NULL;
+ break;
+
+ default:
+ return NULL;
+ }
+
+ if (!alu)
+ return NULL;
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (!(alu->dest.write_mask & (1 << i)))
+ break;
+
+ swizzle[i] = swizzle[src->swizzle[i]];
+ }
+
+ return alu;
+}
+
+static bool
+nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
+{
+ struct peephole_ffma_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *add = nir_instr_as_alu(instr);
+ if (add->op != nir_op_fadd)
+ continue;
+
+ /* TODO: Maybe bail if this expression is considered "precise"? */
+
+ assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
+
+ /* This, is the case a + a. We would rather handle this with an
+ * algebraic reduction than fuse it. Also, we want to only fuse
+ * things where the multiply is used only once and, in this case,
+ * it would be used twice by the same instruction.
+ */
+ if (add->src[0].src.ssa == add->src[1].src.ssa)
+ continue;
+
+ nir_alu_instr *mul;
+ uint8_t add_mul_src, swizzle[4];
+ bool negate, abs;
+ for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
+ for (unsigned i = 0; i < 4; i++)
+ swizzle[i] = i;
+
+ negate = false;
+ abs = false;
+
+ mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+
+ if (mul != NULL)
+ break;
+ }
+
+ if (mul == NULL)
+ continue;
+
+ nir_ssa_def *mul_src[2];
+ mul_src[0] = mul->src[0].src.ssa;
+ mul_src[1] = mul->src[1].src.ssa;
+
+ if (abs) {
+ for (unsigned i = 0; i < 2; i++) {
+ nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fabs);
+ abs->src[0].src = nir_src_for_ssa(mul_src[i]);
+ nir_ssa_dest_init(&abs->instr, &abs->dest.dest,
+ mul_src[i]->num_components, NULL);
+ abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1;
+ nir_instr_insert_before(&add->instr, &abs->instr);
+ mul_src[i] = &abs->dest.dest.ssa;
+ }
+ }
+
+ if (negate) {
+ nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fneg);
+ neg->src[0].src = nir_src_for_ssa(mul_src[0]);
+ nir_ssa_dest_init(&neg->instr, &neg->dest.dest,
+ mul_src[0]->num_components, NULL);
+ neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1;
+ nir_instr_insert_before(&add->instr, &neg->instr);
+ mul_src[0] = &neg->dest.dest.ssa;
+ }
+
+ nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma);
+ ffma->dest.saturate = add->dest.saturate;
+ ffma->dest.write_mask = add->dest.write_mask;
+
+ for (unsigned i = 0; i < 2; i++) {
+ ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
+ for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
+ ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
+ }
+ nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
+ state->mem_ctx);
+
+ assert(add->dest.dest.is_ssa);
+
+ nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
+ add->dest.dest.ssa.num_components,
+ add->dest.dest.ssa.name);
+ nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
+ nir_src_for_ssa(&ffma->dest.dest.ssa),
+ state->mem_ctx);
+
+ nir_instr_insert_before(&add->instr, &ffma->instr);
+ assert(add->dest.dest.ssa.uses->entries == 0);
+ nir_instr_remove(&add->instr);
+
+ state->progress = true;
+ }
+
+ return true;
+}
+
+static bool
+nir_opt_peephole_ffma_impl(nir_function_impl *impl)
+{
+ struct peephole_ffma_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.impl = impl;
+ state.progress = false;
+
+ nir_foreach_block(impl, nir_opt_peephole_ffma_block, &state);
+
+ if (state.progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return state.progress;
+}
+
+bool
+nir_opt_peephole_ffma(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ progress |= nir_opt_peephole_ffma_impl(overload->impl);
+ }
+
+ return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
index b89451b09..f400cfd66 100644
--- a/mesalib/src/glsl/nir/nir_opt_peephole_select.c
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
@@ -84,7 +84,9 @@ block_check_for_allowed_instrs(nir_block *block)
case nir_instr_type_alu: {
/* It must be a move operation */
nir_alu_instr *mov = nir_instr_as_alu(instr);
- if (mov->op != nir_op_fmov && mov->op != nir_op_imov)
+ if (mov->op != nir_op_fmov && mov->op != nir_op_imov &&
+ mov->op != nir_op_fneg && mov->op != nir_op_ineg &&
+ mov->op != nir_op_fabs && mov->op != nir_op_iabs)
return false;
/* Can't handle saturate */
diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c
index fa11a312e..fb8c9344c 100644
--- a/mesalib/src/glsl/nir/nir_print.c
+++ b/mesalib/src/glsl/nir/nir_print.c
@@ -137,25 +137,37 @@ print_dest(nir_dest *dest, FILE *fp)
}
static void
-print_alu_src(nir_alu_src *src, FILE *fp)
+print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
{
- if (src->negate)
+ if (instr->src[src].negate)
fprintf(fp, "-");
- if (src->abs)
+ if (instr->src[src].abs)
fprintf(fp, "abs(");
- print_src(&src->src, fp);
+ print_src(&instr->src[src].src, fp);
- if (src->swizzle[0] != 0 ||
- src->swizzle[1] != 1 ||
- src->swizzle[2] != 2 ||
- src->swizzle[3] != 3) {
+ bool print_swizzle = false;
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ if (instr->src[src].swizzle[i] != i) {
+ print_swizzle = true;
+ break;
+ }
+ }
+
+ if (print_swizzle) {
fprintf(fp, ".");
- for (unsigned i = 0; i < 4; i++)
- fprintf(fp, "%c", "xyzw"[src->swizzle[i]]);
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]);
+ }
}
- if (src->abs)
+ if (instr->src[src].abs)
fprintf(fp, ")");
}
@@ -189,7 +201,7 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp)
if (i != 0)
fprintf(fp, ", ");
- print_alu_src(&instr->src[i], fp);
+ print_alu_src(instr, i, fp);
}
}
diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
index e7f8aeacb..4417e2a48 100644
--- a/mesalib/src/glsl/nir/nir_remove_dead_variables.c
+++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
@@ -98,22 +98,14 @@ add_var_use_shader(nir_shader *shader, struct set *live)
}
static void
-remove_dead_local_vars(nir_function_impl *impl, struct set *live)
+remove_dead_vars(struct exec_list *var_list, struct set *live)
{
- foreach_list_typed_safe(nir_variable, var, node, &impl->locals) {
+ foreach_list_typed_safe(nir_variable, var, node, var_list) {
struct set_entry *entry = _mesa_set_search(live, var);
- if (entry == NULL)
- exec_node_remove(&var->node);
- }
-}
-
-static void
-remove_dead_global_vars(nir_shader *shader, struct set *live)
-{
- foreach_list_typed_safe(nir_variable, var, node, &shader->globals) {
- struct set_entry *entry = _mesa_set_search(live, var);
- if (entry == NULL)
+ if (entry == NULL) {
exec_node_remove(&var->node);
+ ralloc_free(var);
+ }
}
}
@@ -125,11 +117,11 @@ nir_remove_dead_variables(nir_shader *shader)
add_var_use_shader(shader, live);
- remove_dead_global_vars(shader, live);
+ remove_dead_vars(&shader->globals, live);
nir_foreach_overload(shader, overload) {
if (overload->impl)
- remove_dead_local_vars(overload->impl, live);
+ remove_dead_vars(&overload->impl->locals, live);
}
_mesa_set_destroy(live, NULL);
diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c
index 73a802be7..5ba016085 100644
--- a/mesalib/src/glsl/nir/nir_search.c
+++ b/mesalib/src/glsl/nir/nir_search.c
@@ -218,8 +218,8 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
if (matched)
return true;
- if (nir_op_infos[instr->op].num_inputs == 2 &&
- (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) {
+ if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[instr->op].num_inputs == 2);
if (!match_value(expr->srcs[0], instr, 1, num_components,
swizzle, state))
return false;
diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c
index 4d663b51b..fc72c078c 100644
--- a/mesalib/src/glsl/nir/nir_split_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_split_var_copies.c
@@ -188,8 +188,8 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy,
* belongs to the copy instruction and b) the deref chains may
* have some of the same links due to the way we constructed them
*/
- nir_deref *src = nir_copy_deref(state->mem_ctx, src_head);
- nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head);
+ nir_deref *src = nir_copy_deref(new_copy, src_head);
+ nir_deref *dest = nir_copy_deref(new_copy, dest_head);
new_copy->variables[0] = nir_deref_as_var(dest);
new_copy->variables[1] = nir_deref_as_var(src);
diff --git a/mesalib/src/glsl/nir/nir_sweep.c b/mesalib/src/glsl/nir/nir_sweep.c
new file mode 100644
index 000000000..d3549756a
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_sweep.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/**
+ * \file nir_sweep.c
+ *
+ * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
+ * memory - anything still connected to the program will be kept, and any dead memory
+ * we dropped on the floor will be freed.
+ *
+ * The expectation is that drivers should call this when finished compiling the shader
+ * (after any optimization, lowering, and so on). However, it's also fine to call it
+ * earlier, and even many times, trading CPU cycles for memory savings.
+ */
+
+#define steal_list(mem_ctx, type, list) \
+ foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
+
+static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
+
+static bool
+sweep_src_indirect(nir_src *src, void *nir)
+{
+ if (!src->is_ssa && src->reg.indirect)
+ ralloc_steal(nir, src->reg.indirect);
+
+ return true;
+}
+
+static bool
+sweep_dest_indirect(nir_dest *dest, void *nir)
+{
+ if (!dest->is_ssa && dest->reg.indirect)
+ ralloc_steal(nir, dest->reg.indirect);
+
+ return true;
+}
+
+static void
+sweep_block(nir_shader *nir, nir_block *block)
+{
+ ralloc_steal(nir, block);
+
+ nir_foreach_instr(block, instr) {
+ ralloc_steal(nir, instr);
+
+ nir_foreach_src(instr, sweep_src_indirect, nir);
+ nir_foreach_dest(instr, sweep_dest_indirect, nir);
+ }
+}
+
+static void
+sweep_if(nir_shader *nir, nir_if *iff)
+{
+ ralloc_steal(nir, iff);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+}
+
+static void
+sweep_loop(nir_shader *nir, nir_loop *loop)
+{
+ ralloc_steal(nir, loop);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+}
+
+static void
+sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
+{
+ switch (cf_node->type) {
+ case nir_cf_node_block:
+ sweep_block(nir, nir_cf_node_as_block(cf_node));
+ break;
+ case nir_cf_node_if:
+ sweep_if(nir, nir_cf_node_as_if(cf_node));
+ break;
+ case nir_cf_node_loop:
+ sweep_loop(nir, nir_cf_node_as_loop(cf_node));
+ break;
+ default:
+ unreachable("Invalid CF node type");
+ }
+}
+
+static void
+sweep_impl(nir_shader *nir, nir_function_impl *impl)
+{
+ ralloc_steal(nir, impl);
+
+ ralloc_steal(nir, impl->params);
+ ralloc_steal(nir, impl->return_var);
+ steal_list(nir, nir_variable, &impl->locals);
+ steal_list(nir, nir_register, &impl->registers);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ sweep_block(nir, impl->end_block);
+
+ /* Wipe out all the metadata, if any. */
+ nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+static void
+sweep_function(nir_shader *nir, nir_function *f)
+{
+ ralloc_steal(nir, f);
+
+ foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {
+ ralloc_steal(nir, overload);
+ ralloc_steal(nir, overload->params);
+ if (overload->impl)
+ sweep_impl(nir, overload->impl);
+ }
+}
+
+void
+nir_sweep(nir_shader *nir)
+{
+ void *rubbish = ralloc_context(NULL);
+
+ /* First, move ownership of all the memory to a temporary context; assume dead. */
+ ralloc_adopt(rubbish, nir);
+
+ /* Variables and registers are not dead. Steal them back. */
+ steal_list(nir, nir_variable, &nir->uniforms);
+ steal_list(nir, nir_variable, &nir->inputs);
+ steal_list(nir, nir_variable, &nir->outputs);
+ steal_list(nir, nir_variable, &nir->globals);
+ steal_list(nir, nir_variable, &nir->system_values);
+ steal_list(nir, nir_register, &nir->registers);
+
+ /* Recurse into functions, stealing their contents back. */
+ foreach_list_typed(nir_function, func, node, &nir->functions) {
+ sweep_function(nir, func);
+ }
+
+ /* Free everything we didn't steal back. */
+ ralloc_free(rubbish);
+}
diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c
index 47cf45393..53ff54766 100644
--- a/mesalib/src/glsl/nir/nir_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_to_ssa.c
@@ -47,7 +47,7 @@ insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx)
set_foreach(block->predecessors, entry) {
nir_block *pred = (nir_block *) entry->key;
- nir_phi_src *src = ralloc(mem_ctx, nir_phi_src);
+ nir_phi_src *src = ralloc(instr, nir_phi_src);
src->pred = pred;
src->src.is_ssa = false;
src->src.reg.base_offset = 0;
diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp
index a13c3e12a..f0d0b46d2 100644
--- a/mesalib/src/glsl/nir/nir_types.cpp
+++ b/mesalib/src/glsl/nir/nir_types.cpp
@@ -143,6 +143,12 @@ glsl_void_type(void)
}
const glsl_type *
+glsl_float_type(void)
+{
+ return glsl_type::float_type;
+}
+
+const glsl_type *
glsl_vec4_type(void)
{
return glsl_type::vec4_type;
diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h
index 494051a67..276d4ad62 100644
--- a/mesalib/src/glsl/nir/nir_types.h
+++ b/mesalib/src/glsl/nir/nir_types.h
@@ -69,6 +69,7 @@ bool glsl_type_is_scalar(const struct glsl_type *type);
bool glsl_type_is_matrix(const struct glsl_type *type);
const struct glsl_type *glsl_void_type(void);
+const struct glsl_type *glsl_float_type(void);
const struct glsl_type *glsl_vec4_type(void);
const struct glsl_type *glsl_array_type(const struct glsl_type *base,
unsigned elements);
diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c
index f247ae069..a7aa79837 100644
--- a/mesalib/src/glsl/nir/nir_validate.c
+++ b/mesalib/src/glsl/nir/nir_validate.c
@@ -295,6 +295,8 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
static void
validate_deref_chain(nir_deref *deref, validate_state *state)
{
+ assert(deref->child == NULL || ralloc_parent(deref->child) == deref);
+
nir_deref *parent = NULL;
while (deref != NULL) {
switch (deref->deref_type) {
@@ -336,9 +338,10 @@ validate_var_use(nir_variable *var, validate_state *state)
}
static void
-validate_deref_var(nir_deref_var *deref, validate_state *state)
+validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state)
{
assert(deref != NULL);
+ assert(ralloc_parent(deref) == parent_mem_ctx);
assert(deref->deref.type == deref->var->type);
validate_var_use(deref->var, state);
@@ -386,7 +389,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
for (unsigned i = 0; i < num_vars; i++) {
- validate_deref_var(instr->variables[i], state);
+ validate_deref_var(instr, instr->variables[i], state);
}
switch (instr->intrinsic) {
@@ -423,7 +426,7 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
}
if (instr->sampler != NULL)
- validate_deref_var(instr->sampler, state);
+ validate_deref_var(instr, instr->sampler, state);
}
static void
@@ -438,10 +441,10 @@ validate_call_instr(nir_call_instr *instr, validate_state *state)
for (unsigned i = 0; i < instr->num_params; i++) {
assert(instr->callee->params[i].type == instr->params[i]->deref.type);
- validate_deref_var(instr->params[i], state);
+ validate_deref_var(instr, instr->params[i], state);
}
- validate_deref_var(instr->return_deref, state);
+ validate_deref_var(instr, instr->return_deref, state);
}
static void
@@ -680,8 +683,7 @@ validate_cf_node(nir_cf_node *node, validate_state *state)
break;
default:
- assert(!"Invalid ALU instruction type");
- break;
+ unreachable("Invalid CF node type");
}
}
diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp
index 69c03ea8b..3d2f2ca0b 100644
--- a/mesalib/src/glsl/opt_algebraic.cpp
+++ b/mesalib/src/glsl/opt_algebraic.cpp
@@ -290,6 +290,20 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
unsigned int i;
+ if (ir->operation == ir_binop_mul &&
+ ir->operands[0]->type->is_matrix() &&
+ ir->operands[1]->type->is_vector()) {
+ ir_expression *matrix_mul = ir->operands[0]->as_expression();
+
+ if (matrix_mul && matrix_mul->operation == ir_binop_mul &&
+ matrix_mul->operands[0]->type->is_matrix() &&
+ matrix_mul->operands[1]->type->is_matrix()) {
+
+ return mul(matrix_mul->operands[0],
+ mul(matrix_mul->operands[1], ir->operands[1]));
+ }
+ }
+
assert(ir->get_num_operands() <= 4);
for (i = 0; i < ir->get_num_operands(); i++) {
if (ir->operands[i]->type->is_matrix())
@@ -421,6 +435,18 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
break;
}
+ case ir_unop_saturate:
+ if (op_expr[0] && op_expr[0]->operation == ir_binop_add) {
+ ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression();
+ ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression();
+
+ if (b2f_0 && b2f_0->operation == ir_unop_b2f &&
+ b2f_1 && b2f_1->operation == ir_unop_b2f) {
+ return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0]));
+ }
+ }
+ break;
+
case ir_binop_add:
if (is_vec_zero(op_const[0]))
return ir->operands[1];
@@ -518,6 +544,10 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
if (is_vec_negative_one(op_const[1]))
return neg(ir->operands[0]);
+ if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f &&
+ op_expr[1] && op_expr[1]->operation == ir_unop_b2f) {
+ return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0]));
+ }
/* Reassociate multiplication of constants so that we can do
* constant folding.
@@ -544,6 +574,8 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
continue;
ir_expression *add_expr = floor_expr->operands[0]->as_expression();
+ if (!add_expr)
+ continue;
for (int j = 0; j < 2; j++) {
ir_expression *abs_expr = add_expr->operands[j]->as_expression();
diff --git a/mesalib/src/glsl/opt_cse.cpp b/mesalib/src/glsl/opt_cse.cpp
index b0b67f496..4b8e9a07b 100644
--- a/mesalib/src/glsl/opt_cse.cpp
+++ b/mesalib/src/glsl/opt_cse.cpp
@@ -63,6 +63,17 @@ public:
var = NULL;
}
+ void init(ir_instruction *base_ir, ir_rvalue **val)
+ {
+ this->val = val;
+ this->base_ir = base_ir;
+ this->var = NULL;
+
+ assert(val);
+ assert(*val);
+ assert(base_ir);
+ }
+
/**
* The pointer to the expression that we might be able to reuse
*
@@ -116,6 +127,18 @@ private:
ir_rvalue *try_cse(ir_rvalue *rvalue);
void add_to_ae(ir_rvalue **rvalue);
+ /**
+ * Move all nodes from the ae list to the free list
+ */
+ void empty_ae_list();
+
+ /**
+ * Get and initialize a new ae_entry
+ *
+ * This will either come from the free list or be freshly allocated.
+ */
+ ae_entry *get_ae_entry(ir_rvalue **rvalue);
+
/** List of ae_entry: The available expressions to reuse */
exec_list *ae;
@@ -126,6 +149,11 @@ private:
* right.
*/
exec_list *validate_instructions;
+
+ /**
+ * List of available-for-use ae_entry objects.
+ */
+ exec_list free_ae_entries;
};
/**
@@ -322,6 +350,25 @@ cse_visitor::try_cse(ir_rvalue *rvalue)
return NULL;
}
+void
+cse_visitor::empty_ae_list()
+{
+ free_ae_entries.append_list(ae);
+}
+
+ae_entry *
+cse_visitor::get_ae_entry(ir_rvalue **rvalue)
+{
+ ae_entry *entry = (ae_entry *) free_ae_entries.pop_head();
+ if (entry) {
+ entry->init(base_ir, rvalue);
+ } else {
+ entry = new(mem_ctx) ae_entry(base_ir, rvalue);
+ }
+
+ return entry;
+}
+
/** Add the rvalue to the list of available expressions for CSE. */
void
cse_visitor::add_to_ae(ir_rvalue **rvalue)
@@ -332,7 +379,7 @@ cse_visitor::add_to_ae(ir_rvalue **rvalue)
printf("\n");
}
- ae->push_tail(new(mem_ctx) ae_entry(base_ir, rvalue));
+ ae->push_tail(get_ae_entry(rvalue));
if (debug)
dump_ae(ae);
@@ -370,33 +417,33 @@ cse_visitor::visit_enter(ir_if *ir)
{
handle_rvalue(&ir->condition);
- ae->make_empty();
+ empty_ae_list();
visit_list_elements(this, &ir->then_instructions);
- ae->make_empty();
+ empty_ae_list();
visit_list_elements(this, &ir->else_instructions);
- ae->make_empty();
+ empty_ae_list();
return visit_continue_with_parent;
}
ir_visitor_status
cse_visitor::visit_enter(ir_function_signature *ir)
{
- ae->make_empty();
+ empty_ae_list();
visit_list_elements(this, &ir->body);
- ae->make_empty();
+ empty_ae_list();
return visit_continue_with_parent;
}
ir_visitor_status
cse_visitor::visit_enter(ir_loop *ir)
{
- ae->make_empty();
+ empty_ae_list();
visit_list_elements(this, &ir->body_instructions);
- ae->make_empty();
+ empty_ae_list();
return visit_continue_with_parent;
}
diff --git a/mesalib/src/glsl/s_expression.cpp b/mesalib/src/glsl/s_expression.cpp
index 7eaa491e2..f82e155a6 100644
--- a/mesalib/src/glsl/s_expression.cpp
+++ b/mesalib/src/glsl/s_expression.cpp
@@ -23,8 +23,8 @@
*/
#include <assert.h>
-#include <limits>
#include <stdio.h>
+#include <math.h>
#include "s_expression.h"
s_symbol::s_symbol(const char *str, size_t n)
@@ -70,7 +70,7 @@ read_atom(void *ctx, const char *&src, char *&symbol_buffer)
// requires strtof to parse '+INF' as +Infinity, but we still support some
// non-C99-compliant compilers (e.g. MSVC).
if (n == 4 && strncmp(src, "+INF", 4) == 0) {
- expr = new(ctx) s_float(std::numeric_limits<float>::infinity());
+ expr = new(ctx) s_float(INFINITY);
} else {
// Check if the atom is a number.
char *float_end = NULL;
diff --git a/mesalib/src/glsl/shader_enums.h b/mesalib/src/glsl/shader_enums.h
new file mode 100644
index 000000000..79e0f6b5f
--- /dev/null
+++ b/mesalib/src/glsl/shader_enums.h
@@ -0,0 +1,187 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SHADER_ENUMS_H
+#define SHADER_ENUMS_H
+
+/**
+ * Shader stages. Note that these will become 5 with tessellation.
+ *
+ * The order must match how shaders are ordered in the pipeline.
+ * The GLSL linker assumes that if i<j, then the j-th shader is
+ * executed later than the i-th shader.
+ */
+typedef enum
+{
+ MESA_SHADER_VERTEX = 0,
+ MESA_SHADER_GEOMETRY = 1,
+ MESA_SHADER_FRAGMENT = 2,
+ MESA_SHADER_COMPUTE = 3,
+} gl_shader_stage;
+
+#define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
+
+/**
+ * Bitflags for system values.
+ */
+#define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
+#define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS)
+#define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN)
+/**
+ * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be
+ * one of these values. If a NIR variable's mode is nir_var_system_value, it
+ * will be one of these values.
+ */
+typedef enum
+{
+ /**
+ * \name Vertex shader system values
+ */
+ /*@{*/
+ /**
+ * OpenGL-style vertex ID.
+ *
+ * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the
+ * OpenGL 3.3 core profile spec says:
+ *
+ * "gl_VertexID holds the integer index i implicitly passed by
+ * DrawArrays or one of the other drawing commands defined in section
+ * 2.8.3."
+ *
+ * Section 2.8.3 (Drawing Commands) of the same spec says:
+ *
+ * "The commands....are equivalent to the commands with the same base
+ * name (without the BaseVertex suffix), except that the ith element
+ * transferred by the corresponding draw call will be taken from
+ * element indices[i] + basevertex of each enabled array."
+ *
+ * Additionally, the overview in the GL_ARB_shader_draw_parameters spec
+ * says:
+ *
+ * "In unextended GL, vertex shaders have inputs named gl_VertexID and
+ * gl_InstanceID, which contain, respectively the index of the vertex
+ * and instance. The value of gl_VertexID is the implicitly passed
+ * index of the vertex being processed, which includes the value of
+ * baseVertex, for those commands that accept it."
+ *
+ * gl_VertexID gets basevertex added in. This differs from DirectX where
+ * SV_VertexID does \b not get basevertex added in.
+ *
+ * \note
+ * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be
+ * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus
+ * \c SYSTEM_VALUE_BASE_VERTEX.
+ *
+ * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX
+ */
+ SYSTEM_VALUE_VERTEX_ID,
+
+ /**
+ * Instanced ID as supplied to gl_InstanceID
+ *
+ * Values assigned to gl_InstanceID always begin with zero, regardless of
+ * the value of baseinstance.
+ *
+ * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec
+ * says:
+ *
+ * "gl_InstanceID holds the integer instance number of the current
+ * primitive in an instanced draw call (see section 10.5)."
+ *
+ * Through a big chain of pseudocode, section 10.5 describes that
+ * baseinstance is not counted by gl_InstanceID. In that section, notice
+ *
+ * "If an enabled vertex attribute array is instanced (it has a
+ * non-zero divisor as specified by VertexAttribDivisor), the element
+ * index that is transferred to the GL, for all vertices, is given by
+ *
+ * floor(instance/divisor) + baseinstance
+ *
+ * If an array corresponding to an attribute required by a vertex
+ * shader is not enabled, then the corresponding element is taken from
+ * the current attribute state (see section 10.2)."
+ *
+ * Note that baseinstance is \b not included in the value of instance.
+ */
+ SYSTEM_VALUE_INSTANCE_ID,
+
+ /**
+ * DirectX-style vertex ID.
+ *
+ * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include
+ * the value of basevertex.
+ *
+ * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX
+ */
+ SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+
+ /**
+ * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar
+ * functions.
+ *
+ * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
+ */
+ SYSTEM_VALUE_BASE_VERTEX,
+ /*@}*/
+
+ /**
+ * \name Geometry shader system values
+ */
+ /*@{*/
+ SYSTEM_VALUE_INVOCATION_ID,
+ /*@}*/
+
+ /**
+ * \name Fragment shader system values
+ */
+ /*@{*/
+ SYSTEM_VALUE_FRONT_FACE, /**< (not done yet) */
+ SYSTEM_VALUE_SAMPLE_ID,
+ SYSTEM_VALUE_SAMPLE_POS,
+ SYSTEM_VALUE_SAMPLE_MASK_IN,
+ /*@}*/
+
+ SYSTEM_VALUE_MAX /**< Number of values */
+} gl_system_value;
+
+
+/**
+ * The possible interpolation qualifiers that can be applied to a fragment
+ * shader input in GLSL.
+ *
+ * Note: INTERP_QUALIFIER_NONE must be 0 so that memsetting the
+ * gl_fragment_program data structure to 0 causes the default behavior.
+ */
+enum glsl_interp_qualifier
+{
+ INTERP_QUALIFIER_NONE = 0,
+ INTERP_QUALIFIER_SMOOTH,
+ INTERP_QUALIFIER_FLAT,
+ INTERP_QUALIFIER_NOPERSPECTIVE,
+ INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */
+};
+
+
+#endif /* SHADER_ENUMS_H */