aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/glsl/nir
diff options
context:
space:
mode:
Diffstat (limited to 'mesalib/src/glsl/nir')
-rw-r--r--mesalib/src/glsl/nir/glsl_to_nir.cpp92
-rw-r--r--mesalib/src/glsl/nir/nir.c64
-rw-r--r--mesalib/src/glsl/nir/nir.h51
-rw-r--r--mesalib/src/glsl/nir/nir_algebraic.py17
-rw-r--r--mesalib/src/glsl/nir/nir_builder.h135
-rw-r--r--mesalib/src/glsl/nir/nir_from_ssa.c9
-rw-r--r--mesalib/src/glsl/nir/nir_lower_idiv.c155
-rw-r--r--mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c2
-rw-r--r--mesalib/src/glsl/nir/nir_lower_samplers.cpp45
-rw-r--r--mesalib/src/glsl/nir/nir_lower_tex_projector.c143
-rw-r--r--mesalib/src/glsl/nir/nir_lower_var_copies.c8
-rw-r--r--mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c2
-rw-r--r--mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c110
-rw-r--r--mesalib/src/glsl/nir/nir_opcodes.py8
-rw-r--r--mesalib/src/glsl/nir/nir_opt_algebraic.py63
-rw-r--r--mesalib/src/glsl/nir/nir_opt_cse.c32
-rw-r--r--mesalib/src/glsl/nir/nir_opt_peephole_ffma.c261
-rw-r--r--mesalib/src/glsl/nir/nir_opt_peephole_select.c4
-rw-r--r--mesalib/src/glsl/nir/nir_print.c36
-rw-r--r--mesalib/src/glsl/nir/nir_remove_dead_variables.c22
-rw-r--r--mesalib/src/glsl/nir/nir_search.c4
-rw-r--r--mesalib/src/glsl/nir/nir_split_var_copies.c4
-rw-r--r--mesalib/src/glsl/nir/nir_sweep.c172
-rw-r--r--mesalib/src/glsl/nir/nir_to_ssa.c2
-rw-r--r--mesalib/src/glsl/nir/nir_types.cpp6
-rw-r--r--mesalib/src/glsl/nir/nir_types.h1
-rw-r--r--mesalib/src/glsl/nir/nir_validate.c16
27 files changed, 1264 insertions, 200 deletions
diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp
index 357944da6..f6b8331d4 100644
--- a/mesalib/src/glsl/nir/glsl_to_nir.cpp
+++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp
@@ -88,6 +88,8 @@ private:
exec_list *cf_node_list;
nir_instr *result; /* result of the expression tree last visited */
+ nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
+
/* the head of the dereference chain we're creating */
nir_deref_var *deref_head;
/* the tail of the dereference chain we're creating */
@@ -156,6 +158,14 @@ nir_visitor::~nir_visitor()
_mesa_hash_table_destroy(this->overload_table, NULL);
}
+nir_deref_var *
+nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir)
+{
+ ir->accept(this);
+ ralloc_steal(mem_ctx, this->deref_head);
+ return this->deref_head;
+}
+
static nir_constant *
constant_copy(ir_constant *ir, void *mem_ctx)
{
@@ -582,13 +592,11 @@ void
nir_visitor::visit(ir_return *ir)
{
if (ir->value != NULL) {
- ir->value->accept(this);
nir_intrinsic_instr *copy =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
- copy->variables[0] = nir_deref_var_create(this->shader,
- this->impl->return_var);
- copy->variables[1] = this->deref_head;
+ copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var);
+ copy->variables[1] = evaluate_deref(&copy->instr, ir->value);
}
nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
@@ -613,8 +621,7 @@ nir_visitor::visit(ir_call *ir)
nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
ir_dereference *param =
(ir_dereference *) ir->actual_parameters.get_head();
- param->accept(this);
- instr->variables[0] = this->deref_head;
+ instr->variables[0] = evaluate_deref(&instr->instr, param);
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
@@ -623,8 +630,7 @@ nir_visitor::visit(ir_call *ir)
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store_instr->num_components = 1;
- ir->return_deref->accept(this);
- store_instr->variables[0] = this->deref_head;
+ store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref);
store_instr->src[0].is_ssa = true;
store_instr->src[0].ssa = &instr->dest.ssa;
@@ -642,13 +648,11 @@ nir_visitor::visit(ir_call *ir)
unsigned i = 0;
foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
- param->accept(this);
- instr->params[i] = this->deref_head;
+ instr->params[i] = evaluate_deref(&instr->instr, param);
i++;
}
- ir->return_deref->accept(this);
- instr->return_deref = this->deref_head;
+ instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref);
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
}
@@ -663,12 +667,8 @@ nir_visitor::visit(ir_assignment *ir)
nir_intrinsic_instr *copy =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
- ir->lhs->accept(this);
- copy->variables[0] = this->deref_head;
-
- ir->rhs->accept(this);
- copy->variables[1] = this->deref_head;
-
+ copy->variables[0] = evaluate_deref(&copy->instr, ir->lhs);
+ copy->variables[1] = evaluate_deref(&copy->instr, ir->rhs);
if (ir->condition) {
nir_if *if_stmt = nir_if_create(this->shader);
@@ -700,6 +700,7 @@ nir_visitor::visit(ir_assignment *ir)
load->num_components = ir->lhs->type->vector_elements;
nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
load->variables[0] = lhs_deref;
+ ralloc_steal(load, load->variables[0]);
nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr);
nir_op vec_op;
@@ -741,7 +742,7 @@ nir_visitor::visit(ir_assignment *ir)
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
store->num_components = ir->lhs->type->vector_elements;
- nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
+ nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
store->variables[0] = nir_deref_as_var(store_deref);
store->src[0] = src;
@@ -816,6 +817,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
load_instr->num_components = ir->type->vector_elements;
load_instr->variables[0] = this->deref_head;
+ ralloc_steal(load_instr, load_instr->variables[0]);
add_instr(&load_instr->instr, ir->type->vector_elements);
}
@@ -959,6 +961,7 @@ nir_visitor::visit(ir_expression *ir)
nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
intrin->num_components = deref->type->vector_elements;
intrin->variables[0] = this->deref_head;
+ ralloc_steal(intrin, intrin->variables[0]);
if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
@@ -1087,12 +1090,6 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break;
case ir_unop_sin: emit(nir_op_fsin, dest_size, srcs); break;
case ir_unop_cos: emit(nir_op_fcos, dest_size, srcs); break;
- case ir_unop_sin_reduced:
- emit(nir_op_fsin_reduced, dest_size, srcs);
- break;
- case ir_unop_cos_reduced:
- emit(nir_op_fcos_reduced, dest_size, srcs);
- break;
case ir_unop_dFdx: emit(nir_op_fddx, dest_size, srcs); break;
case ir_unop_dFdy: emit(nir_op_fddy, dest_size, srcs); break;
case ir_unop_dFdx_fine: emit(nir_op_fddx_fine, dest_size, srcs); break;
@@ -1210,6 +1207,9 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_bit_and:
case ir_binop_bit_or:
case ir_binop_bit_xor:
+ case ir_binop_logic_and:
+ case ir_binop_logic_or:
+ case ir_binop_logic_xor:
case ir_binop_lshift:
case ir_binop_rshift:
switch (ir->operation) {
@@ -1270,6 +1270,24 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_bit_xor:
op = nir_op_ixor;
break;
+ case ir_binop_logic_and:
+ if (supports_ints)
+ op = nir_op_iand;
+ else
+ op = nir_op_fand;
+ break;
+ case ir_binop_logic_or:
+ if (supports_ints)
+ op = nir_op_ior;
+ else
+ op = nir_op_for;
+ break;
+ case ir_binop_logic_xor:
+ if (supports_ints)
+ op = nir_op_ixor;
+ else
+ op = nir_op_fxor;
+ break;
case ir_binop_lshift:
op = nir_op_ishl;
break;
@@ -1444,24 +1462,6 @@ nir_visitor::visit(ir_expression *ir)
}
}
break;
- case ir_binop_logic_and:
- if (supports_ints)
- emit(nir_op_iand, dest_size, srcs);
- else
- emit(nir_op_fand, dest_size, srcs);
- break;
- case ir_binop_logic_or:
- if (supports_ints)
- emit(nir_op_ior, dest_size, srcs);
- else
- emit(nir_op_for, dest_size, srcs);
- break;
- case ir_binop_logic_xor:
- if (supports_ints)
- emit(nir_op_ixor, dest_size, srcs);
- else
- emit(nir_op_fxor, dest_size, srcs);
- break;
case ir_binop_dot:
switch (ir->operands[0]->type->vector_elements) {
case 2: emit(nir_op_fdot2, dest_size, srcs); break;
@@ -1633,8 +1633,7 @@ nir_visitor::visit(ir_texture *ir)
unreachable("not reached");
}
- ir->sampler->accept(this);
- instr->sampler = this->deref_head;
+ instr->sampler = evaluate_deref(&instr->instr, ir->sampler);
unsigned src_number = 0;
@@ -1759,7 +1758,7 @@ nir_visitor::visit(ir_dereference_record *ir)
int field_index = this->deref_tail->type->field_index(ir->field);
assert(field_index >= 0);
- nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index);
+ nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index);
deref->deref.type = ir->type;
this->deref_tail->child = &deref->deref;
this->deref_tail = &deref->deref;
@@ -1783,5 +1782,6 @@ nir_visitor::visit(ir_dereference_array *ir)
ir->array->accept(this);
this->deref_tail->child = &deref->deref;
+ ralloc_steal(this->deref_tail, deref);
this->deref_tail = &deref->deref;
}
diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c
index 6459d5108..c6e53612b 100644
--- a/mesalib/src/glsl/nir/nir.c
+++ b/mesalib/src/glsl/nir/nir.c
@@ -58,11 +58,11 @@ reg_create(void *mem_ctx, struct exec_list *list)
nir_register *reg = ralloc(mem_ctx, nir_register);
reg->parent_instr = NULL;
- reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ reg->uses = _mesa_set_create(reg, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ reg->defs = _mesa_set_create(reg, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer,
_mesa_key_pointer_equal);
reg->num_components = 0;
@@ -108,7 +108,7 @@ nir_function_create(nir_shader *shader, const char *name)
exec_list_push_tail(&shader->functions, &func->node);
exec_list_make_empty(&func->overload_list);
- func->name = name;
+ func->name = ralloc_strdup(func, name);
func->shader = shader;
return func;
@@ -285,10 +285,10 @@ nir_block_create(void *mem_ctx)
cf_init(&block->cf_node, nir_cf_node_block);
block->successors[0] = block->successors[1] = NULL;
- block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
block->imm_dom = NULL;
- block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
exec_list_make_empty(&block->instr_list);
@@ -381,11 +381,11 @@ alu_src_init(nir_alu_src *src)
}
nir_alu_instr *
-nir_alu_instr_create(void *mem_ctx, nir_op op)
+nir_alu_instr_create(nir_shader *shader, nir_op op)
{
unsigned num_srcs = nir_op_infos[op].num_inputs;
nir_alu_instr *instr =
- ralloc_size(mem_ctx,
+ ralloc_size(shader,
sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
instr_init(&instr->instr, nir_instr_type_alu);
@@ -398,18 +398,18 @@ nir_alu_instr_create(void *mem_ctx, nir_op op)
}
nir_jump_instr *
-nir_jump_instr_create(void *mem_ctx, nir_jump_type type)
+nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
{
- nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr);
+ nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
instr_init(&instr->instr, nir_instr_type_jump);
instr->type = type;
return instr;
}
nir_load_const_instr *
-nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
+nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
{
- nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr);
+ nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
instr_init(&instr->instr, nir_instr_type_load_const);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -418,11 +418,11 @@ nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
}
nir_intrinsic_instr *
-nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
+nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
{
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
nir_intrinsic_instr *instr =
- ralloc_size(mem_ctx,
+ ralloc_size(shader,
sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
instr_init(&instr->instr, nir_instr_type_intrinsic);
@@ -438,29 +438,29 @@ nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
}
nir_call_instr *
-nir_call_instr_create(void *mem_ctx, nir_function_overload *callee)
+nir_call_instr_create(nir_shader *shader, nir_function_overload *callee)
{
- nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr);
+ nir_call_instr *instr = ralloc(shader, nir_call_instr);
instr_init(&instr->instr, nir_instr_type_call);
instr->callee = callee;
instr->num_params = callee->num_params;
- instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params);
+ instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
instr->return_deref = NULL;
return instr;
}
nir_tex_instr *
-nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
+nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
{
- nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr);
+ nir_tex_instr *instr = ralloc(shader, nir_tex_instr);
instr_init(&instr->instr, nir_instr_type_tex);
dest_init(&instr->dest);
instr->num_srcs = num_srcs;
- instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs);
+ instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
for (unsigned i = 0; i < num_srcs; i++)
src_init(&instr->src[i].src);
@@ -472,9 +472,9 @@ nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
}
nir_phi_instr *
-nir_phi_instr_create(void *mem_ctx)
+nir_phi_instr_create(nir_shader *shader)
{
- nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr);
+ nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
instr_init(&instr->instr, nir_instr_type_phi);
dest_init(&instr->dest);
@@ -483,9 +483,9 @@ nir_phi_instr_create(void *mem_ctx)
}
nir_parallel_copy_instr *
-nir_parallel_copy_instr_create(void *mem_ctx)
+nir_parallel_copy_instr_create(nir_shader *shader)
{
- nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
+ nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
instr_init(&instr->instr, nir_instr_type_parallel_copy);
exec_list_make_empty(&instr->entries);
@@ -494,9 +494,9 @@ nir_parallel_copy_instr_create(void *mem_ctx)
}
nir_ssa_undef_instr *
-nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components)
+nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
{
- nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr);
+ nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -543,7 +543,7 @@ copy_deref_var(void *mem_ctx, nir_deref_var *deref)
nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
ret->deref.type = deref->deref.type;
if (deref->deref.child)
- ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
return ret;
}
@@ -558,7 +558,7 @@ copy_deref_array(void *mem_ctx, nir_deref_array *deref)
}
ret->deref.type = deref->deref.type;
if (deref->deref.child)
- ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
return ret;
}
@@ -568,7 +568,7 @@ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
ret->deref.type = deref->deref.type;
if (deref->deref.child)
- ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
return ret;
}
@@ -1834,13 +1834,11 @@ void
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components, const char *name)
{
- void *mem_ctx = ralloc_parent(instr);
-
def->name = name;
def->parent_instr = instr;
- def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ def->uses = _mesa_set_create(instr, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer,
_mesa_key_pointer_equal);
def->num_components = num_components;
diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h
index 29fe94243..74772c798 100644
--- a/mesalib/src/glsl/nir/nir.h
+++ b/mesalib/src/glsl/nir/nir.h
@@ -34,6 +34,7 @@
#include "util/set.h"
#include "util/bitset.h"
#include "nir_types.h"
+#include "glsl/shader_enums.h"
#include <stdio.h>
#include "nir_opcodes.h"
@@ -529,6 +530,16 @@ nir_src_for_reg(nir_register *reg)
return src;
}
+static inline nir_instr *
+nir_src_get_parent_instr(const nir_src *src)
+{
+ if (src->is_ssa) {
+ return src->ssa->parent_instr;
+ } else {
+ return src->reg.reg->parent_instr;
+ }
+}
+
static inline nir_dest
nir_dest_for_reg(nir_register *reg)
{
@@ -1365,11 +1376,17 @@ typedef struct nir_function {
typedef struct nir_shader_compiler_options {
bool lower_ffma;
+ bool lower_flrp;
bool lower_fpow;
bool lower_fsat;
bool lower_fsqrt;
/** lowers fneg and ineg to fsub and isub. */
bool lower_negate;
+ /** lowers fsub and isub to fadd+fneg and iadd+ineg. */
+ bool lower_sub;
+
+ /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
+ bool lower_scmp;
/**
* Does the driver support real 32-bit integers? (Otherwise, integers
@@ -1414,6 +1431,9 @@ typedef struct nir_shader {
* access plus one
*/
unsigned num_inputs, num_uniforms, num_outputs;
+
+ /** the number of uniforms that are only accessed directly */
+ unsigned num_direct_uniforms;
} nir_shader;
#define nir_foreach_overload(shader, overload) \
@@ -1466,26 +1486,26 @@ void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
-nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op);
+nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
-nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type);
+nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
-nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx,
+nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
unsigned num_components);
-nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx,
+nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
nir_intrinsic_op op);
-nir_call_instr *nir_call_instr_create(void *mem_ctx,
+nir_call_instr *nir_call_instr_create(nir_shader *shader,
nir_function_overload *callee);
-nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs);
+nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
-nir_phi_instr *nir_phi_instr_create(void *mem_ctx);
+nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
-nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx);
+nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
-nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx,
+nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
unsigned num_components);
nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
@@ -1550,7 +1570,7 @@ void nir_print_instr(const nir_instr *instr, FILE *fp);
#ifdef DEBUG
void nir_validate_shader(nir_shader *shader);
#else
-static inline void nir_validate_shader(nir_shader *shader) { }
+static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
#endif /* DEBUG */
void nir_calc_dominance_impl(nir_function_impl *impl);
@@ -1596,14 +1616,18 @@ void nir_lower_alu_to_scalar(nir_shader *shader);
void nir_lower_phis_to_scalar(nir_shader *shader);
void nir_lower_samplers(nir_shader *shader,
- struct gl_shader_program *shader_program,
- struct gl_program *prog);
+ const struct gl_shader_program *shader_program,
+ gl_shader_stage stage);
void nir_lower_system_values(nir_shader *shader);
+void nir_lower_tex_projector(nir_shader *shader);
+void nir_lower_idiv(nir_shader *shader);
void nir_lower_atomics(nir_shader *shader);
void nir_lower_to_source_mods(nir_shader *shader);
+void nir_normalize_cubemap_coords(nir_shader *shader);
+
void nir_live_variables_impl(nir_function_impl *impl);
bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
@@ -1612,6 +1636,7 @@ void nir_convert_to_ssa(nir_shader *shader);
void nir_convert_from_ssa(nir_shader *shader);
bool nir_opt_algebraic(nir_shader *shader);
+bool nir_opt_algebraic_late(nir_shader *shader);
bool nir_opt_constant_folding(nir_shader *shader);
bool nir_opt_global_to_local(nir_shader *shader);
@@ -1631,6 +1656,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);
bool nir_opt_remove_phis(nir_shader *shader);
+void nir_sweep(nir_shader *shader);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py
index afab1a008..bbf4f08ef 100644
--- a/mesalib/src/glsl/nir/nir_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_algebraic.py
@@ -181,12 +181,23 @@ _algebraic_pass_template = mako.template.Template("""
#include "nir.h"
#include "nir_search.h"
+#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+
struct transform {
const nir_search_expression *search;
const nir_search_value *replace;
unsigned condition_offset;
};
+struct opt_state {
+ void *mem_ctx;
+ bool progress;
+ const bool *condition_flags;
+};
+
+#endif
+
% for (opcode, xform_list) in xform_dict.iteritems():
% for xform in xform_list:
${xform.search.render()}
@@ -200,12 +211,6 @@ static const struct transform ${pass_name}_${opcode}_xforms[] = {
};
% endfor
-struct opt_state {
- void *mem_ctx;
- bool progress;
- const bool *condition_flags;
-};
-
static bool
${pass_name}_block(nir_block *block, void *void_state)
{
diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h
index 7c4f7fd96..d1419ee21 100644
--- a/mesalib/src/glsl/nir/nir_builder.h
+++ b/mesalib/src/glsl/nir/nir_builder.h
@@ -28,6 +28,9 @@ struct exec_list;
typedef struct nir_builder {
struct exec_list *cf_node_list;
+ nir_instr *before_instr;
+ nir_instr *after_instr;
+
nir_shader *shader;
nir_function_impl *impl;
} nir_builder;
@@ -45,8 +48,75 @@ nir_builder_insert_after_cf_list(nir_builder *build,
struct exec_list *cf_node_list)
{
build->cf_node_list = cf_node_list;
+ build->before_instr = NULL;
+ build->after_instr = NULL;
+}
+
+static inline void
+nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr)
+{
+ build->cf_node_list = NULL;
+ build->before_instr = before_instr;
+ build->after_instr = NULL;
}
+static inline void
+nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr)
+{
+ build->cf_node_list = NULL;
+ build->before_instr = NULL;
+ build->after_instr = after_instr;
+}
+
+static inline void
+nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
+{
+ if (build->cf_node_list) {
+ nir_instr_insert_after_cf_list(build->cf_node_list, instr);
+ } else if (build->before_instr) {
+ nir_instr_insert_before(build->before_instr, instr);
+ } else {
+ assert(build->after_instr);
+ nir_instr_insert_after(build->after_instr, instr);
+ build->after_instr = instr;
+ }
+}
+
+static inline nir_ssa_def *
+nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
+{
+ nir_load_const_instr *load_const =
+ nir_load_const_instr_create(build->shader, num_components);
+ if (!load_const)
+ return NULL;
+
+ load_const->value = value;
+
+ nir_builder_instr_insert(build, &load_const->instr);
+
+ return &load_const->def;
+}
+
+static inline nir_ssa_def *
+nir_imm_float(nir_builder *build, float x)
+{
+ nir_const_value v = { { .f = {x, 0, 0, 0} } };
+ return nir_build_imm(build, 1, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
+{
+ nir_const_value v = { { .f = {x, y, z, w} } };
+ return nir_build_imm(build, 4, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_int(nir_builder *build, int x)
+{
+ nir_const_value v = { { .i = {x, 0, 0, 0} } };
+ return nir_build_imm(build, 1, v);
+}
static inline nir_ssa_def *
nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
@@ -90,7 +160,7 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
instr->dest.write_mask = (1 << num_components) - 1;
- nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr);
+ nir_builder_instr_insert(build, &instr->instr);
return &instr->dest.dest.ssa;
}
@@ -127,4 +197,67 @@ nir_##op(nir_builder *build, nir_ssa_def *src0, \
#include "nir_builder_opcodes.h"
+/**
+ * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
+ */
+static inline nir_ssa_def *
+nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+}
+
+static inline nir_ssa_def *
+nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+}
+
+/**
+ * Construct an fmov or imov that reswizzles the source's components.
+ */
+static inline nir_ssa_def *
+nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
+ unsigned num_components, bool use_fmov)
+{
+ nir_alu_src alu_src;
+ memset(&alu_src, 0, sizeof(alu_src));
+ alu_src.src = nir_src_for_ssa(src);
+ for (int i = 0; i < 4; i++)
+ alu_src.swizzle[i] = swiz[i];
+
+ return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
+ nir_imov_alu(build, alu_src, num_components);
+}
+
+/**
+ * Turns a nir_src into a nir_ssa_def * so it can be passed to
+ * nir_build_alu()-based builder calls.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
+{
+ if (src.is_ssa && src.ssa->num_components == num_components)
+ return src.ssa;
+
+ nir_alu_src alu;
+ memset(&alu, 0, sizeof(alu));
+ alu.src = src;
+ for (int j = 0; j < 4; j++)
+ alu.swizzle[j] = j;
+
+ return nir_imov_alu(build, alu, num_components);
+}
+
#endif /* NIR_BUILDER_H */
diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c
index c3090fb06..184698abd 100644
--- a/mesalib/src/glsl/nir/nir_from_ssa.c
+++ b/mesalib/src/glsl/nir/nir_from_ssa.c
@@ -509,12 +509,13 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state)
reg->num_components = def->num_components;
reg->num_array_elems = 0;
- /* This register comes from an SSA definition that was not part of a
- * phi-web. Therefore, we know it has a single unique definition
- * that dominates all of its uses. Therefore, we can copy the
+ /* This register comes from an SSA definition that is defined and not
+ * part of a phi-web. Therefore, we know it has a single unique
+ * definition that dominates all of its uses; we can copy the
* parent_instr from the SSA def safely.
*/
- reg->parent_instr = def->parent_instr;
+ if (def->parent_instr->type != nir_instr_type_ssa_undef)
+ reg->parent_instr = def->parent_instr;
_mesa_hash_table_insert(state->ssa_table, def, reg);
return reg;
diff --git a/mesalib/src/glsl/nir/nir_lower_idiv.c b/mesalib/src/glsl/nir/nir_lower_idiv.c
new file mode 100644
index 000000000..7b6803207
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_idiv.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/* Lowers idiv/udiv/umod
+ * Based on NV50LegalizeSSA::handleDIV()
+ *
+ * Note that this is probably not enough precision for compute shaders.
+ * Perhaps we want a second higher precision (looping) version of this?
+ * Or perhaps we assume if you can do compute shaders you can also
+ * branch out to a pre-optimized shader library routine..
+ */
+
+static void
+convert_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
+ nir_op op = alu->op;
+ bool is_signed;
+
+ if ((op != nir_op_idiv) &&
+ (op != nir_op_udiv) &&
+ (op != nir_op_umod))
+ return;
+
+ is_signed = (op == nir_op_idiv);
+
+ nir_builder_insert_before_instr(bld, &alu->instr);
+
+ numer = nir_ssa_for_src(bld, alu->src[0].src,
+ nir_ssa_alu_instr_src_components(alu, 0));
+ denom = nir_ssa_for_src(bld, alu->src[1].src,
+ nir_ssa_alu_instr_src_components(alu, 1));
+
+ if (is_signed) {
+ af = nir_i2f(bld, numer);
+ bf = nir_i2f(bld, denom);
+ af = nir_fabs(bld, af);
+ bf = nir_fabs(bld, bf);
+ a = nir_iabs(bld, numer);
+ b = nir_iabs(bld, denom);
+ } else {
+ af = nir_u2f(bld, numer);
+ bf = nir_u2f(bld, denom);
+ a = numer;
+ b = denom;
+ }
+
+ /* get first result: */
+ bf = nir_frcp(bld, bf);
+ bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */
+ q = nir_fmul(bld, af, bf);
+
+ if (is_signed) {
+ q = nir_f2i(bld, q);
+ } else {
+ q = nir_f2u(bld, q);
+ }
+
+ /* get error of first result: */
+ r = nir_imul(bld, q, b);
+ r = nir_isub(bld, a, r);
+ r = nir_u2f(bld, r);
+ r = nir_fmul(bld, r, bf);
+ r = nir_f2u(bld, r);
+
+ /* add quotients: */
+ q = nir_iadd(bld, q, r);
+
+ /* correction: if modulus >= divisor, add 1 */
+ r = nir_imul(bld, q, b);
+ r = nir_isub(bld, a, r);
+
+ r = nir_ige(bld, r, b);
+ r = nir_b2i(bld, r);
+
+ q = nir_iadd(bld, q, r);
+ if (is_signed) {
+ /* fix the sign: */
+ r = nir_ixor(bld, numer, denom);
+ r = nir_ushr(bld, r, nir_imm_int(bld, 31));
+ r = nir_i2b(bld, r);
+ b = nir_ineg(bld, q);
+ q = nir_bcsel(bld, r, b, q);
+ }
+
+ if (op == nir_op_umod) {
+ /* division result in q */
+ r = nir_imul(bld, q, b);
+ q = nir_isub(bld, a, r);
+ }
+
+ assert(alu->dest.dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+ nir_src_for_ssa(q),
+ ralloc_parent(alu));
+}
+
+static bool
+convert_block(nir_block *block, void *state)
+{
+ nir_builder *b = state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_alu)
+ convert_instr(b, nir_instr_as_alu(instr));
+ }
+
+ return true;
+}
+
+static void
+convert_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, convert_block, &b);
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_idiv(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ convert_impl(overload->impl);
+ }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
index 7cd93ea0a..4bdb80072 100644
--- a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -223,7 +223,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
else
nir_instr_insert_after_block(src->pred, &mov->instr);
- nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src);
+ nir_phi_src *new_src = ralloc(new_phi, nir_phi_src);
new_src->pred = src->pred;
new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa);
diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
index 3015dbd09..cf8ab8325 100644
--- a/mesalib/src/glsl/nir/nir_lower_samplers.cpp
+++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
@@ -36,33 +36,26 @@ extern "C" {
}
static unsigned
-get_sampler_index(struct gl_shader_program *shader_program, const char *name,
- const struct gl_program *prog)
+get_sampler_index(const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, const char *name)
{
- GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
-
unsigned location;
if (!shader_program->UniformHash->get(location, name)) {
- linker_error(shader_program,
- "failed to find sampler named %s.\n", name);
+ assert(!"failed to find sampler");
return 0;
}
- if (!shader_program->UniformStorage[location].sampler[shader].active) {
- assert(0 && "cannot return a sampler");
- linker_error(shader_program,
- "cannot return a sampler named %s, because it is not "
- "used in this shader stage. This is a driver bug.\n",
- name);
+ if (!shader_program->UniformStorage[location].sampler[stage].active) {
+ assert(!"cannot return a sampler");
return 0;
}
- return shader_program->UniformStorage[location].sampler[shader].index;
+ return shader_program->UniformStorage[location].sampler[stage].index;
}
static void
-lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
- const struct gl_program *prog, void *mem_ctx)
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, void *mem_ctx)
{
if (instr->sampler == NULL)
return;
@@ -90,7 +83,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset);
break;
case nir_deref_array_type_indirect: {
- instr->src = reralloc(mem_ctx, instr->src, nir_tex_src,
+ instr->src = reralloc(instr, instr->src, nir_tex_src,
instr->num_srcs + 1);
memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src);
instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
@@ -133,15 +126,15 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
}
}
- instr->sampler_index += get_sampler_index(shader_program, name, prog);
+ instr->sampler_index += get_sampler_index(shader_program, stage, name);
instr->sampler = NULL;
}
typedef struct {
void *mem_ctx;
- struct gl_shader_program *shader_program;
- struct gl_program *prog;
+ const struct gl_shader_program *shader_program;
+ gl_shader_stage stage;
} lower_state;
static bool
@@ -152,7 +145,7 @@ lower_block_cb(nir_block *block, void *_state)
nir_foreach_instr(block, instr) {
if (instr->type == nir_instr_type_tex) {
nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
- lower_sampler(tex_instr, state->shader_program, state->prog,
+ lower_sampler(tex_instr, state->shader_program, state->stage,
state->mem_ctx);
}
}
@@ -161,24 +154,24 @@ lower_block_cb(nir_block *block, void *_state)
}
static void
-lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program,
- struct gl_program *prog)
+lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
{
lower_state state;
state.mem_ctx = ralloc_parent(impl);
state.shader_program = shader_program;
- state.prog = prog;
+ state.stage = stage;
nir_foreach_block(impl, lower_block_cb, &state);
}
extern "C" void
-nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program,
- struct gl_program *prog)
+nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
{
nir_foreach_overload(shader, overload) {
if (overload->impl)
- lower_impl(overload->impl, shader_program, prog);
+ lower_impl(overload->impl, shader_program, stage);
}
}
diff --git a/mesalib/src/glsl/nir/nir_lower_tex_projector.c b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
new file mode 100644
index 000000000..6b0e9c340
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass converts the coordinate division for texture projection
+ * to be done in ALU instructions instead of asking the texture operation to
+ * do so.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+ return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+nir_lower_tex_projector_block(nir_block *block, void *void_state)
+{
+ nir_builder *b = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ nir_builder_insert_before_instr(b, &tex->instr);
+
+ /* Find the projector in the srcs list, if present. */
+ int proj_index;
+ for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+ if (tex->src[proj_index].src_type == nir_tex_src_projector)
+ break;
+ }
+ if (proj_index == tex->num_srcs)
+ continue;
+ nir_ssa_def *inv_proj =
+ nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+ /* Walk through the sources projecting the arguments. */
+ for (int i = 0; i < tex->num_srcs; i++) {
+ switch (tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ case nir_tex_src_comparitor:
+ break;
+ default:
+ continue;
+ }
+ nir_ssa_def *unprojected =
+ nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+ nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+ /* Array indices don't get projected, so make an new vector with the
+ * coordinate's array index untouched.
+ */
+ if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+ switch (tex->coord_components) {
+ case 4:
+ projected = nir_vec4(b,
+ channel(b, projected, 0),
+ channel(b, projected, 1),
+ channel(b, projected, 2),
+ channel(b, unprojected, 3));
+ break;
+ case 3:
+ projected = nir_vec3(b,
+ channel(b, projected, 0),
+ channel(b, projected, 1),
+ channel(b, unprojected, 2));
+ break;
+ case 2:
+ projected = nir_vec2(b,
+ channel(b, projected, 0),
+ channel(b, unprojected, 1));
+ break;
+ default:
+ unreachable("bad texture coord count for array");
+ break;
+ }
+ }
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(projected));
+ }
+
+ /* Now move the later tex sources down the array so that the projector
+ * disappears.
+ */
+ nir_src dead;
+ memset(&dead, 0, sizeof dead);
+ nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead);
+ memmove(&tex->src[proj_index],
+ &tex->src[proj_index + 1],
+ (tex->num_srcs - proj_index) * sizeof(*tex->src));
+ tex->num_srcs--;
+ }
+
+ return true;
+}
+
+static void
+nir_lower_tex_projector_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, nir_lower_tex_projector_block, &b);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_tex_projector(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ nir_lower_tex_projector_impl(overload->impl);
+ }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c
index 85ebb281c..58389a7c7 100644
--- a/mesalib/src/glsl/nir/nir_lower_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c
@@ -148,13 +148,10 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
unsigned num_components = glsl_get_vector_elements(src_tail->type);
- nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref);
- nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref);
-
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
load->num_components = num_components;
- load->variables[0] = nir_deref_as_var(src_deref);
+ load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
nir_instr_insert_before(&copy_instr->instr, &load->instr);
@@ -162,7 +159,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
store->num_components = num_components;
- store->variables[0] = nir_deref_as_var(dest_deref);
+ store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
+
store->src[0].is_ssa = true;
store->src[0].ssa = &load->dest.ssa;
diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
index 86e6ab416..2ca74d71b 100644
--- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -642,7 +642,7 @@ add_phi_sources(nir_block *block, nir_block *pred,
struct deref_node *node = entry->data;
- nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src);
+ nir_phi_src *src = ralloc(phi, nir_phi_src);
src->pred = pred;
src->src.is_ssa = true;
src->src.ssa = get_ssa_def_for_block(node, pred, state);
diff --git a/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
new file mode 100644
index 000000000..0da8447ac
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand <jason@jlekstrand.net>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * This file implements a NIR lowering pass to perform the normalization of
+ * the cubemap coordinates to have the largest magnitude component be -1.0
+ * or 1.0. This is based on the old GLSL IR based pass by Eric.
+ */
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+ return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+normalize_cubemap_coords_block(nir_block *block, void *void_state)
+{
+ nir_builder *b = void_state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+ continue;
+
+ nir_builder_insert_before_instr(b, &tex->instr);
+
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (tex->src[i].src_type != nir_tex_src_coord)
+ continue;
+
+ nir_ssa_def *orig_coord =
+ nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+ assert(orig_coord->num_components >= 3);
+
+ nir_ssa_def *abs = nir_fabs(b, orig_coord);
+ nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0),
+ nir_fmax(b, channel(b, abs, 1),
+ channel(b, abs, 2)));
+
+ nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm));
+
+ /* Array indices don't have to be normalized, so make a new vector
+ * with the coordinate's array index untouched.
+ */
+ if (tex->coord_components == 4) {
+ normalized = nir_vec4(b,
+ channel(b, normalized, 0),
+ channel(b, normalized, 1),
+ channel(b, normalized, 2),
+ channel(b, orig_coord, 3));
+ }
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(normalized));
+ }
+ }
+
+ return true;
+}
+
+static void
+normalize_cubemap_coords_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, normalize_cubemap_coords_block, &b);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_normalize_cubemap_coords(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload)
+ if (overload->impl)
+ normalize_cubemap_coords_impl(overload->impl);
+}
diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py
index 062cd628b..264806f5d 100644
--- a/mesalib/src/glsl/nir/nir_opcodes.py
+++ b/mesalib/src/glsl/nir/nir_opcodes.py
@@ -161,12 +161,12 @@ unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion
-unop_convert("f2b", tfloat, tbool, "src0 == 0.0f")
+unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
# Boolean-to-float conversion
unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
-unop_convert("i2b", tint, tbool, "src0 == 0")
-unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion
+unop_convert("i2b", tint, tbool, "src0 != 0")
+unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
@@ -191,8 +191,6 @@ unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
unop("fsin", tfloat, "sinf(src0)")
unop("fcos", tfloat, "cosf(src0)")
-unop("fsin_reduced", tfloat, "sinf(src0)")
-unop("fcos_reduced", tfloat, "cosf(src0)")
# Partial derivatives.
diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py
index ef855aa77..cdb19241c 100644
--- a/mesalib/src/glsl/nir/nir_opt_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py
@@ -75,6 +75,9 @@ optimizations = [
(('flrp', a, b, 1.0), b),
(('flrp', a, a, b), a),
(('flrp', 0.0, a, b), ('fmul', a, b)),
+ (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+ (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
# Comparison simplifications
@@ -82,10 +85,6 @@ optimizations = [
(('inot', ('fge', a, b)), ('flt', a, b)),
(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
- (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
- (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
- (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
@@ -95,6 +94,18 @@ optimizations = [
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
(('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+ (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+ (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+ (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
+ (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
+ (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
+ (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+ # Emulating booleans
+ (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
+ (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
+ (('iand', 'a@bool', 1.0), ('b2f', a)),
+ (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+ (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
# Comparison with the same args. Note that these are not done for
# the float versions because NaN always returns false on float
# inequalities.
@@ -122,7 +133,7 @@ optimizations = [
(('ishr', 0, a), 0),
(('ishr', a, 0), a),
(('ushr', 0, a), 0),
- (('ushr', a, 0), 0),
+ (('ushr', a, 0), a),
# Exponential/logarithmic identities
(('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
(('fexp', ('flog', a)), a), # e^ln(a) = a
@@ -134,6 +145,26 @@ optimizations = [
(('fpow', a, 1.0), a),
(('fpow', a, 2.0), ('fmul', a, a)),
(('fpow', 2.0, a), ('fexp2', a)),
+ (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+ (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))),
+ (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+ (('frcp', ('fexp', a)), ('fexp', ('fneg', a))),
+ (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+ (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))),
+ (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+ (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))),
+ (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+ (('flog', ('frcp', a)), ('fneg', ('flog', a))),
+ (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+ (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))),
+ (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+ (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))),
+ (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+ (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))),
+ (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+ (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))),
+ (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+ (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))),
# Division and reciprocal
(('fdiv', 1.0, a), ('frcp', a)),
(('frcp', ('frcp', a)), a),
@@ -154,18 +185,21 @@ optimizations = [
(('bcsel', a, b, b), b),
(('fcsel', a, b, b), b),
+ # Conversions
+ (('f2i', ('ftrunc', a)), ('f2i', a)),
+ (('f2u', ('ftrunc', a)), ('f2u', a)),
+
# Subtracts
(('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+ (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
+ (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
(('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
(('iadd', a, ('isub', 0, b)), ('isub', a, b)),
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
(('iabs', ('isub', 0, a)), ('iabs', a)),
-
-# This one may not be exact
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
]
# Add optimizations to handle the case where the result of a ternary is
@@ -189,4 +223,17 @@ for op in ['flt', 'fge', 'feq', 'fne',
('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
]
+# This section contains "late" optimizations that should be run after the
+# regular optimizations have finished. Optimizations should go here if
+# they help code generation but do not necessarily produce code that is
+# more easily optimizable.
+late_optimizations = [
+ (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+ (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+ (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+ (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+]
+
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+ late_optimizations).render()
diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c
index 9b383202d..553906e12 100644
--- a/mesalib/src/glsl/nir/nir_opt_cse.c
+++ b/mesalib/src/glsl/nir/nir_opt_cse.c
@@ -37,20 +37,19 @@ struct cse_state {
};
static bool
-nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask)
+nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1,
+ unsigned src2)
{
- if (src1.abs != src2.abs || src1.negate != src2.negate)
+ if (alu1->src[src1].abs != alu2->src[src2].abs ||
+ alu1->src[src1].negate != alu2->src[src2].negate)
return false;
- for (int i = 0; i < 4; ++i) {
- if (!(read_mask & (1 << i)))
- continue;
-
- if (src1.swizzle[i] != src2.swizzle[i])
+ for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
+ if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
return false;
}
- return nir_srcs_equal(src1.src, src2.src);
+ return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
}
static bool
@@ -73,10 +72,17 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
return false;
- for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
- if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i],
- (1 << alu1->dest.dest.ssa.num_components) - 1))
- return false;
+ if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[alu1->op].num_inputs == 2);
+ return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
+ (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 0));
+ } else {
+ for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+ if (!nir_alu_srcs_equal(alu1, alu2, i, i))
+ return false;
+ }
}
return true;
}
@@ -154,12 +160,14 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
static bool
src_is_ssa(nir_src *src, void *data)
{
+ (void) data;
return src->is_ssa;
}
static bool
dest_is_ssa(nir_dest *dest, void *data)
{
+ (void) data;
return dest->is_ssa;
}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
new file mode 100644
index 000000000..9d5646fe6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a small peephole optimization that looks for a multiply that
+ * is only ever used in an add and replaces both with an fma.
+ */
+
+struct peephole_ffma_state {
+ void *mem_ctx;
+ nir_function_impl *impl;
+ bool progress;
+};
+
+static inline bool
+are_all_uses_fadd(nir_ssa_def *def)
+{
+ if (def->if_uses->entries > 0)
+ return false;
+
+ struct set_entry *use_iter;
+ set_foreach(def->uses, use_iter) {
+ nir_instr *use_instr = (nir_instr *)use_iter->key;
+
+ if (use_instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
+ switch (use_alu->op) {
+ case nir_op_fadd:
+ break; /* This one's ok */
+
+ case nir_op_imov:
+ case nir_op_fmov:
+ case nir_op_fneg:
+ case nir_op_fabs:
+ assert(use_alu->dest.dest.is_ssa);
+ if (!are_all_uses_fadd(&use_alu->dest.dest.ssa))
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static nir_alu_instr *
+get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+{
+ assert(src->src.is_ssa && !src->abs && !src->negate);
+
+ nir_instr *instr = src->src.ssa->parent_instr;
+ if (instr->type != nir_instr_type_alu)
+ return NULL;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_imov:
+ case nir_op_fmov:
+ alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ break;
+
+ case nir_op_fneg:
+ alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ *negate = !*negate;
+ break;
+
+ case nir_op_fabs:
+ alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ *negate = false;
+ *abs = true;
+ break;
+
+ case nir_op_fmul:
+ /* Only absorb a fmul into a ffma if the fmul is is only used in fadd
+ * operations. This prevents us from being too aggressive with our
+ * fusing which can actually lead to more instructions.
+ */
+ if (!are_all_uses_fadd(&alu->dest.dest.ssa))
+ return NULL;
+ break;
+
+ default:
+ return NULL;
+ }
+
+ if (!alu)
+ return NULL;
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (!(alu->dest.write_mask & (1 << i)))
+ break;
+
+ swizzle[i] = swizzle[src->swizzle[i]];
+ }
+
+ return alu;
+}
+
+static bool
+nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
+{
+ struct peephole_ffma_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *add = nir_instr_as_alu(instr);
+ if (add->op != nir_op_fadd)
+ continue;
+
+ /* TODO: Maybe bail if this expression is considered "precise"? */
+
+ assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
+
+ /* This, is the case a + a. We would rather handle this with an
+ * algebraic reduction than fuse it. Also, we want to only fuse
+ * things where the multiply is used only once and, in this case,
+ * it would be used twice by the same instruction.
+ */
+ if (add->src[0].src.ssa == add->src[1].src.ssa)
+ continue;
+
+ nir_alu_instr *mul;
+ uint8_t add_mul_src, swizzle[4];
+ bool negate, abs;
+ for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
+ for (unsigned i = 0; i < 4; i++)
+ swizzle[i] = i;
+
+ negate = false;
+ abs = false;
+
+ mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+
+ if (mul != NULL)
+ break;
+ }
+
+ if (mul == NULL)
+ continue;
+
+ nir_ssa_def *mul_src[2];
+ mul_src[0] = mul->src[0].src.ssa;
+ mul_src[1] = mul->src[1].src.ssa;
+
+ if (abs) {
+ for (unsigned i = 0; i < 2; i++) {
+ nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fabs);
+ abs->src[0].src = nir_src_for_ssa(mul_src[i]);
+ nir_ssa_dest_init(&abs->instr, &abs->dest.dest,
+ mul_src[i]->num_components, NULL);
+ abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1;
+ nir_instr_insert_before(&add->instr, &abs->instr);
+ mul_src[i] = &abs->dest.dest.ssa;
+ }
+ }
+
+ if (negate) {
+ nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fneg);
+ neg->src[0].src = nir_src_for_ssa(mul_src[0]);
+ nir_ssa_dest_init(&neg->instr, &neg->dest.dest,
+ mul_src[0]->num_components, NULL);
+ neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1;
+ nir_instr_insert_before(&add->instr, &neg->instr);
+ mul_src[0] = &neg->dest.dest.ssa;
+ }
+
+ nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma);
+ ffma->dest.saturate = add->dest.saturate;
+ ffma->dest.write_mask = add->dest.write_mask;
+
+ for (unsigned i = 0; i < 2; i++) {
+ ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
+ for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
+ ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
+ }
+ nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
+ state->mem_ctx);
+
+ assert(add->dest.dest.is_ssa);
+
+ nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
+ add->dest.dest.ssa.num_components,
+ add->dest.dest.ssa.name);
+ nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
+ nir_src_for_ssa(&ffma->dest.dest.ssa),
+ state->mem_ctx);
+
+ nir_instr_insert_before(&add->instr, &ffma->instr);
+ assert(add->dest.dest.ssa.uses->entries == 0);
+ nir_instr_remove(&add->instr);
+
+ state->progress = true;
+ }
+
+ return true;
+}
+
+static bool
+nir_opt_peephole_ffma_impl(nir_function_impl *impl)
+{
+ struct peephole_ffma_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.impl = impl;
+ state.progress = false;
+
+ nir_foreach_block(impl, nir_opt_peephole_ffma_block, &state);
+
+ if (state.progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return state.progress;
+}
+
+bool
+nir_opt_peephole_ffma(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ progress |= nir_opt_peephole_ffma_impl(overload->impl);
+ }
+
+ return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
index b89451b09..f400cfd66 100644
--- a/mesalib/src/glsl/nir/nir_opt_peephole_select.c
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
@@ -84,7 +84,9 @@ block_check_for_allowed_instrs(nir_block *block)
case nir_instr_type_alu: {
/* It must be a move operation */
nir_alu_instr *mov = nir_instr_as_alu(instr);
- if (mov->op != nir_op_fmov && mov->op != nir_op_imov)
+ if (mov->op != nir_op_fmov && mov->op != nir_op_imov &&
+ mov->op != nir_op_fneg && mov->op != nir_op_ineg &&
+ mov->op != nir_op_fabs && mov->op != nir_op_iabs)
return false;
/* Can't handle saturate */
diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c
index fa11a312e..fb8c9344c 100644
--- a/mesalib/src/glsl/nir/nir_print.c
+++ b/mesalib/src/glsl/nir/nir_print.c
@@ -137,25 +137,37 @@ print_dest(nir_dest *dest, FILE *fp)
}
static void
-print_alu_src(nir_alu_src *src, FILE *fp)
+print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
{
- if (src->negate)
+ if (instr->src[src].negate)
fprintf(fp, "-");
- if (src->abs)
+ if (instr->src[src].abs)
fprintf(fp, "abs(");
- print_src(&src->src, fp);
+ print_src(&instr->src[src].src, fp);
- if (src->swizzle[0] != 0 ||
- src->swizzle[1] != 1 ||
- src->swizzle[2] != 2 ||
- src->swizzle[3] != 3) {
+ bool print_swizzle = false;
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ if (instr->src[src].swizzle[i] != i) {
+ print_swizzle = true;
+ break;
+ }
+ }
+
+ if (print_swizzle) {
fprintf(fp, ".");
- for (unsigned i = 0; i < 4; i++)
- fprintf(fp, "%c", "xyzw"[src->swizzle[i]]);
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]);
+ }
}
- if (src->abs)
+ if (instr->src[src].abs)
fprintf(fp, ")");
}
@@ -189,7 +201,7 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp)
if (i != 0)
fprintf(fp, ", ");
- print_alu_src(&instr->src[i], fp);
+ print_alu_src(instr, i, fp);
}
}
diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
index e7f8aeacb..4417e2a48 100644
--- a/mesalib/src/glsl/nir/nir_remove_dead_variables.c
+++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
@@ -98,22 +98,14 @@ add_var_use_shader(nir_shader *shader, struct set *live)
}
static void
-remove_dead_local_vars(nir_function_impl *impl, struct set *live)
+remove_dead_vars(struct exec_list *var_list, struct set *live)
{
- foreach_list_typed_safe(nir_variable, var, node, &impl->locals) {
+ foreach_list_typed_safe(nir_variable, var, node, var_list) {
struct set_entry *entry = _mesa_set_search(live, var);
- if (entry == NULL)
- exec_node_remove(&var->node);
- }
-}
-
-static void
-remove_dead_global_vars(nir_shader *shader, struct set *live)
-{
- foreach_list_typed_safe(nir_variable, var, node, &shader->globals) {
- struct set_entry *entry = _mesa_set_search(live, var);
- if (entry == NULL)
+ if (entry == NULL) {
exec_node_remove(&var->node);
+ ralloc_free(var);
+ }
}
}
@@ -125,11 +117,11 @@ nir_remove_dead_variables(nir_shader *shader)
add_var_use_shader(shader, live);
- remove_dead_global_vars(shader, live);
+ remove_dead_vars(&shader->globals, live);
nir_foreach_overload(shader, overload) {
if (overload->impl)
- remove_dead_local_vars(overload->impl, live);
+ remove_dead_vars(&overload->impl->locals, live);
}
_mesa_set_destroy(live, NULL);
diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c
index 73a802be7..5ba016085 100644
--- a/mesalib/src/glsl/nir/nir_search.c
+++ b/mesalib/src/glsl/nir/nir_search.c
@@ -218,8 +218,8 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
if (matched)
return true;
- if (nir_op_infos[instr->op].num_inputs == 2 &&
- (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) {
+ if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[instr->op].num_inputs == 2);
if (!match_value(expr->srcs[0], instr, 1, num_components,
swizzle, state))
return false;
diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c
index 4d663b51b..fc72c078c 100644
--- a/mesalib/src/glsl/nir/nir_split_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_split_var_copies.c
@@ -188,8 +188,8 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy,
* belongs to the copy instruction and b) the deref chains may
* have some of the same links due to the way we constructed them
*/
- nir_deref *src = nir_copy_deref(state->mem_ctx, src_head);
- nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head);
+ nir_deref *src = nir_copy_deref(new_copy, src_head);
+ nir_deref *dest = nir_copy_deref(new_copy, dest_head);
new_copy->variables[0] = nir_deref_as_var(dest);
new_copy->variables[1] = nir_deref_as_var(src);
diff --git a/mesalib/src/glsl/nir/nir_sweep.c b/mesalib/src/glsl/nir/nir_sweep.c
new file mode 100644
index 000000000..d3549756a
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_sweep.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/**
+ * \file nir_sweep.c
+ *
+ * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
+ * memory - anything still connected to the program will be kept, and any dead memory
+ * we dropped on the floor will be freed.
+ *
+ * The expectation is that drivers should call this when finished compiling the shader
+ * (after any optimization, lowering, and so on). However, it's also fine to call it
+ * earlier, and even many times, trading CPU cycles for memory savings.
+ */
+
+#define steal_list(mem_ctx, type, list) \
+ foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
+
+static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
+
+static bool
+sweep_src_indirect(nir_src *src, void *nir)
+{
+ if (!src->is_ssa && src->reg.indirect)
+ ralloc_steal(nir, src->reg.indirect);
+
+ return true;
+}
+
+static bool
+sweep_dest_indirect(nir_dest *dest, void *nir)
+{
+ if (!dest->is_ssa && dest->reg.indirect)
+ ralloc_steal(nir, dest->reg.indirect);
+
+ return true;
+}
+
+static void
+sweep_block(nir_shader *nir, nir_block *block)
+{
+ ralloc_steal(nir, block);
+
+ nir_foreach_instr(block, instr) {
+ ralloc_steal(nir, instr);
+
+ nir_foreach_src(instr, sweep_src_indirect, nir);
+ nir_foreach_dest(instr, sweep_dest_indirect, nir);
+ }
+}
+
+static void
+sweep_if(nir_shader *nir, nir_if *iff)
+{
+ ralloc_steal(nir, iff);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+}
+
+static void
+sweep_loop(nir_shader *nir, nir_loop *loop)
+{
+ ralloc_steal(nir, loop);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+}
+
+static void
+sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
+{
+ switch (cf_node->type) {
+ case nir_cf_node_block:
+ sweep_block(nir, nir_cf_node_as_block(cf_node));
+ break;
+ case nir_cf_node_if:
+ sweep_if(nir, nir_cf_node_as_if(cf_node));
+ break;
+ case nir_cf_node_loop:
+ sweep_loop(nir, nir_cf_node_as_loop(cf_node));
+ break;
+ default:
+ unreachable("Invalid CF node type");
+ }
+}
+
+static void
+sweep_impl(nir_shader *nir, nir_function_impl *impl)
+{
+ ralloc_steal(nir, impl);
+
+ ralloc_steal(nir, impl->params);
+ ralloc_steal(nir, impl->return_var);
+ steal_list(nir, nir_variable, &impl->locals);
+ steal_list(nir, nir_register, &impl->registers);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ sweep_block(nir, impl->end_block);
+
+ /* Wipe out all the metadata, if any. */
+ nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+static void
+sweep_function(nir_shader *nir, nir_function *f)
+{
+ ralloc_steal(nir, f);
+
+ foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {
+ ralloc_steal(nir, overload);
+ ralloc_steal(nir, overload->params);
+ if (overload->impl)
+ sweep_impl(nir, overload->impl);
+ }
+}
+
+void
+nir_sweep(nir_shader *nir)
+{
+ void *rubbish = ralloc_context(NULL);
+
+ /* First, move ownership of all the memory to a temporary context; assume dead. */
+ ralloc_adopt(rubbish, nir);
+
+ /* Variables and registers are not dead. Steal them back. */
+ steal_list(nir, nir_variable, &nir->uniforms);
+ steal_list(nir, nir_variable, &nir->inputs);
+ steal_list(nir, nir_variable, &nir->outputs);
+ steal_list(nir, nir_variable, &nir->globals);
+ steal_list(nir, nir_variable, &nir->system_values);
+ steal_list(nir, nir_register, &nir->registers);
+
+ /* Recurse into functions, stealing their contents back. */
+ foreach_list_typed(nir_function, func, node, &nir->functions) {
+ sweep_function(nir, func);
+ }
+
+ /* Free everything we didn't steal back. */
+ ralloc_free(rubbish);
+}
diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c
index 47cf45393..53ff54766 100644
--- a/mesalib/src/glsl/nir/nir_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_to_ssa.c
@@ -47,7 +47,7 @@ insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx)
set_foreach(block->predecessors, entry) {
nir_block *pred = (nir_block *) entry->key;
- nir_phi_src *src = ralloc(mem_ctx, nir_phi_src);
+ nir_phi_src *src = ralloc(instr, nir_phi_src);
src->pred = pred;
src->src.is_ssa = false;
src->src.reg.base_offset = 0;
diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp
index a13c3e12a..f0d0b46d2 100644
--- a/mesalib/src/glsl/nir/nir_types.cpp
+++ b/mesalib/src/glsl/nir/nir_types.cpp
@@ -143,6 +143,12 @@ glsl_void_type(void)
}
const glsl_type *
+glsl_float_type(void)
+{
+ return glsl_type::float_type;
+}
+
+const glsl_type *
glsl_vec4_type(void)
{
return glsl_type::vec4_type;
diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h
index 494051a67..276d4ad62 100644
--- a/mesalib/src/glsl/nir/nir_types.h
+++ b/mesalib/src/glsl/nir/nir_types.h
@@ -69,6 +69,7 @@ bool glsl_type_is_scalar(const struct glsl_type *type);
bool glsl_type_is_matrix(const struct glsl_type *type);
const struct glsl_type *glsl_void_type(void);
+const struct glsl_type *glsl_float_type(void);
const struct glsl_type *glsl_vec4_type(void);
const struct glsl_type *glsl_array_type(const struct glsl_type *base,
unsigned elements);
diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c
index f247ae069..a7aa79837 100644
--- a/mesalib/src/glsl/nir/nir_validate.c
+++ b/mesalib/src/glsl/nir/nir_validate.c
@@ -295,6 +295,8 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
static void
validate_deref_chain(nir_deref *deref, validate_state *state)
{
+ assert(deref->child == NULL || ralloc_parent(deref->child) == deref);
+
nir_deref *parent = NULL;
while (deref != NULL) {
switch (deref->deref_type) {
@@ -336,9 +338,10 @@ validate_var_use(nir_variable *var, validate_state *state)
}
static void
-validate_deref_var(nir_deref_var *deref, validate_state *state)
+validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state)
{
assert(deref != NULL);
+ assert(ralloc_parent(deref) == parent_mem_ctx);
assert(deref->deref.type == deref->var->type);
validate_var_use(deref->var, state);
@@ -386,7 +389,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
for (unsigned i = 0; i < num_vars; i++) {
- validate_deref_var(instr->variables[i], state);
+ validate_deref_var(instr, instr->variables[i], state);
}
switch (instr->intrinsic) {
@@ -423,7 +426,7 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
}
if (instr->sampler != NULL)
- validate_deref_var(instr->sampler, state);
+ validate_deref_var(instr, instr->sampler, state);
}
static void
@@ -438,10 +441,10 @@ validate_call_instr(nir_call_instr *instr, validate_state *state)
for (unsigned i = 0; i < instr->num_params; i++) {
assert(instr->callee->params[i].type == instr->params[i]->deref.type);
- validate_deref_var(instr->params[i], state);
+ validate_deref_var(instr, instr->params[i], state);
}
- validate_deref_var(instr->return_deref, state);
+ validate_deref_var(instr, instr->return_deref, state);
}
static void
@@ -680,8 +683,7 @@ validate_cf_node(nir_cf_node *node, validate_state *state)
break;
default:
- assert(!"Invalid ALU instruction type");
- break;
+ unreachable("Invalid CF node type");
}
}