aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/glsl/nir
diff options
context:
space:
mode:
authormarha <marha@users.sourceforge.net>2015-04-20 21:25:25 +0200
committermarha <marha@users.sourceforge.net>2015-04-20 21:25:25 +0200
commit4ba9be2882d9f1567809edb0a31fcdf11320d41f (patch)
treef796ab7a5044f9dd99aac7cb9a7c836857987635 /mesalib/src/glsl/nir
parent82c8df11062f72a7d467e26cedbbd8b322ff7a70 (diff)
downloadvcxsrv-4ba9be2882d9f1567809edb0a31fcdf11320d41f.tar.gz
vcxsrv-4ba9be2882d9f1567809edb0a31fcdf11320d41f.tar.bz2
vcxsrv-4ba9be2882d9f1567809edb0a31fcdf11320d41f.zip
randrproto xkeyboard-config fontconfig libX11 libXdmcp libXmu pixman xkbcomp xserver mesa git update 20 Apr 2015
xserver commit b1029716e41e252f149b82124a149da180607c96 xkeyboard-config commit 7d00bcc2d9c3944bbdfcbe472ee3299729dc7687 libX11 commit 748d47e69f5c12d8557d56a8a8ec166588da7b93 libXdmcp commit b10f382e3aa2e86cd5a2bc27d6758da55f0ab1f6 xkbcomp commit 1ae525b3d236b59e6437b2b5433d460e18370973 pixman commit 58e21d3e45c5227c2ca9ac00cf044f22a7975180 randrproto commit 98da0d6e48b7d124d6788ea568e9f9e3dc204322 libXmu commit 4459e6940fe3fdf26a8d5d4c71989498bc400a62 fontconfig commit 07be485a0a84995ce69bf60e3b1bb22cb35f6b0e mesa commit c1485f4b7d044724b3dbc1011f3c3a8a53132010
Diffstat (limited to 'mesalib/src/glsl/nir')
-rw-r--r--mesalib/src/glsl/nir/glsl_to_nir.cpp92
-rw-r--r--mesalib/src/glsl/nir/nir.c64
-rw-r--r--mesalib/src/glsl/nir/nir.h51
-rw-r--r--mesalib/src/glsl/nir/nir_algebraic.py17
-rw-r--r--mesalib/src/glsl/nir/nir_builder.h135
-rw-r--r--mesalib/src/glsl/nir/nir_from_ssa.c9
-rw-r--r--mesalib/src/glsl/nir/nir_lower_idiv.c155
-rw-r--r--mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c2
-rw-r--r--mesalib/src/glsl/nir/nir_lower_samplers.cpp45
-rw-r--r--mesalib/src/glsl/nir/nir_lower_tex_projector.c143
-rw-r--r--mesalib/src/glsl/nir/nir_lower_var_copies.c8
-rw-r--r--mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c2
-rw-r--r--mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c110
-rw-r--r--mesalib/src/glsl/nir/nir_opcodes.py8
-rw-r--r--mesalib/src/glsl/nir/nir_opt_algebraic.py63
-rw-r--r--mesalib/src/glsl/nir/nir_opt_cse.c32
-rw-r--r--mesalib/src/glsl/nir/nir_opt_peephole_ffma.c261
-rw-r--r--mesalib/src/glsl/nir/nir_opt_peephole_select.c4
-rw-r--r--mesalib/src/glsl/nir/nir_print.c36
-rw-r--r--mesalib/src/glsl/nir/nir_remove_dead_variables.c22
-rw-r--r--mesalib/src/glsl/nir/nir_search.c4
-rw-r--r--mesalib/src/glsl/nir/nir_split_var_copies.c4
-rw-r--r--mesalib/src/glsl/nir/nir_sweep.c172
-rw-r--r--mesalib/src/glsl/nir/nir_to_ssa.c2
-rw-r--r--mesalib/src/glsl/nir/nir_types.cpp6
-rw-r--r--mesalib/src/glsl/nir/nir_types.h1
-rw-r--r--mesalib/src/glsl/nir/nir_validate.c16
27 files changed, 1264 insertions, 200 deletions
diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp
index 357944da6..f6b8331d4 100644
--- a/mesalib/src/glsl/nir/glsl_to_nir.cpp
+++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp
@@ -88,6 +88,8 @@ private:
exec_list *cf_node_list;
nir_instr *result; /* result of the expression tree last visited */
+ nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
+
/* the head of the dereference chain we're creating */
nir_deref_var *deref_head;
/* the tail of the dereference chain we're creating */
@@ -156,6 +158,14 @@ nir_visitor::~nir_visitor()
_mesa_hash_table_destroy(this->overload_table, NULL);
}
+nir_deref_var *
+nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir)
+{
+ ir->accept(this);
+ ralloc_steal(mem_ctx, this->deref_head);
+ return this->deref_head;
+}
+
static nir_constant *
constant_copy(ir_constant *ir, void *mem_ctx)
{
@@ -582,13 +592,11 @@ void
nir_visitor::visit(ir_return *ir)
{
if (ir->value != NULL) {
- ir->value->accept(this);
nir_intrinsic_instr *copy =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
- copy->variables[0] = nir_deref_var_create(this->shader,
- this->impl->return_var);
- copy->variables[1] = this->deref_head;
+ copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var);
+ copy->variables[1] = evaluate_deref(&copy->instr, ir->value);
}
nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
@@ -613,8 +621,7 @@ nir_visitor::visit(ir_call *ir)
nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
ir_dereference *param =
(ir_dereference *) ir->actual_parameters.get_head();
- param->accept(this);
- instr->variables[0] = this->deref_head;
+ instr->variables[0] = evaluate_deref(&instr->instr, param);
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
@@ -623,8 +630,7 @@ nir_visitor::visit(ir_call *ir)
nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
store_instr->num_components = 1;
- ir->return_deref->accept(this);
- store_instr->variables[0] = this->deref_head;
+ store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref);
store_instr->src[0].is_ssa = true;
store_instr->src[0].ssa = &instr->dest.ssa;
@@ -642,13 +648,11 @@ nir_visitor::visit(ir_call *ir)
unsigned i = 0;
foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
- param->accept(this);
- instr->params[i] = this->deref_head;
+ instr->params[i] = evaluate_deref(&instr->instr, param);
i++;
}
- ir->return_deref->accept(this);
- instr->return_deref = this->deref_head;
+ instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref);
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
}
@@ -663,12 +667,8 @@ nir_visitor::visit(ir_assignment *ir)
nir_intrinsic_instr *copy =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
- ir->lhs->accept(this);
- copy->variables[0] = this->deref_head;
-
- ir->rhs->accept(this);
- copy->variables[1] = this->deref_head;
-
+ copy->variables[0] = evaluate_deref(&copy->instr, ir->lhs);
+ copy->variables[1] = evaluate_deref(&copy->instr, ir->rhs);
if (ir->condition) {
nir_if *if_stmt = nir_if_create(this->shader);
@@ -700,6 +700,7 @@ nir_visitor::visit(ir_assignment *ir)
load->num_components = ir->lhs->type->vector_elements;
nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
load->variables[0] = lhs_deref;
+ ralloc_steal(load, load->variables[0]);
nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr);
nir_op vec_op;
@@ -741,7 +742,7 @@ nir_visitor::visit(ir_assignment *ir)
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
store->num_components = ir->lhs->type->vector_elements;
- nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
+ nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
store->variables[0] = nir_deref_as_var(store_deref);
store->src[0] = src;
@@ -816,6 +817,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
load_instr->num_components = ir->type->vector_elements;
load_instr->variables[0] = this->deref_head;
+ ralloc_steal(load_instr, load_instr->variables[0]);
add_instr(&load_instr->instr, ir->type->vector_elements);
}
@@ -959,6 +961,7 @@ nir_visitor::visit(ir_expression *ir)
nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
intrin->num_components = deref->type->vector_elements;
intrin->variables[0] = this->deref_head;
+ ralloc_steal(intrin, intrin->variables[0]);
if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
@@ -1087,12 +1090,6 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break;
case ir_unop_sin: emit(nir_op_fsin, dest_size, srcs); break;
case ir_unop_cos: emit(nir_op_fcos, dest_size, srcs); break;
- case ir_unop_sin_reduced:
- emit(nir_op_fsin_reduced, dest_size, srcs);
- break;
- case ir_unop_cos_reduced:
- emit(nir_op_fcos_reduced, dest_size, srcs);
- break;
case ir_unop_dFdx: emit(nir_op_fddx, dest_size, srcs); break;
case ir_unop_dFdy: emit(nir_op_fddy, dest_size, srcs); break;
case ir_unop_dFdx_fine: emit(nir_op_fddx_fine, dest_size, srcs); break;
@@ -1210,6 +1207,9 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_bit_and:
case ir_binop_bit_or:
case ir_binop_bit_xor:
+ case ir_binop_logic_and:
+ case ir_binop_logic_or:
+ case ir_binop_logic_xor:
case ir_binop_lshift:
case ir_binop_rshift:
switch (ir->operation) {
@@ -1270,6 +1270,24 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_bit_xor:
op = nir_op_ixor;
break;
+ case ir_binop_logic_and:
+ if (supports_ints)
+ op = nir_op_iand;
+ else
+ op = nir_op_fand;
+ break;
+ case ir_binop_logic_or:
+ if (supports_ints)
+ op = nir_op_ior;
+ else
+ op = nir_op_for;
+ break;
+ case ir_binop_logic_xor:
+ if (supports_ints)
+ op = nir_op_ixor;
+ else
+ op = nir_op_fxor;
+ break;
case ir_binop_lshift:
op = nir_op_ishl;
break;
@@ -1444,24 +1462,6 @@ nir_visitor::visit(ir_expression *ir)
}
}
break;
- case ir_binop_logic_and:
- if (supports_ints)
- emit(nir_op_iand, dest_size, srcs);
- else
- emit(nir_op_fand, dest_size, srcs);
- break;
- case ir_binop_logic_or:
- if (supports_ints)
- emit(nir_op_ior, dest_size, srcs);
- else
- emit(nir_op_for, dest_size, srcs);
- break;
- case ir_binop_logic_xor:
- if (supports_ints)
- emit(nir_op_ixor, dest_size, srcs);
- else
- emit(nir_op_fxor, dest_size, srcs);
- break;
case ir_binop_dot:
switch (ir->operands[0]->type->vector_elements) {
case 2: emit(nir_op_fdot2, dest_size, srcs); break;
@@ -1633,8 +1633,7 @@ nir_visitor::visit(ir_texture *ir)
unreachable("not reached");
}
- ir->sampler->accept(this);
- instr->sampler = this->deref_head;
+ instr->sampler = evaluate_deref(&instr->instr, ir->sampler);
unsigned src_number = 0;
@@ -1759,7 +1758,7 @@ nir_visitor::visit(ir_dereference_record *ir)
int field_index = this->deref_tail->type->field_index(ir->field);
assert(field_index >= 0);
- nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index);
+ nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index);
deref->deref.type = ir->type;
this->deref_tail->child = &deref->deref;
this->deref_tail = &deref->deref;
@@ -1783,5 +1782,6 @@ nir_visitor::visit(ir_dereference_array *ir)
ir->array->accept(this);
this->deref_tail->child = &deref->deref;
+ ralloc_steal(this->deref_tail, deref);
this->deref_tail = &deref->deref;
}
diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c
index 6459d5108..c6e53612b 100644
--- a/mesalib/src/glsl/nir/nir.c
+++ b/mesalib/src/glsl/nir/nir.c
@@ -58,11 +58,11 @@ reg_create(void *mem_ctx, struct exec_list *list)
nir_register *reg = ralloc(mem_ctx, nir_register);
reg->parent_instr = NULL;
- reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ reg->uses = _mesa_set_create(reg, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ reg->defs = _mesa_set_create(reg, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer,
_mesa_key_pointer_equal);
reg->num_components = 0;
@@ -108,7 +108,7 @@ nir_function_create(nir_shader *shader, const char *name)
exec_list_push_tail(&shader->functions, &func->node);
exec_list_make_empty(&func->overload_list);
- func->name = name;
+ func->name = ralloc_strdup(func, name);
func->shader = shader;
return func;
@@ -285,10 +285,10 @@ nir_block_create(void *mem_ctx)
cf_init(&block->cf_node, nir_cf_node_block);
block->successors[0] = block->successors[1] = NULL;
- block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
block->imm_dom = NULL;
- block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
exec_list_make_empty(&block->instr_list);
@@ -381,11 +381,11 @@ alu_src_init(nir_alu_src *src)
}
nir_alu_instr *
-nir_alu_instr_create(void *mem_ctx, nir_op op)
+nir_alu_instr_create(nir_shader *shader, nir_op op)
{
unsigned num_srcs = nir_op_infos[op].num_inputs;
nir_alu_instr *instr =
- ralloc_size(mem_ctx,
+ ralloc_size(shader,
sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
instr_init(&instr->instr, nir_instr_type_alu);
@@ -398,18 +398,18 @@ nir_alu_instr_create(void *mem_ctx, nir_op op)
}
nir_jump_instr *
-nir_jump_instr_create(void *mem_ctx, nir_jump_type type)
+nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
{
- nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr);
+ nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
instr_init(&instr->instr, nir_instr_type_jump);
instr->type = type;
return instr;
}
nir_load_const_instr *
-nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
+nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
{
- nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr);
+ nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
instr_init(&instr->instr, nir_instr_type_load_const);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -418,11 +418,11 @@ nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
}
nir_intrinsic_instr *
-nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
+nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
{
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
nir_intrinsic_instr *instr =
- ralloc_size(mem_ctx,
+ ralloc_size(shader,
sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
instr_init(&instr->instr, nir_instr_type_intrinsic);
@@ -438,29 +438,29 @@ nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
}
nir_call_instr *
-nir_call_instr_create(void *mem_ctx, nir_function_overload *callee)
+nir_call_instr_create(nir_shader *shader, nir_function_overload *callee)
{
- nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr);
+ nir_call_instr *instr = ralloc(shader, nir_call_instr);
instr_init(&instr->instr, nir_instr_type_call);
instr->callee = callee;
instr->num_params = callee->num_params;
- instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params);
+ instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
instr->return_deref = NULL;
return instr;
}
nir_tex_instr *
-nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
+nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
{
- nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr);
+ nir_tex_instr *instr = ralloc(shader, nir_tex_instr);
instr_init(&instr->instr, nir_instr_type_tex);
dest_init(&instr->dest);
instr->num_srcs = num_srcs;
- instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs);
+ instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
for (unsigned i = 0; i < num_srcs; i++)
src_init(&instr->src[i].src);
@@ -472,9 +472,9 @@ nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
}
nir_phi_instr *
-nir_phi_instr_create(void *mem_ctx)
+nir_phi_instr_create(nir_shader *shader)
{
- nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr);
+ nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
instr_init(&instr->instr, nir_instr_type_phi);
dest_init(&instr->dest);
@@ -483,9 +483,9 @@ nir_phi_instr_create(void *mem_ctx)
}
nir_parallel_copy_instr *
-nir_parallel_copy_instr_create(void *mem_ctx)
+nir_parallel_copy_instr_create(nir_shader *shader)
{
- nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
+ nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
instr_init(&instr->instr, nir_instr_type_parallel_copy);
exec_list_make_empty(&instr->entries);
@@ -494,9 +494,9 @@ nir_parallel_copy_instr_create(void *mem_ctx)
}
nir_ssa_undef_instr *
-nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components)
+nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
{
- nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr);
+ nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -543,7 +543,7 @@ copy_deref_var(void *mem_ctx, nir_deref_var *deref)
nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
ret->deref.type = deref->deref.type;
if (deref->deref.child)
- ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
return ret;
}
@@ -558,7 +558,7 @@ copy_deref_array(void *mem_ctx, nir_deref_array *deref)
}
ret->deref.type = deref->deref.type;
if (deref->deref.child)
- ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
return ret;
}
@@ -568,7 +568,7 @@ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
ret->deref.type = deref->deref.type;
if (deref->deref.child)
- ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+ ret->deref.child = nir_copy_deref(ret, deref->deref.child);
return ret;
}
@@ -1834,13 +1834,11 @@ void
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components, const char *name)
{
- void *mem_ctx = ralloc_parent(instr);
-
def->name = name;
def->parent_instr = instr;
- def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ def->uses = _mesa_set_create(instr, _mesa_hash_pointer,
_mesa_key_pointer_equal);
- def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+ def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer,
_mesa_key_pointer_equal);
def->num_components = num_components;
diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h
index 29fe94243..74772c798 100644
--- a/mesalib/src/glsl/nir/nir.h
+++ b/mesalib/src/glsl/nir/nir.h
@@ -34,6 +34,7 @@
#include "util/set.h"
#include "util/bitset.h"
#include "nir_types.h"
+#include "glsl/shader_enums.h"
#include <stdio.h>
#include "nir_opcodes.h"
@@ -529,6 +530,16 @@ nir_src_for_reg(nir_register *reg)
return src;
}
+static inline nir_instr *
+nir_src_get_parent_instr(const nir_src *src)
+{
+ if (src->is_ssa) {
+ return src->ssa->parent_instr;
+ } else {
+ return src->reg.reg->parent_instr;
+ }
+}
+
static inline nir_dest
nir_dest_for_reg(nir_register *reg)
{
@@ -1365,11 +1376,17 @@ typedef struct nir_function {
typedef struct nir_shader_compiler_options {
bool lower_ffma;
+ bool lower_flrp;
bool lower_fpow;
bool lower_fsat;
bool lower_fsqrt;
/** lowers fneg and ineg to fsub and isub. */
bool lower_negate;
+ /** lowers fsub and isub to fadd+fneg and iadd+ineg. */
+ bool lower_sub;
+
+ /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
+ bool lower_scmp;
/**
* Does the driver support real 32-bit integers? (Otherwise, integers
@@ -1414,6 +1431,9 @@ typedef struct nir_shader {
* access plus one
*/
unsigned num_inputs, num_uniforms, num_outputs;
+
+ /** the number of uniforms that are only accessed directly */
+ unsigned num_direct_uniforms;
} nir_shader;
#define nir_foreach_overload(shader, overload) \
@@ -1466,26 +1486,26 @@ void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
-nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op);
+nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
-nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type);
+nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
-nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx,
+nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
unsigned num_components);
-nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx,
+nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
nir_intrinsic_op op);
-nir_call_instr *nir_call_instr_create(void *mem_ctx,
+nir_call_instr *nir_call_instr_create(nir_shader *shader,
nir_function_overload *callee);
-nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs);
+nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
-nir_phi_instr *nir_phi_instr_create(void *mem_ctx);
+nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
-nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx);
+nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
-nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx,
+nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
unsigned num_components);
nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
@@ -1550,7 +1570,7 @@ void nir_print_instr(const nir_instr *instr, FILE *fp);
#ifdef DEBUG
void nir_validate_shader(nir_shader *shader);
#else
-static inline void nir_validate_shader(nir_shader *shader) { }
+static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
#endif /* DEBUG */
void nir_calc_dominance_impl(nir_function_impl *impl);
@@ -1596,14 +1616,18 @@ void nir_lower_alu_to_scalar(nir_shader *shader);
void nir_lower_phis_to_scalar(nir_shader *shader);
void nir_lower_samplers(nir_shader *shader,
- struct gl_shader_program *shader_program,
- struct gl_program *prog);
+ const struct gl_shader_program *shader_program,
+ gl_shader_stage stage);
void nir_lower_system_values(nir_shader *shader);
+void nir_lower_tex_projector(nir_shader *shader);
+void nir_lower_idiv(nir_shader *shader);
void nir_lower_atomics(nir_shader *shader);
void nir_lower_to_source_mods(nir_shader *shader);
+void nir_normalize_cubemap_coords(nir_shader *shader);
+
void nir_live_variables_impl(nir_function_impl *impl);
bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
@@ -1612,6 +1636,7 @@ void nir_convert_to_ssa(nir_shader *shader);
void nir_convert_from_ssa(nir_shader *shader);
bool nir_opt_algebraic(nir_shader *shader);
+bool nir_opt_algebraic_late(nir_shader *shader);
bool nir_opt_constant_folding(nir_shader *shader);
bool nir_opt_global_to_local(nir_shader *shader);
@@ -1631,6 +1656,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);
bool nir_opt_remove_phis(nir_shader *shader);
+void nir_sweep(nir_shader *shader);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py
index afab1a008..bbf4f08ef 100644
--- a/mesalib/src/glsl/nir/nir_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_algebraic.py
@@ -181,12 +181,23 @@ _algebraic_pass_template = mako.template.Template("""
#include "nir.h"
#include "nir_search.h"
+#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+
struct transform {
const nir_search_expression *search;
const nir_search_value *replace;
unsigned condition_offset;
};
+struct opt_state {
+ void *mem_ctx;
+ bool progress;
+ const bool *condition_flags;
+};
+
+#endif
+
% for (opcode, xform_list) in xform_dict.iteritems():
% for xform in xform_list:
${xform.search.render()}
@@ -200,12 +211,6 @@ static const struct transform ${pass_name}_${opcode}_xforms[] = {
};
% endfor
-struct opt_state {
- void *mem_ctx;
- bool progress;
- const bool *condition_flags;
-};
-
static bool
${pass_name}_block(nir_block *block, void *void_state)
{
diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h
index 7c4f7fd96..d1419ee21 100644
--- a/mesalib/src/glsl/nir/nir_builder.h
+++ b/mesalib/src/glsl/nir/nir_builder.h
@@ -28,6 +28,9 @@ struct exec_list;
typedef struct nir_builder {
struct exec_list *cf_node_list;
+ nir_instr *before_instr;
+ nir_instr *after_instr;
+
nir_shader *shader;
nir_function_impl *impl;
} nir_builder;
@@ -45,8 +48,75 @@ nir_builder_insert_after_cf_list(nir_builder *build,
struct exec_list *cf_node_list)
{
build->cf_node_list = cf_node_list;
+ build->before_instr = NULL;
+ build->after_instr = NULL;
+}
+
+static inline void
+nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr)
+{
+ build->cf_node_list = NULL;
+ build->before_instr = before_instr;
+ build->after_instr = NULL;
}
+static inline void
+nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr)
+{
+ build->cf_node_list = NULL;
+ build->before_instr = NULL;
+ build->after_instr = after_instr;
+}
+
+static inline void
+nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
+{
+ if (build->cf_node_list) {
+ nir_instr_insert_after_cf_list(build->cf_node_list, instr);
+ } else if (build->before_instr) {
+ nir_instr_insert_before(build->before_instr, instr);
+ } else {
+ assert(build->after_instr);
+ nir_instr_insert_after(build->after_instr, instr);
+ build->after_instr = instr;
+ }
+}
+
+static inline nir_ssa_def *
+nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
+{
+ nir_load_const_instr *load_const =
+ nir_load_const_instr_create(build->shader, num_components);
+ if (!load_const)
+ return NULL;
+
+ load_const->value = value;
+
+ nir_builder_instr_insert(build, &load_const->instr);
+
+ return &load_const->def;
+}
+
+static inline nir_ssa_def *
+nir_imm_float(nir_builder *build, float x)
+{
+ nir_const_value v = { { .f = {x, 0, 0, 0} } };
+ return nir_build_imm(build, 1, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
+{
+ nir_const_value v = { { .f = {x, y, z, w} } };
+ return nir_build_imm(build, 4, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_int(nir_builder *build, int x)
+{
+ nir_const_value v = { { .i = {x, 0, 0, 0} } };
+ return nir_build_imm(build, 1, v);
+}
static inline nir_ssa_def *
nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
@@ -90,7 +160,7 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
instr->dest.write_mask = (1 << num_components) - 1;
- nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr);
+ nir_builder_instr_insert(build, &instr->instr);
return &instr->dest.dest.ssa;
}
@@ -127,4 +197,67 @@ nir_##op(nir_builder *build, nir_ssa_def *src0, \
#include "nir_builder_opcodes.h"
+/**
+ * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
+ */
+static inline nir_ssa_def *
+nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+}
+
+static inline nir_ssa_def *
+nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+ nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ mov->dest.write_mask = (1 << num_components) - 1;
+ mov->src[0] = src;
+ nir_builder_instr_insert(build, &mov->instr);
+
+ return &mov->dest.dest.ssa;
+}
+
+/**
+ * Construct an fmov or imov that reswizzles the source's components.
+ */
+static inline nir_ssa_def *
+nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
+ unsigned num_components, bool use_fmov)
+{
+ nir_alu_src alu_src;
+ memset(&alu_src, 0, sizeof(alu_src));
+ alu_src.src = nir_src_for_ssa(src);
+ for (int i = 0; i < 4; i++)
+ alu_src.swizzle[i] = swiz[i];
+
+ return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
+ nir_imov_alu(build, alu_src, num_components);
+}
+
+/**
+ * Turns a nir_src into a nir_ssa_def * so it can be passed to
+ * nir_build_alu()-based builder calls.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
+{
+ if (src.is_ssa && src.ssa->num_components == num_components)
+ return src.ssa;
+
+ nir_alu_src alu;
+ memset(&alu, 0, sizeof(alu));
+ alu.src = src;
+ for (int j = 0; j < 4; j++)
+ alu.swizzle[j] = j;
+
+ return nir_imov_alu(build, alu, num_components);
+}
+
#endif /* NIR_BUILDER_H */
diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c
index c3090fb06..184698abd 100644
--- a/mesalib/src/glsl/nir/nir_from_ssa.c
+++ b/mesalib/src/glsl/nir/nir_from_ssa.c
@@ -509,12 +509,13 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state)
reg->num_components = def->num_components;
reg->num_array_elems = 0;
- /* This register comes from an SSA definition that was not part of a
- * phi-web. Therefore, we know it has a single unique definition
- * that dominates all of its uses. Therefore, we can copy the
+ /* This register comes from an SSA definition that is defined and not
+ * part of a phi-web. Therefore, we know it has a single unique
+ * definition that dominates all of its uses; we can copy the
* parent_instr from the SSA def safely.
*/
- reg->parent_instr = def->parent_instr;
+ if (def->parent_instr->type != nir_instr_type_ssa_undef)
+ reg->parent_instr = def->parent_instr;
_mesa_hash_table_insert(state->ssa_table, def, reg);
return reg;
diff --git a/mesalib/src/glsl/nir/nir_lower_idiv.c b/mesalib/src/glsl/nir/nir_lower_idiv.c
new file mode 100644
index 000000000..7b6803207
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_idiv.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/* Lowers idiv/udiv/umod
+ * Based on NV50LegalizeSSA::handleDIV()
+ *
+ * Note that this is probably not enough precision for compute shaders.
+ * Perhaps we want a second higher precision (looping) version of this?
+ * Or perhaps we assume if you can do compute shaders you can also
+ * branch out to a pre-optimized shader library routine..
+ */
+
+static void
+convert_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+ nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
+ nir_op op = alu->op;
+ bool is_signed;
+
+ if ((op != nir_op_idiv) &&
+ (op != nir_op_udiv) &&
+ (op != nir_op_umod))
+ return;
+
+ is_signed = (op == nir_op_idiv);
+
+ nir_builder_insert_before_instr(bld, &alu->instr);
+
+ numer = nir_ssa_for_src(bld, alu->src[0].src,
+ nir_ssa_alu_instr_src_components(alu, 0));
+ denom = nir_ssa_for_src(bld, alu->src[1].src,
+ nir_ssa_alu_instr_src_components(alu, 1));
+
+ if (is_signed) {
+ af = nir_i2f(bld, numer);
+ bf = nir_i2f(bld, denom);
+ af = nir_fabs(bld, af);
+ bf = nir_fabs(bld, bf);
+ a = nir_iabs(bld, numer);
+ b = nir_iabs(bld, denom);
+ } else {
+ af = nir_u2f(bld, numer);
+ bf = nir_u2f(bld, denom);
+ a = numer;
+ b = denom;
+ }
+
+ /* get first result: */
+ bf = nir_frcp(bld, bf);
+ bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */
+ q = nir_fmul(bld, af, bf);
+
+ if (is_signed) {
+ q = nir_f2i(bld, q);
+ } else {
+ q = nir_f2u(bld, q);
+ }
+
+ /* get error of first result: */
+ r = nir_imul(bld, q, b);
+ r = nir_isub(bld, a, r);
+ r = nir_u2f(bld, r);
+ r = nir_fmul(bld, r, bf);
+ r = nir_f2u(bld, r);
+
+ /* add quotients: */
+ q = nir_iadd(bld, q, r);
+
+ /* correction: if modulus >= divisor, add 1 */
+ r = nir_imul(bld, q, b);
+ r = nir_isub(bld, a, r);
+
+ r = nir_ige(bld, r, b);
+ r = nir_b2i(bld, r);
+
+ q = nir_iadd(bld, q, r);
+ if (is_signed) {
+ /* fix the sign: */
+ r = nir_ixor(bld, numer, denom);
+ r = nir_ushr(bld, r, nir_imm_int(bld, 31));
+ r = nir_i2b(bld, r);
+ b = nir_ineg(bld, q);
+ q = nir_bcsel(bld, r, b, q);
+ }
+
+ if (op == nir_op_umod) {
+ /* division result in q */
+ r = nir_imul(bld, q, b);
+ q = nir_isub(bld, a, r);
+ }
+
+ assert(alu->dest.dest.is_ssa);
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+ nir_src_for_ssa(q),
+ ralloc_parent(alu));
+}
+
+static bool
+convert_block(nir_block *block, void *state)
+{
+ nir_builder *b = state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_alu)
+ convert_instr(b, nir_instr_as_alu(instr));
+ }
+
+ return true;
+}
+
+static void
+convert_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, convert_block, &b);
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_idiv(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ convert_impl(overload->impl);
+ }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
index 7cd93ea0a..4bdb80072 100644
--- a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -223,7 +223,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
else
nir_instr_insert_after_block(src->pred, &mov->instr);
- nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src);
+ nir_phi_src *new_src = ralloc(new_phi, nir_phi_src);
new_src->pred = src->pred;
new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa);
diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
index 3015dbd09..cf8ab8325 100644
--- a/mesalib/src/glsl/nir/nir_lower_samplers.cpp
+++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
@@ -36,33 +36,26 @@ extern "C" {
}
static unsigned
-get_sampler_index(struct gl_shader_program *shader_program, const char *name,
- const struct gl_program *prog)
+get_sampler_index(const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, const char *name)
{
- GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
-
unsigned location;
if (!shader_program->UniformHash->get(location, name)) {
- linker_error(shader_program,
- "failed to find sampler named %s.\n", name);
+ assert(!"failed to find sampler");
return 0;
}
- if (!shader_program->UniformStorage[location].sampler[shader].active) {
- assert(0 && "cannot return a sampler");
- linker_error(shader_program,
- "cannot return a sampler named %s, because it is not "
- "used in this shader stage. This is a driver bug.\n",
- name);
+ if (!shader_program->UniformStorage[location].sampler[stage].active) {
+ assert(!"cannot return a sampler");
return 0;
}
- return shader_program->UniformStorage[location].sampler[shader].index;
+ return shader_program->UniformStorage[location].sampler[stage].index;
}
static void
-lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
- const struct gl_program *prog, void *mem_ctx)
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage, void *mem_ctx)
{
if (instr->sampler == NULL)
return;
@@ -90,7 +83,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset);
break;
case nir_deref_array_type_indirect: {
- instr->src = reralloc(mem_ctx, instr->src, nir_tex_src,
+ instr->src = reralloc(instr, instr->src, nir_tex_src,
instr->num_srcs + 1);
memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src);
instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
@@ -133,15 +126,15 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
}
}
- instr->sampler_index += get_sampler_index(shader_program, name, prog);
+ instr->sampler_index += get_sampler_index(shader_program, stage, name);
instr->sampler = NULL;
}
typedef struct {
void *mem_ctx;
- struct gl_shader_program *shader_program;
- struct gl_program *prog;
+ const struct gl_shader_program *shader_program;
+ gl_shader_stage stage;
} lower_state;
static bool
@@ -152,7 +145,7 @@ lower_block_cb(nir_block *block, void *_state)
nir_foreach_instr(block, instr) {
if (instr->type == nir_instr_type_tex) {
nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
- lower_sampler(tex_instr, state->shader_program, state->prog,
+ lower_sampler(tex_instr, state->shader_program, state->stage,
state->mem_ctx);
}
}
@@ -161,24 +154,24 @@ lower_block_cb(nir_block *block, void *_state)
}
static void
-lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program,
- struct gl_program *prog)
+lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
{
lower_state state;
state.mem_ctx = ralloc_parent(impl);
state.shader_program = shader_program;
- state.prog = prog;
+ state.stage = stage;
nir_foreach_block(impl, lower_block_cb, &state);
}
extern "C" void
-nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program,
- struct gl_program *prog)
+nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program,
+ gl_shader_stage stage)
{
nir_foreach_overload(shader, overload) {
if (overload->impl)
- lower_impl(overload->impl, shader_program, prog);
+ lower_impl(overload->impl, shader_program, stage);
}
}
diff --git a/mesalib/src/glsl/nir/nir_lower_tex_projector.c b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
new file mode 100644
index 000000000..6b0e9c340
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass converts the coordinate division for texture projection
+ * to be done in ALU instructions instead of asking the texture operation to
+ * do so.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+ return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+nir_lower_tex_projector_block(nir_block *block, void *void_state)
+{
+ nir_builder *b = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ nir_builder_insert_before_instr(b, &tex->instr);
+
+ /* Find the projector in the srcs list, if present. */
+ int proj_index;
+ for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+ if (tex->src[proj_index].src_type == nir_tex_src_projector)
+ break;
+ }
+ if (proj_index == tex->num_srcs)
+ continue;
+ nir_ssa_def *inv_proj =
+ nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+ /* Walk through the sources projecting the arguments. */
+ for (int i = 0; i < tex->num_srcs; i++) {
+ switch (tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ case nir_tex_src_comparitor:
+ break;
+ default:
+ continue;
+ }
+ nir_ssa_def *unprojected =
+ nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+ nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+ /* Array indices don't get projected, so make an new vector with the
+ * coordinate's array index untouched.
+ */
+ if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+ switch (tex->coord_components) {
+ case 4:
+ projected = nir_vec4(b,
+ channel(b, projected, 0),
+ channel(b, projected, 1),
+ channel(b, projected, 2),
+ channel(b, unprojected, 3));
+ break;
+ case 3:
+ projected = nir_vec3(b,
+ channel(b, projected, 0),
+ channel(b, projected, 1),
+ channel(b, unprojected, 2));
+ break;
+ case 2:
+ projected = nir_vec2(b,
+ channel(b, projected, 0),
+ channel(b, unprojected, 1));
+ break;
+ default:
+ unreachable("bad texture coord count for array");
+ break;
+ }
+ }
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(projected));
+ }
+
+ /* Now move the later tex sources down the array so that the projector
+ * disappears.
+ */
+ nir_src dead;
+ memset(&dead, 0, sizeof dead);
+ nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead);
+ memmove(&tex->src[proj_index],
+ &tex->src[proj_index + 1],
+ (tex->num_srcs - proj_index) * sizeof(*tex->src));
+ tex->num_srcs--;
+ }
+
+ return true;
+}
+
+static void
+nir_lower_tex_projector_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, nir_lower_tex_projector_block, &b);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_lower_tex_projector(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ nir_lower_tex_projector_impl(overload->impl);
+ }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c
index 85ebb281c..58389a7c7 100644
--- a/mesalib/src/glsl/nir/nir_lower_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c
@@ -148,13 +148,10 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
unsigned num_components = glsl_get_vector_elements(src_tail->type);
- nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref);
- nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref);
-
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
load->num_components = num_components;
- load->variables[0] = nir_deref_as_var(src_deref);
+ load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
nir_instr_insert_before(&copy_instr->instr, &load->instr);
@@ -162,7 +159,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
store->num_components = num_components;
- store->variables[0] = nir_deref_as_var(dest_deref);
+ store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
+
store->src[0].is_ssa = true;
store->src[0].ssa = &load->dest.ssa;
diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
index 86e6ab416..2ca74d71b 100644
--- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -642,7 +642,7 @@ add_phi_sources(nir_block *block, nir_block *pred,
struct deref_node *node = entry->data;
- nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src);
+ nir_phi_src *src = ralloc(phi, nir_phi_src);
src->pred = pred;
src->src.is_ssa = true;
src->src.ssa = get_ssa_def_for_block(node, pred, state);
diff --git a/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
new file mode 100644
index 000000000..0da8447ac
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand <jason@jlekstrand.net>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * This file implements a NIR lowering pass to perform the normalization of
+ * the cubemap coordinates to have the largest magnitude component be -1.0
+ * or 1.0. This is based on the old GLSL IR based pass by Eric.
+ */
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+ return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+normalize_cubemap_coords_block(nir_block *block, void *void_state)
+{
+ nir_builder *b = void_state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
+
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+ continue;
+
+ nir_builder_insert_before_instr(b, &tex->instr);
+
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ if (tex->src[i].src_type != nir_tex_src_coord)
+ continue;
+
+ nir_ssa_def *orig_coord =
+ nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+ assert(orig_coord->num_components >= 3);
+
+ nir_ssa_def *abs = nir_fabs(b, orig_coord);
+ nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0),
+ nir_fmax(b, channel(b, abs, 1),
+ channel(b, abs, 2)));
+
+ nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm));
+
+ /* Array indices don't have to be normalized, so make a new vector
+ * with the coordinate's array index untouched.
+ */
+ if (tex->coord_components == 4) {
+ normalized = nir_vec4(b,
+ channel(b, normalized, 0),
+ channel(b, normalized, 1),
+ channel(b, normalized, 2),
+ channel(b, orig_coord, 3));
+ }
+
+ nir_instr_rewrite_src(&tex->instr,
+ &tex->src[i].src,
+ nir_src_for_ssa(normalized));
+ }
+ }
+
+ return true;
+}
+
+static void
+normalize_cubemap_coords_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block(impl, normalize_cubemap_coords_block, &b);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+}
+
+void
+nir_normalize_cubemap_coords(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload)
+ if (overload->impl)
+ normalize_cubemap_coords_impl(overload->impl);
+}
diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py
index 062cd628b..264806f5d 100644
--- a/mesalib/src/glsl/nir/nir_opcodes.py
+++ b/mesalib/src/glsl/nir/nir_opcodes.py
@@ -161,12 +161,12 @@ unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion
-unop_convert("f2b", tfloat, tbool, "src0 == 0.0f")
+unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
# Boolean-to-float conversion
unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
-unop_convert("i2b", tint, tbool, "src0 == 0")
-unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion
+unop_convert("i2b", tint, tbool, "src0 != 0")
+unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
@@ -191,8 +191,6 @@ unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
unop("fsin", tfloat, "sinf(src0)")
unop("fcos", tfloat, "cosf(src0)")
-unop("fsin_reduced", tfloat, "sinf(src0)")
-unop("fcos_reduced", tfloat, "cosf(src0)")
# Partial derivatives.
diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py
index ef855aa77..cdb19241c 100644
--- a/mesalib/src/glsl/nir/nir_opt_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py
@@ -75,6 +75,9 @@ optimizations = [
(('flrp', a, b, 1.0), b),
(('flrp', a, a, b), a),
(('flrp', 0.0, a, b), ('fmul', a, b)),
+ (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+ (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
# Comparison simplifications
@@ -82,10 +85,6 @@ optimizations = [
(('inot', ('fge', a, b)), ('flt', a, b)),
(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
- (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
- (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
- (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
@@ -95,6 +94,18 @@ optimizations = [
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
(('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+ (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+ (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+ (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
+ (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
+ (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
+ (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+ # Emulating booleans
+ (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
+ (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
+ (('iand', 'a@bool', 1.0), ('b2f', a)),
+ (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+ (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
# Comparison with the same args. Note that these are not done for
# the float versions because NaN always returns false on float
# inequalities.
@@ -122,7 +133,7 @@ optimizations = [
(('ishr', 0, a), 0),
(('ishr', a, 0), a),
(('ushr', 0, a), 0),
- (('ushr', a, 0), 0),
+ (('ushr', a, 0), a),
# Exponential/logarithmic identities
(('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
(('fexp', ('flog', a)), a), # e^ln(a) = a
@@ -134,6 +145,26 @@ optimizations = [
(('fpow', a, 1.0), a),
(('fpow', a, 2.0), ('fmul', a, a)),
(('fpow', 2.0, a), ('fexp2', a)),
+ (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+ (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))),
+ (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+ (('frcp', ('fexp', a)), ('fexp', ('fneg', a))),
+ (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+ (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))),
+ (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+ (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))),
+ (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+ (('flog', ('frcp', a)), ('fneg', ('flog', a))),
+ (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+ (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))),
+ (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+ (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))),
+ (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+ (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))),
+ (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+ (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))),
+ (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+ (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))),
# Division and reciprocal
(('fdiv', 1.0, a), ('frcp', a)),
(('frcp', ('frcp', a)), a),
@@ -154,18 +185,21 @@ optimizations = [
(('bcsel', a, b, b), b),
(('fcsel', a, b, b), b),
+ # Conversions
+ (('f2i', ('ftrunc', a)), ('f2i', a)),
+ (('f2u', ('ftrunc', a)), ('f2u', a)),
+
# Subtracts
(('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+ (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
+ (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
(('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
(('iadd', a, ('isub', 0, b)), ('isub', a, b)),
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
(('iabs', ('isub', 0, a)), ('iabs', a)),
-
-# This one may not be exact
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
]
# Add optimizations to handle the case where the result of a ternary is
@@ -189,4 +223,17 @@ for op in ['flt', 'fge', 'feq', 'fne',
('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
]
+# This section contains "late" optimizations that should be run after the
+# regular optimizations have finished. Optimizations should go here if
+# they help code generation but do not necessarily produce code that is
+# more easily optimizable.
+late_optimizations = [
+ (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+ (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+ (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+ (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+]
+
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+ late_optimizations).render()
diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c
index 9b383202d..553906e12 100644
--- a/mesalib/src/glsl/nir/nir_opt_cse.c
+++ b/mesalib/src/glsl/nir/nir_opt_cse.c
@@ -37,20 +37,19 @@ struct cse_state {
};
static bool
-nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask)
+nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1,
+ unsigned src2)
{
- if (src1.abs != src2.abs || src1.negate != src2.negate)
+ if (alu1->src[src1].abs != alu2->src[src2].abs ||
+ alu1->src[src1].negate != alu2->src[src2].negate)
return false;
- for (int i = 0; i < 4; ++i) {
- if (!(read_mask & (1 << i)))
- continue;
-
- if (src1.swizzle[i] != src2.swizzle[i])
+ for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
+ if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
return false;
}
- return nir_srcs_equal(src1.src, src2.src);
+ return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
}
static bool
@@ -73,10 +72,17 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
return false;
- for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
- if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i],
- (1 << alu1->dest.dest.ssa.num_components) - 1))
- return false;
+ if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[alu1->op].num_inputs == 2);
+ return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
+ (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
+ nir_alu_srcs_equal(alu1, alu2, 1, 0));
+ } else {
+ for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+ if (!nir_alu_srcs_equal(alu1, alu2, i, i))
+ return false;
+ }
}
return true;
}
@@ -154,12 +160,14 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
static bool
src_is_ssa(nir_src *src, void *data)
{
+ (void) data;
return src->is_ssa;
}
static bool
dest_is_ssa(nir_dest *dest, void *data)
{
+ (void) data;
return dest->is_ssa;
}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
new file mode 100644
index 000000000..9d5646fe6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a small peephole optimization that looks for a multiply that
+ * is only ever used in an add and replaces both with an fma.
+ */
+
+struct peephole_ffma_state {
+ void *mem_ctx;
+ nir_function_impl *impl;
+ bool progress;
+};
+
+static inline bool
+are_all_uses_fadd(nir_ssa_def *def)
+{
+ if (def->if_uses->entries > 0)
+ return false;
+
+ struct set_entry *use_iter;
+ set_foreach(def->uses, use_iter) {
+ nir_instr *use_instr = (nir_instr *)use_iter->key;
+
+ if (use_instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
+ switch (use_alu->op) {
+ case nir_op_fadd:
+ break; /* This one's ok */
+
+ case nir_op_imov:
+ case nir_op_fmov:
+ case nir_op_fneg:
+ case nir_op_fabs:
+ assert(use_alu->dest.dest.is_ssa);
+ if (!are_all_uses_fadd(&use_alu->dest.dest.ssa))
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static nir_alu_instr *
+get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+{
+ assert(src->src.is_ssa && !src->abs && !src->negate);
+
+ nir_instr *instr = src->src.ssa->parent_instr;
+ if (instr->type != nir_instr_type_alu)
+ return NULL;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_imov:
+ case nir_op_fmov:
+ alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ break;
+
+ case nir_op_fneg:
+ alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ *negate = !*negate;
+ break;
+
+ case nir_op_fabs:
+ alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ *negate = false;
+ *abs = true;
+ break;
+
+ case nir_op_fmul:
+ /* Only absorb a fmul into a ffma if the fmul is is only used in fadd
+ * operations. This prevents us from being too aggressive with our
+ * fusing which can actually lead to more instructions.
+ */
+ if (!are_all_uses_fadd(&alu->dest.dest.ssa))
+ return NULL;
+ break;
+
+ default:
+ return NULL;
+ }
+
+ if (!alu)
+ return NULL;
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (!(alu->dest.write_mask & (1 << i)))
+ break;
+
+ swizzle[i] = swizzle[src->swizzle[i]];
+ }
+
+ return alu;
+}
+
+static bool
+nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
+{
+ struct peephole_ffma_state *state = void_state;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_alu)
+ continue;
+
+ nir_alu_instr *add = nir_instr_as_alu(instr);
+ if (add->op != nir_op_fadd)
+ continue;
+
+ /* TODO: Maybe bail if this expression is considered "precise"? */
+
+ assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
+
+ /* This, is the case a + a. We would rather handle this with an
+ * algebraic reduction than fuse it. Also, we want to only fuse
+ * things where the multiply is used only once and, in this case,
+ * it would be used twice by the same instruction.
+ */
+ if (add->src[0].src.ssa == add->src[1].src.ssa)
+ continue;
+
+ nir_alu_instr *mul;
+ uint8_t add_mul_src, swizzle[4];
+ bool negate, abs;
+ for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
+ for (unsigned i = 0; i < 4; i++)
+ swizzle[i] = i;
+
+ negate = false;
+ abs = false;
+
+ mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+
+ if (mul != NULL)
+ break;
+ }
+
+ if (mul == NULL)
+ continue;
+
+ nir_ssa_def *mul_src[2];
+ mul_src[0] = mul->src[0].src.ssa;
+ mul_src[1] = mul->src[1].src.ssa;
+
+ if (abs) {
+ for (unsigned i = 0; i < 2; i++) {
+ nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fabs);
+ abs->src[0].src = nir_src_for_ssa(mul_src[i]);
+ nir_ssa_dest_init(&abs->instr, &abs->dest.dest,
+ mul_src[i]->num_components, NULL);
+ abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1;
+ nir_instr_insert_before(&add->instr, &abs->instr);
+ mul_src[i] = &abs->dest.dest.ssa;
+ }
+ }
+
+ if (negate) {
+ nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fneg);
+ neg->src[0].src = nir_src_for_ssa(mul_src[0]);
+ nir_ssa_dest_init(&neg->instr, &neg->dest.dest,
+ mul_src[0]->num_components, NULL);
+ neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1;
+ nir_instr_insert_before(&add->instr, &neg->instr);
+ mul_src[0] = &neg->dest.dest.ssa;
+ }
+
+ nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma);
+ ffma->dest.saturate = add->dest.saturate;
+ ffma->dest.write_mask = add->dest.write_mask;
+
+ for (unsigned i = 0; i < 2; i++) {
+ ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
+ for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
+ ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
+ }
+ nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
+ state->mem_ctx);
+
+ assert(add->dest.dest.is_ssa);
+
+ nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
+ add->dest.dest.ssa.num_components,
+ add->dest.dest.ssa.name);
+ nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
+ nir_src_for_ssa(&ffma->dest.dest.ssa),
+ state->mem_ctx);
+
+ nir_instr_insert_before(&add->instr, &ffma->instr);
+ assert(add->dest.dest.ssa.uses->entries == 0);
+ nir_instr_remove(&add->instr);
+
+ state->progress = true;
+ }
+
+ return true;
+}
+
+static bool
+nir_opt_peephole_ffma_impl(nir_function_impl *impl)
+{
+ struct peephole_ffma_state state;
+
+ state.mem_ctx = ralloc_parent(impl);
+ state.impl = impl;
+ state.progress = false;
+
+ nir_foreach_block(impl, nir_opt_peephole_ffma_block, &state);
+
+ if (state.progress)
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return state.progress;
+}
+
+bool
+nir_opt_peephole_ffma(nir_shader *shader)
+{
+ bool progress = false;
+
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl)
+ progress |= nir_opt_peephole_ffma_impl(overload->impl);
+ }
+
+ return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
index b89451b09..f400cfd66 100644
--- a/mesalib/src/glsl/nir/nir_opt_peephole_select.c
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
@@ -84,7 +84,9 @@ block_check_for_allowed_instrs(nir_block *block)
case nir_instr_type_alu: {
/* It must be a move operation */
nir_alu_instr *mov = nir_instr_as_alu(instr);
- if (mov->op != nir_op_fmov && mov->op != nir_op_imov)
+ if (mov->op != nir_op_fmov && mov->op != nir_op_imov &&
+ mov->op != nir_op_fneg && mov->op != nir_op_ineg &&
+ mov->op != nir_op_fabs && mov->op != nir_op_iabs)
return false;
/* Can't handle saturate */
diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c
index fa11a312e..fb8c9344c 100644
--- a/mesalib/src/glsl/nir/nir_print.c
+++ b/mesalib/src/glsl/nir/nir_print.c
@@ -137,25 +137,37 @@ print_dest(nir_dest *dest, FILE *fp)
}
static void
-print_alu_src(nir_alu_src *src, FILE *fp)
+print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
{
- if (src->negate)
+ if (instr->src[src].negate)
fprintf(fp, "-");
- if (src->abs)
+ if (instr->src[src].abs)
fprintf(fp, "abs(");
- print_src(&src->src, fp);
+ print_src(&instr->src[src].src, fp);
- if (src->swizzle[0] != 0 ||
- src->swizzle[1] != 1 ||
- src->swizzle[2] != 2 ||
- src->swizzle[3] != 3) {
+ bool print_swizzle = false;
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ if (instr->src[src].swizzle[i] != i) {
+ print_swizzle = true;
+ break;
+ }
+ }
+
+ if (print_swizzle) {
fprintf(fp, ".");
- for (unsigned i = 0; i < 4; i++)
- fprintf(fp, "%c", "xyzw"[src->swizzle[i]]);
+ for (unsigned i = 0; i < 4; i++) {
+ if (!nir_alu_instr_channel_used(instr, src, i))
+ continue;
+
+ fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]);
+ }
}
- if (src->abs)
+ if (instr->src[src].abs)
fprintf(fp, ")");
}
@@ -189,7 +201,7 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp)
if (i != 0)
fprintf(fp, ", ");
- print_alu_src(&instr->src[i], fp);
+ print_alu_src(instr, i, fp);
}
}
diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
index e7f8aeacb..4417e2a48 100644
--- a/mesalib/src/glsl/nir/nir_remove_dead_variables.c
+++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
@@ -98,22 +98,14 @@ add_var_use_shader(nir_shader *shader, struct set *live)
}
static void
-remove_dead_local_vars(nir_function_impl *impl, struct set *live)
+remove_dead_vars(struct exec_list *var_list, struct set *live)
{
- foreach_list_typed_safe(nir_variable, var, node, &impl->locals) {
+ foreach_list_typed_safe(nir_variable, var, node, var_list) {
struct set_entry *entry = _mesa_set_search(live, var);
- if (entry == NULL)
- exec_node_remove(&var->node);
- }
-}
-
-static void
-remove_dead_global_vars(nir_shader *shader, struct set *live)
-{
- foreach_list_typed_safe(nir_variable, var, node, &shader->globals) {
- struct set_entry *entry = _mesa_set_search(live, var);
- if (entry == NULL)
+ if (entry == NULL) {
exec_node_remove(&var->node);
+ ralloc_free(var);
+ }
}
}
@@ -125,11 +117,11 @@ nir_remove_dead_variables(nir_shader *shader)
add_var_use_shader(shader, live);
- remove_dead_global_vars(shader, live);
+ remove_dead_vars(&shader->globals, live);
nir_foreach_overload(shader, overload) {
if (overload->impl)
- remove_dead_local_vars(overload->impl, live);
+ remove_dead_vars(&overload->impl->locals, live);
}
_mesa_set_destroy(live, NULL);
diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c
index 73a802be7..5ba016085 100644
--- a/mesalib/src/glsl/nir/nir_search.c
+++ b/mesalib/src/glsl/nir/nir_search.c
@@ -218,8 +218,8 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
if (matched)
return true;
- if (nir_op_infos[instr->op].num_inputs == 2 &&
- (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) {
+ if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+ assert(nir_op_infos[instr->op].num_inputs == 2);
if (!match_value(expr->srcs[0], instr, 1, num_components,
swizzle, state))
return false;
diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c
index 4d663b51b..fc72c078c 100644
--- a/mesalib/src/glsl/nir/nir_split_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_split_var_copies.c
@@ -188,8 +188,8 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy,
* belongs to the copy instruction and b) the deref chains may
* have some of the same links due to the way we constructed them
*/
- nir_deref *src = nir_copy_deref(state->mem_ctx, src_head);
- nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head);
+ nir_deref *src = nir_copy_deref(new_copy, src_head);
+ nir_deref *dest = nir_copy_deref(new_copy, dest_head);
new_copy->variables[0] = nir_deref_as_var(dest);
new_copy->variables[1] = nir_deref_as_var(src);
diff --git a/mesalib/src/glsl/nir/nir_sweep.c b/mesalib/src/glsl/nir/nir_sweep.c
new file mode 100644
index 000000000..d3549756a
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_sweep.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/**
+ * \file nir_sweep.c
+ *
+ * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
+ * memory - anything still connected to the program will be kept, and any dead memory
+ * we dropped on the floor will be freed.
+ *
+ * The expectation is that drivers should call this when finished compiling the shader
+ * (after any optimization, lowering, and so on). However, it's also fine to call it
+ * earlier, and even many times, trading CPU cycles for memory savings.
+ */
+
+#define steal_list(mem_ctx, type, list) \
+ foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
+
+static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
+
+static bool
+sweep_src_indirect(nir_src *src, void *nir)
+{
+ if (!src->is_ssa && src->reg.indirect)
+ ralloc_steal(nir, src->reg.indirect);
+
+ return true;
+}
+
+static bool
+sweep_dest_indirect(nir_dest *dest, void *nir)
+{
+ if (!dest->is_ssa && dest->reg.indirect)
+ ralloc_steal(nir, dest->reg.indirect);
+
+ return true;
+}
+
+static void
+sweep_block(nir_shader *nir, nir_block *block)
+{
+ ralloc_steal(nir, block);
+
+ nir_foreach_instr(block, instr) {
+ ralloc_steal(nir, instr);
+
+ nir_foreach_src(instr, sweep_src_indirect, nir);
+ nir_foreach_dest(instr, sweep_dest_indirect, nir);
+ }
+}
+
+static void
+sweep_if(nir_shader *nir, nir_if *iff)
+{
+ ralloc_steal(nir, iff);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
+ sweep_cf_node(nir, cf_node);
+ }
+}
+
+static void
+sweep_loop(nir_shader *nir, nir_loop *loop)
+{
+ ralloc_steal(nir, loop);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+}
+
+static void
+sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
+{
+ switch (cf_node->type) {
+ case nir_cf_node_block:
+ sweep_block(nir, nir_cf_node_as_block(cf_node));
+ break;
+ case nir_cf_node_if:
+ sweep_if(nir, nir_cf_node_as_if(cf_node));
+ break;
+ case nir_cf_node_loop:
+ sweep_loop(nir, nir_cf_node_as_loop(cf_node));
+ break;
+ default:
+ unreachable("Invalid CF node type");
+ }
+}
+
+static void
+sweep_impl(nir_shader *nir, nir_function_impl *impl)
+{
+ ralloc_steal(nir, impl);
+
+ ralloc_steal(nir, impl->params);
+ ralloc_steal(nir, impl->return_var);
+ steal_list(nir, nir_variable, &impl->locals);
+ steal_list(nir, nir_register, &impl->registers);
+
+ foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
+ sweep_cf_node(nir, cf_node);
+ }
+
+ sweep_block(nir, impl->end_block);
+
+ /* Wipe out all the metadata, if any. */
+ nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+static void
+sweep_function(nir_shader *nir, nir_function *f)
+{
+ ralloc_steal(nir, f);
+
+ foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {
+ ralloc_steal(nir, overload);
+ ralloc_steal(nir, overload->params);
+ if (overload->impl)
+ sweep_impl(nir, overload->impl);
+ }
+}
+
+void
+nir_sweep(nir_shader *nir)
+{
+ void *rubbish = ralloc_context(NULL);
+
+ /* First, move ownership of all the memory to a temporary context; assume dead. */
+ ralloc_adopt(rubbish, nir);
+
+ /* Variables and registers are not dead. Steal them back. */
+ steal_list(nir, nir_variable, &nir->uniforms);
+ steal_list(nir, nir_variable, &nir->inputs);
+ steal_list(nir, nir_variable, &nir->outputs);
+ steal_list(nir, nir_variable, &nir->globals);
+ steal_list(nir, nir_variable, &nir->system_values);
+ steal_list(nir, nir_register, &nir->registers);
+
+ /* Recurse into functions, stealing their contents back. */
+ foreach_list_typed(nir_function, func, node, &nir->functions) {
+ sweep_function(nir, func);
+ }
+
+ /* Free everything we didn't steal back. */
+ ralloc_free(rubbish);
+}
diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c
index 47cf45393..53ff54766 100644
--- a/mesalib/src/glsl/nir/nir_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_to_ssa.c
@@ -47,7 +47,7 @@ insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx)
set_foreach(block->predecessors, entry) {
nir_block *pred = (nir_block *) entry->key;
- nir_phi_src *src = ralloc(mem_ctx, nir_phi_src);
+ nir_phi_src *src = ralloc(instr, nir_phi_src);
src->pred = pred;
src->src.is_ssa = false;
src->src.reg.base_offset = 0;
diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp
index a13c3e12a..f0d0b46d2 100644
--- a/mesalib/src/glsl/nir/nir_types.cpp
+++ b/mesalib/src/glsl/nir/nir_types.cpp
@@ -143,6 +143,12 @@ glsl_void_type(void)
}
const glsl_type *
+glsl_float_type(void)
+{
+ return glsl_type::float_type;
+}
+
+const glsl_type *
glsl_vec4_type(void)
{
return glsl_type::vec4_type;
diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h
index 494051a67..276d4ad62 100644
--- a/mesalib/src/glsl/nir/nir_types.h
+++ b/mesalib/src/glsl/nir/nir_types.h
@@ -69,6 +69,7 @@ bool glsl_type_is_scalar(const struct glsl_type *type);
bool glsl_type_is_matrix(const struct glsl_type *type);
const struct glsl_type *glsl_void_type(void);
+const struct glsl_type *glsl_float_type(void);
const struct glsl_type *glsl_vec4_type(void);
const struct glsl_type *glsl_array_type(const struct glsl_type *base,
unsigned elements);
diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c
index f247ae069..a7aa79837 100644
--- a/mesalib/src/glsl/nir/nir_validate.c
+++ b/mesalib/src/glsl/nir/nir_validate.c
@@ -295,6 +295,8 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
static void
validate_deref_chain(nir_deref *deref, validate_state *state)
{
+ assert(deref->child == NULL || ralloc_parent(deref->child) == deref);
+
nir_deref *parent = NULL;
while (deref != NULL) {
switch (deref->deref_type) {
@@ -336,9 +338,10 @@ validate_var_use(nir_variable *var, validate_state *state)
}
static void
-validate_deref_var(nir_deref_var *deref, validate_state *state)
+validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state)
{
assert(deref != NULL);
+ assert(ralloc_parent(deref) == parent_mem_ctx);
assert(deref->deref.type == deref->var->type);
validate_var_use(deref->var, state);
@@ -386,7 +389,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
for (unsigned i = 0; i < num_vars; i++) {
- validate_deref_var(instr->variables[i], state);
+ validate_deref_var(instr, instr->variables[i], state);
}
switch (instr->intrinsic) {
@@ -423,7 +426,7 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
}
if (instr->sampler != NULL)
- validate_deref_var(instr->sampler, state);
+ validate_deref_var(instr, instr->sampler, state);
}
static void
@@ -438,10 +441,10 @@ validate_call_instr(nir_call_instr *instr, validate_state *state)
for (unsigned i = 0; i < instr->num_params; i++) {
assert(instr->callee->params[i].type == instr->params[i]->deref.type);
- validate_deref_var(instr->params[i], state);
+ validate_deref_var(instr, instr->params[i], state);
}
- validate_deref_var(instr->return_deref, state);
+ validate_deref_var(instr, instr->return_deref, state);
}
static void
@@ -680,8 +683,7 @@ validate_cf_node(nir_cf_node *node, validate_state *state)
break;
default:
- assert(!"Invalid ALU instruction type");
- break;
+ unreachable("Invalid CF node type");
}
}