From 4ba9be2882d9f1567809edb0a31fcdf11320d41f Mon Sep 17 00:00:00 2001 From: marha Date: Mon, 20 Apr 2015 21:25:25 +0200 Subject: randrproto xkeyboard-config fontconfig libX11 libXdmcp libXmu pixman xkbcomp xserver mesa git update 20 Apr 2015 xserver commit b1029716e41e252f149b82124a149da180607c96 xkeyboard-config commit 7d00bcc2d9c3944bbdfcbe472ee3299729dc7687 libX11 commit 748d47e69f5c12d8557d56a8a8ec166588da7b93 libXdmcp commit b10f382e3aa2e86cd5a2bc27d6758da55f0ab1f6 xkbcomp commit 1ae525b3d236b59e6437b2b5433d460e18370973 pixman commit 58e21d3e45c5227c2ca9ac00cf044f22a7975180 randrproto commit 98da0d6e48b7d124d6788ea568e9f9e3dc204322 libXmu commit 4459e6940fe3fdf26a8d5d4c71989498bc400a62 fontconfig commit 07be485a0a84995ce69bf60e3b1bb22cb35f6b0e mesa commit c1485f4b7d044724b3dbc1011f3c3a8a53132010 --- mesalib/src/glsl/nir/glsl_to_nir.cpp | 92 ++++---- mesalib/src/glsl/nir/nir.c | 64 +++-- mesalib/src/glsl/nir/nir.h | 51 +++- mesalib/src/glsl/nir/nir_algebraic.py | 17 +- mesalib/src/glsl/nir/nir_builder.h | 135 ++++++++++- mesalib/src/glsl/nir/nir_from_ssa.c | 9 +- mesalib/src/glsl/nir/nir_lower_idiv.c | 155 ++++++++++++ mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c | 2 +- mesalib/src/glsl/nir/nir_lower_samplers.cpp | 45 ++-- mesalib/src/glsl/nir/nir_lower_tex_projector.c | 143 +++++++++++ mesalib/src/glsl/nir/nir_lower_var_copies.c | 8 +- mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c | 2 +- .../src/glsl/nir/nir_normalize_cubemap_coords.c | 110 +++++++++ mesalib/src/glsl/nir/nir_opcodes.py | 8 +- mesalib/src/glsl/nir/nir_opt_algebraic.py | 63 ++++- mesalib/src/glsl/nir/nir_opt_cse.c | 32 ++- mesalib/src/glsl/nir/nir_opt_peephole_ffma.c | 261 +++++++++++++++++++++ mesalib/src/glsl/nir/nir_opt_peephole_select.c | 4 +- mesalib/src/glsl/nir/nir_print.c | 36 ++- mesalib/src/glsl/nir/nir_remove_dead_variables.c | 22 +- mesalib/src/glsl/nir/nir_search.c | 4 +- mesalib/src/glsl/nir/nir_split_var_copies.c | 4 +- mesalib/src/glsl/nir/nir_sweep.c | 172 ++++++++++++++ mesalib/src/glsl/nir/nir_to_ssa.c | 2 +- mesalib/src/glsl/nir/nir_types.cpp | 6 + mesalib/src/glsl/nir/nir_types.h | 1 + mesalib/src/glsl/nir/nir_validate.c | 16 +- 27 files changed, 1264 insertions(+), 200 deletions(-) create mode 100644 mesalib/src/glsl/nir/nir_lower_idiv.c create mode 100644 mesalib/src/glsl/nir/nir_lower_tex_projector.c create mode 100644 mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c create mode 100644 mesalib/src/glsl/nir/nir_opt_peephole_ffma.c create mode 100644 mesalib/src/glsl/nir/nir_sweep.c (limited to 'mesalib/src/glsl/nir') diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp index 357944da6..f6b8331d4 100644 --- a/mesalib/src/glsl/nir/glsl_to_nir.cpp +++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp @@ -88,6 +88,8 @@ private: exec_list *cf_node_list; nir_instr *result; /* result of the expression tree last visited */ + nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir); + /* the head of the dereference chain we're creating */ nir_deref_var *deref_head; /* the tail of the dereference chain we're creating */ @@ -156,6 +158,14 @@ nir_visitor::~nir_visitor() _mesa_hash_table_destroy(this->overload_table, NULL); } +nir_deref_var * +nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir) +{ + ir->accept(this); + ralloc_steal(mem_ctx, this->deref_head); + return this->deref_head; +} + static nir_constant * constant_copy(ir_constant *ir, void *mem_ctx) { @@ -582,13 +592,11 @@ void nir_visitor::visit(ir_return *ir) { if (ir->value != NULL) { - ir->value->accept(this); nir_intrinsic_instr *copy = nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_var_create(this->shader, - this->impl->return_var); - copy->variables[1] = this->deref_head; + copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var); + copy->variables[1] = evaluate_deref(©->instr, ir->value); } nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); @@ -613,8 +621,7 @@ nir_visitor::visit(ir_call *ir) nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); ir_dereference *param = (ir_dereference *) ir->actual_parameters.get_head(); - param->accept(this); - instr->variables[0] = this->deref_head; + instr->variables[0] = evaluate_deref(&instr->instr, param); nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); @@ -623,8 +630,7 @@ nir_visitor::visit(ir_call *ir) nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); store_instr->num_components = 1; - ir->return_deref->accept(this); - store_instr->variables[0] = this->deref_head; + store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref); store_instr->src[0].is_ssa = true; store_instr->src[0].ssa = &instr->dest.ssa; @@ -642,13 +648,11 @@ nir_visitor::visit(ir_call *ir) unsigned i = 0; foreach_in_list(ir_dereference, param, &ir->actual_parameters) { - param->accept(this); - instr->params[i] = this->deref_head; + instr->params[i] = evaluate_deref(&instr->instr, param); i++; } - ir->return_deref->accept(this); - instr->return_deref = this->deref_head; + instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref); nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); } @@ -663,12 +667,8 @@ nir_visitor::visit(ir_assignment *ir) nir_intrinsic_instr *copy = nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); - ir->lhs->accept(this); - copy->variables[0] = this->deref_head; - - ir->rhs->accept(this); - copy->variables[1] = this->deref_head; - + copy->variables[0] = evaluate_deref(©->instr, ir->lhs); + copy->variables[1] = evaluate_deref(©->instr, ir->rhs); if (ir->condition) { nir_if *if_stmt = nir_if_create(this->shader); @@ -700,6 +700,7 @@ nir_visitor::visit(ir_assignment *ir) load->num_components = ir->lhs->type->vector_elements; nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); load->variables[0] = lhs_deref; + ralloc_steal(load, load->variables[0]); nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr); nir_op vec_op; @@ -741,7 +742,7 @@ nir_visitor::visit(ir_assignment *ir) nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); store->num_components = ir->lhs->type->vector_elements; - nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref); + nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref); store->variables[0] = nir_deref_as_var(store_deref); store->src[0] = src; @@ -816,6 +817,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir) nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var); load_instr->num_components = ir->type->vector_elements; load_instr->variables[0] = this->deref_head; + ralloc_steal(load_instr, load_instr->variables[0]); add_instr(&load_instr->instr, ir->type->vector_elements); } @@ -959,6 +961,7 @@ nir_visitor::visit(ir_expression *ir) nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); intrin->num_components = deref->type->vector_elements; intrin->variables[0] = this->deref_head; + ralloc_steal(intrin, intrin->variables[0]); if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset || intrin->intrinsic == nir_intrinsic_interp_var_at_sample) @@ -1087,12 +1090,6 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break; case ir_unop_sin: emit(nir_op_fsin, dest_size, srcs); break; case ir_unop_cos: emit(nir_op_fcos, dest_size, srcs); break; - case ir_unop_sin_reduced: - emit(nir_op_fsin_reduced, dest_size, srcs); - break; - case ir_unop_cos_reduced: - emit(nir_op_fcos_reduced, dest_size, srcs); - break; case ir_unop_dFdx: emit(nir_op_fddx, dest_size, srcs); break; case ir_unop_dFdy: emit(nir_op_fddy, dest_size, srcs); break; case ir_unop_dFdx_fine: emit(nir_op_fddx_fine, dest_size, srcs); break; @@ -1210,6 +1207,9 @@ nir_visitor::visit(ir_expression *ir) case ir_binop_bit_and: case ir_binop_bit_or: case ir_binop_bit_xor: + case ir_binop_logic_and: + case ir_binop_logic_or: + case ir_binop_logic_xor: case ir_binop_lshift: case ir_binop_rshift: switch (ir->operation) { @@ -1270,6 +1270,24 @@ nir_visitor::visit(ir_expression *ir) case ir_binop_bit_xor: op = nir_op_ixor; break; + case ir_binop_logic_and: + if (supports_ints) + op = nir_op_iand; + else + op = nir_op_fand; + break; + case ir_binop_logic_or: + if (supports_ints) + op = nir_op_ior; + else + op = nir_op_for; + break; + case ir_binop_logic_xor: + if (supports_ints) + op = nir_op_ixor; + else + op = nir_op_fxor; + break; case ir_binop_lshift: op = nir_op_ishl; break; @@ -1444,24 +1462,6 @@ nir_visitor::visit(ir_expression *ir) } } break; - case ir_binop_logic_and: - if (supports_ints) - emit(nir_op_iand, dest_size, srcs); - else - emit(nir_op_fand, dest_size, srcs); - break; - case ir_binop_logic_or: - if (supports_ints) - emit(nir_op_ior, dest_size, srcs); - else - emit(nir_op_for, dest_size, srcs); - break; - case ir_binop_logic_xor: - if (supports_ints) - emit(nir_op_ixor, dest_size, srcs); - else - emit(nir_op_fxor, dest_size, srcs); - break; case ir_binop_dot: switch (ir->operands[0]->type->vector_elements) { case 2: emit(nir_op_fdot2, dest_size, srcs); break; @@ -1633,8 +1633,7 @@ nir_visitor::visit(ir_texture *ir) unreachable("not reached"); } - ir->sampler->accept(this); - instr->sampler = this->deref_head; + instr->sampler = evaluate_deref(&instr->instr, ir->sampler); unsigned src_number = 0; @@ -1759,7 +1758,7 @@ nir_visitor::visit(ir_dereference_record *ir) int field_index = this->deref_tail->type->field_index(ir->field); assert(field_index >= 0); - nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index); + nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index); deref->deref.type = ir->type; this->deref_tail->child = &deref->deref; this->deref_tail = &deref->deref; @@ -1783,5 +1782,6 @@ nir_visitor::visit(ir_dereference_array *ir) ir->array->accept(this); this->deref_tail->child = &deref->deref; + ralloc_steal(this->deref_tail, deref); this->deref_tail = &deref->deref; } diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c index 6459d5108..c6e53612b 100644 --- a/mesalib/src/glsl/nir/nir.c +++ b/mesalib/src/glsl/nir/nir.c @@ -58,11 +58,11 @@ reg_create(void *mem_ctx, struct exec_list *list) nir_register *reg = ralloc(mem_ctx, nir_register); reg->parent_instr = NULL; - reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + reg->uses = _mesa_set_create(reg, _mesa_hash_pointer, _mesa_key_pointer_equal); - reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + reg->defs = _mesa_set_create(reg, _mesa_hash_pointer, _mesa_key_pointer_equal); - reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer, _mesa_key_pointer_equal); reg->num_components = 0; @@ -108,7 +108,7 @@ nir_function_create(nir_shader *shader, const char *name) exec_list_push_tail(&shader->functions, &func->node); exec_list_make_empty(&func->overload_list); - func->name = name; + func->name = ralloc_strdup(func, name); func->shader = shader; return func; @@ -285,10 +285,10 @@ nir_block_create(void *mem_ctx) cf_init(&block->cf_node, nir_cf_node_block); block->successors[0] = block->successors[1] = NULL; - block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); block->imm_dom = NULL; - block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); exec_list_make_empty(&block->instr_list); @@ -381,11 +381,11 @@ alu_src_init(nir_alu_src *src) } nir_alu_instr * -nir_alu_instr_create(void *mem_ctx, nir_op op) +nir_alu_instr_create(nir_shader *shader, nir_op op) { unsigned num_srcs = nir_op_infos[op].num_inputs; nir_alu_instr *instr = - ralloc_size(mem_ctx, + ralloc_size(shader, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); instr_init(&instr->instr, nir_instr_type_alu); @@ -398,18 +398,18 @@ nir_alu_instr_create(void *mem_ctx, nir_op op) } nir_jump_instr * -nir_jump_instr_create(void *mem_ctx, nir_jump_type type) +nir_jump_instr_create(nir_shader *shader, nir_jump_type type) { - nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr); + nir_jump_instr *instr = ralloc(shader, nir_jump_instr); instr_init(&instr->instr, nir_instr_type_jump); instr->type = type; return instr; } nir_load_const_instr * -nir_load_const_instr_create(void *mem_ctx, unsigned num_components) +nir_load_const_instr_create(nir_shader *shader, unsigned num_components) { - nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr); + nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr); instr_init(&instr->instr, nir_instr_type_load_const); nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); @@ -418,11 +418,11 @@ nir_load_const_instr_create(void *mem_ctx, unsigned num_components) } nir_intrinsic_instr * -nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op) +nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) { unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; nir_intrinsic_instr *instr = - ralloc_size(mem_ctx, + ralloc_size(shader, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); instr_init(&instr->instr, nir_instr_type_intrinsic); @@ -438,29 +438,29 @@ nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op) } nir_call_instr * -nir_call_instr_create(void *mem_ctx, nir_function_overload *callee) +nir_call_instr_create(nir_shader *shader, nir_function_overload *callee) { - nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr); + nir_call_instr *instr = ralloc(shader, nir_call_instr); instr_init(&instr->instr, nir_instr_type_call); instr->callee = callee; instr->num_params = callee->num_params; - instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params); + instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params); instr->return_deref = NULL; return instr; } nir_tex_instr * -nir_tex_instr_create(void *mem_ctx, unsigned num_srcs) +nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) { - nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr); + nir_tex_instr *instr = ralloc(shader, nir_tex_instr); instr_init(&instr->instr, nir_instr_type_tex); dest_init(&instr->dest); instr->num_srcs = num_srcs; - instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs); + instr->src = ralloc_array(instr, nir_tex_src, num_srcs); for (unsigned i = 0; i < num_srcs; i++) src_init(&instr->src[i].src); @@ -472,9 +472,9 @@ nir_tex_instr_create(void *mem_ctx, unsigned num_srcs) } nir_phi_instr * -nir_phi_instr_create(void *mem_ctx) +nir_phi_instr_create(nir_shader *shader) { - nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr); + nir_phi_instr *instr = ralloc(shader, nir_phi_instr); instr_init(&instr->instr, nir_instr_type_phi); dest_init(&instr->dest); @@ -483,9 +483,9 @@ nir_phi_instr_create(void *mem_ctx) } nir_parallel_copy_instr * -nir_parallel_copy_instr_create(void *mem_ctx) +nir_parallel_copy_instr_create(nir_shader *shader) { - nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr); + nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); instr_init(&instr->instr, nir_instr_type_parallel_copy); exec_list_make_empty(&instr->entries); @@ -494,9 +494,9 @@ nir_parallel_copy_instr_create(void *mem_ctx) } nir_ssa_undef_instr * -nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components) +nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components) { - nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr); + nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); instr_init(&instr->instr, nir_instr_type_ssa_undef); nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); @@ -543,7 +543,7 @@ copy_deref_var(void *mem_ctx, nir_deref_var *deref) nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); ret->deref.type = deref->deref.type; if (deref->deref.child) - ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + ret->deref.child = nir_copy_deref(ret, deref->deref.child); return ret; } @@ -558,7 +558,7 @@ copy_deref_array(void *mem_ctx, nir_deref_array *deref) } ret->deref.type = deref->deref.type; if (deref->deref.child) - ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + ret->deref.child = nir_copy_deref(ret, deref->deref.child); return ret; } @@ -568,7 +568,7 @@ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref) nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index); ret->deref.type = deref->deref.type; if (deref->deref.child) - ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + ret->deref.child = nir_copy_deref(ret, deref->deref.child); return ret; } @@ -1834,13 +1834,11 @@ void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, unsigned num_components, const char *name) { - void *mem_ctx = ralloc_parent(instr); - def->name = name; def->parent_instr = instr; - def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + def->uses = _mesa_set_create(instr, _mesa_hash_pointer, _mesa_key_pointer_equal); - def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer, _mesa_key_pointer_equal); def->num_components = num_components; diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h index 29fe94243..74772c798 100644 --- a/mesalib/src/glsl/nir/nir.h +++ b/mesalib/src/glsl/nir/nir.h @@ -34,6 +34,7 @@ #include "util/set.h" #include "util/bitset.h" #include "nir_types.h" +#include "glsl/shader_enums.h" #include #include "nir_opcodes.h" @@ -529,6 +530,16 @@ nir_src_for_reg(nir_register *reg) return src; } +static inline nir_instr * +nir_src_get_parent_instr(const nir_src *src) +{ + if (src->is_ssa) { + return src->ssa->parent_instr; + } else { + return src->reg.reg->parent_instr; + } +} + static inline nir_dest nir_dest_for_reg(nir_register *reg) { @@ -1365,11 +1376,17 @@ typedef struct nir_function { typedef struct nir_shader_compiler_options { bool lower_ffma; + bool lower_flrp; bool lower_fpow; bool lower_fsat; bool lower_fsqrt; /** lowers fneg and ineg to fsub and isub. */ bool lower_negate; + /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ + bool lower_sub; + + /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ + bool lower_scmp; /** * Does the driver support real 32-bit integers? (Otherwise, integers @@ -1414,6 +1431,9 @@ typedef struct nir_shader { * access plus one */ unsigned num_inputs, num_uniforms, num_outputs; + + /** the number of uniforms that are only accessed directly */ + unsigned num_direct_uniforms; } nir_shader; #define nir_foreach_overload(shader, overload) \ @@ -1466,26 +1486,26 @@ void nir_metadata_require(nir_function_impl *impl, nir_metadata required); void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); /** creates an instruction with default swizzle/writemask/etc. with NULL registers */ -nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op); +nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); -nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type); +nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); -nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx, +nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, unsigned num_components); -nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx, +nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op); -nir_call_instr *nir_call_instr_create(void *mem_ctx, +nir_call_instr *nir_call_instr_create(nir_shader *shader, nir_function_overload *callee); -nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs); +nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); -nir_phi_instr *nir_phi_instr_create(void *mem_ctx); +nir_phi_instr *nir_phi_instr_create(nir_shader *shader); -nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx); +nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); -nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx, +nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components); nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var); @@ -1550,7 +1570,7 @@ void nir_print_instr(const nir_instr *instr, FILE *fp); #ifdef DEBUG void nir_validate_shader(nir_shader *shader); #else -static inline void nir_validate_shader(nir_shader *shader) { } +static inline void nir_validate_shader(nir_shader *shader) { (void) shader; } #endif /* DEBUG */ void nir_calc_dominance_impl(nir_function_impl *impl); @@ -1596,14 +1616,18 @@ void nir_lower_alu_to_scalar(nir_shader *shader); void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, - struct gl_shader_program *shader_program, - struct gl_program *prog); + const struct gl_shader_program *shader_program, + gl_shader_stage stage); void nir_lower_system_values(nir_shader *shader); +void nir_lower_tex_projector(nir_shader *shader); +void nir_lower_idiv(nir_shader *shader); void nir_lower_atomics(nir_shader *shader); void nir_lower_to_source_mods(nir_shader *shader); +void nir_normalize_cubemap_coords(nir_shader *shader); + void nir_live_variables_impl(nir_function_impl *impl); bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); @@ -1612,6 +1636,7 @@ void nir_convert_to_ssa(nir_shader *shader); void nir_convert_from_ssa(nir_shader *shader); bool nir_opt_algebraic(nir_shader *shader); +bool nir_opt_algebraic_late(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); bool nir_opt_global_to_local(nir_shader *shader); @@ -1631,6 +1656,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader); bool nir_opt_remove_phis(nir_shader *shader); +void nir_sweep(nir_shader *shader); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py index afab1a008..bbf4f08ef 100644 --- a/mesalib/src/glsl/nir/nir_algebraic.py +++ b/mesalib/src/glsl/nir/nir_algebraic.py @@ -181,12 +181,23 @@ _algebraic_pass_template = mako.template.Template(""" #include "nir.h" #include "nir_search.h" +#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS +#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS + struct transform { const nir_search_expression *search; const nir_search_value *replace; unsigned condition_offset; }; +struct opt_state { + void *mem_ctx; + bool progress; + const bool *condition_flags; +}; + +#endif + % for (opcode, xform_list) in xform_dict.iteritems(): % for xform in xform_list: ${xform.search.render()} @@ -200,12 +211,6 @@ static const struct transform ${pass_name}_${opcode}_xforms[] = { }; % endfor -struct opt_state { - void *mem_ctx; - bool progress; - const bool *condition_flags; -}; - static bool ${pass_name}_block(nir_block *block, void *void_state) { diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h index 7c4f7fd96..d1419ee21 100644 --- a/mesalib/src/glsl/nir/nir_builder.h +++ b/mesalib/src/glsl/nir/nir_builder.h @@ -28,6 +28,9 @@ struct exec_list; typedef struct nir_builder { struct exec_list *cf_node_list; + nir_instr *before_instr; + nir_instr *after_instr; + nir_shader *shader; nir_function_impl *impl; } nir_builder; @@ -45,8 +48,75 @@ nir_builder_insert_after_cf_list(nir_builder *build, struct exec_list *cf_node_list) { build->cf_node_list = cf_node_list; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr) +{ + build->cf_node_list = NULL; + build->before_instr = before_instr; + build->after_instr = NULL; } +static inline void +nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr) +{ + build->cf_node_list = NULL; + build->before_instr = NULL; + build->after_instr = after_instr; +} + +static inline void +nir_builder_instr_insert(nir_builder *build, nir_instr *instr) +{ + if (build->cf_node_list) { + nir_instr_insert_after_cf_list(build->cf_node_list, instr); + } else if (build->before_instr) { + nir_instr_insert_before(build->before_instr, instr); + } else { + assert(build->after_instr); + nir_instr_insert_after(build->after_instr, instr); + build->after_instr = instr; + } +} + +static inline nir_ssa_def * +nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value) +{ + nir_load_const_instr *load_const = + nir_load_const_instr_create(build->shader, num_components); + if (!load_const) + return NULL; + + load_const->value = value; + + nir_builder_instr_insert(build, &load_const->instr); + + return &load_const->def; +} + +static inline nir_ssa_def * +nir_imm_float(nir_builder *build, float x) +{ + nir_const_value v = { { .f = {x, 0, 0, 0} } }; + return nir_build_imm(build, 1, v); +} + +static inline nir_ssa_def * +nir_imm_vec4(nir_builder *build, float x, float y, float z, float w) +{ + nir_const_value v = { { .f = {x, y, z, w} } }; + return nir_build_imm(build, 4, v); +} + +static inline nir_ssa_def * +nir_imm_int(nir_builder *build, int x) +{ + nir_const_value v = { { .i = {x, 0, 0, 0} } }; + return nir_build_imm(build, 1, v); +} static inline nir_ssa_def * nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, @@ -90,7 +160,7 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); instr->dest.write_mask = (1 << num_components) - 1; - nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr); + nir_builder_instr_insert(build, &instr->instr); return &instr->dest.dest.ssa; } @@ -127,4 +197,67 @@ nir_##op(nir_builder *build, nir_ssa_def *src0, \ #include "nir_builder_opcodes.h" +/** + * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def. + */ +static inline nir_ssa_def * +nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) +{ + nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); + mov->dest.write_mask = (1 << num_components) - 1; + mov->src[0] = src; + nir_builder_instr_insert(build, &mov->instr); + + return &mov->dest.dest.ssa; +} + +static inline nir_ssa_def * +nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) +{ + nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); + mov->dest.write_mask = (1 << num_components) - 1; + mov->src[0] = src; + nir_builder_instr_insert(build, &mov->instr); + + return &mov->dest.dest.ssa; +} + +/** + * Construct an fmov or imov that reswizzles the source's components. + */ +static inline nir_ssa_def * +nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], + unsigned num_components, bool use_fmov) +{ + nir_alu_src alu_src; + memset(&alu_src, 0, sizeof(alu_src)); + alu_src.src = nir_src_for_ssa(src); + for (int i = 0; i < 4; i++) + alu_src.swizzle[i] = swiz[i]; + + return use_fmov ? nir_fmov_alu(build, alu_src, num_components) : + nir_imov_alu(build, alu_src, num_components); +} + +/** + * Turns a nir_src into a nir_ssa_def * so it can be passed to + * nir_build_alu()-based builder calls. + */ +static inline nir_ssa_def * +nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) +{ + if (src.is_ssa && src.ssa->num_components == num_components) + return src.ssa; + + nir_alu_src alu; + memset(&alu, 0, sizeof(alu)); + alu.src = src; + for (int j = 0; j < 4; j++) + alu.swizzle[j] = j; + + return nir_imov_alu(build, alu, num_components); +} + #endif /* NIR_BUILDER_H */ diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c index c3090fb06..184698abd 100644 --- a/mesalib/src/glsl/nir/nir_from_ssa.c +++ b/mesalib/src/glsl/nir/nir_from_ssa.c @@ -509,12 +509,13 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) reg->num_components = def->num_components; reg->num_array_elems = 0; - /* This register comes from an SSA definition that was not part of a - * phi-web. Therefore, we know it has a single unique definition - * that dominates all of its uses. Therefore, we can copy the + /* This register comes from an SSA definition that is defined and not + * part of a phi-web. Therefore, we know it has a single unique + * definition that dominates all of its uses; we can copy the * parent_instr from the SSA def safely. */ - reg->parent_instr = def->parent_instr; + if (def->parent_instr->type != nir_instr_type_ssa_undef) + reg->parent_instr = def->parent_instr; _mesa_hash_table_insert(state->ssa_table, def, reg); return reg; diff --git a/mesalib/src/glsl/nir/nir_lower_idiv.c b/mesalib/src/glsl/nir/nir_lower_idiv.c new file mode 100644 index 000000000..7b6803207 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_idiv.c @@ -0,0 +1,155 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "nir.h" +#include "nir_builder.h" + +/* Lowers idiv/udiv/umod + * Based on NV50LegalizeSSA::handleDIV() + * + * Note that this is probably not enough precision for compute shaders. + * Perhaps we want a second higher precision (looping) version of this? + * Or perhaps we assume if you can do compute shaders you can also + * branch out to a pre-optimized shader library routine.. + */ + +static void +convert_instr(nir_builder *bld, nir_alu_instr *alu) +{ + nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r; + nir_op op = alu->op; + bool is_signed; + + if ((op != nir_op_idiv) && + (op != nir_op_udiv) && + (op != nir_op_umod)) + return; + + is_signed = (op == nir_op_idiv); + + nir_builder_insert_before_instr(bld, &alu->instr); + + numer = nir_ssa_for_src(bld, alu->src[0].src, + nir_ssa_alu_instr_src_components(alu, 0)); + denom = nir_ssa_for_src(bld, alu->src[1].src, + nir_ssa_alu_instr_src_components(alu, 1)); + + if (is_signed) { + af = nir_i2f(bld, numer); + bf = nir_i2f(bld, denom); + af = nir_fabs(bld, af); + bf = nir_fabs(bld, bf); + a = nir_iabs(bld, numer); + b = nir_iabs(bld, denom); + } else { + af = nir_u2f(bld, numer); + bf = nir_u2f(bld, denom); + a = numer; + b = denom; + } + + /* get first result: */ + bf = nir_frcp(bld, bf); + bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */ + q = nir_fmul(bld, af, bf); + + if (is_signed) { + q = nir_f2i(bld, q); + } else { + q = nir_f2u(bld, q); + } + + /* get error of first result: */ + r = nir_imul(bld, q, b); + r = nir_isub(bld, a, r); + r = nir_u2f(bld, r); + r = nir_fmul(bld, r, bf); + r = nir_f2u(bld, r); + + /* add quotients: */ + q = nir_iadd(bld, q, r); + + /* correction: if modulus >= divisor, add 1 */ + r = nir_imul(bld, q, b); + r = nir_isub(bld, a, r); + + r = nir_ige(bld, r, b); + r = nir_b2i(bld, r); + + q = nir_iadd(bld, q, r); + if (is_signed) { + /* fix the sign: */ + r = nir_ixor(bld, numer, denom); + r = nir_ushr(bld, r, nir_imm_int(bld, 31)); + r = nir_i2b(bld, r); + b = nir_ineg(bld, q); + q = nir_bcsel(bld, r, b, q); + } + + if (op == nir_op_umod) { + /* division result in q */ + r = nir_imul(bld, q, b); + q = nir_isub(bld, a, r); + } + + assert(alu->dest.dest.is_ssa); + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, + nir_src_for_ssa(q), + ralloc_parent(alu)); +} + +static bool +convert_block(nir_block *block, void *state) +{ + nir_builder *b = state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_alu) + convert_instr(b, nir_instr_as_alu(instr)); + } + + return true; +} + +static void +convert_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(impl, convert_block, &b); + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_idiv(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + convert_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c index 7cd93ea0a..4bdb80072 100644 --- a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c +++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c @@ -223,7 +223,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state) else nir_instr_insert_after_block(src->pred, &mov->instr); - nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src); + nir_phi_src *new_src = ralloc(new_phi, nir_phi_src); new_src->pred = src->pred; new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa); diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp index 3015dbd09..cf8ab8325 100644 --- a/mesalib/src/glsl/nir/nir_lower_samplers.cpp +++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp @@ -36,33 +36,26 @@ extern "C" { } static unsigned -get_sampler_index(struct gl_shader_program *shader_program, const char *name, - const struct gl_program *prog) +get_sampler_index(const struct gl_shader_program *shader_program, + gl_shader_stage stage, const char *name) { - GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); - unsigned location; if (!shader_program->UniformHash->get(location, name)) { - linker_error(shader_program, - "failed to find sampler named %s.\n", name); + assert(!"failed to find sampler"); return 0; } - if (!shader_program->UniformStorage[location].sampler[shader].active) { - assert(0 && "cannot return a sampler"); - linker_error(shader_program, - "cannot return a sampler named %s, because it is not " - "used in this shader stage. This is a driver bug.\n", - name); + if (!shader_program->UniformStorage[location].sampler[stage].active) { + assert(!"cannot return a sampler"); return 0; } - return shader_program->UniformStorage[location].sampler[shader].index; + return shader_program->UniformStorage[location].sampler[stage].index; } static void -lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, - const struct gl_program *prog, void *mem_ctx) +lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, + gl_shader_stage stage, void *mem_ctx) { if (instr->sampler == NULL) return; @@ -90,7 +83,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset); break; case nir_deref_array_type_indirect: { - instr->src = reralloc(mem_ctx, instr->src, nir_tex_src, + instr->src = reralloc(instr, instr->src, nir_tex_src, instr->num_srcs + 1); memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src); instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; @@ -133,15 +126,15 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, } } - instr->sampler_index += get_sampler_index(shader_program, name, prog); + instr->sampler_index += get_sampler_index(shader_program, stage, name); instr->sampler = NULL; } typedef struct { void *mem_ctx; - struct gl_shader_program *shader_program; - struct gl_program *prog; + const struct gl_shader_program *shader_program; + gl_shader_stage stage; } lower_state; static bool @@ -152,7 +145,7 @@ lower_block_cb(nir_block *block, void *_state) nir_foreach_instr(block, instr) { if (instr->type == nir_instr_type_tex) { nir_tex_instr *tex_instr = nir_instr_as_tex(instr); - lower_sampler(tex_instr, state->shader_program, state->prog, + lower_sampler(tex_instr, state->shader_program, state->stage, state->mem_ctx); } } @@ -161,24 +154,24 @@ lower_block_cb(nir_block *block, void *_state) } static void -lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program, - struct gl_program *prog) +lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, + gl_shader_stage stage) { lower_state state; state.mem_ctx = ralloc_parent(impl); state.shader_program = shader_program; - state.prog = prog; + state.stage = stage; nir_foreach_block(impl, lower_block_cb, &state); } extern "C" void -nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program, - struct gl_program *prog) +nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program, + gl_shader_stage stage) { nir_foreach_overload(shader, overload) { if (overload->impl) - lower_impl(overload->impl, shader_program, prog); + lower_impl(overload->impl, shader_program, stage); } } diff --git a/mesalib/src/glsl/nir/nir_lower_tex_projector.c b/mesalib/src/glsl/nir/nir_lower_tex_projector.c new file mode 100644 index 000000000..6b0e9c340 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_tex_projector.c @@ -0,0 +1,143 @@ +/* + * Copyright © 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * This lowering pass converts the coordinate division for texture projection + * to be done in ALU instructions instead of asking the texture operation to + * do so. + */ + +#include "nir.h" +#include "nir_builder.h" + +static nir_ssa_def * +channel(nir_builder *b, nir_ssa_def *def, int c) +{ + return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false); +} + +static bool +nir_lower_tex_projector_block(nir_block *block, void *void_state) +{ + nir_builder *b = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + nir_builder_insert_before_instr(b, &tex->instr); + + /* Find the projector in the srcs list, if present. */ + int proj_index; + for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) { + if (tex->src[proj_index].src_type == nir_tex_src_projector) + break; + } + if (proj_index == tex->num_srcs) + continue; + nir_ssa_def *inv_proj = + nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); + + /* Walk through the sources projecting the arguments. */ + for (int i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + case nir_tex_src_comparitor: + break; + default: + continue; + } + nir_ssa_def *unprojected = + nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); + nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); + + /* Array indices don't get projected, so make an new vector with the + * coordinate's array index untouched. + */ + if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { + switch (tex->coord_components) { + case 4: + projected = nir_vec4(b, + channel(b, projected, 0), + channel(b, projected, 1), + channel(b, projected, 2), + channel(b, unprojected, 3)); + break; + case 3: + projected = nir_vec3(b, + channel(b, projected, 0), + channel(b, projected, 1), + channel(b, unprojected, 2)); + break; + case 2: + projected = nir_vec2(b, + channel(b, projected, 0), + channel(b, unprojected, 1)); + break; + default: + unreachable("bad texture coord count for array"); + break; + } + } + + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(projected)); + } + + /* Now move the later tex sources down the array so that the projector + * disappears. + */ + nir_src dead; + memset(&dead, 0, sizeof dead); + nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead); + memmove(&tex->src[proj_index], + &tex->src[proj_index + 1], + (tex->num_srcs - proj_index) * sizeof(*tex->src)); + tex->num_srcs--; + } + + return true; +} + +static void +nir_lower_tex_projector_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(impl, nir_lower_tex_projector_block, &b); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_tex_projector(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_tex_projector_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c index 85ebb281c..58389a7c7 100644 --- a/mesalib/src/glsl/nir/nir_lower_var_copies.c +++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c @@ -148,13 +148,10 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, unsigned num_components = glsl_get_vector_elements(src_tail->type); - nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref); - nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref); - nir_intrinsic_instr *load = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var); load->num_components = num_components; - load->variables[0] = nir_deref_as_var(src_deref); + load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref)); nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); nir_instr_insert_before(©_instr->instr, &load->instr); @@ -162,7 +159,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); store->num_components = num_components; - store->variables[0] = nir_deref_as_var(dest_deref); + store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref)); + store->src[0].is_ssa = true; store->src[0].ssa = &load->dest.ssa; diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c index 86e6ab416..2ca74d71b 100644 --- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -642,7 +642,7 @@ add_phi_sources(nir_block *block, nir_block *pred, struct deref_node *node = entry->data; - nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src); + nir_phi_src *src = ralloc(phi, nir_phi_src); src->pred = pred; src->src.is_ssa = true; src->src.ssa = get_ssa_def_for_block(node, pred, state); diff --git a/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c new file mode 100644 index 000000000..0da8447ac --- /dev/null +++ b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c @@ -0,0 +1,110 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand + */ + +#include "nir.h" +#include "nir_builder.h" + +/** + * This file implements a NIR lowering pass to perform the normalization of + * the cubemap coordinates to have the largest magnitude component be -1.0 + * or 1.0. This is based on the old GLSL IR based pass by Eric. + */ + +static nir_ssa_def * +channel(nir_builder *b, nir_ssa_def *def, int c) +{ + return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false); +} + +static bool +normalize_cubemap_coords_block(nir_block *block, void *void_state) +{ + nir_builder *b = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) + continue; + + nir_builder_insert_before_instr(b, &tex->instr); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type != nir_tex_src_coord) + continue; + + nir_ssa_def *orig_coord = + nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); + assert(orig_coord->num_components >= 3); + + nir_ssa_def *abs = nir_fabs(b, orig_coord); + nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0), + nir_fmax(b, channel(b, abs, 1), + channel(b, abs, 2))); + + nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm)); + + /* Array indices don't have to be normalized, so make a new vector + * with the coordinate's array index untouched. + */ + if (tex->coord_components == 4) { + normalized = nir_vec4(b, + channel(b, normalized, 0), + channel(b, normalized, 1), + channel(b, normalized, 2), + channel(b, orig_coord, 3)); + } + + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(normalized)); + } + } + + return true; +} + +static void +normalize_cubemap_coords_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(impl, normalize_cubemap_coords_block, &b); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_normalize_cubemap_coords(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) + if (overload->impl) + normalize_cubemap_coords_impl(overload->impl); +} diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py index 062cd628b..264806f5d 100644 --- a/mesalib/src/glsl/nir/nir_opcodes.py +++ b/mesalib/src/glsl/nir/nir_opcodes.py @@ -161,12 +161,12 @@ unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. # Float-to-boolean conversion -unop_convert("f2b", tfloat, tbool, "src0 == 0.0f") +unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") # Boolean-to-float conversion unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") # Int-to-boolean conversion -unop_convert("i2b", tint, tbool, "src0 == 0") -unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion +unop_convert("i2b", tint, tbool, "src0 != 0") +unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion. unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}") @@ -191,8 +191,6 @@ unop("fround_even", tfloat, "_mesa_roundevenf(src0)") unop("fsin", tfloat, "sinf(src0)") unop("fcos", tfloat, "cosf(src0)") -unop("fsin_reduced", tfloat, "sinf(src0)") -unop("fcos_reduced", tfloat, "cosf(src0)") # Partial derivatives. diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py index ef855aa77..cdb19241c 100644 --- a/mesalib/src/glsl/nir/nir_opt_algebraic.py +++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py @@ -75,6 +75,9 @@ optimizations = [ (('flrp', a, b, 1.0), b), (('flrp', a, a, b), a), (('flrp', 0.0, a, b), ('fmul', a, b)), + (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'), + (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'), + (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'), (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'), # Comparison simplifications @@ -82,10 +85,6 @@ optimizations = [ (('inot', ('fge', a, b)), ('flt', a, b)), (('inot', ('ilt', a, b)), ('ige', a, b)), (('inot', ('ige', a, b)), ('ilt', a, b)), - (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), - (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), - (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), - (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)), (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), @@ -95,6 +94,18 @@ optimizations = [ (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), (('fsat', ('fsat', a)), ('fsat', a)), (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)), + (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))), + (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))), + (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'), + (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'), + (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'), + (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'), + # Emulating booleans + (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), + (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), + (('iand', 'a@bool', 1.0), ('b2f', a)), + (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. + (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. # Comparison with the same args. Note that these are not done for # the float versions because NaN always returns false on float # inequalities. @@ -122,7 +133,7 @@ optimizations = [ (('ishr', 0, a), 0), (('ishr', a, 0), a), (('ushr', 0, a), 0), - (('ushr', a, 0), 0), + (('ushr', a, 0), a), # Exponential/logarithmic identities (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a (('fexp', ('flog', a)), a), # e^ln(a) = a @@ -134,6 +145,26 @@ optimizations = [ (('fpow', a, 1.0), a), (('fpow', a, 2.0), ('fmul', a, a)), (('fpow', 2.0, a), ('fexp2', a)), + (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), + (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))), + (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), + (('frcp', ('fexp', a)), ('fexp', ('fneg', a))), + (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), + (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))), + (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))), + (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))), + (('flog2', ('frcp', a)), ('fneg', ('flog2', a))), + (('flog', ('frcp', a)), ('fneg', ('flog', a))), + (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), + (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))), + (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), + (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))), + (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), + (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))), + (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), + (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))), + (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), + (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))), # Division and reciprocal (('fdiv', 1.0, a), ('frcp', a)), (('frcp', ('frcp', a)), a), @@ -154,18 +185,21 @@ optimizations = [ (('bcsel', a, b, b), b), (('fcsel', a, b, b), b), + # Conversions + (('f2i', ('ftrunc', a)), ('f2i', a)), + (('f2u', ('ftrunc', a)), ('f2u', a)), + # Subtracts (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), (('isub', a, ('isub', 0, b)), ('iadd', a, b)), + (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'), + (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'), (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), (('ineg', a), ('isub', 0, a), 'options->lower_negate'), (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)), (('iadd', a, ('isub', 0, b)), ('isub', a, b)), (('fabs', ('fsub', 0.0, a)), ('fabs', a)), (('iabs', ('isub', 0, a)), ('iabs', a)), - -# This one may not be exact - (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), ] # Add optimizations to handle the case where the result of a ternary is @@ -189,4 +223,17 @@ for op in ['flt', 'fge', 'feq', 'fne', ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), ] +# This section contains "late" optimizations that should be run after the +# regular optimizations have finished. Optimizations should go here if +# they help code generation but do not necessarily produce code that is +# more easily optimizable. +late_optimizations = [ + (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), + (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), + (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), + (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), +] + print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render() +print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late", + late_optimizations).render() diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c index 9b383202d..553906e12 100644 --- a/mesalib/src/glsl/nir/nir_opt_cse.c +++ b/mesalib/src/glsl/nir/nir_opt_cse.c @@ -37,20 +37,19 @@ struct cse_state { }; static bool -nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask) +nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1, + unsigned src2) { - if (src1.abs != src2.abs || src1.negate != src2.negate) + if (alu1->src[src1].abs != alu2->src[src2].abs || + alu1->src[src1].negate != alu2->src[src2].negate) return false; - for (int i = 0; i < 4; ++i) { - if (!(read_mask & (1 << i))) - continue; - - if (src1.swizzle[i] != src2.swizzle[i]) + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { + if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) return false; } - return nir_srcs_equal(src1.src, src2.src); + return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); } static bool @@ -73,10 +72,17 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2) if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) return false; - for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { - if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i], - (1 << alu1->dest.dest.ssa.num_components) - 1)) - return false; + if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[alu1->op].num_inputs == 2); + return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && + nir_alu_srcs_equal(alu1, alu2, 1, 1)) || + (nir_alu_srcs_equal(alu1, alu2, 0, 1) && + nir_alu_srcs_equal(alu1, alu2, 1, 0)); + } else { + for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { + if (!nir_alu_srcs_equal(alu1, alu2, i, i)) + return false; + } } return true; } @@ -154,12 +160,14 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2) static bool src_is_ssa(nir_src *src, void *data) { + (void) data; return src->is_ssa; } static bool dest_is_ssa(nir_dest *dest, void *data) { + (void) data; return dest->is_ssa; } diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c new file mode 100644 index 000000000..9d5646fe6 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c @@ -0,0 +1,261 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements a small peephole optimization that looks for a multiply that + * is only ever used in an add and replaces both with an fma. + */ + +struct peephole_ffma_state { + void *mem_ctx; + nir_function_impl *impl; + bool progress; +}; + +static inline bool +are_all_uses_fadd(nir_ssa_def *def) +{ + if (def->if_uses->entries > 0) + return false; + + struct set_entry *use_iter; + set_foreach(def->uses, use_iter) { + nir_instr *use_instr = (nir_instr *)use_iter->key; + + if (use_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *use_alu = nir_instr_as_alu(use_instr); + switch (use_alu->op) { + case nir_op_fadd: + break; /* This one's ok */ + + case nir_op_imov: + case nir_op_fmov: + case nir_op_fneg: + case nir_op_fabs: + assert(use_alu->dest.dest.is_ssa); + if (!are_all_uses_fadd(&use_alu->dest.dest.ssa)) + return false; + break; + + default: + return false; + } + } + + return true; +} + +static nir_alu_instr * +get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) +{ + assert(src->src.is_ssa && !src->abs && !src->negate); + + nir_instr *instr = src->src.ssa->parent_instr; + if (instr->type != nir_instr_type_alu) + return NULL; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_imov: + case nir_op_fmov: + alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + break; + + case nir_op_fneg: + alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + *negate = !*negate; + break; + + case nir_op_fabs: + alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + *negate = false; + *abs = true; + break; + + case nir_op_fmul: + /* Only absorb a fmul into a ffma if the fmul is is only used in fadd + * operations. This prevents us from being too aggressive with our + * fusing which can actually lead to more instructions. + */ + if (!are_all_uses_fadd(&alu->dest.dest.ssa)) + return NULL; + break; + + default: + return NULL; + } + + if (!alu) + return NULL; + + for (unsigned i = 0; i < 4; i++) { + if (!(alu->dest.write_mask & (1 << i))) + break; + + swizzle[i] = swizzle[src->swizzle[i]]; + } + + return alu; +} + +static bool +nir_opt_peephole_ffma_block(nir_block *block, void *void_state) +{ + struct peephole_ffma_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *add = nir_instr_as_alu(instr); + if (add->op != nir_op_fadd) + continue; + + /* TODO: Maybe bail if this expression is considered "precise"? */ + + assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa); + + /* This, is the case a + a. We would rather handle this with an + * algebraic reduction than fuse it. Also, we want to only fuse + * things where the multiply is used only once and, in this case, + * it would be used twice by the same instruction. + */ + if (add->src[0].src.ssa == add->src[1].src.ssa) + continue; + + nir_alu_instr *mul; + uint8_t add_mul_src, swizzle[4]; + bool negate, abs; + for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) { + for (unsigned i = 0; i < 4; i++) + swizzle[i] = i; + + negate = false; + abs = false; + + mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs); + + if (mul != NULL) + break; + } + + if (mul == NULL) + continue; + + nir_ssa_def *mul_src[2]; + mul_src[0] = mul->src[0].src.ssa; + mul_src[1] = mul->src[1].src.ssa; + + if (abs) { + for (unsigned i = 0; i < 2; i++) { + nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx, + nir_op_fabs); + abs->src[0].src = nir_src_for_ssa(mul_src[i]); + nir_ssa_dest_init(&abs->instr, &abs->dest.dest, + mul_src[i]->num_components, NULL); + abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1; + nir_instr_insert_before(&add->instr, &abs->instr); + mul_src[i] = &abs->dest.dest.ssa; + } + } + + if (negate) { + nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx, + nir_op_fneg); + neg->src[0].src = nir_src_for_ssa(mul_src[0]); + nir_ssa_dest_init(&neg->instr, &neg->dest.dest, + mul_src[0]->num_components, NULL); + neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1; + nir_instr_insert_before(&add->instr, &neg->instr); + mul_src[0] = &neg->dest.dest.ssa; + } + + nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma); + ffma->dest.saturate = add->dest.saturate; + ffma->dest.write_mask = add->dest.write_mask; + + for (unsigned i = 0; i < 2; i++) { + ffma->src[i].src = nir_src_for_ssa(mul_src[i]); + for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++) + ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]]; + } + nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], + state->mem_ctx); + + assert(add->dest.dest.is_ssa); + + nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest, + add->dest.dest.ssa.num_components, + add->dest.dest.ssa.name); + nir_ssa_def_rewrite_uses(&add->dest.dest.ssa, + nir_src_for_ssa(&ffma->dest.dest.ssa), + state->mem_ctx); + + nir_instr_insert_before(&add->instr, &ffma->instr); + assert(add->dest.dest.ssa.uses->entries == 0); + nir_instr_remove(&add->instr); + + state->progress = true; + } + + return true; +} + +static bool +nir_opt_peephole_ffma_impl(nir_function_impl *impl) +{ + struct peephole_ffma_state state; + + state.mem_ctx = ralloc_parent(impl); + state.impl = impl; + state.progress = false; + + nir_foreach_block(impl, nir_opt_peephole_ffma_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; +} + +bool +nir_opt_peephole_ffma(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress |= nir_opt_peephole_ffma_impl(overload->impl); + } + + return progress; +} diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c index b89451b09..f400cfd66 100644 --- a/mesalib/src/glsl/nir/nir_opt_peephole_select.c +++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c @@ -84,7 +84,9 @@ block_check_for_allowed_instrs(nir_block *block) case nir_instr_type_alu: { /* It must be a move operation */ nir_alu_instr *mov = nir_instr_as_alu(instr); - if (mov->op != nir_op_fmov && mov->op != nir_op_imov) + if (mov->op != nir_op_fmov && mov->op != nir_op_imov && + mov->op != nir_op_fneg && mov->op != nir_op_ineg && + mov->op != nir_op_fabs && mov->op != nir_op_iabs) return false; /* Can't handle saturate */ diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c index fa11a312e..fb8c9344c 100644 --- a/mesalib/src/glsl/nir/nir_print.c +++ b/mesalib/src/glsl/nir/nir_print.c @@ -137,25 +137,37 @@ print_dest(nir_dest *dest, FILE *fp) } static void -print_alu_src(nir_alu_src *src, FILE *fp) +print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp) { - if (src->negate) + if (instr->src[src].negate) fprintf(fp, "-"); - if (src->abs) + if (instr->src[src].abs) fprintf(fp, "abs("); - print_src(&src->src, fp); + print_src(&instr->src[src].src, fp); - if (src->swizzle[0] != 0 || - src->swizzle[1] != 1 || - src->swizzle[2] != 2 || - src->swizzle[3] != 3) { + bool print_swizzle = false; + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; + + if (instr->src[src].swizzle[i] != i) { + print_swizzle = true; + break; + } + } + + if (print_swizzle) { fprintf(fp, "."); - for (unsigned i = 0; i < 4; i++) - fprintf(fp, "%c", "xyzw"[src->swizzle[i]]); + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; + + fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]); + } } - if (src->abs) + if (instr->src[src].abs) fprintf(fp, ")"); } @@ -189,7 +201,7 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp) if (i != 0) fprintf(fp, ", "); - print_alu_src(&instr->src[i], fp); + print_alu_src(instr, i, fp); } } diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c index e7f8aeacb..4417e2a48 100644 --- a/mesalib/src/glsl/nir/nir_remove_dead_variables.c +++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c @@ -98,22 +98,14 @@ add_var_use_shader(nir_shader *shader, struct set *live) } static void -remove_dead_local_vars(nir_function_impl *impl, struct set *live) +remove_dead_vars(struct exec_list *var_list, struct set *live) { - foreach_list_typed_safe(nir_variable, var, node, &impl->locals) { + foreach_list_typed_safe(nir_variable, var, node, var_list) { struct set_entry *entry = _mesa_set_search(live, var); - if (entry == NULL) - exec_node_remove(&var->node); - } -} - -static void -remove_dead_global_vars(nir_shader *shader, struct set *live) -{ - foreach_list_typed_safe(nir_variable, var, node, &shader->globals) { - struct set_entry *entry = _mesa_set_search(live, var); - if (entry == NULL) + if (entry == NULL) { exec_node_remove(&var->node); + ralloc_free(var); + } } } @@ -125,11 +117,11 @@ nir_remove_dead_variables(nir_shader *shader) add_var_use_shader(shader, live); - remove_dead_global_vars(shader, live); + remove_dead_vars(&shader->globals, live); nir_foreach_overload(shader, overload) { if (overload->impl) - remove_dead_local_vars(overload->impl, live); + remove_dead_vars(&overload->impl->locals, live); } _mesa_set_destroy(live, NULL); diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c index 73a802be7..5ba016085 100644 --- a/mesalib/src/glsl/nir/nir_search.c +++ b/mesalib/src/glsl/nir/nir_search.c @@ -218,8 +218,8 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr, if (matched) return true; - if (nir_op_infos[instr->op].num_inputs == 2 && - (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) { + if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[instr->op].num_inputs == 2); if (!match_value(expr->srcs[0], instr, 1, num_components, swizzle, state)) return false; diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c index 4d663b51b..fc72c078c 100644 --- a/mesalib/src/glsl/nir/nir_split_var_copies.c +++ b/mesalib/src/glsl/nir/nir_split_var_copies.c @@ -188,8 +188,8 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy, * belongs to the copy instruction and b) the deref chains may * have some of the same links due to the way we constructed them */ - nir_deref *src = nir_copy_deref(state->mem_ctx, src_head); - nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head); + nir_deref *src = nir_copy_deref(new_copy, src_head); + nir_deref *dest = nir_copy_deref(new_copy, dest_head); new_copy->variables[0] = nir_deref_as_var(dest); new_copy->variables[1] = nir_deref_as_var(src); diff --git a/mesalib/src/glsl/nir/nir_sweep.c b/mesalib/src/glsl/nir/nir_sweep.c new file mode 100644 index 000000000..d3549756a --- /dev/null +++ b/mesalib/src/glsl/nir/nir_sweep.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +/** + * \file nir_sweep.c + * + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated + * memory - anything still connected to the program will be kept, and any dead memory + * we dropped on the floor will be freed. + * + * The expectation is that drivers should call this when finished compiling the shader + * (after any optimization, lowering, and so on). However, it's also fine to call it + * earlier, and even many times, trading CPU cycles for memory savings. + */ + +#define steal_list(mem_ctx, type, list) \ + foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); } + +static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node); + +static bool +sweep_src_indirect(nir_src *src, void *nir) +{ + if (!src->is_ssa && src->reg.indirect) + ralloc_steal(nir, src->reg.indirect); + + return true; +} + +static bool +sweep_dest_indirect(nir_dest *dest, void *nir) +{ + if (!dest->is_ssa && dest->reg.indirect) + ralloc_steal(nir, dest->reg.indirect); + + return true; +} + +static void +sweep_block(nir_shader *nir, nir_block *block) +{ + ralloc_steal(nir, block); + + nir_foreach_instr(block, instr) { + ralloc_steal(nir, instr); + + nir_foreach_src(instr, sweep_src_indirect, nir); + nir_foreach_dest(instr, sweep_dest_indirect, nir); + } +} + +static void +sweep_if(nir_shader *nir, nir_if *iff) +{ + ralloc_steal(nir, iff); + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) { + sweep_cf_node(nir, cf_node); + } + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) { + sweep_cf_node(nir, cf_node); + } +} + +static void +sweep_loop(nir_shader *nir, nir_loop *loop) +{ + ralloc_steal(nir, loop); + + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { + sweep_cf_node(nir, cf_node); + } +} + +static void +sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node) +{ + switch (cf_node->type) { + case nir_cf_node_block: + sweep_block(nir, nir_cf_node_as_block(cf_node)); + break; + case nir_cf_node_if: + sweep_if(nir, nir_cf_node_as_if(cf_node)); + break; + case nir_cf_node_loop: + sweep_loop(nir, nir_cf_node_as_loop(cf_node)); + break; + default: + unreachable("Invalid CF node type"); + } +} + +static void +sweep_impl(nir_shader *nir, nir_function_impl *impl) +{ + ralloc_steal(nir, impl); + + ralloc_steal(nir, impl->params); + ralloc_steal(nir, impl->return_var); + steal_list(nir, nir_variable, &impl->locals); + steal_list(nir, nir_register, &impl->registers); + + foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) { + sweep_cf_node(nir, cf_node); + } + + sweep_block(nir, impl->end_block); + + /* Wipe out all the metadata, if any. */ + nir_metadata_preserve(impl, nir_metadata_none); +} + +static void +sweep_function(nir_shader *nir, nir_function *f) +{ + ralloc_steal(nir, f); + + foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) { + ralloc_steal(nir, overload); + ralloc_steal(nir, overload->params); + if (overload->impl) + sweep_impl(nir, overload->impl); + } +} + +void +nir_sweep(nir_shader *nir) +{ + void *rubbish = ralloc_context(NULL); + + /* First, move ownership of all the memory to a temporary context; assume dead. */ + ralloc_adopt(rubbish, nir); + + /* Variables and registers are not dead. Steal them back. */ + steal_list(nir, nir_variable, &nir->uniforms); + steal_list(nir, nir_variable, &nir->inputs); + steal_list(nir, nir_variable, &nir->outputs); + steal_list(nir, nir_variable, &nir->globals); + steal_list(nir, nir_variable, &nir->system_values); + steal_list(nir, nir_register, &nir->registers); + + /* Recurse into functions, stealing their contents back. */ + foreach_list_typed(nir_function, func, node, &nir->functions) { + sweep_function(nir, func); + } + + /* Free everything we didn't steal back. */ + ralloc_free(rubbish); +} diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c index 47cf45393..53ff54766 100644 --- a/mesalib/src/glsl/nir/nir_to_ssa.c +++ b/mesalib/src/glsl/nir/nir_to_ssa.c @@ -47,7 +47,7 @@ insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx) set_foreach(block->predecessors, entry) { nir_block *pred = (nir_block *) entry->key; - nir_phi_src *src = ralloc(mem_ctx, nir_phi_src); + nir_phi_src *src = ralloc(instr, nir_phi_src); src->pred = pred; src->src.is_ssa = false; src->src.reg.base_offset = 0; diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp index a13c3e12a..f0d0b46d2 100644 --- a/mesalib/src/glsl/nir/nir_types.cpp +++ b/mesalib/src/glsl/nir/nir_types.cpp @@ -142,6 +142,12 @@ glsl_void_type(void) return glsl_type::void_type; } +const glsl_type * +glsl_float_type(void) +{ + return glsl_type::float_type; +} + const glsl_type * glsl_vec4_type(void) { diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h index 494051a67..276d4ad62 100644 --- a/mesalib/src/glsl/nir/nir_types.h +++ b/mesalib/src/glsl/nir/nir_types.h @@ -69,6 +69,7 @@ bool glsl_type_is_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); +const struct glsl_type *glsl_float_type(void); const struct glsl_type *glsl_vec4_type(void); const struct glsl_type *glsl_array_type(const struct glsl_type *base, unsigned elements); diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c index f247ae069..a7aa79837 100644 --- a/mesalib/src/glsl/nir/nir_validate.c +++ b/mesalib/src/glsl/nir/nir_validate.c @@ -295,6 +295,8 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state) static void validate_deref_chain(nir_deref *deref, validate_state *state) { + assert(deref->child == NULL || ralloc_parent(deref->child) == deref); + nir_deref *parent = NULL; while (deref != NULL) { switch (deref->deref_type) { @@ -336,9 +338,10 @@ validate_var_use(nir_variable *var, validate_state *state) } static void -validate_deref_var(nir_deref_var *deref, validate_state *state) +validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state) { assert(deref != NULL); + assert(ralloc_parent(deref) == parent_mem_ctx); assert(deref->deref.type == deref->var->type); validate_var_use(deref->var, state); @@ -386,7 +389,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; for (unsigned i = 0; i < num_vars; i++) { - validate_deref_var(instr->variables[i], state); + validate_deref_var(instr, instr->variables[i], state); } switch (instr->intrinsic) { @@ -423,7 +426,7 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state) } if (instr->sampler != NULL) - validate_deref_var(instr->sampler, state); + validate_deref_var(instr, instr->sampler, state); } static void @@ -438,10 +441,10 @@ validate_call_instr(nir_call_instr *instr, validate_state *state) for (unsigned i = 0; i < instr->num_params; i++) { assert(instr->callee->params[i].type == instr->params[i]->deref.type); - validate_deref_var(instr->params[i], state); + validate_deref_var(instr, instr->params[i], state); } - validate_deref_var(instr->return_deref, state); + validate_deref_var(instr, instr->return_deref, state); } static void @@ -680,8 +683,7 @@ validate_cf_node(nir_cf_node *node, validate_state *state) break; default: - assert(!"Invalid ALU instruction type"); - break; + unreachable("Invalid CF node type"); } } -- cgit v1.2.3