diff options
Diffstat (limited to 'mesalib/src/glsl/nir')
-rw-r--r-- | mesalib/src/glsl/nir/nir.c | 112 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir.h | 17 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir_array.h | 96 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir_from_ssa.c | 2 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir_lower_locals_to_regs.c | 148 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir_lower_to_source_mods.c | 7 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir_lower_var_copies.c | 24 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c | 120 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir_opt_algebraic.py | 1 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir_print.c | 2 | ||||
-rw-r--r-- | mesalib/src/glsl/nir/nir_types.cpp | 2 |
11 files changed, 359 insertions, 172 deletions
diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c index c6e53612b..4cc074b80 100644 --- a/mesalib/src/glsl/nir/nir.c +++ b/mesalib/src/glsl/nir/nir.c @@ -589,6 +589,66 @@ nir_copy_deref(void *mem_ctx, nir_deref *deref) return NULL; } +/* Returns a load_const instruction that represents the constant + * initializer for the given deref chain. The caller is responsible for + * ensuring that there actually is a constant initializer. + */ +nir_load_const_instr * +nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref) +{ + nir_constant *constant = deref->var->constant_initializer; + assert(constant); + + const nir_deref *tail = &deref->deref; + unsigned matrix_offset = 0; + while (tail->child) { + switch (tail->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(tail->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (glsl_type_is_matrix(tail->type)) { + assert(arr->deref.child == NULL); + matrix_offset = arr->base_offset; + } else { + constant = constant->elements[arr->base_offset]; + } + break; + } + + case nir_deref_type_struct: { + constant = constant->elements[nir_deref_as_struct(tail->child)->index]; + break; + } + + default: + unreachable("Invalid deref child type"); + } + + tail = tail->child; + } + + nir_load_const_instr *load = + nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type)); + + matrix_offset *= load->def.num_components; + for (unsigned i = 0; i < load->def.num_components; i++) { + switch (glsl_get_base_type(tail->type)) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + load->value.u[i] = constant->value.u[matrix_offset + i]; + break; + case GLSL_TYPE_BOOL: + load->value.u[i] = constant->value.b[matrix_offset + i] ? + NIR_TRUE : NIR_FALSE; + break; + default: + unreachable("Invalid immediate type"); + } + } + + return load; +} /** * \name Control flow modification @@ -1800,33 +1860,37 @@ src_does_not_use_reg(nir_src *src, void *void_reg) void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) { - if (src->is_ssa) { - nir_ssa_def *old_ssa = src->ssa; - *src = new_src; - if (old_ssa && nir_foreach_src(instr, src_does_not_use_def, old_ssa)) { - struct set_entry *entry = _mesa_set_search(old_ssa->uses, instr); - assert(entry); - _mesa_set_remove(old_ssa->uses, entry); - } - } else { - if (src->reg.indirect) - nir_instr_rewrite_src(instr, src->reg.indirect, new_src); - - nir_register *old_reg = src->reg.reg; - *src = new_src; - if (old_reg && nir_foreach_src(instr, src_does_not_use_reg, old_reg)) { - struct set_entry *entry = _mesa_set_search(old_reg->uses, instr); - assert(entry); - _mesa_set_remove(old_reg->uses, entry); + nir_src old_src = *src; + *src = new_src; + + for (nir_src *iter_src = &old_src; iter_src; + iter_src = iter_src->is_ssa ? NULL : iter_src->reg.indirect) { + if (iter_src->is_ssa) { + nir_ssa_def *ssa = iter_src->ssa; + if (ssa && nir_foreach_src(instr, src_does_not_use_def, ssa)) { + struct set_entry *entry = _mesa_set_search(ssa->uses, instr); + assert(entry); + _mesa_set_remove(ssa->uses, entry); + } + } else { + nir_register *reg = iter_src->reg.reg; + if (reg && nir_foreach_src(instr, src_does_not_use_reg, reg)) { + struct set_entry *entry = _mesa_set_search(reg->uses, instr); + assert(entry); + _mesa_set_remove(reg->uses, entry); + } } } - if (new_src.is_ssa) { - if (new_src.ssa) - _mesa_set_add(new_src.ssa->uses, instr); - } else { - if (new_src.reg.reg) - _mesa_set_add(new_src.reg.reg->uses, instr); + for (nir_src *iter_src = &new_src; iter_src; + iter_src = iter_src->is_ssa ? NULL : iter_src->reg.indirect) { + if (iter_src->is_ssa) { + if (iter_src->ssa) + _mesa_set_add(iter_src->ssa->uses, instr); + } else { + if (iter_src->reg.reg) + _mesa_set_add(iter_src->reg.reg->uses, instr); + } } } diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h index 74772c798..98b0ec328 100644 --- a/mesalib/src/glsl/nir/nir.h +++ b/mesalib/src/glsl/nir/nir.h @@ -960,7 +960,8 @@ typedef struct { static inline unsigned nir_tex_instr_dest_size(nir_tex_instr *instr) { - if (instr->op == nir_texop_txs) { + switch (instr->op) { + case nir_texop_txs: { unsigned ret; switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_1D: @@ -985,13 +986,18 @@ nir_tex_instr_dest_size(nir_tex_instr *instr) return ret; } - if (instr->op == nir_texop_query_levels) + case nir_texop_lod: return 2; - if (instr->is_shadow && instr->is_new_style_shadow) + case nir_texop_query_levels: return 1; - return 4; + default: + if (instr->is_shadow && instr->is_new_style_shadow) + return 1; + + return 4; + } } static inline unsigned @@ -1514,6 +1520,9 @@ nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index); nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref); +nir_load_const_instr * +nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref); + void nir_instr_insert_before(nir_instr *instr, nir_instr *before); void nir_instr_insert_after(nir_instr *instr, nir_instr *after); diff --git a/mesalib/src/glsl/nir/nir_array.h b/mesalib/src/glsl/nir/nir_array.h new file mode 100644 index 000000000..1db4e8cea --- /dev/null +++ b/mesalib/src/glsl/nir/nir_array.h @@ -0,0 +1,96 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + void *mem_ctx; + size_t size; + size_t alloc; + void *data; +} nir_array; + +static inline void +nir_array_init(nir_array *arr, void *mem_ctx) +{ + arr->mem_ctx = mem_ctx; + arr->size = 0; + arr->alloc = 0; + arr->data = NULL; +} + +static inline void +nir_array_fini(nir_array *arr) +{ + if (arr->mem_ctx) + ralloc_free(arr->data); + else + free(arr->data); +} + +#define NIR_ARRAY_INITIAL_SIZE 64 + +/* Increments the size of the array by the given ammount and returns a + * pointer to the beginning of the newly added space. + */ +static inline void * +nir_array_grow(nir_array *arr, size_t additional) +{ + size_t new_size = arr->size + additional; + if (new_size > arr->alloc) { + if (arr->alloc == 0) + arr->alloc = NIR_ARRAY_INITIAL_SIZE; + + while (new_size > arr->alloc) + arr->alloc *= 2; + + if (arr->mem_ctx) + arr->data = reralloc_size(arr->mem_ctx, arr->data, arr->alloc); + else + arr->data = realloc(arr->data, arr->alloc); + } + + void *ptr = (void *)((char *)arr->data + arr->size); + arr->size = new_size; + + return ptr; +} + +#define nir_array_add(arr, type, elem) \ + *(type *)nir_array_grow(arr, sizeof(type)) = (elem) + +#define nir_array_foreach(arr, type, elem) \ + for (type *elem = (type *)(arr)->data; \ + elem < (type *)((char *)(arr)->data + (arr)->size); elem++) + +#ifdef __cplusplus +} /* extern "C" */ +#endif diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c index 184698abd..6a3b141bd 100644 --- a/mesalib/src/glsl/nir/nir_from_ssa.c +++ b/mesalib/src/glsl/nir/nir_from_ssa.c @@ -642,7 +642,7 @@ emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src, * * The algorithm works by playing this little shell game with the values. * We start by recording where every source value is and which source value - * each destination value should recieve. We then grab any copy whose + * each destination value should receive. We then grab any copy whose * destination is "empty", i.e. not used as a source, and do the following: * - Find where its source value currently lives * - Emit the move instruction diff --git a/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c index 8c5df7be6..bc6a3d320 100644 --- a/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c +++ b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c @@ -26,13 +26,20 @@ */ #include "nir.h" +#include "nir_array.h" struct locals_to_regs_state { - void *mem_ctx; + nir_shader *shader; nir_function_impl *impl; /* A hash table mapping derefs to registers */ struct hash_table *regs_table; + + /* A growing array of derefs that we have encountered. There is exactly + * one element of this array per element in the hash table. This is + * used to make adding register initialization code deterministic. + */ + nir_array derefs_array; }; /* The following two functions implement a hash and equality check for @@ -100,15 +107,8 @@ get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state) unsigned array_size = 1; nir_deref *tail = &deref->deref; while (tail->child) { - if (tail->child->deref_type == nir_deref_type_array) { - /* Multiply by the parent's type. */ - if (glsl_type_is_matrix(tail->type)) { - array_size *= glsl_get_matrix_columns(tail->type); - } else { - assert(glsl_get_length(tail->type) > 0); - array_size *= glsl_get_length(tail->type); - } - } + if (tail->child->deref_type == nir_deref_type_array) + array_size *= glsl_get_length(tail->type); tail = tail->child; } @@ -119,6 +119,7 @@ get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state) reg->num_array_elems = array_size > 1 ? array_size : 0; _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg); + nir_array_add(&state->derefs_array, nir_deref_var *, deref); return reg; } @@ -134,6 +135,14 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr, src.reg.base_offset = 0; src.reg.indirect = NULL; + /* It is possible for a user to create a shader that has an array with a + * single element and then proceed to access it indirectly. Indirectly + * accessing a non-array register is not allowed in NIR. In order to + * handle this case we just convert it to a direct reference. + */ + if (src.reg.reg->num_array_elems == 0) + return src; + nir_deref *tail = &deref->deref; while (tail->child != NULL) { const struct glsl_type *parent_type = tail->type; @@ -149,11 +158,11 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr, if (src.reg.indirect) { nir_load_const_instr *load_const = - nir_load_const_instr_create(state->mem_ctx, 1); + nir_load_const_instr_create(state->shader, 1); load_const->value.u[0] = glsl_get_length(parent_type); nir_instr_insert_before(instr, &load_const->instr); - nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx, nir_op_imul); + nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul); mul->src[0].src = *src.reg.indirect; mul->src[1].src.is_ssa = true; mul->src[1].src.ssa = &load_const->def; @@ -167,15 +176,15 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr, if (deref_array->deref_array_type == nir_deref_array_type_indirect) { if (src.reg.indirect == NULL) { - src.reg.indirect = ralloc(state->mem_ctx, nir_src); + src.reg.indirect = ralloc(state->shader, nir_src); nir_src_copy(src.reg.indirect, &deref_array->indirect, - state->mem_ctx); + state->shader); } else { - nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx, + nir_alu_instr *add = nir_alu_instr_create(state->shader, nir_op_iadd); add->src[0].src = *src.reg.indirect; nir_src_copy(&add->src[1].src, &deref_array->indirect, - state->mem_ctx); + state->shader); add->dest.write_mask = 1; nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); nir_instr_insert_before(instr, &add->instr); @@ -205,7 +214,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) if (intrin->variables[0]->var->data.mode != nir_var_local) continue; - nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov); + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); mov->src[0].src = get_deref_reg_src(intrin->variables[0], &intrin->instr, state); mov->dest.write_mask = (1 << intrin->num_components) - 1; @@ -214,9 +223,9 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) intrin->num_components, NULL); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(&mov->dest.dest.ssa), - state->mem_ctx); + state->shader); } else { - nir_dest_copy(&mov->dest.dest, &intrin->dest, state->mem_ctx); + nir_dest_copy(&mov->dest.dest, &intrin->dest, state->shader); } nir_instr_insert_before(&intrin->instr, &mov->instr); @@ -231,8 +240,8 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) nir_src reg_src = get_deref_reg_src(intrin->variables[0], &intrin->instr, state); - nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov); - nir_src_copy(&mov->src[0].src, &intrin->src[0], state->mem_ctx); + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); + nir_src_copy(&mov->src[0].src, &intrin->src[0], state->shader); mov->dest.write_mask = (1 << intrin->num_components) - 1; mov->dest.dest.is_ssa = false; mov->dest.dest.reg.reg = reg_src.reg.reg; @@ -257,20 +266,115 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) return true; } +static nir_block * +compute_reg_usedef_lca(nir_register *reg) +{ + struct set_entry *entry; + nir_block *lca = NULL; + + set_foreach(reg->defs, entry) + lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block); + + set_foreach(reg->uses, entry) + lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block); + + set_foreach(reg->if_uses, entry) { + nir_if *if_stmt = (nir_if *)entry->key; + nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); + assert(prev_node->type == nir_cf_node_block); + lca = nir_dominance_lca(lca, nir_cf_node_as_block(prev_node)); + } + + return lca; +} + +static void +insert_constant_initializer(nir_deref_var *deref_head, nir_deref *deref_tail, + nir_block *block, + struct locals_to_regs_state *state) +{ + if (deref_tail->child) { + switch (deref_tail->child->deref_type) { + case nir_deref_type_array: { + unsigned array_elems = glsl_get_length(deref_tail->type); + + nir_deref_array arr_deref; + arr_deref.deref = *deref_tail->child; + arr_deref.deref_array_type = nir_deref_array_type_direct; + + nir_deref *old_child = deref_tail->child; + deref_tail->child = &arr_deref.deref; + for (unsigned i = 0; i < array_elems; i++) { + arr_deref.base_offset = i; + insert_constant_initializer(deref_head, &arr_deref.deref, + block, state); + } + deref_tail->child = old_child; + return; + } + + case nir_deref_type_struct: + insert_constant_initializer(deref_head, deref_tail->child, + block, state); + return; + + default: + unreachable("Invalid deref child type"); + } + } + + assert(deref_tail->child == NULL); + + nir_load_const_instr *load = + nir_deref_get_const_initializer_load(state->shader, deref_head); + nir_instr_insert_before_block(block, &load->instr); + + nir_src reg_src = get_deref_reg_src(deref_head, &load->instr, state); + + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); + mov->src[0].src = nir_src_for_ssa(&load->def); + mov->dest.write_mask = (1 << load->def.num_components) - 1; + mov->dest.dest.is_ssa = false; + mov->dest.dest.reg.reg = reg_src.reg.reg; + mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; + mov->dest.dest.reg.indirect = reg_src.reg.indirect; + + nir_instr_insert_after(&load->instr, &mov->instr); +} + static void nir_lower_locals_to_regs_impl(nir_function_impl *impl) { struct locals_to_regs_state state; - state.mem_ctx = ralloc_parent(impl); + state.shader = impl->overload->function->shader; state.impl = impl; state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal); + nir_array_init(&state.derefs_array, NULL); + + nir_metadata_require(impl, nir_metadata_dominance); nir_foreach_block(impl, lower_locals_to_regs_block, &state); + nir_array_foreach(&state.derefs_array, nir_deref_var *, deref_ptr) { + nir_deref_var *deref = *deref_ptr; + struct hash_entry *deref_entry = + _mesa_hash_table_search(state.regs_table, deref); + assert(deref_entry && deref_entry->key == deref); + nir_register *reg = (nir_register *)deref_entry->data; + + if (deref->var->constant_initializer == NULL) + continue; + + nir_block *usedef_lca = compute_reg_usedef_lca(reg); + + insert_constant_initializer(deref, &deref->deref, usedef_lca, &state); + } + nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); + nir_array_fini(&state.derefs_array); _mesa_hash_table_destroy(state.regs_table, NULL); } diff --git a/mesalib/src/glsl/nir/nir_lower_to_source_mods.c b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c index d6bf77f17..7b4a0f657 100644 --- a/mesalib/src/glsl/nir/nir_lower_to_source_mods.c +++ b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c @@ -67,6 +67,13 @@ nir_lower_to_source_mods_block(nir_block *block, void *state) continue; } + /* We can only do a rewrite if the source we are copying is SSA. + * Otherwise, moving the read might invalidly reorder reads/writes + * on a register. + */ + if (!parent->src[0].src.is_ssa) + continue; + nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src); if (alu->src[i].abs) { /* abs trumps both neg and abs, do nothing */ diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c index 58389a7c7..21672901f 100644 --- a/mesalib/src/glsl/nir/nir_lower_var_copies.c +++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c @@ -64,26 +64,6 @@ get_deref_tail(nir_deref *deref) return deref; } -static int -type_get_length(const struct glsl_type *type) -{ - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - return glsl_get_length(type); - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(type)) - return glsl_get_matrix_columns(type); - else - return glsl_get_vector_elements(type); - default: - unreachable("Invalid deref base type"); - } -} - /* This function recursively walks the given deref chain and replaces the * given copy instruction with an equivalent sequence load/store * operations. @@ -121,9 +101,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child); nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child); - unsigned length = type_get_length(src_arr_parent->type); + unsigned length = glsl_get_length(src_arr_parent->type); /* The wildcards should represent the same number of elements */ - assert(length == type_get_length(dest_arr_parent->type)); + assert(length == glsl_get_length(dest_arr_parent->type)); assert(length > 0); /* Walk over all of the elements that this wildcard refers to and diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c index 2ca74d71b..bb60f4601 100644 --- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -55,7 +55,7 @@ struct deref_node { }; struct lower_variables_state { - void *mem_ctx; + nir_shader *shader; void *dead_ctx; nir_function_impl *impl; @@ -90,34 +90,14 @@ struct lower_variables_state { struct hash_table *phi_table; }; -static int -type_get_length(const struct glsl_type *type) -{ - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - return glsl_get_length(type); - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(type)) - return glsl_get_matrix_columns(type); - else - return glsl_get_vector_elements(type); - default: - unreachable("Invalid deref base type"); - } -} - static struct deref_node * deref_node_create(struct deref_node *parent, - const struct glsl_type *type, void *mem_ctx) + const struct glsl_type *type, nir_shader *shader) { size_t size = sizeof(struct deref_node) + - type_get_length(type) * sizeof(struct deref_node *); + glsl_get_length(type) * sizeof(struct deref_node *); - struct deref_node *node = rzalloc_size(mem_ctx, size); + struct deref_node *node = rzalloc_size(shader, size); node->type = type; node->parent = parent; node->deref = NULL; @@ -165,7 +145,7 @@ get_deref_node(nir_deref_var *deref, struct lower_variables_state *state) case nir_deref_type_struct: { nir_deref_struct *deref_struct = nir_deref_as_struct(tail); - assert(deref_struct->index < type_get_length(node->type)); + assert(deref_struct->index < glsl_get_length(node->type)); if (node->children[deref_struct->index] == NULL) node->children[deref_struct->index] = @@ -184,7 +164,7 @@ get_deref_node(nir_deref_var *deref, struct lower_variables_state *state) * out-of-bounds offset. We need to handle this at least * somewhat gracefully. */ - if (arr->base_offset >= type_get_length(node->type)) + if (arr->base_offset >= glsl_get_length(node->type)) return NULL; if (node->children[arr->base_offset] == NULL) @@ -317,6 +297,10 @@ deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref, if (arr->deref_array_type == nir_deref_array_type_indirect) return true; + /* If there is an indirect at this level, we're aliased. */ + if (node->indirect) + return true; + assert(arr->deref_array_type == nir_deref_array_type_direct); if (node->children[arr->base_offset] && @@ -465,7 +449,7 @@ lower_copies_to_load_store(struct deref_node *node, set_foreach(node->copies, copy_entry) { nir_intrinsic_instr *copy = (void *)copy_entry->key; - nir_lower_var_copy_instr(copy, state->mem_ctx); + nir_lower_var_copy_instr(copy, state->shader); for (unsigned i = 0; i < 2; ++i) { struct deref_node *arg_node = @@ -485,67 +469,6 @@ lower_copies_to_load_store(struct deref_node *node, return true; } -/* Returns a load_const instruction that represents the constant - * initializer for the given deref chain. The caller is responsible for - * ensuring that there actually is a constant initializer. - */ -static nir_load_const_instr * -get_const_initializer_load(const nir_deref_var *deref, - struct lower_variables_state *state) -{ - nir_constant *constant = deref->var->constant_initializer; - const nir_deref *tail = &deref->deref; - unsigned matrix_offset = 0; - while (tail->child) { - switch (tail->child->deref_type) { - case nir_deref_type_array: { - nir_deref_array *arr = nir_deref_as_array(tail->child); - assert(arr->deref_array_type == nir_deref_array_type_direct); - if (glsl_type_is_matrix(tail->type)) { - assert(arr->deref.child == NULL); - matrix_offset = arr->base_offset; - } else { - constant = constant->elements[arr->base_offset]; - } - break; - } - - case nir_deref_type_struct: { - constant = constant->elements[nir_deref_as_struct(tail->child)->index]; - break; - } - - default: - unreachable("Invalid deref child type"); - } - - tail = tail->child; - } - - nir_load_const_instr *load = - nir_load_const_instr_create(state->mem_ctx, - glsl_get_vector_elements(tail->type)); - - matrix_offset *= load->def.num_components; - for (unsigned i = 0; i < load->def.num_components; i++) { - switch (glsl_get_base_type(tail->type)) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - load->value.u[i] = constant->value.u[matrix_offset + i]; - break; - case GLSL_TYPE_BOOL: - load->value.u[i] = constant->value.b[matrix_offset + i] ? - NIR_TRUE : NIR_FALSE; - break; - default: - unreachable("Invalid immediate type"); - } - } - - return load; -} - /** Pushes an SSA def onto the def stack for the given node * * Each node is potentially associated with a stack of SSA definitions. @@ -614,7 +537,7 @@ get_ssa_def_for_block(struct deref_node *node, nir_block *block, * given block. This means that we need to add an undef and use that. */ nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->mem_ctx, + nir_ssa_undef_instr_create(state->shader, glsl_get_vector_elements(node->type)); nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr); def_stack_push(node, &undef->def, state); @@ -694,7 +617,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) * should result in an undefined value. */ nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->mem_ctx, + nir_ssa_undef_instr_create(state->shader, intrin->num_components); nir_instr_insert_before(&intrin->instr, &undef->instr); @@ -702,14 +625,14 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(&undef->def), - state->mem_ctx); + state->shader); continue; } if (!node->lower_to_ssa) continue; - nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); mov->src[0].src.is_ssa = true; mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state); @@ -727,7 +650,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(&mov->dest.dest.ssa), - state->mem_ctx); + state->shader); break; } @@ -750,7 +673,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) assert(intrin->src[0].is_ssa); - nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); mov->src[0].src.is_ssa = true; mov->src[0].src.ssa = intrin->src[0].ssa; @@ -887,7 +810,7 @@ insert_phi_nodes(struct lower_variables_state *state) continue; if (has_already[next->index] < iter_count) { - nir_phi_instr *phi = nir_phi_instr_create(state->mem_ctx); + nir_phi_instr *phi = nir_phi_instr_create(state->shader); nir_ssa_dest_init(&phi->instr, &phi->dest, glsl_get_vector_elements(node->type), NULL); nir_instr_insert_before_block(next, &phi->instr); @@ -938,8 +861,8 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) { struct lower_variables_state state; - state.mem_ctx = ralloc_parent(impl); - state.dead_ctx = ralloc_context(state.mem_ctx); + state.shader = impl->overload->function->shader; + state.dead_ctx = ralloc_context(state.shader); state.impl = impl; state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx, @@ -983,7 +906,8 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) progress = true; if (deref->var->constant_initializer) { - nir_load_const_instr *load = get_const_initializer_load(deref, &state); + nir_load_const_instr *load = + nir_deref_get_const_initializer_load(state.shader, deref); nir_ssa_def_init(&load->instr, &load->def, glsl_get_vector_elements(node->type), NULL); nir_instr_insert_before_cf_list(&impl->body, &load->instr); diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py index cdb19241c..2a2b9561e 100644 --- a/mesalib/src/glsl/nir/nir_opt_algebraic.py +++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py @@ -144,6 +144,7 @@ optimizations = [ (('fexp', ('fmul', ('flog', a), b)), ('fpow', a, b), '!options->lower_fpow'), # e^(ln(a)*b) = a^b (('fpow', a, 1.0), a), (('fpow', a, 2.0), ('fmul', a, a)), + (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), (('fpow', 2.0, a), ('fexp2', a)), (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))), diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c index fb8c9344c..eb4045cec 100644 --- a/mesalib/src/glsl/nir/nir_print.c +++ b/mesalib/src/glsl/nir/nir_print.c @@ -533,6 +533,8 @@ print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp) fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]); } + + fprintf(fp, ")"); } static void diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp index f0d0b46d2..62176f508 100644 --- a/mesalib/src/glsl/nir/nir_types.cpp +++ b/mesalib/src/glsl/nir/nir_types.cpp @@ -103,7 +103,7 @@ glsl_get_matrix_columns(const struct glsl_type *type) unsigned glsl_get_length(const struct glsl_type *type) { - return type->length; + return type->is_matrix() ? type->matrix_columns : type->length; } const char * |