diff options
Diffstat (limited to 'mesalib/src/glsl')
38 files changed, 872 insertions, 624 deletions
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources index c471eca23..d784a8107 100644 --- a/mesalib/src/glsl/Makefile.sources +++ b/mesalib/src/glsl/Makefile.sources @@ -22,6 +22,7 @@ NIR_FILES = \ nir/glsl_to_nir.h \ nir/nir.c \ nir/nir.h \ + nir/nir_array.h \ nir/nir_builder.h \ nir/nir_constant_expressions.h \ nir/nir_dominance.c \ diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp index 18b82e3be..14e630905 100644 --- a/mesalib/src/glsl/ast_to_hir.cpp +++ b/mesalib/src/glsl/ast_to_hir.cpp @@ -1558,6 +1558,18 @@ ast_expression::do_hir(exec_list *instructions, error_emitted = true; } + /* From section 4.1.7 of the GLSL 4.50 spec (Opaque Types): + * + * "Except for array indexing, structure member selection, and + * parentheses, opaque variables are not allowed to be operands in + * expressions; such use results in a compile-time error." + */ + if (type->contains_opaque()) { + _mesa_glsl_error(&loc, state, "opaque variables cannot be operands " + "of the ?: operator"); + error_emitted = true; + } + ir_constant *cond_val = op[0]->constant_expression_value(); if (then_instructions.is_empty() @@ -2357,6 +2369,14 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual, var->data.image_format = GL_NONE; } + } else if (qual->flags.q.read_only || + qual->flags.q.write_only || + qual->flags.q.coherent || + qual->flags.q._volatile || + qual->flags.q.restrict_flag || + qual->flags.q.explicit_image_format) { + _mesa_glsl_error(loc, state, "memory qualifiers may only be applied to " + "images"); } } @@ -2781,8 +2801,21 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, validate_matrix_layout_for_type(state, loc, var->type, var); } - if (var->type->contains_image()) - apply_image_qualifier_to_variable(qual, var, state, loc); + apply_image_qualifier_to_variable(qual, var, state, loc); + + /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader + * Inputs): + * + * "Fragment shaders also allow the following layout qualifier on in only + * (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (qual->flags.q.early_fragment_tests) { + _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " + "valid in fragment shader input layout declaration."); + } } /** @@ -3541,6 +3574,9 @@ ast_declarator_list::hir(exec_list *instructions, case GLSL_TYPE_INT: if (state->is_version(120, 300)) break; + case GLSL_TYPE_DOUBLE: + if (check_type->base_type == GLSL_TYPE_DOUBLE && (state->is_version(410, 0) || state->ARB_vertex_attrib_64bit_enable)) + break; /* FALLTHROUGH */ default: _mesa_glsl_error(& loc, state, diff --git a/mesalib/src/glsl/ast_type.cpp b/mesalib/src/glsl/ast_type.cpp index b596cd59e..1bcf6a2e8 100644 --- a/mesalib/src/glsl/ast_type.cpp +++ b/mesalib/src/glsl/ast_type.cpp @@ -293,11 +293,7 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, valid_in_mask.flags.q.invocations = 1; break; case MESA_SHADER_FRAGMENT: - if (q.flags.q.early_fragment_tests) { - state->early_fragment_tests = true; - } else { - _mesa_glsl_error(loc, state, "invalid input layout qualifier"); - } + valid_in_mask.flags.q.early_fragment_tests = 1; break; case MESA_SHADER_COMPUTE: create_cs_ast |= @@ -345,6 +341,10 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, this->invocations = q.invocations; } + if (q.flags.q.early_fragment_tests) { + state->fs_early_fragment_tests = true; + } + if (create_gs_ast) { node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type); } else if (create_cs_ast) { diff --git a/mesalib/src/glsl/builtin_functions.cpp b/mesalib/src/glsl/builtin_functions.cpp index 524b8d6e8..97055d85d 100644 --- a/mesalib/src/glsl/builtin_functions.cpp +++ b/mesalib/src/glsl/builtin_functions.cpp @@ -136,6 +136,12 @@ v140(const _mesa_glsl_parse_state *state) } static bool +es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(0, 310); +} + +static bool texture_rectangle(const _mesa_glsl_parse_state *state) { return state->ARB_texture_rectangle_enable; @@ -194,7 +200,8 @@ shader_bit_encoding(const _mesa_glsl_parse_state *state) static bool shader_integer_mix(const _mesa_glsl_parse_state *state) { - return v130(state) && state->EXT_shader_integer_mix_enable; + return state->is_version(450, 310) || + (v130(state) && state->EXT_shader_integer_mix_enable); } static bool @@ -219,10 +226,17 @@ gpu_shader5(const _mesa_glsl_parse_state *state) } static bool -shader_packing_or_gpu_shader5(const _mesa_glsl_parse_state *state) +gpu_shader5_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || state->ARB_gpu_shader5_enable; +} + +static bool +shader_packing_or_es31_or_gpu_shader5(const _mesa_glsl_parse_state *state) { return state->ARB_shading_language_packing_enable || - gpu_shader5(state); + state->ARB_gpu_shader5_enable || + state->is_version(400, 310); } static bool @@ -297,15 +311,24 @@ texture_gather(const _mesa_glsl_parse_state *state) state->ARB_gpu_shader5_enable; } +static bool +texture_gather_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || + state->ARB_texture_gather_enable || + state->ARB_gpu_shader5_enable; +} + /* Only ARB_texture_gather but not GLSL 4.0 or ARB_gpu_shader5. * used for relaxation of const offset requirements. */ static bool -texture_gather_only(const _mesa_glsl_parse_state *state) +texture_gather_only_or_es31(const _mesa_glsl_parse_state *state) { return !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable && - state->ARB_texture_gather_enable; + (state->ARB_texture_gather_enable || + state->is_version(0, 310)); } /* Desktop GL or OES_standard_derivatives + fragment shader only */ @@ -359,7 +382,7 @@ tex3d_lod(const _mesa_glsl_parse_state *state) static bool shader_atomic_counters(const _mesa_glsl_parse_state *state) { - return state->ARB_shader_atomic_counters_enable; + return state->has_atomic_counters(); } static bool @@ -1161,12 +1184,12 @@ builtin_builder::create_builtins() add_function("packUnorm2x16", _packUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); add_function("packSnorm2x16", _packSnorm2x16(shader_packing_or_es3), NULL); - add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_gpu_shader5), NULL); - add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); add_function("unpackUnorm2x16", _unpackUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); add_function("unpackSnorm2x16", _unpackSnorm2x16(shader_packing_or_es3), NULL); - add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_gpu_shader5), NULL); - add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); add_function("packHalf2x16", _packHalf2x16(shader_packing_or_es3), NULL); add_function("unpackHalf2x16", _unpackHalf2x16(shader_packing_or_es3), NULL); add_function("packDouble2x32", _packDouble2x32(fp64), NULL); @@ -2202,61 +2225,69 @@ builtin_builder::create_builtins() NULL); add_function("textureGather", - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec4_type), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type), NULL); add_function("textureGatherOffset", - _texture(ir_tg4, texture_gather_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), @@ -2285,6 +2316,9 @@ builtin_builder::create_builtins() _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), NULL); add_function("textureGatherOffsets", @@ -4445,7 +4479,7 @@ builtin_builder::_bitfieldExtract(const glsl_type *type) ir_variable *value = in_var(type, "value"); ir_variable *offset = in_var(glsl_type::int_type, "offset"); ir_variable *bits = in_var(glsl_type::int_type, "bits"); - MAKE_SIG(type, gpu_shader5, 3, value, offset, bits); + MAKE_SIG(type, gpu_shader5_or_es31, 3, value, offset, bits); body.emit(ret(expr(ir_triop_bitfield_extract, value, offset, bits))); @@ -4459,33 +4493,33 @@ builtin_builder::_bitfieldInsert(const glsl_type *type) ir_variable *insert = in_var(type, "insert"); ir_variable *offset = in_var(glsl_type::int_type, "offset"); ir_variable *bits = in_var(glsl_type::int_type, "bits"); - MAKE_SIG(type, gpu_shader5, 4, base, insert, offset, bits); + MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits); body.emit(ret(bitfield_insert(base, insert, offset, bits))); return sig; } -UNOP(bitfieldReverse, ir_unop_bitfield_reverse, gpu_shader5) +UNOP(bitfieldReverse, ir_unop_bitfield_reverse, gpu_shader5_or_es31) ir_function_signature * builtin_builder::_bitCount(const glsl_type *type) { - return unop(gpu_shader5, ir_unop_bit_count, + return unop(gpu_shader5_or_es31, ir_unop_bit_count, glsl_type::ivec(type->vector_elements), type); } ir_function_signature * builtin_builder::_findLSB(const glsl_type *type) { - return unop(gpu_shader5, ir_unop_find_lsb, + return unop(gpu_shader5_or_es31, ir_unop_find_lsb, glsl_type::ivec(type->vector_elements), type); } ir_function_signature * builtin_builder::_findMSB(const glsl_type *type) { - return unop(gpu_shader5, ir_unop_find_msb, + return unop(gpu_shader5_or_es31, ir_unop_find_msb, glsl_type::ivec(type->vector_elements), type); } @@ -4505,7 +4539,7 @@ builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type) ir_function_signature * builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type) { - return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5, x_type, x_type, exp_type); + return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, x_type, x_type, exp_type); } ir_function_signature * @@ -4526,7 +4560,7 @@ builtin_builder::_frexp(const glsl_type *x_type, const glsl_type *exp_type) { ir_variable *x = in_var(x_type, "x"); ir_variable *exponent = out_var(exp_type, "exp"); - MAKE_SIG(x_type, gpu_shader5, 2, x, exponent); + MAKE_SIG(x_type, gpu_shader5_or_es31, 2, x, exponent); const unsigned vec_elem = x_type->vector_elements; const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); @@ -4575,7 +4609,7 @@ builtin_builder::_uaddCarry(const glsl_type *type) ir_variable *x = in_var(type, "x"); ir_variable *y = in_var(type, "y"); ir_variable *carry = out_var(type, "carry"); - MAKE_SIG(type, gpu_shader5, 3, x, y, carry); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, carry); body.emit(assign(carry, ir_builder::carry(x, y))); body.emit(ret(add(x, y))); @@ -4589,7 +4623,7 @@ builtin_builder::_usubBorrow(const glsl_type *type) ir_variable *x = in_var(type, "x"); ir_variable *y = in_var(type, "y"); ir_variable *borrow = out_var(type, "borrow"); - MAKE_SIG(type, gpu_shader5, 3, x, y, borrow); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, borrow); body.emit(assign(borrow, ir_builder::borrow(x, y))); body.emit(ret(sub(x, y))); @@ -4607,7 +4641,7 @@ builtin_builder::_mulExtended(const glsl_type *type) ir_variable *y = in_var(type, "y"); ir_variable *msb = out_var(type, "msb"); ir_variable *lsb = out_var(type, "lsb"); - MAKE_SIG(glsl_type::void_type, gpu_shader5, 4, x, y, msb, lsb); + MAKE_SIG(glsl_type::void_type, gpu_shader5_or_es31, 4, x, y, msb, lsb); body.emit(assign(msb, imul_high(x, y))); body.emit(assign(lsb, mul(x, y))); diff --git a/mesalib/src/glsl/builtin_types.cpp b/mesalib/src/glsl/builtin_types.cpp index fef86df28..d92e2eb30 100644 --- a/mesalib/src/glsl/builtin_types.cpp +++ b/mesalib/src/glsl/builtin_types.cpp @@ -372,7 +372,7 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) add_type(symbols, glsl_type::uimage2DMSArray_type); } - if (state->ARB_shader_atomic_counters_enable) { + if (state->has_atomic_counters()) { add_type(symbols, glsl_type::atomic_uint_type); } diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp index 21e7331c7..6806aa1f9 100644 --- a/mesalib/src/glsl/builtin_variables.cpp +++ b/mesalib/src/glsl/builtin_variables.cpp @@ -653,19 +653,46 @@ builtin_variable_generator::generate_constants() add_const("gl_MaxTextureCoords", state->Const.MaxTextureCoords); } - if (state->ARB_shader_atomic_counters_enable) { + if (state->has_atomic_counters()) { add_const("gl_MaxVertexAtomicCounters", state->Const.MaxVertexAtomicCounters); - add_const("gl_MaxGeometryAtomicCounters", - state->Const.MaxGeometryAtomicCounters); add_const("gl_MaxFragmentAtomicCounters", state->Const.MaxFragmentAtomicCounters); add_const("gl_MaxCombinedAtomicCounters", state->Const.MaxCombinedAtomicCounters); add_const("gl_MaxAtomicCounterBindings", state->Const.MaxAtomicBufferBindings); - add_const("gl_MaxTessControlAtomicCounters", 0); - add_const("gl_MaxTessEvaluationAtomicCounters", 0); + + /* When Mesa adds support for GL_OES_geometry_shader and + * GL_OES_tessellation_shader, this will need to change. + */ + if (!state->es_shader) { + add_const("gl_MaxGeometryAtomicCounters", + state->Const.MaxGeometryAtomicCounters); + add_const("gl_MaxTessControlAtomicCounters", 0); + add_const("gl_MaxTessEvaluationAtomicCounters", 0); + } + } + + if (state->is_version(420, 310)) { + add_const("gl_MaxVertexAtomicCounterBuffers", + state->Const.MaxVertexAtomicCounterBuffers); + add_const("gl_MaxFragmentAtomicCounterBuffers", + state->Const.MaxFragmentAtomicCounterBuffers); + add_const("gl_MaxCombinedAtomicCounterBuffers", + state->Const.MaxCombinedAtomicCounterBuffers); + add_const("gl_MaxAtomicCounterBufferSize", + state->Const.MaxAtomicCounterBufferSize); + + /* When Mesa adds support for GL_OES_geometry_shader and + * GL_OES_tessellation_shader, this will need to change. + */ + if (!state->es_shader) { + add_const("gl_MaxGeometryAtomicCounterBuffers", + state->Const.MaxGeometryAtomicCounterBuffers); + add_const("gl_MaxTessControlAtomicCounterBuffers", 0); + add_const("gl_MaxTessEvaluationAtomicCounterBuffers", 0); + } } if (state->is_version(430, 0) || state->ARB_compute_shader_enable) { diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y index cfceca66b..a11b6b2c7 100644 --- a/mesalib/src/glsl/glcpp/glcpp-parse.y +++ b/mesalib/src/glsl/glcpp/glcpp-parse.y @@ -2448,6 +2448,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions->ARB_gpu_shader_fp64) add_builtin_define(parser, "GL_ARB_gpu_shader_fp64", 1); + if (extensions->ARB_vertex_attrib_64bit) + add_builtin_define(parser, "GL_ARB_vertex_attrib_64bit", 1); + if (extensions->AMD_vertex_shader_layer) add_builtin_define(parser, "GL_AMD_vertex_shader_layer", 1); diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll index 2785ed168..10db5b8b6 100644 --- a/mesalib/src/glsl/glsl_lexer.ll +++ b/mesalib/src/glsl/glsl_lexer.ll @@ -409,7 +409,7 @@ restrict KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store readonly KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, READONLY); writeonly KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, WRITEONLY); -atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT); +atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT); struct return STRUCT; void return VOID_TOK; diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy index aceb3b916..3ce9e103f 100644 --- a/mesalib/src/glsl/glsl_parser.yy +++ b/mesalib/src/glsl/glsl_parser.yy @@ -214,6 +214,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2, %type <type_qualifier> layout_qualifier %type <type_qualifier> layout_qualifier_id_list layout_qualifier_id %type <type_qualifier> interface_block_layout_qualifier +%type <type_qualifier> memory_qualifier %type <type_qualifier> interface_qualifier %type <type_specifier> type_specifier %type <type_specifier> type_specifier_nonarray @@ -1000,6 +1001,11 @@ parameter_qualifier: $$ = $2; $$.precision = $1; } + | memory_qualifier parameter_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2); + } parameter_direction_qualifier: IN_TOK @@ -1360,6 +1366,21 @@ layout_qualifier_id: if (!$$.flags.i && match_layout_qualifier($1, "early_fragment_tests", state) == 0) { + /* From section 4.4.1.3 of the GLSL 4.50 specification + * (Fragment Shader Inputs): + * + * "Fragment shaders also allow the following layout + * qualifier on in only (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (state->stage != MESA_SHADER_FRAGMENT) { + _mesa_glsl_error(& @1, state, + "early_fragment_tests layout qualifier only " + "valid in fragment shaders"); + } + $$.flags.q.early_fragment_tests = 1; } } @@ -1404,13 +1425,13 @@ layout_qualifier_id: } if ((state->ARB_shading_language_420pack_enable || - state->ARB_shader_atomic_counters_enable) && + state->has_atomic_counters()) && match_layout_qualifier("binding", $1, state) == 0) { $$.flags.q.explicit_binding = 1; $$.binding = $3; } - if (state->ARB_shader_atomic_counters_enable && + if (state->has_atomic_counters() && match_layout_qualifier("offset", $1, state) == 0) { $$.flags.q.explicit_offset = 1; $$.offset = $3; @@ -1581,6 +1602,7 @@ type_qualifier: | storage_qualifier | interpolation_qualifier | layout_qualifier + | memory_qualifier | precision_qualifier { memset(&$$, 0, sizeof($$)); @@ -1718,6 +1740,11 @@ type_qualifier: $$ = $2; $$.precision = $1; } + | memory_qualifier type_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2); + } ; auxiliary_storage_qualifier: @@ -1778,7 +1805,10 @@ storage_qualifier: memset(& $$, 0, sizeof($$)); $$.flags.q.uniform = 1; } - | COHERENT + ; + +memory_qualifier: + COHERENT { memset(& $$, 0, sizeof($$)); $$.flags.q.coherent = 1; diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp index 0aa3c54fc..be6713c46 100644 --- a/mesalib/src/glsl/glsl_parser_extras.cpp +++ b/mesalib/src/glsl/glsl_parser_extras.cpp @@ -117,6 +117,16 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters; this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters; this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings; + this->Const.MaxVertexAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers; + this->Const.MaxGeometryAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers; + this->Const.MaxFragmentAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + this->Const.MaxCombinedAtomicCounterBuffers = + ctx->Const.MaxCombinedAtomicBuffers; + this->Const.MaxAtomicCounterBufferSize = + ctx->Const.MaxAtomicBufferSize; /* Compute shader constants */ for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupCount); i++) @@ -143,9 +153,9 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->num_user_structures = 0; /* supported_versions should be large enough to support the known desktop - * GLSL versions plus 2 GLES versions (ES2 & ES3) + * GLSL versions plus 3 GLES versions (ES 1.00, ES 3.00, and ES 3.10)) */ - STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 2) == + STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 3) == ARRAY_SIZE(this->supported_versions)); /* Populate the list of supported GLSL versions */ @@ -175,6 +185,11 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->supported_versions[this->num_supported_versions].es = true; this->num_supported_versions++; } + if (_mesa_is_gles31(ctx)) { + this->supported_versions[this->num_supported_versions].ver = 310; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } /* Create a string for use in error messages to tell the user which GLSL * versions are supported. @@ -212,7 +227,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->gs_input_size = 0; this->in_qualifier = new(this) ast_type_qualifier(); this->out_qualifier = new(this) ast_type_qualifier(); - this->early_fragment_tests = false; + this->fs_early_fragment_tests = false; memset(this->atomic_counter_offsets, 0, sizeof(this->atomic_counter_offsets)); this->allow_extension_directive_midshader = @@ -565,6 +580,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_texture_query_lod, true, false, ARB_texture_query_lod), EXT(ARB_texture_rectangle, true, false, dummy_true), EXT(ARB_uniform_buffer_object, true, false, ARB_uniform_buffer_object), + EXT(ARB_vertex_attrib_64bit, true, false, ARB_vertex_attrib_64bit), EXT(ARB_viewport_array, true, false, ARB_viewport_array), /* KHR extensions go here, sorted alphabetically. @@ -1418,6 +1434,7 @@ set_shader_inout_layout(struct gl_shader *shader, assert(!state->fs_redeclares_gl_fragcoord); assert(!state->fs_pixel_center_integer); assert(!state->fs_origin_upper_left); + assert(!state->fs_early_fragment_tests); } switch (shader->Stage) { @@ -1460,6 +1477,7 @@ set_shader_inout_layout(struct gl_shader *shader, shader->origin_upper_left = state->fs_origin_upper_left; shader->ARB_fragment_coord_conventions_enable = state->ARB_fragment_coord_conventions_enable; + shader->EarlyFragmentTests = state->fs_early_fragment_tests; break; default: diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h index dae7864fd..9a0c24e67 100644 --- a/mesalib/src/glsl/glsl_parser_extras.h +++ b/mesalib/src/glsl/glsl_parser_extras.h @@ -160,7 +160,7 @@ struct _mesa_glsl_parse_state { { if (!this->has_separate_shader_objects()) { const char *const requirement = this->es_shader - ? "GL_EXT_separate_shader_objects extension" + ? "GL_EXT_separate_shader_objects extension or GLSL ES 310" : "GL_ARB_separate_shader_objects extension or GLSL 420"; _mesa_glsl_error(locp, this, "%s explicit location requires %s", @@ -175,17 +175,26 @@ struct _mesa_glsl_parse_state { const ir_variable *) { if (!this->has_explicit_attrib_location() || - !this->ARB_explicit_uniform_location_enable) { + !this->has_explicit_uniform_location()) { + const char *const requirement = this->es_shader + ? "GLSL ES 310" + : "GL_ARB_explicit_uniform_location and either " + "GL_ARB_explicit_attrib_location or GLSL 330."; + _mesa_glsl_error(locp, this, - "uniform explicit location requires " - "GL_ARB_explicit_uniform_location and either " - "GL_ARB_explicit_attrib_location or GLSL 330."); + "uniform explicit location requires %s", + requirement); return false; } return true; } + bool has_atomic_counters() const + { + return ARB_shader_atomic_counters_enable || is_version(420, 310); + } + bool has_explicit_attrib_stream() const { return ARB_gpu_shader5_enable || is_version(400, 0); @@ -196,6 +205,11 @@ struct _mesa_glsl_parse_state { return ARB_explicit_attrib_location_enable || is_version(330, 300); } + bool has_explicit_uniform_location() const + { + return ARB_explicit_uniform_location_enable || is_version(430, 310); + } + bool has_uniform_buffer_objects() const { return ARB_uniform_buffer_object_enable || is_version(140, 300); @@ -203,7 +217,7 @@ struct _mesa_glsl_parse_state { bool has_separate_shader_objects() const { - return ARB_separate_shader_objects_enable || is_version(410, 0) + return ARB_separate_shader_objects_enable || is_version(410, 310) || EXT_separate_shader_objects_enable; } @@ -224,7 +238,7 @@ struct _mesa_glsl_parse_state { struct { unsigned ver; bool es; - } supported_versions[14]; + } supported_versions[15]; bool es_shader; unsigned language_version; @@ -339,6 +353,16 @@ struct _mesa_glsl_parse_state { unsigned MaxCombinedAtomicCounters; unsigned MaxAtomicBufferBindings; + /* These are also atomic counter related, but they weren't added to + * until atomic counters were added to core in GLSL 4.20 and GLSL ES + * 3.10. + */ + unsigned MaxVertexAtomicCounterBuffers; + unsigned MaxGeometryAtomicCounterBuffers; + unsigned MaxFragmentAtomicCounterBuffers; + unsigned MaxCombinedAtomicCounterBuffers; + unsigned MaxAtomicCounterBufferSize; + /* ARB_compute_shader */ unsigned MaxComputeWorkGroupCount[3]; unsigned MaxComputeWorkGroupSize[3]; @@ -458,6 +482,8 @@ struct _mesa_glsl_parse_state { bool ARB_texture_rectangle_warn; bool ARB_uniform_buffer_object_enable; bool ARB_uniform_buffer_object_warn; + bool ARB_vertex_attrib_64bit_enable; + bool ARB_vertex_attrib_64bit_warn; bool ARB_viewport_array_enable; bool ARB_viewport_array_warn; @@ -510,7 +536,7 @@ struct _mesa_glsl_parse_state { */ unsigned gs_input_size; - bool early_fragment_tests; + bool fs_early_fragment_tests; /** Atomic counter offsets by binding */ unsigned atomic_counter_offsets[MAX_COMBINED_ATOMIC_BUFFERS]; diff --git a/mesalib/src/glsl/ir_set_program_inouts.cpp b/mesalib/src/glsl/ir_set_program_inouts.cpp index e877a2019..b968a1efd 100644 --- a/mesalib/src/glsl/ir_set_program_inouts.cpp +++ b/mesalib/src/glsl/ir_set_program_inouts.cpp @@ -105,13 +105,10 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len, int idx = var->data.location + var->data.index + offset + i; GLbitfield64 bitfield = BITFIELD64_BIT(idx); - /* dvec3 and dvec4 take up 2 slots */ - if (dual_slot) { - idx += i; - bitfield |= bitfield << 1; - } if (var->data.mode == ir_var_shader_in) { - prog->InputsRead |= bitfield; + prog->InputsRead |= bitfield; + if (dual_slot) + prog->DoubleInputsRead |= bitfield; if (is_fragment_shader) { gl_fragment_program *fprog = (gl_fragment_program *) prog; fprog->InterpQualifier[idx] = @@ -120,13 +117,6 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len, fprog->IsCentroid |= bitfield; if (var->data.sample) fprog->IsSample |= bitfield; - - /* Set the InterpQualifier of the next slot to the same as the - * current one, since dvec3 and dvec4 spans 2 slots. - */ - if (dual_slot) - fprog->InterpQualifier[idx + 1] = - (glsl_interp_qualifier) var->data.interpolation; } } else if (var->data.mode == ir_var_system_value) { prog->SystemValuesRead |= bitfield; diff --git a/mesalib/src/glsl/link_uniform_blocks.cpp b/mesalib/src/glsl/link_uniform_blocks.cpp index 6ca41107e..898544bea 100644 --- a/mesalib/src/glsl/link_uniform_blocks.cpp +++ b/mesalib/src/glsl/link_uniform_blocks.cpp @@ -67,14 +67,14 @@ private: assert(!"Should not get here."); } - virtual void enter_record(const glsl_type *type, const char *name, + virtual void enter_record(const glsl_type *type, const char *, bool row_major) { assert(type->is_record()); this->offset = glsl_align( this->offset, type->std140_base_alignment(row_major)); } - virtual void leave_record(const glsl_type *type, const char *name, + virtual void leave_record(const glsl_type *type, const char *, bool row_major) { assert(type->is_record()); @@ -90,8 +90,8 @@ private: } virtual void visit_field(const glsl_type *type, const char *name, - bool row_major, const glsl_type *record_type, - bool last_field) + bool row_major, const glsl_type *, + bool /* last_field */) { assert(this->index < this->num_variables); diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp index d5ca23a38..2c928e144 100644 --- a/mesalib/src/glsl/link_uniforms.cpp +++ b/mesalib/src/glsl/link_uniforms.cpp @@ -544,7 +544,7 @@ private: assert(!"Should not get here."); } - virtual void enter_record(const glsl_type *type, const char *name, + virtual void enter_record(const glsl_type *type, const char *, bool row_major) { assert(type->is_record()); if (this->ubo_block_index == -1) @@ -553,7 +553,7 @@ private: this->ubo_byte_offset, type->std140_base_alignment(row_major)); } - virtual void leave_record(const glsl_type *type, const char *name, + virtual void leave_record(const glsl_type *type, const char *, bool row_major) { assert(type->is_record()); if (this->ubo_block_index == -1) @@ -564,7 +564,7 @@ private: virtual void visit_field(const glsl_type *type, const char *name, bool row_major, const glsl_type *record_type, - bool last_field) + bool /* last_field */) { assert(!type->without_array()->is_record()); assert(!type->without_array()->is_interface()); diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index 21fde9444..ea73c6f9d 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -1413,6 +1413,8 @@ link_fs_input_layout_qualifiers(struct gl_shader_program *prog, linked_shader->origin_upper_left = shader->origin_upper_left; linked_shader->pixel_center_integer = shader->pixel_center_integer; } + + linked_shader->EarlyFragmentTests |= shader->EarlyFragmentTests; } } @@ -1975,6 +1977,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog, } to_assign[16]; unsigned num_attr = 0; + unsigned total_attribs_size = 0; foreach_in_list(ir_instruction, node, sh->ir) { ir_variable *const var = node->as_variable(); @@ -2016,12 +2019,41 @@ assign_attribute_or_color_locations(gl_shader_program *prog, } } + const unsigned slots = var->type->count_attribute_slots(); + + /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes): + * + * "A program with more than the value of MAX_VERTEX_ATTRIBS active + * attribute variables may fail to link, unless device-dependent + * optimizations are able to make the program fit within available + * hardware resources. For the purposes of this test, attribute variables + * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, + * and dmat4 may count as consuming twice as many attributes as equivalent + * single-precision types. While these types use the same number of + * generic attributes as their single-precision equivalents, + * implementations are permitted to consume two single-precision vectors + * of internal storage for each three- or four-component double-precision + * vector." + * Until someone has a good reason in Mesa, enforce that now. + */ + if (target_index == MESA_SHADER_VERTEX) { + total_attribs_size += slots; + if (var->type->without_array() == glsl_type::dvec3_type || + var->type->without_array() == glsl_type::dvec4_type || + var->type->without_array() == glsl_type::dmat2x3_type || + var->type->without_array() == glsl_type::dmat2x4_type || + var->type->without_array() == glsl_type::dmat3_type || + var->type->without_array() == glsl_type::dmat3x4_type || + var->type->without_array() == glsl_type::dmat4x3_type || + var->type->without_array() == glsl_type::dmat4_type) + total_attribs_size += slots; + } + /* If the variable is not a built-in and has a location statically * assigned in the shader (presumably via a layout qualifier), make sure * that it doesn't collide with other assigned locations. Otherwise, * add it to the list of variables that need linker-assigned locations. */ - const unsigned slots = var->type->count_attribute_slots(); if (var->data.location != -1) { if (var->data.location >= generic_base && var->data.index < 1) { /* From page 61 of the OpenGL 4.0 spec: @@ -2141,6 +2173,15 @@ assign_attribute_or_color_locations(gl_shader_program *prog, num_attr++; } + if (target_index == MESA_SHADER_VERTEX) { + if (total_attribs_size > max_index) { + linker_error(prog, + "attempt to use %d vertex attribute slots only %d available ", + total_attribs_size, max_index); + return false; + } + } + /* If all of the attributes were assigned locations by the application (or * are built-in attributes with fixed locations), return early. This should * be the common case. @@ -2556,6 +2597,7 @@ add_interface_variables(struct gl_shader_program *shProg, { foreach_in_list(ir_instruction, node, sh->ir) { ir_variable *var = node->as_variable(); + uint8_t mask = 0; if (!var) continue; @@ -2571,6 +2613,10 @@ add_interface_variables(struct gl_shader_program *shProg, var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE && var->data.location != SYSTEM_VALUE_INSTANCE_ID) continue; + /* Mark special built-in inputs referenced by the vertex stage so + * that they are considered active by the shader queries. + */ + mask = (1 << (MESA_SHADER_VERTEX)); /* FALLTHROUGH */ case ir_var_shader_in: if (programInterface != GL_PROGRAM_INPUT) @@ -2585,7 +2631,7 @@ add_interface_variables(struct gl_shader_program *shProg, }; if (!add_program_resource(shProg, programInterface, var, - build_stageref(shProg, var->name))) + build_stageref(shProg, var->name) | mask)) return false; } return true; diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp index f6b8331d4..af758ceb0 100644 --- a/mesalib/src/glsl/nir/glsl_to_nir.cpp +++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp @@ -614,27 +614,135 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_atomic_counter_inc_var; } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) { op = nir_intrinsic_atomic_counter_dec_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) { + op = nir_intrinsic_image_load; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) { + op = nir_intrinsic_image_store; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) { + op = nir_intrinsic_image_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) { + op = nir_intrinsic_image_atomic_min; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) { + op = nir_intrinsic_image_atomic_max; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) { + op = nir_intrinsic_image_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) { + op = nir_intrinsic_image_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) { + op = nir_intrinsic_image_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) { + op = nir_intrinsic_image_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) { + op = nir_intrinsic_image_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) { + op = nir_intrinsic_memory_barrier; } else { unreachable("not reached"); } nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); - ir_dereference *param = - (ir_dereference *) ir->actual_parameters.get_head(); - instr->variables[0] = evaluate_deref(&instr->instr, param); - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + + switch (op) { + case nir_intrinsic_atomic_counter_read_var: + case nir_intrinsic_atomic_counter_inc_var: + case nir_intrinsic_atomic_counter_dec_var: { + ir_dereference *param = + (ir_dereference *) ir->actual_parameters.get_head(); + instr->variables[0] = evaluate_deref(&instr->instr, param); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + break; + } + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: { + nir_ssa_undef_instr *instr_undef = + nir_ssa_undef_instr_create(shader, 1); + nir_instr_insert_after_cf_list(this->cf_node_list, + &instr_undef->instr); + + /* Set the image variable dereference. */ + exec_node *param = ir->actual_parameters.get_head(); + ir_dereference *image = (ir_dereference *)param; + const glsl_type *type = + image->variable_referenced()->type->without_array(); + + instr->variables[0] = evaluate_deref(&instr->instr, image); + param = param->get_next(); + + /* Set the address argument, extending the coordinate vector to four + * components. + */ + const nir_src src_addr = evaluate_rvalue((ir_dereference *)param); + nir_alu_instr *instr_addr = nir_alu_instr_create(shader, nir_op_vec4); + nir_ssa_dest_init(&instr_addr->instr, &instr_addr->dest.dest, 4, NULL); + + for (int i = 0; i < 4; i++) { + if (i < type->coordinate_components()) { + instr_addr->src[i].src = src_addr; + instr_addr->src[i].swizzle[0] = i; + } else { + instr_addr->src[i].src = nir_src_for_ssa(&instr_undef->def); + } + } + + nir_instr_insert_after_cf_list(cf_node_list, &instr_addr->instr); + instr->src[0] = nir_src_for_ssa(&instr_addr->dest.dest.ssa); + param = param->get_next(); + + /* Set the sample argument, which is undefined for single-sample + * images. + */ + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + instr->src[1] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } else { + instr->src[1] = nir_src_for_ssa(&instr_undef->def); + } + + /* Set the intrinsic parameters. */ + if (!param->is_tail_sentinel()) { + instr->src[2] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } + + if (!param->is_tail_sentinel()) { + instr->src[3] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } + + /* Set the intrinsic destination. */ + if (ir->return_deref) + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + break; + } + case nir_intrinsic_memory_barrier: + break; + default: + unreachable("not reached"); + } nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); - nir_intrinsic_instr *store_instr = - nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); - store_instr->num_components = 1; + if (ir->return_deref) { + nir_intrinsic_instr *store_instr = + nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); + store_instr->num_components = ir->return_deref->type->vector_elements; - store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref); - store_instr->src[0].is_ssa = true; - store_instr->src[0].ssa = &instr->dest.ssa; + store_instr->variables[0] = + evaluate_deref(&store_instr->instr, ir->return_deref); + store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa); - nir_instr_insert_after_cf_list(this->cf_node_list, &store_instr->instr); + nir_instr_insert_after_cf_list(this->cf_node_list, + &store_instr->instr); + } return; } @@ -824,7 +932,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir) nir_dest *dest = get_instr_dest(this->result); assert(dest->is_ssa); - nir_src src; + nir_src src = NIR_SRC_INIT; src.is_ssa = true; src.ssa = &dest->ssa; @@ -1038,8 +1146,8 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_rcp: emit(nir_op_frcp, dest_size, srcs); break; case ir_unop_rsq: emit(nir_op_frsq, dest_size, srcs); break; case ir_unop_sqrt: emit(nir_op_fsqrt, dest_size, srcs); break; - case ir_unop_exp: emit(nir_op_fexp, dest_size, srcs); break; - case ir_unop_log: emit(nir_op_flog, dest_size, srcs); break; + case ir_unop_exp: unreachable("ir_unop_exp should have been lowered"); + case ir_unop_log: unreachable("ir_unop_log should have been lowered"); case ir_unop_exp2: emit(nir_op_fexp2, dest_size, srcs); break; case ir_unop_log2: emit(nir_op_flog2, dest_size, srcs); break; case ir_unop_i2f: diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c index 4cc074b80..f03e80a4e 100644 --- a/mesalib/src/glsl/nir/nir.c +++ b/mesalib/src/glsl/nir/nir.c @@ -58,12 +58,9 @@ reg_create(void *mem_ctx, struct exec_list *list) nir_register *reg = ralloc(mem_ctx, nir_register); reg->parent_instr = NULL; - reg->uses = _mesa_set_create(reg, _mesa_hash_pointer, - _mesa_key_pointer_equal); - reg->defs = _mesa_set_create(reg, _mesa_hash_pointer, - _mesa_key_pointer_equal); - reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer, - _mesa_key_pointer_equal); + list_inithead(®->uses); + list_inithead(®->defs); + list_inithead(®->if_uses); reg->num_components = 0; reg->num_array_elems = 0; @@ -1070,11 +1067,14 @@ update_if_uses(nir_cf_node *node) nir_if *if_stmt = nir_cf_node_as_if(node); - struct set *if_uses_set = if_stmt->condition.is_ssa ? - if_stmt->condition.ssa->if_uses : - if_stmt->condition.reg.reg->uses; - - _mesa_set_add(if_uses_set, if_stmt); + if_stmt->condition.parent_if = if_stmt; + if (if_stmt->condition.is_ssa) { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.ssa->if_uses); + } else { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.reg.reg->if_uses); + } } void @@ -1227,16 +1227,7 @@ cleanup_cf_node(nir_cf_node *node) foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) cleanup_cf_node(child); - struct set *if_uses; - if (if_stmt->condition.is_ssa) { - if_uses = if_stmt->condition.ssa->if_uses; - } else { - if_uses = if_stmt->condition.reg.reg->if_uses; - } - - struct set_entry *entry = _mesa_set_search(if_uses, if_stmt); - assert(entry); - _mesa_set_remove(if_uses, entry); + list_del(&if_stmt->condition.use_link); break; } @@ -1293,9 +1284,9 @@ add_use_cb(nir_src *src, void *state) { nir_instr *instr = state; - struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses; - - _mesa_set_add(uses_set, instr); + src->parent_instr = instr; + list_addtail(&src->use_link, + src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses); return true; } @@ -1320,8 +1311,10 @@ add_reg_def_cb(nir_dest *dest, void *state) { nir_instr *instr = state; - if (!dest->is_ssa) - _mesa_set_add(dest->reg.reg->defs, instr); + if (!dest->is_ssa) { + dest->reg.parent_instr = instr; + list_addtail(&dest->reg.def_link, &dest->reg.reg->defs); + } return true; } @@ -1436,13 +1429,7 @@ nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) static bool remove_use_cb(nir_src *src, void *state) { - nir_instr *instr = state; - - struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses; - - struct set_entry *entry = _mesa_set_search(uses_set, instr); - if (entry) - _mesa_set_remove(uses_set, entry); + list_del(&src->use_link); return true; } @@ -1450,16 +1437,8 @@ remove_use_cb(nir_src *src, void *state) static bool remove_def_cb(nir_dest *dest, void *state) { - nir_instr *instr = state; - - if (dest->is_ssa) - return true; - - nir_register *reg = dest->reg.reg; - - struct set_entry *entry = _mesa_set_search(reg->defs, instr); - if (entry) - _mesa_set_remove(reg->defs, entry); + if (!dest->is_ssa) + list_del(&dest->reg.def_link); return true; } @@ -1834,64 +1813,77 @@ nir_srcs_equal(nir_src src1, nir_src src2) } static bool -src_does_not_use_def(nir_src *src, void *void_def) +src_is_valid(const nir_src *src) { - nir_ssa_def *def = void_def; - - if (src->is_ssa) { - return src->ssa != def; - } else { - return true; - } + return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL); } -static bool -src_does_not_use_reg(nir_src *src, void *void_reg) +static void +src_remove_all_uses(nir_src *src) { - nir_register *reg = void_reg; + for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { + if (!src_is_valid(src)) + continue; - if (src->is_ssa) { - return true; - } else { - return src->reg.reg != reg; + list_del(&src->use_link); + } +} + +static void +src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) +{ + for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { + if (!src_is_valid(src)) + continue; + + if (parent_instr) { + src->parent_instr = parent_instr; + if (src->is_ssa) + list_addtail(&src->use_link, &src->ssa->uses); + else + list_addtail(&src->use_link, &src->reg.reg->uses); + } else { + assert(parent_if); + src->parent_if = parent_if; + if (src->is_ssa) + list_addtail(&src->use_link, &src->ssa->if_uses); + else + list_addtail(&src->use_link, &src->reg.reg->if_uses); + } } } void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) { - nir_src old_src = *src; + assert(!src_is_valid(src) || src->parent_instr == instr); + + src_remove_all_uses(src); *src = new_src; + src_add_all_uses(src, instr, NULL); +} - for (nir_src *iter_src = &old_src; iter_src; - iter_src = iter_src->is_ssa ? NULL : iter_src->reg.indirect) { - if (iter_src->is_ssa) { - nir_ssa_def *ssa = iter_src->ssa; - if (ssa && nir_foreach_src(instr, src_does_not_use_def, ssa)) { - struct set_entry *entry = _mesa_set_search(ssa->uses, instr); - assert(entry); - _mesa_set_remove(ssa->uses, entry); - } - } else { - nir_register *reg = iter_src->reg.reg; - if (reg && nir_foreach_src(instr, src_does_not_use_reg, reg)) { - struct set_entry *entry = _mesa_set_search(reg->uses, instr); - assert(entry); - _mesa_set_remove(reg->uses, entry); - } - } - } +void +nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src) +{ + assert(!src_is_valid(dest) || dest->parent_instr == dest_instr); - for (nir_src *iter_src = &new_src; iter_src; - iter_src = iter_src->is_ssa ? NULL : iter_src->reg.indirect) { - if (iter_src->is_ssa) { - if (iter_src->ssa) - _mesa_set_add(iter_src->ssa->uses, instr); - } else { - if (iter_src->reg.reg) - _mesa_set_add(iter_src->reg.reg->uses, instr); - } - } + src_remove_all_uses(dest); + src_remove_all_uses(src); + *dest = *src; + *src = NIR_SRC_INIT; + src_add_all_uses(dest, dest_instr, NULL); +} + +void +nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src) +{ + nir_src *src = &if_stmt->condition; + assert(!src_is_valid(src) || src->parent_if == if_stmt); + + src_remove_all_uses(src); + *src = new_src; + src_add_all_uses(src, NULL, if_stmt); } void @@ -1900,10 +1892,8 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, { def->name = name; def->parent_instr = instr; - def->uses = _mesa_set_create(instr, _mesa_hash_pointer, - _mesa_key_pointer_equal); - def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer, - _mesa_key_pointer_equal); + list_inithead(&def->uses); + list_inithead(&def->if_uses); def->num_components = num_components; if (instr->block) { @@ -1924,57 +1914,23 @@ nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, nir_ssa_def_init(instr, &dest->ssa, num_components, name); } -struct ssa_def_rewrite_state { - void *mem_ctx; - nir_ssa_def *old; - nir_src new_src; -}; - -static bool -ssa_def_rewrite_uses_src(nir_src *src, void *void_state) -{ - struct ssa_def_rewrite_state *state = void_state; - - if (src->is_ssa && src->ssa == state->old) - nir_src_copy(src, &state->new_src, state->mem_ctx); - - return true; -} - void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx) { - struct ssa_def_rewrite_state state; - state.mem_ctx = mem_ctx; - state.old = def; - state.new_src = new_src; - assert(!new_src.is_ssa || def != new_src.ssa); - struct set *new_uses, *new_if_uses; - if (new_src.is_ssa) { - new_uses = new_src.ssa->uses; - new_if_uses = new_src.ssa->if_uses; - } else { - new_uses = new_src.reg.reg->uses; - new_if_uses = new_src.reg.reg->if_uses; - } - - struct set_entry *entry; - set_foreach(def->uses, entry) { - nir_instr *instr = (nir_instr *)entry->key; - - _mesa_set_remove(def->uses, entry); - nir_foreach_src(instr, ssa_def_rewrite_uses_src, &state); - _mesa_set_add(new_uses, instr); + nir_foreach_use_safe(def, use_src) { + nir_instr *src_parent_instr = use_src->parent_instr; + list_del(&use_src->use_link); + nir_src_copy(use_src, &new_src, mem_ctx); + src_add_all_uses(use_src, src_parent_instr, NULL); } - set_foreach(def->if_uses, entry) { - nir_if *if_use = (nir_if *)entry->key; - - _mesa_set_remove(def->if_uses, entry); - nir_src_copy(&if_use->condition, &new_src, mem_ctx); - _mesa_set_add(new_if_uses, if_use); + nir_foreach_if_use_safe(def, use_src) { + nir_if *src_parent_if = use_src->parent_if; + list_del(&use_src->use_link); + nir_src_copy(use_src, &new_src, mem_ctx); + src_add_all_uses(use_src, NULL, src_parent_if); } } diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h index 98b0ec328..697d37e95 100644 --- a/mesalib/src/glsl/nir/nir.h +++ b/mesalib/src/glsl/nir/nir.h @@ -30,6 +30,7 @@ #include "util/hash_table.h" #include "../list.h" #include "GL/gl.h" /* GLenum */ +#include "util/list.h" #include "util/ralloc.h" #include "util/set.h" #include "util/bitset.h" @@ -397,13 +398,13 @@ typedef struct { struct nir_instr *parent_instr; /** set of nir_instr's where this register is used (read from) */ - struct set *uses; + struct list_head uses; /** set of nir_instr's where this register is defined (written to) */ - struct set *defs; + struct list_head defs; /** set of nir_if's where this register is used as a condition */ - struct set *if_uses; + struct list_head if_uses; } nir_register; typedef enum { @@ -462,10 +463,10 @@ typedef struct { nir_instr *parent_instr; /** set of nir_instr's where this register is used (read from) */ - struct set *uses; + struct list_head uses; /** set of nir_if's where this register is used as a condition */ - struct set *if_uses; + struct list_head if_uses; uint8_t num_components; } nir_ssa_def; @@ -481,6 +482,9 @@ typedef struct { } nir_reg_src; typedef struct { + nir_instr *parent_instr; + struct list_head def_link; + nir_register *reg; struct nir_src *indirect; /** < NULL for no indirect offset */ unsigned base_offset; @@ -488,8 +492,17 @@ typedef struct { /* TODO def-use chain goes here */ } nir_reg_dest; +struct nir_if; + typedef struct nir_src { union { + nir_instr *parent_instr; + struct nir_if *parent_if; + }; + + struct list_head use_link; + + union { nir_reg_src reg; nir_ssa_def *ssa; }; @@ -497,6 +510,20 @@ typedef struct nir_src { bool is_ssa; } nir_src; +#define NIR_SRC_INIT (nir_src) { { NULL } } + +#define nir_foreach_use(reg_or_ssa_def, src) \ + list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) + +#define nir_foreach_use_safe(reg_or_ssa_def, src) \ + list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) + +#define nir_foreach_if_use(reg_or_ssa_def, src) \ + list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) + +#define nir_foreach_if_use_safe(reg_or_ssa_def, src) \ + list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) + typedef struct { union { nir_reg_dest reg; @@ -506,10 +533,18 @@ typedef struct { bool is_ssa; } nir_dest; +#define NIR_DEST_INIT (nir_dest) { { { NULL } } } + +#define nir_foreach_def(reg, dest) \ + list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) + +#define nir_foreach_def_safe(reg, dest) \ + list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) + static inline nir_src nir_src_for_ssa(nir_ssa_def *def) { - nir_src src; + nir_src src = NIR_SRC_INIT; src.is_ssa = true; src.ssa = def; @@ -520,7 +555,7 @@ nir_src_for_ssa(nir_ssa_def *def) static inline nir_src nir_src_for_reg(nir_register *reg) { - nir_src src; + nir_src src = NIR_SRC_INIT; src.is_ssa = false; src.reg.reg = reg; @@ -543,12 +578,9 @@ nir_src_get_parent_instr(const nir_src *src) static inline nir_dest nir_dest_for_reg(nir_register *reg) { - nir_dest dest; + nir_dest dest = NIR_DEST_INIT; - dest.is_ssa = false; dest.reg.reg = reg; - dest.reg.indirect = NULL; - dest.reg.base_offset = 0; return dest; } @@ -1207,7 +1239,7 @@ nir_block_last_instr(nir_block *block) #define nir_foreach_instr_safe(block, instr) \ foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) -typedef struct { +typedef struct nir_if { nir_cf_node cf_node; nir_src condition; @@ -1548,6 +1580,8 @@ bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); nir_const_value *nir_src_as_const_value(nir_src src); bool nir_srcs_equal(nir_src src1, nir_src src2); void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); +void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); +void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, unsigned num_components, const char *name); diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h index d1419ee21..9223e8380 100644 --- a/mesalib/src/glsl/nir/nir_builder.h +++ b/mesalib/src/glsl/nir/nir_builder.h @@ -231,8 +231,7 @@ static inline nir_ssa_def * nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], unsigned num_components, bool use_fmov) { - nir_alu_src alu_src; - memset(&alu_src, 0, sizeof(alu_src)); + nir_alu_src alu_src = { NIR_SRC_INIT }; alu_src.src = nir_src_for_ssa(src); for (int i = 0; i < 4; i++) alu_src.swizzle[i] = swiz[i]; @@ -251,8 +250,7 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) if (src.is_ssa && src.ssa->num_components == num_components) return src.ssa; - nir_alu_src alu; - memset(&alu, 0, sizeof(alu)); + nir_alu_src alu = { NIR_SRC_INIT }; alu.src = src; for (int j = 0; j < 4; j++) alu.swizzle[j] = j; diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c index 6a3b141bd..67733e6da 100644 --- a/mesalib/src/glsl/nir/nir_from_ssa.c +++ b/mesalib/src/glsl/nir/nir_from_ssa.c @@ -37,7 +37,6 @@ struct from_ssa_state { void *mem_ctx; void *dead_ctx; - struct hash_table *ssa_table; struct hash_table *merge_node_table; nir_instr *instr; nir_function_impl *impl; @@ -344,45 +343,31 @@ isolate_phi_nodes_block(nir_block *block, void *void_state) get_parallel_copy_at_end_of_block(src->pred); assert(pcopy); - nir_parallel_copy_entry *entry = ralloc(state->dead_ctx, - nir_parallel_copy_entry); - exec_list_push_tail(&pcopy->entries, &entry->node); - - nir_src_copy(&entry->src, &src->src, state->dead_ctx); - _mesa_set_add(src->src.ssa->uses, &pcopy->instr); - + nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx, + nir_parallel_copy_entry); nir_ssa_dest_init(&pcopy->instr, &entry->dest, phi->dest.ssa.num_components, src->src.ssa->name); + exec_list_push_tail(&pcopy->entries, &entry->node); - struct set_entry *use_entry = - _mesa_set_search(src->src.ssa->uses, instr); - if (use_entry) - /* It is possible that a phi node can use the same source twice - * but for different basic blocks. If that happens, entry will - * be NULL because we already deleted it. This is safe - * because, by the time the loop is done, we will have deleted - * all of the sources of the phi from their respective use sets - * and moved them to the parallel copy definitions. - */ - _mesa_set_remove(src->src.ssa->uses, use_entry); + assert(src->src.is_ssa); + nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src); - src->src.ssa = &entry->dest.ssa; - _mesa_set_add(entry->dest.ssa.uses, instr); + nir_instr_rewrite_src(&phi->instr, &src->src, + nir_src_for_ssa(&entry->dest.ssa)); } - nir_parallel_copy_entry *entry = ralloc(state->dead_ctx, - nir_parallel_copy_entry); - exec_list_push_tail(&block_pcopy->entries, &entry->node); - + nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx, + nir_parallel_copy_entry); nir_ssa_dest_init(&block_pcopy->instr, &entry->dest, phi->dest.ssa.num_components, phi->dest.ssa.name); + exec_list_push_tail(&block_pcopy->entries, &entry->node); + nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(&entry->dest.ssa), state->mem_ctx); - entry->src.is_ssa = true; - entry->src.ssa = &phi->dest.ssa; - _mesa_set_add(phi->dest.ssa.uses, &block_pcopy->instr); + nir_instr_rewrite_src(&block_pcopy->instr, &entry->src, + nir_src_for_ssa(&phi->dest.ssa)); } return true; @@ -415,7 +400,7 @@ coalesce_phi_nodes_block(nir_block *block, void *void_state) } static void -agressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, +aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, struct from_ssa_state *state) { nir_foreach_parallel_copy_entry(pcopy, entry) { @@ -444,7 +429,7 @@ agressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, } static bool -agressive_coalesce_block(nir_block *block, void *void_state) +aggressive_coalesce_block(nir_block *block, void *void_state) { struct from_ssa_state *state = void_state; @@ -457,7 +442,7 @@ agressive_coalesce_block(nir_block *block, void *void_state) start_pcopy = nir_instr_as_parallel_copy(instr); - agressive_coalesce_parallel_copy(start_pcopy, state); + aggressive_coalesce_parallel_copy(start_pcopy, state); break; } @@ -467,17 +452,21 @@ agressive_coalesce_block(nir_block *block, void *void_state) get_parallel_copy_at_end_of_block(block); if (end_pcopy && end_pcopy != start_pcopy) - agressive_coalesce_parallel_copy(end_pcopy, state); + aggressive_coalesce_parallel_copy(end_pcopy, state); return true; } -static nir_register * -get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) +static bool +rewrite_ssa_def(nir_ssa_def *def, void *void_state) { + struct from_ssa_state *state = void_state; + nir_register *reg; + struct hash_entry *entry = _mesa_hash_table_search(state->merge_node_table, def); if (entry) { + /* In this case, we're part of a phi web. Use the web's register. */ merge_node *node = (merge_node *)entry->data; /* If it doesn't have a register yet, create one. Note that all of @@ -491,20 +480,15 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) node->set->reg->num_array_elems = 0; } - return node->set->reg; - } - - entry = _mesa_hash_table_search(state->ssa_table, def); - if (entry) { - return (nir_register *)entry->data; + reg = node->set->reg; } else { /* We leave load_const SSA values alone. They act as immediates to * the backend. If it got coalesced into a phi, that's ok. */ if (def->parent_instr->type == nir_instr_type_load_const) - return NULL; + return true; - nir_register *reg = nir_local_reg_create(state->impl); + reg = nir_local_reg_create(state->impl); reg->name = def->name; reg->num_components = def->num_components; reg->num_array_elems = 0; @@ -516,57 +500,24 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) */ if (def->parent_instr->type != nir_instr_type_ssa_undef) reg->parent_instr = def->parent_instr; - - _mesa_hash_table_insert(state->ssa_table, def, reg); - return reg; } -} - -static bool -rewrite_ssa_src(nir_src *src, void *void_state) -{ - struct from_ssa_state *state = void_state; - if (src->is_ssa) { - nir_register *reg = get_register_for_ssa_def(src->ssa, state); + nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx); + assert(list_empty(&def->uses) && list_empty(&def->if_uses)); - if (reg == NULL) { - assert(src->ssa->parent_instr->type == nir_instr_type_load_const); - return true; - } - - memset(src, 0, sizeof *src); - src->reg.reg = reg; - - /* We don't need to remove it from the uses set because that is going - * away. We just need to add it to the one for the register. */ - _mesa_set_add(reg->uses, state->instr); - } - - return true; -} - -static bool -rewrite_ssa_dest(nir_dest *dest, void *void_state) -{ - struct from_ssa_state *state = void_state; - - if (dest->is_ssa) { - nir_register *reg = get_register_for_ssa_def(&dest->ssa, state); - - if (reg == NULL) { - assert(dest->ssa.parent_instr->type == nir_instr_type_load_const); - return true; - } + if (def->parent_instr->type == nir_instr_type_ssa_undef) + return true; - _mesa_set_destroy(dest->ssa.uses, NULL); - _mesa_set_destroy(dest->ssa.if_uses, NULL); + assert(def->parent_instr->type != nir_instr_type_load_const); - memset(dest, 0, sizeof *dest); - dest->reg.reg = reg; + /* At this point we know a priori that this SSA def is part of a + * nir_dest. We can use exec_node_data to get the dest pointer. + */ + nir_dest *dest = exec_node_data(nir_dest, def, ssa); - _mesa_set_add(reg->defs, state->instr); - } + *dest = nir_dest_for_reg(reg); + dest->reg.parent_instr = state->instr; + list_addtail(&dest->reg.def_link, ®->defs); return true; } @@ -581,8 +532,7 @@ resolve_registers_block(nir_block *block, void *void_state) nir_foreach_instr_safe(block, instr) { state->instr = instr; - nir_foreach_src(instr, rewrite_ssa_src, state); - nir_foreach_dest(instr, rewrite_ssa_dest, state); + nir_foreach_ssa_def(instr, rewrite_ssa_def, state); if (instr->type == nir_instr_type_ssa_undef || instr->type == nir_instr_type_phi) { @@ -592,23 +542,6 @@ resolve_registers_block(nir_block *block, void *void_state) } state->instr = NULL; - nir_if *following_if = nir_block_get_following_if(block); - if (following_if && following_if->condition.is_ssa) { - nir_register *reg = get_register_for_ssa_def(following_if->condition.ssa, - state); - if (reg) { - memset(&following_if->condition, 0, sizeof following_if->condition); - following_if->condition.reg.reg = reg; - - _mesa_set_add(reg->if_uses, following_if); - } else { - /* FIXME: We really shouldn't hit this. We should be doing - * constant control flow propagation. - */ - assert(following_if->condition.ssa->parent_instr->type == nir_instr_type_load_const); - } - } - return true; } @@ -853,10 +786,8 @@ nir_convert_from_ssa_impl(nir_function_impl *impl) nir_metadata_dominance); nir_foreach_block(impl, coalesce_phi_nodes_block, &state); - nir_foreach_block(impl, agressive_coalesce_block, &state); + nir_foreach_block(impl, aggressive_coalesce_block, &state); - state.ssa_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); nir_foreach_block(impl, resolve_registers_block, &state); nir_foreach_block(impl, resolve_parallel_copies_block, &state); @@ -865,7 +796,6 @@ nir_convert_from_ssa_impl(nir_function_impl *impl) nir_metadata_dominance); /* Clean up dead instructions and the hash tables */ - _mesa_hash_table_destroy(state.ssa_table, NULL); _mesa_hash_table_destroy(state.merge_node_table, NULL); ralloc_free(state.dead_ctx); } diff --git a/mesalib/src/glsl/nir/nir_intrinsics.h b/mesalib/src/glsl/nir/nir_intrinsics.h index 8e28765c1..10192c531 100644 --- a/mesalib/src/glsl/nir/nir_intrinsics.h +++ b/mesalib/src/glsl/nir/nir_intrinsics.h @@ -68,6 +68,13 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, #define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) BARRIER(discard) + +/* + * Memory barrier with semantics analogous to the memoryBarrier() GLSL + * intrinsic. + */ +BARRIER(memory_barrier) + /** A conditional discard, with a single boolean source. */ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) @@ -89,6 +96,33 @@ ATOMIC(inc, 0) ATOMIC(dec, 0) ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE) +/* + * Image load, store and atomic intrinsics. + * + * All image intrinsics take an image target passed as a nir_variable. Image + * variables contain a number of memory and layout qualifiers that influence + * the semantics of the intrinsic. + * + * All image intrinsics take a four-coordinate vector and a sample index as + * first two sources, determining the location within the image that will be + * accessed by the intrinsic. Components not applicable to the image target + * in use are undefined. Image store takes an additional four-component + * argument with the value to be written, and image atomic operations take + * either one or two additional scalar arguments with the same meaning as in + * the ARB_shader_image_load_store specification. + */ +INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0) +INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0) + #define SYSTEM_VALUE(name, components) \ INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) diff --git a/mesalib/src/glsl/nir/nir_lower_atomics.c b/mesalib/src/glsl/nir/nir_lower_atomics.c index e82df0169..f6f89020f 100644 --- a/mesalib/src/glsl/nir/nir_lower_atomics.c +++ b/mesalib/src/glsl/nir/nir_lower_atomics.c @@ -78,7 +78,8 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) nir_deref_as_array(instr->variables[0]->deref.child); assert(deref_array->deref.child == NULL); - offset_const->value.u[0] += deref_array->base_offset; + offset_const->value.u[0] += + deref_array->base_offset * ATOMIC_COUNTER_SIZE; if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_load_const_instr *atomic_counter_size = diff --git a/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c index bc6a3d320..28fdec50e 100644 --- a/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c +++ b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c @@ -269,18 +269,16 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) static nir_block * compute_reg_usedef_lca(nir_register *reg) { - struct set_entry *entry; nir_block *lca = NULL; - set_foreach(reg->defs, entry) - lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block); + list_for_each_entry(nir_dest, def_dest, ®->defs, reg.def_link) + lca = nir_dominance_lca(lca, def_dest->reg.parent_instr->block); - set_foreach(reg->uses, entry) - lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block); + list_for_each_entry(nir_src, use_src, ®->uses, use_link) + lca = nir_dominance_lca(lca, use_src->parent_instr->block); - set_foreach(reg->if_uses, entry) { - nir_if *if_stmt = (nir_if *)entry->key; - nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); + list_for_each_entry(nir_src, use_src, ®->if_uses, use_link) { + nir_cf_node *prev_node = nir_cf_node_prev(&use_src->parent_if->cf_node); assert(prev_node->type == nir_cf_node_block); lca = nir_dominance_lca(lca, nir_cf_node_as_block(prev_node)); } diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp index cf8ab8325..7a0b0a09f 100644 --- a/mesalib/src/glsl/nir/nir_lower_samplers.cpp +++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp @@ -70,44 +70,45 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr case nir_deref_type_array: { nir_deref_array *deref_array = nir_deref_as_array(deref->child); + assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); + + if (deref_array->deref.child) { + ralloc_asprintf_append(&name, "[%u]", + deref_array->deref_array_type == nir_deref_array_type_direct ? + deref_array->base_offset : 0); + } else { + assert(deref->child->type->base_type == GLSL_TYPE_SAMPLER); + instr->sampler_index = deref_array->base_offset; + } + /* XXX: We're assuming here that the indirect is the last array * thing we have. This should be ok for now as we don't support * arrays_of_arrays yet. */ - - instr->sampler_index *= glsl_get_length(deref->type); - switch (deref_array->deref_array_type) { - case nir_deref_array_type_direct: - instr->sampler_index += deref_array->base_offset; - if (deref_array->deref.child) - ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset); - break; - case nir_deref_array_type_indirect: { - instr->src = reralloc(instr, instr->src, nir_tex_src, - instr->num_srcs + 1); - memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src); + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, + instr->num_srcs + 1); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, + &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; instr->num_srcs++; - - nir_instr_rewrite_src(&instr->instr, - &instr->src[instr->num_srcs - 1].src, - deref_array->indirect); + nir_instr_move_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + &deref_array->indirect); instr->sampler_array_size = glsl_get_length(deref->type); - - nir_src empty; - memset(&empty, 0, sizeof empty); - nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, empty); - - if (deref_array->deref.child) - ralloc_strcat(&name, "[0]"); - break; - } - - case nir_deref_array_type_wildcard: - unreachable("Cannot copy samplers"); - default: - unreachable("Invalid deref array type"); } break; } diff --git a/mesalib/src/glsl/nir/nir_lower_tex_projector.c b/mesalib/src/glsl/nir/nir_lower_tex_projector.c index 6b0e9c340..357131cd7 100644 --- a/mesalib/src/glsl/nir/nir_lower_tex_projector.c +++ b/mesalib/src/glsl/nir/nir_lower_tex_projector.c @@ -109,12 +109,12 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state) /* Now move the later tex sources down the array so that the projector * disappears. */ - nir_src dead; - memset(&dead, 0, sizeof dead); - nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead); - memmove(&tex->src[proj_index], - &tex->src[proj_index + 1], - (tex->num_srcs - proj_index) * sizeof(*tex->src)); + nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, + NIR_SRC_INIT); + for (int i = proj_index + 1; i < tex->num_srcs; i++) { + tex->src[i-1].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src); + } tex->num_srcs--; } diff --git a/mesalib/src/glsl/nir/nir_lower_to_source_mods.c b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c index 7b4a0f657..94c7e36d4 100644 --- a/mesalib/src/glsl/nir/nir_lower_to_source_mods.c +++ b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c @@ -88,8 +88,8 @@ nir_lower_to_source_mods_block(nir_block *block, void *state) alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]]; } - if (parent->dest.dest.ssa.uses->entries == 0 && - parent->dest.dest.ssa.if_uses->entries == 0) + if (list_empty(&parent->dest.dest.ssa.uses) && + list_empty(&parent->dest.dest.ssa.if_uses)) nir_instr_remove(&parent->instr); } @@ -131,13 +131,13 @@ nir_lower_to_source_mods_block(nir_block *block, void *state) if (nir_op_infos[alu->op].output_type != nir_type_float) continue; - if (alu->dest.dest.ssa.if_uses->entries != 0) + if (!list_empty(&alu->dest.dest.ssa.if_uses)) continue; bool all_children_are_sat = true; - struct set_entry *entry; - set_foreach(alu->dest.dest.ssa.uses, entry) { - const nir_instr *child = entry->key; + nir_foreach_use(&alu->dest.dest.ssa, child_src) { + assert(child_src->is_ssa); + nir_instr *child = child_src->parent_instr; if (child->type != nir_instr_type_alu) { all_children_are_sat = false; continue; @@ -161,8 +161,12 @@ nir_lower_to_source_mods_block(nir_block *block, void *state) alu->dest.saturate = true; - set_foreach(alu->dest.dest.ssa.uses, entry) { - nir_alu_instr *child_alu = nir_instr_as_alu((nir_instr *)entry->key); + nir_foreach_use(&alu->dest.dest.ssa, child_src) { + assert(child_src->is_ssa); + nir_instr *child = child_src->parent_instr; + assert(child->type == nir_instr_type_alu); + nir_alu_instr *child_alu = nir_instr_as_alu(child); + child_alu->op = nir_op_fmov; child_alu->dest.saturate = false; /* We could propagate the dest of our instruction to the diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c index bb60f4601..ccb8f99df 100644 --- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -567,10 +567,11 @@ add_phi_sources(nir_block *block, nir_block *pred, nir_phi_src *src = ralloc(phi, nir_phi_src); src->pred = pred; + src->src.parent_instr = &phi->instr; src->src.is_ssa = true; src->src.ssa = get_ssa_def_for_block(node, pred, state); - _mesa_set_add(src->src.ssa->uses, instr); + list_addtail(&src->src.use_link, &src->src.ssa->uses); exec_list_push_tail(&phi->srcs, &src->node); } diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py index 264806f5d..56e96d912 100644 --- a/mesalib/src/glsl/nir/nir_opcodes.py +++ b/mesalib/src/glsl/nir/nir_opcodes.py @@ -153,8 +153,6 @@ unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)") unop("frcp", tfloat, "1.0f / src0") unop("frsq", tfloat, "1.0f / sqrtf(src0)") unop("fsqrt", tfloat, "sqrtf(src0)") -unop("fexp", tfloat, "expf(src0)") # < e^x -unop("flog", tfloat, "logf(src0)") # log base e unop("fexp2", tfloat, "exp2f(src0)") unop("flog2", tfloat, "log2f(src0)") unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py index 2a2b9561e..fa039222f 100644 --- a/mesalib/src/glsl/nir/nir_opt_algebraic.py +++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py @@ -83,24 +83,37 @@ optimizations = [ # Comparison simplifications (('inot', ('flt', a, b)), ('fge', a, b)), (('inot', ('fge', a, b)), ('flt', a, b)), + (('inot', ('feq', a, b)), ('fne', a, b)), + (('inot', ('fne', a, b)), ('feq', a, b)), (('inot', ('ilt', a, b)), ('ige', a, b)), (('inot', ('ige', a, b)), ('ilt', a, b)), + (('inot', ('ieq', a, b)), ('ine', a, b)), + (('inot', ('ine', a, b)), ('ieq', a, b)), (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)), (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)), (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)), + (('fmin', a, a), a), + (('fmax', a, a), a), + (('imin', a, a), a), + (('imax', a, a), a), + (('umin', a, a), a), + (('umax', a, a), a), (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'), (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), (('fsat', ('fsat', a)), ('fsat', a)), (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)), (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))), + (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)), (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))), + (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)), (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'), (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'), (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'), (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'), # Emulating booleans + (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))), (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), (('iand', 'a@bool', 1.0), ('b2f', a)), @@ -136,36 +149,23 @@ optimizations = [ (('ushr', a, 0), a), # Exponential/logarithmic identities (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a - (('fexp', ('flog', a)), a), # e^ln(a) = a (('flog2', ('fexp2', a)), a), # lg2(2^a) = a - (('flog', ('fexp', a)), a), # ln(e^a) = a (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b) (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b - (('fexp', ('fmul', ('flog', a), b)), ('fpow', a, b), '!options->lower_fpow'), # e^(ln(a)*b) = a^b (('fpow', a, 1.0), a), (('fpow', a, 2.0), ('fmul', a, a)), (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), (('fpow', 2.0, a), ('fexp2', a)), (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), - (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))), (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), - (('frcp', ('fexp', a)), ('fexp', ('fneg', a))), (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), - (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))), (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))), - (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))), (('flog2', ('frcp', a)), ('fneg', ('flog2', a))), - (('flog', ('frcp', a)), ('fneg', ('flog', a))), (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), - (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))), (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), - (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))), (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), - (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))), (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), - (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))), (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), - (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))), # Division and reciprocal (('fdiv', 1.0, a), ('frcp', a)), (('frcp', ('frcp', a)), a), @@ -187,6 +187,7 @@ optimizations = [ (('fcsel', a, b, b), b), # Conversions + (('i2b', ('b2i', a)), a), (('f2i', ('ftrunc', a)), ('f2i', a)), (('f2u', ('ftrunc', a)), ('f2u', a)), diff --git a/mesalib/src/glsl/nir/nir_opt_copy_propagate.c b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c index ee78e5aa0..71367d001 100644 --- a/mesalib/src/glsl/nir/nir_opt_copy_propagate.c +++ b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c @@ -93,62 +93,6 @@ is_swizzleless_move(nir_alu_instr *instr) } } -typedef struct { - nir_ssa_def *def; - bool found; -} search_def_state; - -static bool -search_def(nir_src *src, void *_state) -{ - search_def_state *state = (search_def_state *) _state; - - if (src->is_ssa && src->ssa == state->def) - state->found = true; - - return true; -} - -static void -rewrite_src_instr(nir_src *src, nir_ssa_def *new_def, nir_instr *parent_instr) -{ - nir_ssa_def *old_def = src->ssa; - - src->ssa = new_def; - - /* - * The instruction could still use the old definition in one of its other - * sources, so only remove the instruction from the uses if there are no - * more uses left. - */ - - search_def_state search_state; - search_state.def = old_def; - search_state.found = false; - nir_foreach_src(parent_instr, search_def, &search_state); - if (!search_state.found) { - struct set_entry *entry = _mesa_set_search(old_def->uses, parent_instr); - assert(entry); - _mesa_set_remove(old_def->uses, entry); - } - - _mesa_set_add(new_def->uses, parent_instr); -} - -static void -rewrite_src_if(nir_if *if_stmt, nir_ssa_def *new_def) -{ - nir_ssa_def *old_def = if_stmt->condition.ssa; - - if_stmt->condition.ssa = new_def; - - struct set_entry *entry = _mesa_set_search(old_def->if_uses, if_stmt); - assert(entry); - _mesa_set_remove(old_def->if_uses, entry); - - _mesa_set_add(new_def->if_uses, if_stmt); -} - static bool copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) { @@ -178,10 +122,14 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) return false; } - if (parent_instr) - rewrite_src_instr(src, alu_instr->src[0].src.ssa, parent_instr); - else - rewrite_src_if(parent_if, alu_instr->src[0].src.ssa); + if (parent_instr) { + nir_instr_rewrite_src(parent_instr, src, + nir_src_for_ssa(alu_instr->src[0].src.ssa)); + } else { + assert(src == &parent_if->condition); + nir_if_rewrite_condition(parent_if, + nir_src_for_ssa(alu_instr->src[0].src.ssa)); + } return true; } @@ -234,7 +182,8 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) for (unsigned i = 0; i < 4; i++) src->swizzle[i] = new_swizzle[i]; - rewrite_src_instr(&src->src, def, &parent_alu_instr->instr); + nir_instr_rewrite_src(&parent_alu_instr->instr, &src->src, + nir_src_for_ssa(def)); return true; } diff --git a/mesalib/src/glsl/nir/nir_opt_gcm.c b/mesalib/src/glsl/nir/nir_opt_gcm.c index b4f5fd3d5..44068bf37 100644 --- a/mesalib/src/glsl/nir/nir_opt_gcm.c +++ b/mesalib/src/glsl/nir/nir_opt_gcm.c @@ -279,9 +279,8 @@ gcm_schedule_late_def(nir_ssa_def *def, void *void_state) nir_block *lca = NULL; - struct set_entry *entry; - set_foreach(def->uses, entry) { - nir_instr *use_instr = (nir_instr *)entry->key; + nir_foreach_use(def, use_src) { + nir_instr *use_instr = use_src->parent_instr; gcm_schedule_late_instr(use_instr, state); @@ -304,8 +303,8 @@ gcm_schedule_late_def(nir_ssa_def *def, void *void_state) } } - set_foreach(def->if_uses, entry) { - nir_if *if_stmt = (nir_if *)entry->key; + nir_foreach_if_use(def, use_src) { + nir_if *if_stmt = use_src->parent_if; /* For if statements, we consider the block to be the one immediately * preceding the if CF node. @@ -377,9 +376,8 @@ gcm_place_instr(nir_instr *instr, struct gcm_state *state); static bool gcm_place_instr_def(nir_ssa_def *def, void *state) { - struct set_entry *entry; - set_foreach(def->uses, entry) - gcm_place_instr((nir_instr *)entry->key, state); + nir_foreach_use(def, use_src) + gcm_place_instr(use_src->parent_instr, state); return false; } diff --git a/mesalib/src/glsl/nir/nir_opt_global_to_local.c b/mesalib/src/glsl/nir/nir_opt_global_to_local.c index 00db37ba7..bccb45b62 100644 --- a/mesalib/src/glsl/nir/nir_opt_global_to_local.c +++ b/mesalib/src/glsl/nir/nir_opt_global_to_local.c @@ -34,9 +34,8 @@ global_to_local(nir_register *reg) assert(reg->is_global); - struct set_entry *entry; - set_foreach(reg->defs, entry) { - nir_instr *instr = (nir_instr *) entry->key; + nir_foreach_def(reg, def_dest) { + nir_instr *instr = def_dest->reg.parent_instr; nir_function_impl *instr_impl = nir_cf_node_get_function(&instr->block->cf_node); if (impl != NULL) { @@ -47,8 +46,8 @@ global_to_local(nir_register *reg) } } - set_foreach(reg->uses, entry) { - nir_instr *instr = (nir_instr *) entry->key; + nir_foreach_use(reg, use_src) { + nir_instr *instr = use_src->parent_instr; nir_function_impl *instr_impl = nir_cf_node_get_function(&instr->block->cf_node); if (impl != NULL) { @@ -59,8 +58,8 @@ global_to_local(nir_register *reg) } } - set_foreach(reg->if_uses, entry) { - nir_if *if_stmt = (nir_if *) entry->key; + nir_foreach_if_use(reg, use_src) { + nir_if *if_stmt = use_src->parent_if; nir_function_impl *if_impl = nir_cf_node_get_function(&if_stmt->cf_node); if (impl != NULL) { if (impl != if_impl) diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c index 9d5646fe6..b430eac8e 100644 --- a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c +++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c @@ -41,12 +41,11 @@ struct peephole_ffma_state { static inline bool are_all_uses_fadd(nir_ssa_def *def) { - if (def->if_uses->entries > 0) + if (!list_empty(&def->if_uses)) return false; - struct set_entry *use_iter; - set_foreach(def->uses, use_iter) { - nir_instr *use_instr = (nir_instr *)use_iter->key; + nir_foreach_use(def, use_src) { + nir_instr *use_instr = use_src->parent_instr; if (use_instr->type != nir_instr_type_alu) return false; @@ -220,7 +219,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state) state->mem_ctx); nir_instr_insert_before(&add->instr, &ffma->instr); - assert(add->dest.dest.ssa.uses->entries == 0); + assert(list_empty(&add->dest.dest.ssa.uses)); nir_instr_remove(&add->instr); state->progress = true; diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c index f400cfd66..82c65bb44 100644 --- a/mesalib/src/glsl/nir/nir_opt_peephole_select.c +++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c @@ -98,15 +98,13 @@ block_check_for_allowed_instrs(nir_block *block) return false; /* It cannot have any if-uses */ - if (mov->dest.dest.ssa.if_uses->entries != 0) + if (!list_empty(&mov->dest.dest.ssa.if_uses)) return false; /* The only uses of this definition must be phi's in the successor */ - struct set_entry *entry; - set_foreach(mov->dest.dest.ssa.uses, entry) { - const nir_instr *dest_instr = entry->key; - if (dest_instr->type != nir_instr_type_phi || - dest_instr->block != block->successors[0]) + nir_foreach_use(&mov->dest.dest.ssa, use) { + if (use->parent_instr->type != nir_instr_type_phi || + use->parent_instr->block != block->successors[0]) return false; } break; diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c index 5ba016085..0c4e48ce9 100644 --- a/mesalib/src/glsl/nir/nir_search.c +++ b/mesalib/src/glsl/nir/nir_search.c @@ -73,6 +73,14 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, { uint8_t new_swizzle[4]; + /* If the source is an explicitly sized source, then we need to reset + * both the number of components and the swizzle. + */ + if (nir_op_infos[instr->op].input_sizes[src] != 0) { + num_components = nir_op_infos[instr->op].input_sizes[src]; + swizzle = identity_swizzle; + } + for (int i = 0; i < num_components; ++i) new_swizzle[i] = instr->src[src].swizzle[swizzle[i]]; @@ -90,6 +98,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, case nir_search_value_variable: { nir_search_variable *var = nir_search_value_as_variable(value); + assert(var->variable < NIR_SEARCH_MAX_VARIABLES); if (state->variables_seen & (1 << var->variable)) { if (!nir_srcs_equal(state->variables[var->variable].src, @@ -198,16 +207,13 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr, } } + /* Stash off the current variables_seen bitmask. This way we can + * restore it prior to matching in the commutative case below. + */ + unsigned variables_seen_stash = state->variables_seen; + bool matched = true; for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - /* If the source is an explicitly sized source, then we need to reset - * both the number of components and the swizzle. - */ - if (nir_op_infos[instr->op].input_sizes[i] != 0) { - num_components = nir_op_infos[instr->op].input_sizes[i]; - swizzle = identity_swizzle; - } - if (!match_value(expr->srcs[i], instr, i, num_components, swizzle, state)) { matched = false; @@ -220,6 +226,13 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr, if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { assert(nir_op_infos[instr->op].num_inputs == 2); + + /* Restore the variables_seen bitmask. If we don't do this, then we + * could end up with an erroneous failure due to variables found in the + * first match attempt above not matching those in the second. + */ + state->variables_seen = variables_seen_stash; + if (!match_value(expr->srcs[0], instr, 1, num_components, swizzle, state)) return false; @@ -276,7 +289,7 @@ construct_value(const nir_search_value *value, nir_alu_type type, const nir_search_variable *var = nir_search_value_as_variable(value); assert(state->variables_seen & (1 << var->variable)); - nir_alu_src val; + nir_alu_src val = { NIR_SRC_INIT }; nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx); assert(!var->is_constant); diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c index 53ff54766..a3c35fa04 100644 --- a/mesalib/src/glsl/nir/nir_to_ssa.c +++ b/mesalib/src/glsl/nir/nir_to_ssa.c @@ -89,9 +89,8 @@ insert_phi_nodes(nir_function_impl *impl) w_start = w_end = 0; iter_count++; - struct set_entry *entry; - set_foreach(reg->defs, entry) { - nir_instr *def = (nir_instr *) entry->key; + nir_foreach_def(reg, dest) { + nir_instr *def = dest->reg.parent_instr; if (work[def->block->index] < iter_count) W[w_end++] = def->block; work[def->block->index] = iter_count; @@ -99,6 +98,7 @@ insert_phi_nodes(nir_function_impl *impl) while (w_start != w_end) { nir_block *cur = W[w_start++]; + struct set_entry *entry; set_foreach(cur->dom_frontier, entry) { nir_block *next = (nir_block *) entry->key; @@ -190,13 +190,12 @@ rewrite_use(nir_src *src, void *_state) if (state->states[index].stack == NULL) return true; - src->is_ssa = true; - src->ssa = get_ssa_src(src->reg.reg, state); - + nir_ssa_def *def = get_ssa_src(src->reg.reg, state); if (state->parent_instr) - _mesa_set_add(src->ssa->uses, state->parent_instr); + nir_instr_rewrite_src(state->parent_instr, src, nir_src_for_ssa(def)); else - _mesa_set_add(src->ssa->if_uses, state->parent_if); + nir_if_rewrite_condition(state->parent_if, nir_src_for_ssa(def)); + return true; } @@ -219,6 +218,7 @@ rewrite_def_forwards(nir_dest *dest, void *_state) name = ralloc_asprintf(state->mem_ctx, "%s_%u", dest->reg.reg->name, state->states[index].num_defs); + list_del(&dest->reg.def_link); nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name); /* push our SSA destination on the stack */ @@ -270,6 +270,7 @@ rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state) reg->name, state->states[index].num_defs); instr->dest.write_mask = (1 << num_components) - 1; + list_del(&instr->dest.dest.reg.def_link); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name); if (nir_op_infos[instr->op].output_size == 0) { @@ -484,7 +485,7 @@ init_rewrite_state(nir_function_impl *impl, rewrite_state *state) * called after phi nodes are inserted so we can count phi node * definitions too. */ - unsigned stack_size = reg->defs->entries; + unsigned stack_size = list_length(®->defs); state->states[reg->index].stack = ralloc_array(state->states, nir_ssa_def *, diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c index a7aa79837..da92ed904 100644 --- a/mesalib/src/glsl/nir/nir_validate.c +++ b/mesalib/src/glsl/nir/nir_validate.c @@ -97,50 +97,47 @@ typedef struct { static void validate_src(nir_src *src, validate_state *state); static void -validate_reg_src(nir_reg_src *src, validate_state *state) +validate_reg_src(nir_src *src, validate_state *state) { - assert(src->reg != NULL); + assert(src->reg.reg != NULL); struct hash_entry *entry; - entry = _mesa_hash_table_search(state->regs, src->reg); + entry = _mesa_hash_table_search(state->regs, src->reg.reg); assert(entry); reg_validate_state *reg_state = (reg_validate_state *) entry->data; if (state->instr) { - _mesa_set_add(reg_state->uses, state->instr); - - assert(_mesa_set_search(src->reg->uses, state->instr)); + _mesa_set_add(reg_state->uses, src); } else { assert(state->if_stmt); - _mesa_set_add(reg_state->if_uses, state->if_stmt); - - assert(_mesa_set_search(src->reg->if_uses, state->if_stmt)); + _mesa_set_add(reg_state->if_uses, src); } - if (!src->reg->is_global) { + if (!src->reg.reg->is_global) { assert(reg_state->where_defined == state->impl && "using a register declared in a different function"); } - assert((src->reg->num_array_elems == 0 || - src->base_offset < src->reg->num_array_elems) && + assert((src->reg.reg->num_array_elems == 0 || + src->reg.base_offset < src->reg.reg->num_array_elems) && "definitely out-of-bounds array access"); - if (src->indirect) { - assert(src->reg->num_array_elems != 0); - assert((src->indirect->is_ssa || src->indirect->reg.indirect == NULL) && + if (src->reg.indirect) { + assert(src->reg.reg->num_array_elems != 0); + assert((src->reg.indirect->is_ssa || + src->reg.indirect->reg.indirect == NULL) && "only one level of indirection allowed"); - validate_src(src->indirect, state); + validate_src(src->reg.indirect, state); } } static void -validate_ssa_src(nir_ssa_def *def, validate_state *state) +validate_ssa_src(nir_src *src, validate_state *state) { - assert(def != NULL); + assert(src->ssa != NULL); - struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); + struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, src->ssa); assert(entry); @@ -150,14 +147,10 @@ validate_ssa_src(nir_ssa_def *def, validate_state *state) "using an SSA value defined in a different function"); if (state->instr) { - _mesa_set_add(def_state->uses, state->instr); - - assert(_mesa_set_search(def->uses, state->instr)); + _mesa_set_add(def_state->uses, src); } else { assert(state->if_stmt); - _mesa_set_add(def_state->if_uses, state->if_stmt); - - assert(_mesa_set_search(def->if_uses, state->if_stmt)); + _mesa_set_add(def_state->if_uses, src); } /* TODO validate that the use is dominated by the definition */ @@ -166,10 +159,15 @@ validate_ssa_src(nir_ssa_def *def, validate_state *state) static void validate_src(nir_src *src, validate_state *state) { + if (state->instr) + assert(src->parent_instr == state->instr); + else + assert(src->parent_if == state->if_stmt); + if (src->is_ssa) - validate_ssa_src(src->ssa, state); + validate_ssa_src(src, state); else - validate_reg_src(&src->reg, state); + validate_reg_src(src, state); } static void @@ -201,8 +199,7 @@ validate_reg_dest(nir_reg_dest *dest, validate_state *state) { assert(dest->reg != NULL); - struct set_entry *entry = _mesa_set_search(dest->reg->defs, state->instr); - assert(entry && "definition not in nir_register.defs"); + assert(dest->parent_instr == state->instr); struct hash_entry *entry2; entry2 = _mesa_hash_table_search(state->regs, dest->reg); @@ -210,7 +207,7 @@ validate_reg_dest(nir_reg_dest *dest, validate_state *state) assert(entry2); reg_validate_state *reg_state = (reg_validate_state *) entry2->data; - _mesa_set_add(reg_state->defs, state->instr); + _mesa_set_add(reg_state->defs, dest); if (!dest->reg->is_global) { assert(reg_state->where_defined == state->impl && @@ -236,8 +233,13 @@ validate_ssa_def(nir_ssa_def *def, validate_state *state) assert(!BITSET_TEST(state->ssa_defs_found, def->index)); BITSET_SET(state->ssa_defs_found, def->index); + assert(def->parent_instr == state->instr); + assert(def->num_components <= 4); + list_validate(&def->uses); + list_validate(&def->if_uses); + ssa_def_validate_state *def_state = ralloc(state->ssa_defs, ssa_def_validate_state); def_state->where_defined = state->impl; @@ -699,6 +701,10 @@ prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state) assert(!BITSET_TEST(state->regs_found, reg->index)); BITSET_SET(state->regs_found, reg->index); + list_validate(®->uses); + list_validate(®->defs); + list_validate(®->if_uses); + reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state); reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -719,47 +725,47 @@ postvalidate_reg_decl(nir_register *reg, validate_state *state) reg_validate_state *reg_state = (reg_validate_state *) entry->data; - if (reg_state->uses->entries != reg->uses->entries) { + nir_foreach_use(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->uses, src); + assert(entry); + _mesa_set_remove(reg_state->uses, entry); + } + + if (reg_state->uses->entries != 0) { printf("extra entries in register uses:\n"); struct set_entry *entry; - set_foreach(reg->uses, entry) { - struct set_entry *entry2 = - _mesa_set_search(reg_state->uses, entry->key); - - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + set_foreach(reg_state->uses, entry) + printf("%p\n", entry->key); abort(); } - if (reg_state->if_uses->entries != reg->if_uses->entries) { + nir_foreach_if_use(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->if_uses, src); + assert(entry); + _mesa_set_remove(reg_state->if_uses, entry); + } + + if (reg_state->if_uses->entries != 0) { printf("extra entries in register if_uses:\n"); struct set_entry *entry; - set_foreach(reg->if_uses, entry) { - struct set_entry *entry2 = - _mesa_set_search(reg_state->if_uses, entry->key); - - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + set_foreach(reg_state->if_uses, entry) + printf("%p\n", entry->key); abort(); } - if (reg_state->defs->entries != reg->defs->entries) { + nir_foreach_def(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->defs, src); + assert(entry); + _mesa_set_remove(reg_state->defs, entry); + } + + if (reg_state->defs->entries != 0) { printf("extra entries in register defs:\n"); struct set_entry *entry; - set_foreach(reg->defs, entry) { - struct set_entry *entry2 = - _mesa_set_search(reg_state->defs, entry->key); - - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + set_foreach(reg_state->defs, entry) + printf("%p\n", entry->key); abort(); } @@ -788,32 +794,32 @@ postvalidate_ssa_def(nir_ssa_def *def, void *void_state) struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; - if (def_state->uses->entries != def->uses->entries) { - printf("extra entries in SSA def uses:\n"); - struct set_entry *entry; - set_foreach(def->uses, entry) { - struct set_entry *entry2 = - _mesa_set_search(def_state->uses, entry->key); + nir_foreach_use(def, src) { + struct set_entry *entry = _mesa_set_search(def_state->uses, src); + assert(entry); + _mesa_set_remove(def_state->uses, entry); + } - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + if (def_state->uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(def_state->uses, entry) + printf("%p\n", entry->key); abort(); } - if (def_state->if_uses->entries != def->if_uses->entries) { - printf("extra entries in SSA def uses:\n"); - struct set_entry *entry; - set_foreach(def->if_uses, entry) { - struct set_entry *entry2 = - _mesa_set_search(def_state->if_uses, entry->key); + nir_foreach_if_use(def, src) { + struct set_entry *entry = _mesa_set_search(def_state->if_uses, src); + assert(entry); + _mesa_set_remove(def_state->if_uses, entry); + } - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + if (def_state->if_uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(def_state->if_uses, entry) + printf("%p\n", entry->key); abort(); } diff --git a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp index 92f20c71d..31719d20c 100644 --- a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp +++ b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp @@ -99,6 +99,16 @@ public: } else { this->fragdata_usage |= 1 << index->get_uint_component(0); + /* Don't lower fragdata array if the output variable + * is not a float variable (or float vector) because it will + * generate wrong register assignments because of different + * data types. + */ + if (var->type->gl_type != GL_FLOAT && + var->type->gl_type != GL_FLOAT_VEC2 && + var->type->gl_type != GL_FLOAT_VEC3 && + var->type->gl_type != GL_FLOAT_VEC4) + this->lower_fragdata_array = false; } /* Don't visit the leaves of ir_dereference_array. */ |