diff options
author | marha <marha@users.sourceforge.net> | 2015-05-15 19:14:42 +0200 |
---|---|---|
committer | marha <marha@users.sourceforge.net> | 2015-05-15 19:14:42 +0200 |
commit | 843964ee791452b197e41dacb0146f5b456ffaa5 (patch) | |
tree | b1b0734c17ce1d488aa4d6e95f27363d80731e10 /mesalib/src/glsl | |
parent | a71d524ecad48837e0124a03124bc05f59a48be7 (diff) | |
download | vcxsrv-843964ee791452b197e41dacb0146f5b456ffaa5.tar.gz vcxsrv-843964ee791452b197e41dacb0146f5b456ffaa5.tar.bz2 vcxsrv-843964ee791452b197e41dacb0146f5b456ffaa5.zip |
randrproto fontconfig libfontenc libxcb mesalib xserver pixman xkeyboard-config git update 15 May 2015
xserver commit bf6344e1913a5d24c2d68eaca999ea3d71e1b707
libxcb commit cb621341a62e6d2233db3e337611f6fdd4f675a6
libxcb/xcb-proto commit 4c550465934164aab2449a125f75f4ca07816233
xkeyboard-config commit 5da6d510b460cad1b31288618cc364e586576826
libX11 commit d3415d1f052530760b4617db45affcb984cfe35c
libXdmcp commit b10f382e3aa2e86cd5a2bc27d6758da55f0ab1f6
libXext commit efdcbb7634501e1117d422636a0a75d7ea84b16b
libfontenc commit 42f3a39c3085afd9ef904ae39102fd49bbc2e4a5
libXinerama commit edd95182b26eb5d576d4878c559e0f17dddaa909
libXau commit 1e4635be11154dd8262f37b379511bd627defa2a
xkbcomp commit 1ae525b3d236b59e6437b2b5433d460e18370973
pixman commit 82f9b4faaf1aa63ec26b0dfd227f1a8e5e139ae2
xextproto commit 66afec3f49e8eb0d4c2e9af7088fc3116d4bafd7
randrproto commit 895ee5264524c7c239ee4ef5e39c4e295323fb51
glproto commit bd3d751e1eb17efb39f65093271bb4ac071aa9e0
mkfontscale commit 87d628f8eec170ec13bb9feefb1ce05aed07d1d6
xwininfo commit 0c49f8f2bd56b1e77721e81030ea948386dcdf4e
libXft commit e8a83226bc10afb587f6f34daac44d2ef809c85e
libXmu commit 4459e6940fe3fdf26a8d5d4c71989498bc400a62
libxtrans commit 7cbad9fe2e61cd9d5caeaf361826a6f4bd320f03
fontconfig commit 55ff8419274fd5ce59675f220b85035a3986d6cf
mesa commit 3687d752e51829b4723c9abb07ae56d2bbcda570
Diffstat (limited to 'mesalib/src/glsl')
38 files changed, 872 insertions, 624 deletions
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources index c471eca23..d784a8107 100644 --- a/mesalib/src/glsl/Makefile.sources +++ b/mesalib/src/glsl/Makefile.sources @@ -22,6 +22,7 @@ NIR_FILES = \ nir/glsl_to_nir.h \ nir/nir.c \ nir/nir.h \ + nir/nir_array.h \ nir/nir_builder.h \ nir/nir_constant_expressions.h \ nir/nir_dominance.c \ diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp index 18b82e3be..14e630905 100644 --- a/mesalib/src/glsl/ast_to_hir.cpp +++ b/mesalib/src/glsl/ast_to_hir.cpp @@ -1558,6 +1558,18 @@ ast_expression::do_hir(exec_list *instructions, error_emitted = true; } + /* From section 4.1.7 of the GLSL 4.50 spec (Opaque Types): + * + * "Except for array indexing, structure member selection, and + * parentheses, opaque variables are not allowed to be operands in + * expressions; such use results in a compile-time error." + */ + if (type->contains_opaque()) { + _mesa_glsl_error(&loc, state, "opaque variables cannot be operands " + "of the ?: operator"); + error_emitted = true; + } + ir_constant *cond_val = op[0]->constant_expression_value(); if (then_instructions.is_empty() @@ -2357,6 +2369,14 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual, var->data.image_format = GL_NONE; } + } else if (qual->flags.q.read_only || + qual->flags.q.write_only || + qual->flags.q.coherent || + qual->flags.q._volatile || + qual->flags.q.restrict_flag || + qual->flags.q.explicit_image_format) { + _mesa_glsl_error(loc, state, "memory qualifiers may only be applied to " + "images"); } } @@ -2781,8 +2801,21 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, validate_matrix_layout_for_type(state, loc, var->type, var); } - if (var->type->contains_image()) - apply_image_qualifier_to_variable(qual, var, state, loc); + apply_image_qualifier_to_variable(qual, var, state, loc); + + /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader + * Inputs): + * + * "Fragment shaders also allow the following layout qualifier on in only + * (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (qual->flags.q.early_fragment_tests) { + _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " + "valid in fragment shader input layout declaration."); + } } /** @@ -3541,6 +3574,9 @@ ast_declarator_list::hir(exec_list *instructions, case GLSL_TYPE_INT: if (state->is_version(120, 300)) break; + case GLSL_TYPE_DOUBLE: + if (check_type->base_type == GLSL_TYPE_DOUBLE && (state->is_version(410, 0) || state->ARB_vertex_attrib_64bit_enable)) + break; /* FALLTHROUGH */ default: _mesa_glsl_error(& loc, state, diff --git a/mesalib/src/glsl/ast_type.cpp b/mesalib/src/glsl/ast_type.cpp index b596cd59e..1bcf6a2e8 100644 --- a/mesalib/src/glsl/ast_type.cpp +++ b/mesalib/src/glsl/ast_type.cpp @@ -293,11 +293,7 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, valid_in_mask.flags.q.invocations = 1; break; case MESA_SHADER_FRAGMENT: - if (q.flags.q.early_fragment_tests) { - state->early_fragment_tests = true; - } else { - _mesa_glsl_error(loc, state, "invalid input layout qualifier"); - } + valid_in_mask.flags.q.early_fragment_tests = 1; break; case MESA_SHADER_COMPUTE: create_cs_ast |= @@ -345,6 +341,10 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, this->invocations = q.invocations; } + if (q.flags.q.early_fragment_tests) { + state->fs_early_fragment_tests = true; + } + if (create_gs_ast) { node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type); } else if (create_cs_ast) { diff --git a/mesalib/src/glsl/builtin_functions.cpp b/mesalib/src/glsl/builtin_functions.cpp index 524b8d6e8..97055d85d 100644 --- a/mesalib/src/glsl/builtin_functions.cpp +++ b/mesalib/src/glsl/builtin_functions.cpp @@ -136,6 +136,12 @@ v140(const _mesa_glsl_parse_state *state) } static bool +es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(0, 310); +} + +static bool texture_rectangle(const _mesa_glsl_parse_state *state) { return state->ARB_texture_rectangle_enable; @@ -194,7 +200,8 @@ shader_bit_encoding(const _mesa_glsl_parse_state *state) static bool shader_integer_mix(const _mesa_glsl_parse_state *state) { - return v130(state) && state->EXT_shader_integer_mix_enable; + return state->is_version(450, 310) || + (v130(state) && state->EXT_shader_integer_mix_enable); } static bool @@ -219,10 +226,17 @@ gpu_shader5(const _mesa_glsl_parse_state *state) } static bool -shader_packing_or_gpu_shader5(const _mesa_glsl_parse_state *state) +gpu_shader5_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || state->ARB_gpu_shader5_enable; +} + +static bool +shader_packing_or_es31_or_gpu_shader5(const _mesa_glsl_parse_state *state) { return state->ARB_shading_language_packing_enable || - gpu_shader5(state); + state->ARB_gpu_shader5_enable || + state->is_version(400, 310); } static bool @@ -297,15 +311,24 @@ texture_gather(const _mesa_glsl_parse_state *state) state->ARB_gpu_shader5_enable; } +static bool +texture_gather_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || + state->ARB_texture_gather_enable || + state->ARB_gpu_shader5_enable; +} + /* Only ARB_texture_gather but not GLSL 4.0 or ARB_gpu_shader5. * used for relaxation of const offset requirements. */ static bool -texture_gather_only(const _mesa_glsl_parse_state *state) +texture_gather_only_or_es31(const _mesa_glsl_parse_state *state) { return !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable && - state->ARB_texture_gather_enable; + (state->ARB_texture_gather_enable || + state->is_version(0, 310)); } /* Desktop GL or OES_standard_derivatives + fragment shader only */ @@ -359,7 +382,7 @@ tex3d_lod(const _mesa_glsl_parse_state *state) static bool shader_atomic_counters(const _mesa_glsl_parse_state *state) { - return state->ARB_shader_atomic_counters_enable; + return state->has_atomic_counters(); } static bool @@ -1161,12 +1184,12 @@ builtin_builder::create_builtins() add_function("packUnorm2x16", _packUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); add_function("packSnorm2x16", _packSnorm2x16(shader_packing_or_es3), NULL); - add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_gpu_shader5), NULL); - add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); add_function("unpackUnorm2x16", _unpackUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); add_function("unpackSnorm2x16", _unpackSnorm2x16(shader_packing_or_es3), NULL); - add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_gpu_shader5), NULL); - add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_gpu_shader5), NULL); + add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); add_function("packHalf2x16", _packHalf2x16(shader_packing_or_es3), NULL); add_function("unpackHalf2x16", _unpackHalf2x16(shader_packing_or_es3), NULL); add_function("packDouble2x32", _packDouble2x32(fp64), NULL); @@ -2202,61 +2225,69 @@ builtin_builder::create_builtins() NULL); add_function("textureGather", - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec4_type), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type), NULL); add_function("textureGatherOffset", - _texture(ir_tg4, texture_gather_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), @@ -2285,6 +2316,9 @@ builtin_builder::create_builtins() _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), NULL); add_function("textureGatherOffsets", @@ -4445,7 +4479,7 @@ builtin_builder::_bitfieldExtract(const glsl_type *type) ir_variable *value = in_var(type, "value"); ir_variable *offset = in_var(glsl_type::int_type, "offset"); ir_variable *bits = in_var(glsl_type::int_type, "bits"); - MAKE_SIG(type, gpu_shader5, 3, value, offset, bits); + MAKE_SIG(type, gpu_shader5_or_es31, 3, value, offset, bits); body.emit(ret(expr(ir_triop_bitfield_extract, value, offset, bits))); @@ -4459,33 +4493,33 @@ builtin_builder::_bitfieldInsert(const glsl_type *type) ir_variable *insert = in_var(type, "insert"); ir_variable *offset = in_var(glsl_type::int_type, "offset"); ir_variable *bits = in_var(glsl_type::int_type, "bits"); - MAKE_SIG(type, gpu_shader5, 4, base, insert, offset, bits); + MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits); body.emit(ret(bitfield_insert(base, insert, offset, bits))); return sig; } -UNOP(bitfieldReverse, ir_unop_bitfield_reverse, gpu_shader5) +UNOP(bitfieldReverse, ir_unop_bitfield_reverse, gpu_shader5_or_es31) ir_function_signature * builtin_builder::_bitCount(const glsl_type *type) { - return unop(gpu_shader5, ir_unop_bit_count, + return unop(gpu_shader5_or_es31, ir_unop_bit_count, glsl_type::ivec(type->vector_elements), type); } ir_function_signature * builtin_builder::_findLSB(const glsl_type *type) { - return unop(gpu_shader5, ir_unop_find_lsb, + return unop(gpu_shader5_or_es31, ir_unop_find_lsb, glsl_type::ivec(type->vector_elements), type); } ir_function_signature * builtin_builder::_findMSB(const glsl_type *type) { - return unop(gpu_shader5, ir_unop_find_msb, + return unop(gpu_shader5_or_es31, ir_unop_find_msb, glsl_type::ivec(type->vector_elements), type); } @@ -4505,7 +4539,7 @@ builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type) ir_function_signature * builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type) { - return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5, x_type, x_type, exp_type); + return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, x_type, x_type, exp_type); } ir_function_signature * @@ -4526,7 +4560,7 @@ builtin_builder::_frexp(const glsl_type *x_type, const glsl_type *exp_type) { ir_variable *x = in_var(x_type, "x"); ir_variable *exponent = out_var(exp_type, "exp"); - MAKE_SIG(x_type, gpu_shader5, 2, x, exponent); + MAKE_SIG(x_type, gpu_shader5_or_es31, 2, x, exponent); const unsigned vec_elem = x_type->vector_elements; const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); @@ -4575,7 +4609,7 @@ builtin_builder::_uaddCarry(const glsl_type *type) ir_variable *x = in_var(type, "x"); ir_variable *y = in_var(type, "y"); ir_variable *carry = out_var(type, "carry"); - MAKE_SIG(type, gpu_shader5, 3, x, y, carry); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, carry); body.emit(assign(carry, ir_builder::carry(x, y))); body.emit(ret(add(x, y))); @@ -4589,7 +4623,7 @@ builtin_builder::_usubBorrow(const glsl_type *type) ir_variable *x = in_var(type, "x"); ir_variable *y = in_var(type, "y"); ir_variable *borrow = out_var(type, "borrow"); - MAKE_SIG(type, gpu_shader5, 3, x, y, borrow); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, borrow); body.emit(assign(borrow, ir_builder::borrow(x, y))); body.emit(ret(sub(x, y))); @@ -4607,7 +4641,7 @@ builtin_builder::_mulExtended(const glsl_type *type) ir_variable *y = in_var(type, "y"); ir_variable *msb = out_var(type, "msb"); ir_variable *lsb = out_var(type, "lsb"); - MAKE_SIG(glsl_type::void_type, gpu_shader5, 4, x, y, msb, lsb); + MAKE_SIG(glsl_type::void_type, gpu_shader5_or_es31, 4, x, y, msb, lsb); body.emit(assign(msb, imul_high(x, y))); body.emit(assign(lsb, mul(x, y))); diff --git a/mesalib/src/glsl/builtin_types.cpp b/mesalib/src/glsl/builtin_types.cpp index fef86df28..d92e2eb30 100644 --- a/mesalib/src/glsl/builtin_types.cpp +++ b/mesalib/src/glsl/builtin_types.cpp @@ -372,7 +372,7 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) add_type(symbols, glsl_type::uimage2DMSArray_type); } - if (state->ARB_shader_atomic_counters_enable) { + if (state->has_atomic_counters()) { add_type(symbols, glsl_type::atomic_uint_type); } diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp index 21e7331c7..6806aa1f9 100644 --- a/mesalib/src/glsl/builtin_variables.cpp +++ b/mesalib/src/glsl/builtin_variables.cpp @@ -653,19 +653,46 @@ builtin_variable_generator::generate_constants() add_const("gl_MaxTextureCoords", state->Const.MaxTextureCoords); } - if (state->ARB_shader_atomic_counters_enable) { + if (state->has_atomic_counters()) { add_const("gl_MaxVertexAtomicCounters", state->Const.MaxVertexAtomicCounters); - add_const("gl_MaxGeometryAtomicCounters", - state->Const.MaxGeometryAtomicCounters); add_const("gl_MaxFragmentAtomicCounters", state->Const.MaxFragmentAtomicCounters); add_const("gl_MaxCombinedAtomicCounters", state->Const.MaxCombinedAtomicCounters); add_const("gl_MaxAtomicCounterBindings", state->Const.MaxAtomicBufferBindings); - add_const("gl_MaxTessControlAtomicCounters", 0); - add_const("gl_MaxTessEvaluationAtomicCounters", 0); + + /* When Mesa adds support for GL_OES_geometry_shader and + * GL_OES_tessellation_shader, this will need to change. + */ + if (!state->es_shader) { + add_const("gl_MaxGeometryAtomicCounters", + state->Const.MaxGeometryAtomicCounters); + add_const("gl_MaxTessControlAtomicCounters", 0); + add_const("gl_MaxTessEvaluationAtomicCounters", 0); + } + } + + if (state->is_version(420, 310)) { + add_const("gl_MaxVertexAtomicCounterBuffers", + state->Const.MaxVertexAtomicCounterBuffers); + add_const("gl_MaxFragmentAtomicCounterBuffers", + state->Const.MaxFragmentAtomicCounterBuffers); + add_const("gl_MaxCombinedAtomicCounterBuffers", + state->Const.MaxCombinedAtomicCounterBuffers); + add_const("gl_MaxAtomicCounterBufferSize", + state->Const.MaxAtomicCounterBufferSize); + + /* When Mesa adds support for GL_OES_geometry_shader and + * GL_OES_tessellation_shader, this will need to change. + */ + if (!state->es_shader) { + add_const("gl_MaxGeometryAtomicCounterBuffers", + state->Const.MaxGeometryAtomicCounterBuffers); + add_const("gl_MaxTessControlAtomicCounterBuffers", 0); + add_const("gl_MaxTessEvaluationAtomicCounterBuffers", 0); + } } if (state->is_version(430, 0) || state->ARB_compute_shader_enable) { diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y index cfceca66b..a11b6b2c7 100644 --- a/mesalib/src/glsl/glcpp/glcpp-parse.y +++ b/mesalib/src/glsl/glcpp/glcpp-parse.y @@ -2448,6 +2448,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions->ARB_gpu_shader_fp64) add_builtin_define(parser, "GL_ARB_gpu_shader_fp64", 1); + if (extensions->ARB_vertex_attrib_64bit) + add_builtin_define(parser, "GL_ARB_vertex_attrib_64bit", 1); + if (extensions->AMD_vertex_shader_layer) add_builtin_define(parser, "GL_AMD_vertex_shader_layer", 1); diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll index 2785ed168..10db5b8b6 100644 --- a/mesalib/src/glsl/glsl_lexer.ll +++ b/mesalib/src/glsl/glsl_lexer.ll @@ -409,7 +409,7 @@ restrict KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store readonly KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, READONLY); writeonly KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, WRITEONLY); -atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT); +atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT); struct return STRUCT; void return VOID_TOK; diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy index aceb3b916..3ce9e103f 100644 --- a/mesalib/src/glsl/glsl_parser.yy +++ b/mesalib/src/glsl/glsl_parser.yy @@ -214,6 +214,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2, %type <type_qualifier> layout_qualifier %type <type_qualifier> layout_qualifier_id_list layout_qualifier_id %type <type_qualifier> interface_block_layout_qualifier +%type <type_qualifier> memory_qualifier %type <type_qualifier> interface_qualifier %type <type_specifier> type_specifier %type <type_specifier> type_specifier_nonarray @@ -1000,6 +1001,11 @@ parameter_qualifier: $$ = $2; $$.precision = $1; } + | memory_qualifier parameter_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2); + } parameter_direction_qualifier: IN_TOK @@ -1360,6 +1366,21 @@ layout_qualifier_id: if (!$$.flags.i && match_layout_qualifier($1, "early_fragment_tests", state) == 0) { + /* From section 4.4.1.3 of the GLSL 4.50 specification + * (Fragment Shader Inputs): + * + * "Fragment shaders also allow the following layout + * qualifier on in only (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (state->stage != MESA_SHADER_FRAGMENT) { + _mesa_glsl_error(& @1, state, + "early_fragment_tests layout qualifier only " + "valid in fragment shaders"); + } + $$.flags.q.early_fragment_tests = 1; } } @@ -1404,13 +1425,13 @@ layout_qualifier_id: } if ((state->ARB_shading_language_420pack_enable || - state->ARB_shader_atomic_counters_enable) && + state->has_atomic_counters()) && match_layout_qualifier("binding", $1, state) == 0) { $$.flags.q.explicit_binding = 1; $$.binding = $3; } - if (state->ARB_shader_atomic_counters_enable && + if (state->has_atomic_counters() && match_layout_qualifier("offset", $1, state) == 0) { $$.flags.q.explicit_offset = 1; $$.offset = $3; @@ -1581,6 +1602,7 @@ type_qualifier: | storage_qualifier | interpolation_qualifier | layout_qualifier + | memory_qualifier | precision_qualifier { memset(&$$, 0, sizeof($$)); @@ -1718,6 +1740,11 @@ type_qualifier: $$ = $2; $$.precision = $1; } + | memory_qualifier type_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2); + } ; auxiliary_storage_qualifier: @@ -1778,7 +1805,10 @@ storage_qualifier: memset(& $$, 0, sizeof($$)); $$.flags.q.uniform = 1; } - | COHERENT + ; + +memory_qualifier: + COHERENT { memset(& $$, 0, sizeof($$)); $$.flags.q.coherent = 1; diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp index 0aa3c54fc..be6713c46 100644 --- a/mesalib/src/glsl/glsl_parser_extras.cpp +++ b/mesalib/src/glsl/glsl_parser_extras.cpp @@ -117,6 +117,16 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters; this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters; this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings; + this->Const.MaxVertexAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers; + this->Const.MaxGeometryAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers; + this->Const.MaxFragmentAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + this->Const.MaxCombinedAtomicCounterBuffers = + ctx->Const.MaxCombinedAtomicBuffers; + this->Const.MaxAtomicCounterBufferSize = + ctx->Const.MaxAtomicBufferSize; /* Compute shader constants */ for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupCount); i++) @@ -143,9 +153,9 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->num_user_structures = 0; /* supported_versions should be large enough to support the known desktop - * GLSL versions plus 2 GLES versions (ES2 & ES3) + * GLSL versions plus 3 GLES versions (ES 1.00, ES 3.00, and ES 3.10)) */ - STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 2) == + STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 3) == ARRAY_SIZE(this->supported_versions)); /* Populate the list of supported GLSL versions */ @@ -175,6 +185,11 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->supported_versions[this->num_supported_versions].es = true; this->num_supported_versions++; } + if (_mesa_is_gles31(ctx)) { + this->supported_versions[this->num_supported_versions].ver = 310; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } /* Create a string for use in error messages to tell the user which GLSL * versions are supported. @@ -212,7 +227,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->gs_input_size = 0; this->in_qualifier = new(this) ast_type_qualifier(); this->out_qualifier = new(this) ast_type_qualifier(); - this->early_fragment_tests = false; + this->fs_early_fragment_tests = false; memset(this->atomic_counter_offsets, 0, sizeof(this->atomic_counter_offsets)); this->allow_extension_directive_midshader = @@ -565,6 +580,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_texture_query_lod, true, false, ARB_texture_query_lod), EXT(ARB_texture_rectangle, true, false, dummy_true), EXT(ARB_uniform_buffer_object, true, false, ARB_uniform_buffer_object), + EXT(ARB_vertex_attrib_64bit, true, false, ARB_vertex_attrib_64bit), EXT(ARB_viewport_array, true, false, ARB_viewport_array), /* KHR extensions go here, sorted alphabetically. @@ -1418,6 +1434,7 @@ set_shader_inout_layout(struct gl_shader *shader, assert(!state->fs_redeclares_gl_fragcoord); assert(!state->fs_pixel_center_integer); assert(!state->fs_origin_upper_left); + assert(!state->fs_early_fragment_tests); } switch (shader->Stage) { @@ -1460,6 +1477,7 @@ set_shader_inout_layout(struct gl_shader *shader, shader->origin_upper_left = state->fs_origin_upper_left; shader->ARB_fragment_coord_conventions_enable = state->ARB_fragment_coord_conventions_enable; + shader->EarlyFragmentTests = state->fs_early_fragment_tests; break; default: diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h index dae7864fd..9a0c24e67 100644 --- a/mesalib/src/glsl/glsl_parser_extras.h +++ b/mesalib/src/glsl/glsl_parser_extras.h @@ -160,7 +160,7 @@ struct _mesa_glsl_parse_state { { if (!this->has_separate_shader_objects()) { const char *const requirement = this->es_shader - ? "GL_EXT_separate_shader_objects extension" + ? "GL_EXT_separate_shader_objects extension or GLSL ES 310" : "GL_ARB_separate_shader_objects extension or GLSL 420"; _mesa_glsl_error(locp, this, "%s explicit location requires %s", @@ -175,17 +175,26 @@ struct _mesa_glsl_parse_state { const ir_variable *) { if (!this->has_explicit_attrib_location() || - !this->ARB_explicit_uniform_location_enable) { + !this->has_explicit_uniform_location()) { + const char *const requirement = this->es_shader + ? "GLSL ES 310" + : "GL_ARB_explicit_uniform_location and either " + "GL_ARB_explicit_attrib_location or GLSL 330."; + _mesa_glsl_error(locp, this, - "uniform explicit location requires " - "GL_ARB_explicit_uniform_location and either " - "GL_ARB_explicit_attrib_location or GLSL 330."); + "uniform explicit location requires %s", + requirement); return false; } return true; } + bool has_atomic_counters() const + { + return ARB_shader_atomic_counters_enable || is_version(420, 310); + } + bool has_explicit_attrib_stream() const { return ARB_gpu_shader5_enable || is_version(400, 0); @@ -196,6 +205,11 @@ struct _mesa_glsl_parse_state { return ARB_explicit_attrib_location_enable || is_version(330, 300); } + bool has_explicit_uniform_location() const + { + return ARB_explicit_uniform_location_enable || is_version(430, 310); + } + bool has_uniform_buffer_objects() const { return ARB_uniform_buffer_object_enable || is_version(140, 300); @@ -203,7 +217,7 @@ struct _mesa_glsl_parse_state { bool has_separate_shader_objects() const { - return ARB_separate_shader_objects_enable || is_version(410, 0) + return ARB_separate_shader_objects_enable || is_version(410, 310) || EXT_separate_shader_objects_enable; } @@ -224,7 +238,7 @@ struct _mesa_glsl_parse_state { struct { unsigned ver; bool es; - } supported_versions[14]; + } supported_versions[15]; bool es_shader; unsigned language_version; @@ -339,6 +353,16 @@ struct _mesa_glsl_parse_state { unsigned MaxCombinedAtomicCounters; unsigned MaxAtomicBufferBindings; + /* These are also atomic counter related, but they weren't added to + * until atomic counters were added to core in GLSL 4.20 and GLSL ES + * 3.10. + */ + unsigned MaxVertexAtomicCounterBuffers; + unsigned MaxGeometryAtomicCounterBuffers; + unsigned MaxFragmentAtomicCounterBuffers; + unsigned MaxCombinedAtomicCounterBuffers; + unsigned MaxAtomicCounterBufferSize; + /* ARB_compute_shader */ unsigned MaxComputeWorkGroupCount[3]; unsigned MaxComputeWorkGroupSize[3]; @@ -458,6 +482,8 @@ struct _mesa_glsl_parse_state { bool ARB_texture_rectangle_warn; bool ARB_uniform_buffer_object_enable; bool ARB_uniform_buffer_object_warn; + bool ARB_vertex_attrib_64bit_enable; + bool ARB_vertex_attrib_64bit_warn; bool ARB_viewport_array_enable; bool ARB_viewport_array_warn; @@ -510,7 +536,7 @@ struct _mesa_glsl_parse_state { */ unsigned gs_input_size; - bool early_fragment_tests; + bool fs_early_fragment_tests; /** Atomic counter offsets by binding */ unsigned atomic_counter_offsets[MAX_COMBINED_ATOMIC_BUFFERS]; diff --git a/mesalib/src/glsl/ir_set_program_inouts.cpp b/mesalib/src/glsl/ir_set_program_inouts.cpp index e877a2019..b968a1efd 100644 --- a/mesalib/src/glsl/ir_set_program_inouts.cpp +++ b/mesalib/src/glsl/ir_set_program_inouts.cpp @@ -105,13 +105,10 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len, int idx = var->data.location + var->data.index + offset + i; GLbitfield64 bitfield = BITFIELD64_BIT(idx); - /* dvec3 and dvec4 take up 2 slots */ - if (dual_slot) { - idx += i; - bitfield |= bitfield << 1; - } if (var->data.mode == ir_var_shader_in) { - prog->InputsRead |= bitfield; + prog->InputsRead |= bitfield; + if (dual_slot) + prog->DoubleInputsRead |= bitfield; if (is_fragment_shader) { gl_fragment_program *fprog = (gl_fragment_program *) prog; fprog->InterpQualifier[idx] = @@ -120,13 +117,6 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len, fprog->IsCentroid |= bitfield; if (var->data.sample) fprog->IsSample |= bitfield; - - /* Set the InterpQualifier of the next slot to the same as the - * current one, since dvec3 and dvec4 spans 2 slots. - */ - if (dual_slot) - fprog->InterpQualifier[idx + 1] = - (glsl_interp_qualifier) var->data.interpolation; } } else if (var->data.mode == ir_var_system_value) { prog->SystemValuesRead |= bitfield; diff --git a/mesalib/src/glsl/link_uniform_blocks.cpp b/mesalib/src/glsl/link_uniform_blocks.cpp index 6ca41107e..898544bea 100644 --- a/mesalib/src/glsl/link_uniform_blocks.cpp +++ b/mesalib/src/glsl/link_uniform_blocks.cpp @@ -67,14 +67,14 @@ private: assert(!"Should not get here."); } - virtual void enter_record(const glsl_type *type, const char *name, + virtual void enter_record(const glsl_type *type, const char *, bool row_major) { assert(type->is_record()); this->offset = glsl_align( this->offset, type->std140_base_alignment(row_major)); } - virtual void leave_record(const glsl_type *type, const char *name, + virtual void leave_record(const glsl_type *type, const char *, bool row_major) { assert(type->is_record()); @@ -90,8 +90,8 @@ private: } virtual void visit_field(const glsl_type *type, const char *name, - bool row_major, const glsl_type *record_type, - bool last_field) + bool row_major, const glsl_type *, + bool /* last_field */) { assert(this->index < this->num_variables); diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp index d5ca23a38..2c928e144 100644 --- a/mesalib/src/glsl/link_uniforms.cpp +++ b/mesalib/src/glsl/link_uniforms.cpp @@ -544,7 +544,7 @@ private: assert(!"Should not get here."); } - virtual void enter_record(const glsl_type *type, const char *name, + virtual void enter_record(const glsl_type *type, const char *, bool row_major) { assert(type->is_record()); if (this->ubo_block_index == -1) @@ -553,7 +553,7 @@ private: this->ubo_byte_offset, type->std140_base_alignment(row_major)); } - virtual void leave_record(const glsl_type *type, const char *name, + virtual void leave_record(const glsl_type *type, const char *, bool row_major) { assert(type->is_record()); if (this->ubo_block_index == -1) @@ -564,7 +564,7 @@ private: virtual void visit_field(const glsl_type *type, const char *name, bool row_major, const glsl_type *record_type, - bool last_field) + bool /* last_field */) { assert(!type->without_array()->is_record()); assert(!type->without_array()->is_interface()); diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index 21fde9444..ea73c6f9d 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -1413,6 +1413,8 @@ link_fs_input_layout_qualifiers(struct gl_shader_program *prog, linked_shader->origin_upper_left = shader->origin_upper_left; linked_shader->pixel_center_integer = shader->pixel_center_integer; } + + linked_shader->EarlyFragmentTests |= shader->EarlyFragmentTests; } } @@ -1975,6 +1977,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog, } to_assign[16]; unsigned num_attr = 0; + unsigned total_attribs_size = 0; foreach_in_list(ir_instruction, node, sh->ir) { ir_variable *const var = node->as_variable(); @@ -2016,12 +2019,41 @@ assign_attribute_or_color_locations(gl_shader_program *prog, } } + const unsigned slots = var->type->count_attribute_slots(); + + /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes): + * + * "A program with more than the value of MAX_VERTEX_ATTRIBS active + * attribute variables may fail to link, unless device-dependent + * optimizations are able to make the program fit within available + * hardware resources. For the purposes of this test, attribute variables + * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, + * and dmat4 may count as consuming twice as many attributes as equivalent + * single-precision types. While these types use the same number of + * generic attributes as their single-precision equivalents, + * implementations are permitted to consume two single-precision vectors + * of internal storage for each three- or four-component double-precision + * vector." + * Until someone has a good reason in Mesa, enforce that now. + */ + if (target_index == MESA_SHADER_VERTEX) { + total_attribs_size += slots; + if (var->type->without_array() == glsl_type::dvec3_type || + var->type->without_array() == glsl_type::dvec4_type || + var->type->without_array() == glsl_type::dmat2x3_type || + var->type->without_array() == glsl_type::dmat2x4_type || + var->type->without_array() == glsl_type::dmat3_type || + var->type->without_array() == glsl_type::dmat3x4_type || + var->type->without_array() == glsl_type::dmat4x3_type || + var->type->without_array() == glsl_type::dmat4_type) + total_attribs_size += slots; + } + /* If the variable is not a built-in and has a location statically * assigned in the shader (presumably via a layout qualifier), make sure * that it doesn't collide with other assigned locations. Otherwise, * add it to the list of variables that need linker-assigned locations. */ - const unsigned slots = var->type->count_attribute_slots(); if (var->data.location != -1) { if (var->data.location >= generic_base && var->data.index < 1) { /* From page 61 of the OpenGL 4.0 spec: @@ -2141,6 +2173,15 @@ assign_attribute_or_color_locations(gl_shader_program *prog, num_attr++; } + if (target_index == MESA_SHADER_VERTEX) { + if (total_attribs_size > max_index) { + linker_error(prog, + "attempt to use %d vertex attribute slots only %d available ", + total_attribs_size, max_index); + return false; + } + } + /* If all of the attributes were assigned locations by the application (or * are built-in attributes with fixed locations), return early. This should * be the common case. @@ -2556,6 +2597,7 @@ add_interface_variables(struct gl_shader_program *shProg, { foreach_in_list(ir_instruction, node, sh->ir) { ir_variable *var = node->as_variable(); + uint8_t mask = 0; if (!var) continue; @@ -2571,6 +2613,10 @@ add_interface_variables(struct gl_shader_program *shProg, var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE && var->data.location != SYSTEM_VALUE_INSTANCE_ID) continue; + /* Mark special built-in inputs referenced by the vertex stage so + * that they are considered active by the shader queries. + */ + mask = (1 << (MESA_SHADER_VERTEX)); /* FALLTHROUGH */ case ir_var_shader_in: if (programInterface != GL_PROGRAM_INPUT) @@ -2585,7 +2631,7 @@ add_interface_variables(struct gl_shader_program *shProg, }; if (!add_program_resource(shProg, programInterface, var, - build_stageref(shProg, var->name))) + build_stageref(shProg, var->name) | mask)) return false; } return true; diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp index f6b8331d4..af758ceb0 100644 --- a/mesalib/src/glsl/nir/glsl_to_nir.cpp +++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp @@ -614,27 +614,135 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_atomic_counter_inc_var; } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) { op = nir_intrinsic_atomic_counter_dec_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) { + op = nir_intrinsic_image_load; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) { + op = nir_intrinsic_image_store; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) { + op = nir_intrinsic_image_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) { + op = nir_intrinsic_image_atomic_min; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) { + op = nir_intrinsic_image_atomic_max; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) { + op = nir_intrinsic_image_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) { + op = nir_intrinsic_image_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) { + op = nir_intrinsic_image_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) { + op = nir_intrinsic_image_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) { + op = nir_intrinsic_image_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) { + op = nir_intrinsic_memory_barrier; } else { unreachable("not reached"); } nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); - ir_dereference *param = - (ir_dereference *) ir->actual_parameters.get_head(); - instr->variables[0] = evaluate_deref(&instr->instr, param); - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + + switch (op) { + case nir_intrinsic_atomic_counter_read_var: + case nir_intrinsic_atomic_counter_inc_var: + case nir_intrinsic_atomic_counter_dec_var: { + ir_dereference *param = + (ir_dereference *) ir->actual_parameters.get_head(); + instr->variables[0] = evaluate_deref(&instr->instr, param); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + break; + } + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: { + nir_ssa_undef_instr *instr_undef = + nir_ssa_undef_instr_create(shader, 1); + nir_instr_insert_after_cf_list(this->cf_node_list, + &instr_undef->instr); + + /* Set the image variable dereference. */ + exec_node *param = ir->actual_parameters.get_head(); + ir_dereference *image = (ir_dereference *)param; + const glsl_type *type = + image->variable_referenced()->type->without_array(); + + instr->variables[0] = evaluate_deref(&instr->instr, image); + param = param->get_next(); + + /* Set the address argument, extending the coordinate vector to four + * components. + */ + const nir_src src_addr = evaluate_rvalue((ir_dereference *)param); + nir_alu_instr *instr_addr = nir_alu_instr_create(shader, nir_op_vec4); + nir_ssa_dest_init(&instr_addr->instr, &instr_addr->dest.dest, 4, NULL); + + for (int i = 0; i < 4; i++) { + if (i < type->coordinate_components()) { + instr_addr->src[i].src = src_addr; + instr_addr->src[i].swizzle[0] = i; + } else { + instr_addr->src[i].src = nir_src_for_ssa(&instr_undef->def); + } + } + + nir_instr_insert_after_cf_list(cf_node_list, &instr_addr->instr); + instr->src[0] = nir_src_for_ssa(&instr_addr->dest.dest.ssa); + param = param->get_next(); + + /* Set the sample argument, which is undefined for single-sample + * images. + */ + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + instr->src[1] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } else { + instr->src[1] = nir_src_for_ssa(&instr_undef->def); + } + + /* Set the intrinsic parameters. */ + if (!param->is_tail_sentinel()) { + instr->src[2] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } + + if (!param->is_tail_sentinel()) { + instr->src[3] = evaluate_rvalue((ir_dereference *)param); + param = param->get_next(); + } + + /* Set the intrinsic destination. */ + if (ir->return_deref) + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + break; + } + case nir_intrinsic_memory_barrier: + break; + default: + unreachable("not reached"); + } nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); - nir_intrinsic_instr *store_instr = - nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); - store_instr->num_components = 1; + if (ir->return_deref) { + nir_intrinsic_instr *store_instr = + nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); + store_instr->num_components = ir->return_deref->type->vector_elements; - store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref); - store_instr->src[0].is_ssa = true; - store_instr->src[0].ssa = &instr->dest.ssa; + store_instr->variables[0] = + evaluate_deref(&store_instr->instr, ir->return_deref); + store_instr->src[0] = nir_src_for_ssa(&instr->dest.ssa); - nir_instr_insert_after_cf_list(this->cf_node_list, &store_instr->instr); + nir_instr_insert_after_cf_list(this->cf_node_list, + &store_instr->instr); + } return; } @@ -824,7 +932,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir) nir_dest *dest = get_instr_dest(this->result); assert(dest->is_ssa); - nir_src src; + nir_src src = NIR_SRC_INIT; src.is_ssa = true; src.ssa = &dest->ssa; @@ -1038,8 +1146,8 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_rcp: emit(nir_op_frcp, dest_size, srcs); break; case ir_unop_rsq: emit(nir_op_frsq, dest_size, srcs); break; case ir_unop_sqrt: emit(nir_op_fsqrt, dest_size, srcs); break; - case ir_unop_exp: emit(nir_op_fexp, dest_size, srcs); break; - case ir_unop_log: emit(nir_op_flog, dest_size, srcs); break; + case ir_unop_exp: unreachable("ir_unop_exp should have been lowered"); + case ir_unop_log: unreachable("ir_unop_log should have been lowered"); case ir_unop_exp2: emit(nir_op_fexp2, dest_size, srcs); break; case ir_unop_log2: emit(nir_op_flog2, dest_size, srcs); break; case ir_unop_i2f: diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c index 4cc074b80..f03e80a4e 100644 --- a/mesalib/src/glsl/nir/nir.c +++ b/mesalib/src/glsl/nir/nir.c @@ -58,12 +58,9 @@ reg_create(void *mem_ctx, struct exec_list *list) nir_register *reg = ralloc(mem_ctx, nir_register); reg->parent_instr = NULL; - reg->uses = _mesa_set_create(reg, _mesa_hash_pointer, - _mesa_key_pointer_equal); - reg->defs = _mesa_set_create(reg, _mesa_hash_pointer, - _mesa_key_pointer_equal); - reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer, - _mesa_key_pointer_equal); + list_inithead(®->uses); + list_inithead(®->defs); + list_inithead(®->if_uses); reg->num_components = 0; reg->num_array_elems = 0; @@ -1070,11 +1067,14 @@ update_if_uses(nir_cf_node *node) nir_if *if_stmt = nir_cf_node_as_if(node); - struct set *if_uses_set = if_stmt->condition.is_ssa ? - if_stmt->condition.ssa->if_uses : - if_stmt->condition.reg.reg->uses; - - _mesa_set_add(if_uses_set, if_stmt); + if_stmt->condition.parent_if = if_stmt; + if (if_stmt->condition.is_ssa) { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.ssa->if_uses); + } else { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.reg.reg->if_uses); + } } void @@ -1227,16 +1227,7 @@ cleanup_cf_node(nir_cf_node *node) foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) cleanup_cf_node(child); - struct set *if_uses; - if (if_stmt->condition.is_ssa) { - if_uses = if_stmt->condition.ssa->if_uses; - } else { - if_uses = if_stmt->condition.reg.reg->if_uses; - } - - struct set_entry *entry = _mesa_set_search(if_uses, if_stmt); - assert(entry); - _mesa_set_remove(if_uses, entry); + list_del(&if_stmt->condition.use_link); break; } @@ -1293,9 +1284,9 @@ add_use_cb(nir_src *src, void *state) { nir_instr *instr = state; - struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses; - - _mesa_set_add(uses_set, instr); + src->parent_instr = instr; + list_addtail(&src->use_link, + src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses); return true; } @@ -1320,8 +1311,10 @@ add_reg_def_cb(nir_dest *dest, void *state) { nir_instr *instr = state; - if (!dest->is_ssa) - _mesa_set_add(dest->reg.reg->defs, instr); + if (!dest->is_ssa) { + dest->reg.parent_instr = instr; + list_addtail(&dest->reg.def_link, &dest->reg.reg->defs); + } return true; } @@ -1436,13 +1429,7 @@ nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) static bool remove_use_cb(nir_src *src, void *state) { - nir_instr *instr = state; - - struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses; - - struct set_entry *entry = _mesa_set_search(uses_set, instr); - if (entry) - _mesa_set_remove(uses_set, entry); + list_del(&src->use_link); return true; } @@ -1450,16 +1437,8 @@ remove_use_cb(nir_src *src, void *state) static bool remove_def_cb(nir_dest *dest, void *state) { - nir_instr *instr = state; - - if (dest->is_ssa) - return true; - - nir_register *reg = dest->reg.reg; - - struct set_entry *entry = _mesa_set_search(reg->defs, instr); - if (entry) - _mesa_set_remove(reg->defs, entry); + if (!dest->is_ssa) + list_del(&dest->reg.def_link); return true; } @@ -1834,64 +1813,77 @@ nir_srcs_equal(nir_src src1, nir_src src2) } static bool -src_does_not_use_def(nir_src *src, void *void_def) +src_is_valid(const nir_src *src) { - nir_ssa_def *def = void_def; - - if (src->is_ssa) { - return src->ssa != def; - } else { - return true; - } + return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL); } -static bool -src_does_not_use_reg(nir_src *src, void *void_reg) +static void +src_remove_all_uses(nir_src *src) { - nir_register *reg = void_reg; + for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { + if (!src_is_valid(src)) + continue; - if (src->is_ssa) { - return true; - } else { - return src->reg.reg != reg; + list_del(&src->use_link); + } +} + +static void +src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) +{ + for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { + if (!src_is_valid(src)) + continue; + + if (parent_instr) { + src->parent_instr = parent_instr; + if (src->is_ssa) + list_addtail(&src->use_link, &src->ssa->uses); + else + list_addtail(&src->use_link, &src->reg.reg->uses); + } else { + assert(parent_if); + src->parent_if = parent_if; + if (src->is_ssa) + list_addtail(&src->use_link, &src->ssa->if_uses); + else + list_addtail(&src->use_link, &src->reg.reg->if_uses); + } } } void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) { - nir_src old_src = *src; + assert(!src_is_valid(src) || src->parent_instr == instr); + + src_remove_all_uses(src); *src = new_src; + src_add_all_uses(src, instr, NULL); +} - for (nir_src *iter_src = &old_src; iter_src; - iter_src = iter_src->is_ssa ? NULL : iter_src->reg.indirect) { - if (iter_src->is_ssa) { - nir_ssa_def *ssa = iter_src->ssa; - if (ssa && nir_foreach_src(instr, src_does_not_use_def, ssa)) { - struct set_entry *entry = _mesa_set_search(ssa->uses, instr); - assert(entry); - _mesa_set_remove(ssa->uses, entry); - } - } else { - nir_register *reg = iter_src->reg.reg; - if (reg && nir_foreach_src(instr, src_does_not_use_reg, reg)) { - struct set_entry *entry = _mesa_set_search(reg->uses, instr); - assert(entry); - _mesa_set_remove(reg->uses, entry); - } - } - } +void +nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src) +{ + assert(!src_is_valid(dest) || dest->parent_instr == dest_instr); - for (nir_src *iter_src = &new_src; iter_src; - iter_src = iter_src->is_ssa ? NULL : iter_src->reg.indirect) { - if (iter_src->is_ssa) { - if (iter_src->ssa) - _mesa_set_add(iter_src->ssa->uses, instr); - } else { - if (iter_src->reg.reg) - _mesa_set_add(iter_src->reg.reg->uses, instr); - } - } + src_remove_all_uses(dest); + src_remove_all_uses(src); + *dest = *src; + *src = NIR_SRC_INIT; + src_add_all_uses(dest, dest_instr, NULL); +} + +void +nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src) +{ + nir_src *src = &if_stmt->condition; + assert(!src_is_valid(src) || src->parent_if == if_stmt); + + src_remove_all_uses(src); + *src = new_src; + src_add_all_uses(src, NULL, if_stmt); } void @@ -1900,10 +1892,8 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, { def->name = name; def->parent_instr = instr; - def->uses = _mesa_set_create(instr, _mesa_hash_pointer, - _mesa_key_pointer_equal); - def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer, - _mesa_key_pointer_equal); + list_inithead(&def->uses); + list_inithead(&def->if_uses); def->num_components = num_components; if (instr->block) { @@ -1924,57 +1914,23 @@ nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, nir_ssa_def_init(instr, &dest->ssa, num_components, name); } -struct ssa_def_rewrite_state { - void *mem_ctx; - nir_ssa_def *old; - nir_src new_src; -}; - -static bool -ssa_def_rewrite_uses_src(nir_src *src, void *void_state) -{ - struct ssa_def_rewrite_state *state = void_state; - - if (src->is_ssa && src->ssa == state->old) - nir_src_copy(src, &state->new_src, state->mem_ctx); - - return true; -} - void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx) { - struct ssa_def_rewrite_state state; - state.mem_ctx = mem_ctx; - state.old = def; - state.new_src = new_src; - assert(!new_src.is_ssa || def != new_src.ssa); - struct set *new_uses, *new_if_uses; - if (new_src.is_ssa) { - new_uses = new_src.ssa->uses; - new_if_uses = new_src.ssa->if_uses; - } else { - new_uses = new_src.reg.reg->uses; - new_if_uses = new_src.reg.reg->if_uses; - } - - struct set_entry *entry; - set_foreach(def->uses, entry) { - nir_instr *instr = (nir_instr *)entry->key; - - _mesa_set_remove(def->uses, entry); - nir_foreach_src(instr, ssa_def_rewrite_uses_src, &state); - _mesa_set_add(new_uses, instr); + nir_foreach_use_safe(def, use_src) { + nir_instr *src_parent_instr = use_src->parent_instr; + list_del(&use_src->use_link); + nir_src_copy(use_src, &new_src, mem_ctx); + src_add_all_uses(use_src, src_parent_instr, NULL); } - set_foreach(def->if_uses, entry) { - nir_if *if_use = (nir_if *)entry->key; - - _mesa_set_remove(def->if_uses, entry); - nir_src_copy(&if_use->condition, &new_src, mem_ctx); - _mesa_set_add(new_if_uses, if_use); + nir_foreach_if_use_safe(def, use_src) { + nir_if *src_parent_if = use_src->parent_if; + list_del(&use_src->use_link); + nir_src_copy(use_src, &new_src, mem_ctx); + src_add_all_uses(use_src, NULL, src_parent_if); } } diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h index 98b0ec328..697d37e95 100644 --- a/mesalib/src/glsl/nir/nir.h +++ b/mesalib/src/glsl/nir/nir.h @@ -30,6 +30,7 @@ #include "util/hash_table.h" #include "../list.h" #include "GL/gl.h" /* GLenum */ +#include "util/list.h" #include "util/ralloc.h" #include "util/set.h" #include "util/bitset.h" @@ -397,13 +398,13 @@ typedef struct { struct nir_instr *parent_instr; /** set of nir_instr's where this register is used (read from) */ - struct set *uses; + struct list_head uses; /** set of nir_instr's where this register is defined (written to) */ - struct set *defs; + struct list_head defs; /** set of nir_if's where this register is used as a condition */ - struct set *if_uses; + struct list_head if_uses; } nir_register; typedef enum { @@ -462,10 +463,10 @@ typedef struct { nir_instr *parent_instr; /** set of nir_instr's where this register is used (read from) */ - struct set *uses; + struct list_head uses; /** set of nir_if's where this register is used as a condition */ - struct set *if_uses; + struct list_head if_uses; uint8_t num_components; } nir_ssa_def; @@ -481,6 +482,9 @@ typedef struct { } nir_reg_src; typedef struct { + nir_instr *parent_instr; + struct list_head def_link; + nir_register *reg; struct nir_src *indirect; /** < NULL for no indirect offset */ unsigned base_offset; @@ -488,8 +492,17 @@ typedef struct { /* TODO def-use chain goes here */ } nir_reg_dest; +struct nir_if; + typedef struct nir_src { union { + nir_instr *parent_instr; + struct nir_if *parent_if; + }; + + struct list_head use_link; + + union { nir_reg_src reg; nir_ssa_def *ssa; }; @@ -497,6 +510,20 @@ typedef struct nir_src { bool is_ssa; } nir_src; +#define NIR_SRC_INIT (nir_src) { { NULL } } + +#define nir_foreach_use(reg_or_ssa_def, src) \ + list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) + +#define nir_foreach_use_safe(reg_or_ssa_def, src) \ + list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) + +#define nir_foreach_if_use(reg_or_ssa_def, src) \ + list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) + +#define nir_foreach_if_use_safe(reg_or_ssa_def, src) \ + list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) + typedef struct { union { nir_reg_dest reg; @@ -506,10 +533,18 @@ typedef struct { bool is_ssa; } nir_dest; +#define NIR_DEST_INIT (nir_dest) { { { NULL } } } + +#define nir_foreach_def(reg, dest) \ + list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) + +#define nir_foreach_def_safe(reg, dest) \ + list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) + static inline nir_src nir_src_for_ssa(nir_ssa_def *def) { - nir_src src; + nir_src src = NIR_SRC_INIT; src.is_ssa = true; src.ssa = def; @@ -520,7 +555,7 @@ nir_src_for_ssa(nir_ssa_def *def) static inline nir_src nir_src_for_reg(nir_register *reg) { - nir_src src; + nir_src src = NIR_SRC_INIT; src.is_ssa = false; src.reg.reg = reg; @@ -543,12 +578,9 @@ nir_src_get_parent_instr(const nir_src *src) static inline nir_dest nir_dest_for_reg(nir_register *reg) { - nir_dest dest; + nir_dest dest = NIR_DEST_INIT; - dest.is_ssa = false; dest.reg.reg = reg; - dest.reg.indirect = NULL; - dest.reg.base_offset = 0; return dest; } @@ -1207,7 +1239,7 @@ nir_block_last_instr(nir_block *block) #define nir_foreach_instr_safe(block, instr) \ foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) -typedef struct { +typedef struct nir_if { nir_cf_node cf_node; nir_src condition; @@ -1548,6 +1580,8 @@ bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); nir_const_value *nir_src_as_const_value(nir_src src); bool nir_srcs_equal(nir_src src1, nir_src src2); void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); +void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); +void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, unsigned num_components, const char *name); diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h index d1419ee21..9223e8380 100644 --- a/mesalib/src/glsl/nir/nir_builder.h +++ b/mesalib/src/glsl/nir/nir_builder.h @@ -231,8 +231,7 @@ static inline nir_ssa_def * nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], unsigned num_components, bool use_fmov) { - nir_alu_src alu_src; - memset(&alu_src, 0, sizeof(alu_src)); + nir_alu_src alu_src = { NIR_SRC_INIT }; alu_src.src = nir_src_for_ssa(src); for (int i = 0; i < 4; i++) alu_src.swizzle[i] = swiz[i]; @@ -251,8 +250,7 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) if (src.is_ssa && src.ssa->num_components == num_components) return src.ssa; - nir_alu_src alu; - memset(&alu, 0, sizeof(alu)); + nir_alu_src alu = { NIR_SRC_INIT }; alu.src = src; for (int j = 0; j < 4; j++) alu.swizzle[j] = j; diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c index 6a3b141bd..67733e6da 100644 --- a/mesalib/src/glsl/nir/nir_from_ssa.c +++ b/mesalib/src/glsl/nir/nir_from_ssa.c @@ -37,7 +37,6 @@ struct from_ssa_state { void *mem_ctx; void *dead_ctx; - struct hash_table *ssa_table; struct hash_table *merge_node_table; nir_instr *instr; nir_function_impl *impl; @@ -344,45 +343,31 @@ isolate_phi_nodes_block(nir_block *block, void *void_state) get_parallel_copy_at_end_of_block(src->pred); assert(pcopy); - nir_parallel_copy_entry *entry = ralloc(state->dead_ctx, - nir_parallel_copy_entry); - exec_list_push_tail(&pcopy->entries, &entry->node); - - nir_src_copy(&entry->src, &src->src, state->dead_ctx); - _mesa_set_add(src->src.ssa->uses, &pcopy->instr); - + nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx, + nir_parallel_copy_entry); nir_ssa_dest_init(&pcopy->instr, &entry->dest, phi->dest.ssa.num_components, src->src.ssa->name); + exec_list_push_tail(&pcopy->entries, &entry->node); - struct set_entry *use_entry = - _mesa_set_search(src->src.ssa->uses, instr); - if (use_entry) - /* It is possible that a phi node can use the same source twice - * but for different basic blocks. If that happens, entry will - * be NULL because we already deleted it. This is safe - * because, by the time the loop is done, we will have deleted - * all of the sources of the phi from their respective use sets - * and moved them to the parallel copy definitions. - */ - _mesa_set_remove(src->src.ssa->uses, use_entry); + assert(src->src.is_ssa); + nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src); - src->src.ssa = &entry->dest.ssa; - _mesa_set_add(entry->dest.ssa.uses, instr); + nir_instr_rewrite_src(&phi->instr, &src->src, + nir_src_for_ssa(&entry->dest.ssa)); } - nir_parallel_copy_entry *entry = ralloc(state->dead_ctx, - nir_parallel_copy_entry); - exec_list_push_tail(&block_pcopy->entries, &entry->node); - + nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx, + nir_parallel_copy_entry); nir_ssa_dest_init(&block_pcopy->instr, &entry->dest, phi->dest.ssa.num_components, phi->dest.ssa.name); + exec_list_push_tail(&block_pcopy->entries, &entry->node); + nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(&entry->dest.ssa), state->mem_ctx); - entry->src.is_ssa = true; - entry->src.ssa = &phi->dest.ssa; - _mesa_set_add(phi->dest.ssa.uses, &block_pcopy->instr); + nir_instr_rewrite_src(&block_pcopy->instr, &entry->src, + nir_src_for_ssa(&phi->dest.ssa)); } return true; @@ -415,7 +400,7 @@ coalesce_phi_nodes_block(nir_block *block, void *void_state) } static void -agressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, +aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, struct from_ssa_state *state) { nir_foreach_parallel_copy_entry(pcopy, entry) { @@ -444,7 +429,7 @@ agressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, } static bool -agressive_coalesce_block(nir_block *block, void *void_state) +aggressive_coalesce_block(nir_block *block, void *void_state) { struct from_ssa_state *state = void_state; @@ -457,7 +442,7 @@ agressive_coalesce_block(nir_block *block, void *void_state) start_pcopy = nir_instr_as_parallel_copy(instr); - agressive_coalesce_parallel_copy(start_pcopy, state); + aggressive_coalesce_parallel_copy(start_pcopy, state); break; } @@ -467,17 +452,21 @@ agressive_coalesce_block(nir_block *block, void *void_state) get_parallel_copy_at_end_of_block(block); if (end_pcopy && end_pcopy != start_pcopy) - agressive_coalesce_parallel_copy(end_pcopy, state); + aggressive_coalesce_parallel_copy(end_pcopy, state); return true; } -static nir_register * -get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) +static bool +rewrite_ssa_def(nir_ssa_def *def, void *void_state) { + struct from_ssa_state *state = void_state; + nir_register *reg; + struct hash_entry *entry = _mesa_hash_table_search(state->merge_node_table, def); if (entry) { + /* In this case, we're part of a phi web. Use the web's register. */ merge_node *node = (merge_node *)entry->data; /* If it doesn't have a register yet, create one. Note that all of @@ -491,20 +480,15 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) node->set->reg->num_array_elems = 0; } - return node->set->reg; - } - - entry = _mesa_hash_table_search(state->ssa_table, def); - if (entry) { - return (nir_register *)entry->data; + reg = node->set->reg; } else { /* We leave load_const SSA values alone. They act as immediates to * the backend. If it got coalesced into a phi, that's ok. */ if (def->parent_instr->type == nir_instr_type_load_const) - return NULL; + return true; - nir_register *reg = nir_local_reg_create(state->impl); + reg = nir_local_reg_create(state->impl); reg->name = def->name; reg->num_components = def->num_components; reg->num_array_elems = 0; @@ -516,57 +500,24 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) */ if (def->parent_instr->type != nir_instr_type_ssa_undef) reg->parent_instr = def->parent_instr; - - _mesa_hash_table_insert(state->ssa_table, def, reg); - return reg; } -} - -static bool -rewrite_ssa_src(nir_src *src, void *void_state) -{ - struct from_ssa_state *state = void_state; - if (src->is_ssa) { - nir_register *reg = get_register_for_ssa_def(src->ssa, state); + nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx); + assert(list_empty(&def->uses) && list_empty(&def->if_uses)); - if (reg == NULL) { - assert(src->ssa->parent_instr->type == nir_instr_type_load_const); - return true; - } - - memset(src, 0, sizeof *src); - src->reg.reg = reg; - - /* We don't need to remove it from the uses set because that is going - * away. We just need to add it to the one for the register. */ - _mesa_set_add(reg->uses, state->instr); - } - - return true; -} - -static bool -rewrite_ssa_dest(nir_dest *dest, void *void_state) -{ - struct from_ssa_state *state = void_state; - - if (dest->is_ssa) { - nir_register *reg = get_register_for_ssa_def(&dest->ssa, state); - - if (reg == NULL) { - assert(dest->ssa.parent_instr->type == nir_instr_type_load_const); - return true; - } + if (def->parent_instr->type == nir_instr_type_ssa_undef) + return true; - _mesa_set_destroy(dest->ssa.uses, NULL); - _mesa_set_destroy(dest->ssa.if_uses, NULL); + assert(def->parent_instr->type != nir_instr_type_load_const); - memset(dest, 0, sizeof *dest); - dest->reg.reg = reg; + /* At this point we know a priori that this SSA def is part of a + * nir_dest. We can use exec_node_data to get the dest pointer. + */ + nir_dest *dest = exec_node_data(nir_dest, def, ssa); - _mesa_set_add(reg->defs, state->instr); - } + *dest = nir_dest_for_reg(reg); + dest->reg.parent_instr = state->instr; + list_addtail(&dest->reg.def_link, ®->defs); return true; } @@ -581,8 +532,7 @@ resolve_registers_block(nir_block *block, void *void_state) nir_foreach_instr_safe(block, instr) { state->instr = instr; - nir_foreach_src(instr, rewrite_ssa_src, state); - nir_foreach_dest(instr, rewrite_ssa_dest, state); + nir_foreach_ssa_def(instr, rewrite_ssa_def, state); if (instr->type == nir_instr_type_ssa_undef || instr->type == nir_instr_type_phi) { @@ -592,23 +542,6 @@ resolve_registers_block(nir_block *block, void *void_state) } state->instr = NULL; - nir_if *following_if = nir_block_get_following_if(block); - if (following_if && following_if->condition.is_ssa) { - nir_register *reg = get_register_for_ssa_def(following_if->condition.ssa, - state); - if (reg) { - memset(&following_if->condition, 0, sizeof following_if->condition); - following_if->condition.reg.reg = reg; - - _mesa_set_add(reg->if_uses, following_if); - } else { - /* FIXME: We really shouldn't hit this. We should be doing - * constant control flow propagation. - */ - assert(following_if->condition.ssa->parent_instr->type == nir_instr_type_load_const); - } - } - return true; } @@ -853,10 +786,8 @@ nir_convert_from_ssa_impl(nir_function_impl *impl) nir_metadata_dominance); nir_foreach_block(impl, coalesce_phi_nodes_block, &state); - nir_foreach_block(impl, agressive_coalesce_block, &state); + nir_foreach_block(impl, aggressive_coalesce_block, &state); - state.ssa_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); nir_foreach_block(impl, resolve_registers_block, &state); nir_foreach_block(impl, resolve_parallel_copies_block, &state); @@ -865,7 +796,6 @@ nir_convert_from_ssa_impl(nir_function_impl *impl) nir_metadata_dominance); /* Clean up dead instructions and the hash tables */ - _mesa_hash_table_destroy(state.ssa_table, NULL); _mesa_hash_table_destroy(state.merge_node_table, NULL); ralloc_free(state.dead_ctx); } diff --git a/mesalib/src/glsl/nir/nir_intrinsics.h b/mesalib/src/glsl/nir/nir_intrinsics.h index 8e28765c1..10192c531 100644 --- a/mesalib/src/glsl/nir/nir_intrinsics.h +++ b/mesalib/src/glsl/nir/nir_intrinsics.h @@ -68,6 +68,13 @@ INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, #define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) BARRIER(discard) + +/* + * Memory barrier with semantics analogous to the memoryBarrier() GLSL + * intrinsic. + */ +BARRIER(memory_barrier) + /** A conditional discard, with a single boolean source. */ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) @@ -89,6 +96,33 @@ ATOMIC(inc, 0) ATOMIC(dec, 0) ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE) +/* + * Image load, store and atomic intrinsics. + * + * All image intrinsics take an image target passed as a nir_variable. Image + * variables contain a number of memory and layout qualifiers that influence + * the semantics of the intrinsic. + * + * All image intrinsics take a four-coordinate vector and a sample index as + * first two sources, determining the location within the image that will be + * accessed by the intrinsic. Components not applicable to the image target + * in use are undefined. Image store takes an additional four-component + * argument with the value to be written, and image atomic operations take + * either one or two additional scalar arguments with the same meaning as in + * the ARB_shader_image_load_store specification. + */ +INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0) +INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0) + #define SYSTEM_VALUE(name, components) \ INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) diff --git a/mesalib/src/glsl/nir/nir_lower_atomics.c b/mesalib/src/glsl/nir/nir_lower_atomics.c index e82df0169..f6f89020f 100644 --- a/mesalib/src/glsl/nir/nir_lower_atomics.c +++ b/mesalib/src/glsl/nir/nir_lower_atomics.c @@ -78,7 +78,8 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) nir_deref_as_array(instr->variables[0]->deref.child); assert(deref_array->deref.child == NULL); - offset_const->value.u[0] += deref_array->base_offset; + offset_const->value.u[0] += + deref_array->base_offset * ATOMIC_COUNTER_SIZE; if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_load_const_instr *atomic_counter_size = diff --git a/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c index bc6a3d320..28fdec50e 100644 --- a/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c +++ b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c @@ -269,18 +269,16 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) static nir_block * compute_reg_usedef_lca(nir_register *reg) { - struct set_entry *entry; nir_block *lca = NULL; - set_foreach(reg->defs, entry) - lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block); + list_for_each_entry(nir_dest, def_dest, ®->defs, reg.def_link) + lca = nir_dominance_lca(lca, def_dest->reg.parent_instr->block); - set_foreach(reg->uses, entry) - lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block); + list_for_each_entry(nir_src, use_src, ®->uses, use_link) + lca = nir_dominance_lca(lca, use_src->parent_instr->block); - set_foreach(reg->if_uses, entry) { - nir_if *if_stmt = (nir_if *)entry->key; - nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); + list_for_each_entry(nir_src, use_src, ®->if_uses, use_link) { + nir_cf_node *prev_node = nir_cf_node_prev(&use_src->parent_if->cf_node); assert(prev_node->type == nir_cf_node_block); lca = nir_dominance_lca(lca, nir_cf_node_as_block(prev_node)); } diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp index cf8ab8325..7a0b0a09f 100644 --- a/mesalib/src/glsl/nir/nir_lower_samplers.cpp +++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp @@ -70,44 +70,45 @@ lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_progr case nir_deref_type_array: { nir_deref_array *deref_array = nir_deref_as_array(deref->child); + assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); + + if (deref_array->deref.child) { + ralloc_asprintf_append(&name, "[%u]", + deref_array->deref_array_type == nir_deref_array_type_direct ? + deref_array->base_offset : 0); + } else { + assert(deref->child->type->base_type == GLSL_TYPE_SAMPLER); + instr->sampler_index = deref_array->base_offset; + } + /* XXX: We're assuming here that the indirect is the last array * thing we have. This should be ok for now as we don't support * arrays_of_arrays yet. */ - - instr->sampler_index *= glsl_get_length(deref->type); - switch (deref_array->deref_array_type) { - case nir_deref_array_type_direct: - instr->sampler_index += deref_array->base_offset; - if (deref_array->deref.child) - ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset); - break; - case nir_deref_array_type_indirect: { - instr->src = reralloc(instr, instr->src, nir_tex_src, - instr->num_srcs + 1); - memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src); + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, + instr->num_srcs + 1); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, + &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; instr->num_srcs++; - - nir_instr_rewrite_src(&instr->instr, - &instr->src[instr->num_srcs - 1].src, - deref_array->indirect); + nir_instr_move_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + &deref_array->indirect); instr->sampler_array_size = glsl_get_length(deref->type); - - nir_src empty; - memset(&empty, 0, sizeof empty); - nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, empty); - - if (deref_array->deref.child) - ralloc_strcat(&name, "[0]"); - break; - } - - case nir_deref_array_type_wildcard: - unreachable("Cannot copy samplers"); - default: - unreachable("Invalid deref array type"); } break; } diff --git a/mesalib/src/glsl/nir/nir_lower_tex_projector.c b/mesalib/src/glsl/nir/nir_lower_tex_projector.c index 6b0e9c340..357131cd7 100644 --- a/mesalib/src/glsl/nir/nir_lower_tex_projector.c +++ b/mesalib/src/glsl/nir/nir_lower_tex_projector.c @@ -109,12 +109,12 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state) /* Now move the later tex sources down the array so that the projector * disappears. */ - nir_src dead; - memset(&dead, 0, sizeof dead); - nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead); - memmove(&tex->src[proj_index], - &tex->src[proj_index + 1], - (tex->num_srcs - proj_index) * sizeof(*tex->src)); + nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, + NIR_SRC_INIT); + for (int i = proj_index + 1; i < tex->num_srcs; i++) { + tex->src[i-1].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src); + } tex->num_srcs--; } diff --git a/mesalib/src/glsl/nir/nir_lower_to_source_mods.c b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c index 7b4a0f657..94c7e36d4 100644 --- a/mesalib/src/glsl/nir/nir_lower_to_source_mods.c +++ b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c @@ -88,8 +88,8 @@ nir_lower_to_source_mods_block(nir_block *block, void *state) alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]]; } - if (parent->dest.dest.ssa.uses->entries == 0 && - parent->dest.dest.ssa.if_uses->entries == 0) + if (list_empty(&parent->dest.dest.ssa.uses) && + list_empty(&parent->dest.dest.ssa.if_uses)) nir_instr_remove(&parent->instr); } @@ -131,13 +131,13 @@ nir_lower_to_source_mods_block(nir_block *block, void *state) if (nir_op_infos[alu->op].output_type != nir_type_float) continue; - if (alu->dest.dest.ssa.if_uses->entries != 0) + if (!list_empty(&alu->dest.dest.ssa.if_uses)) continue; bool all_children_are_sat = true; - struct set_entry *entry; - set_foreach(alu->dest.dest.ssa.uses, entry) { - const nir_instr *child = entry->key; + nir_foreach_use(&alu->dest.dest.ssa, child_src) { + assert(child_src->is_ssa); + nir_instr *child = child_src->parent_instr; if (child->type != nir_instr_type_alu) { all_children_are_sat = false; continue; @@ -161,8 +161,12 @@ nir_lower_to_source_mods_block(nir_block *block, void *state) alu->dest.saturate = true; - set_foreach(alu->dest.dest.ssa.uses, entry) { - nir_alu_instr *child_alu = nir_instr_as_alu((nir_instr *)entry->key); + nir_foreach_use(&alu->dest.dest.ssa, child_src) { + assert(child_src->is_ssa); + nir_instr *child = child_src->parent_instr; + assert(child->type == nir_instr_type_alu); + nir_alu_instr *child_alu = nir_instr_as_alu(child); + child_alu->op = nir_op_fmov; child_alu->dest.saturate = false; /* We could propagate the dest of our instruction to the diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c index bb60f4601..ccb8f99df 100644 --- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -567,10 +567,11 @@ add_phi_sources(nir_block *block, nir_block *pred, nir_phi_src *src = ralloc(phi, nir_phi_src); src->pred = pred; + src->src.parent_instr = &phi->instr; src->src.is_ssa = true; src->src.ssa = get_ssa_def_for_block(node, pred, state); - _mesa_set_add(src->src.ssa->uses, instr); + list_addtail(&src->src.use_link, &src->src.ssa->uses); exec_list_push_tail(&phi->srcs, &src->node); } diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py index 264806f5d..56e96d912 100644 --- a/mesalib/src/glsl/nir/nir_opcodes.py +++ b/mesalib/src/glsl/nir/nir_opcodes.py @@ -153,8 +153,6 @@ unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)") unop("frcp", tfloat, "1.0f / src0") unop("frsq", tfloat, "1.0f / sqrtf(src0)") unop("fsqrt", tfloat, "sqrtf(src0)") -unop("fexp", tfloat, "expf(src0)") # < e^x -unop("flog", tfloat, "logf(src0)") # log base e unop("fexp2", tfloat, "exp2f(src0)") unop("flog2", tfloat, "log2f(src0)") unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py index 2a2b9561e..fa039222f 100644 --- a/mesalib/src/glsl/nir/nir_opt_algebraic.py +++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py @@ -83,24 +83,37 @@ optimizations = [ # Comparison simplifications (('inot', ('flt', a, b)), ('fge', a, b)), (('inot', ('fge', a, b)), ('flt', a, b)), + (('inot', ('feq', a, b)), ('fne', a, b)), + (('inot', ('fne', a, b)), ('feq', a, b)), (('inot', ('ilt', a, b)), ('ige', a, b)), (('inot', ('ige', a, b)), ('ilt', a, b)), + (('inot', ('ieq', a, b)), ('ine', a, b)), + (('inot', ('ine', a, b)), ('ieq', a, b)), (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)), (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)), (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)), + (('fmin', a, a), a), + (('fmax', a, a), a), + (('imin', a, a), a), + (('imax', a, a), a), + (('umin', a, a), a), + (('umax', a, a), a), (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'), (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), (('fsat', ('fsat', a)), ('fsat', a)), (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)), (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))), + (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)), (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))), + (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)), (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'), (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'), (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'), (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'), # Emulating booleans + (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))), (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), (('iand', 'a@bool', 1.0), ('b2f', a)), @@ -136,36 +149,23 @@ optimizations = [ (('ushr', a, 0), a), # Exponential/logarithmic identities (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a - (('fexp', ('flog', a)), a), # e^ln(a) = a (('flog2', ('fexp2', a)), a), # lg2(2^a) = a - (('flog', ('fexp', a)), a), # ln(e^a) = a (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b) (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b - (('fexp', ('fmul', ('flog', a), b)), ('fpow', a, b), '!options->lower_fpow'), # e^(ln(a)*b) = a^b (('fpow', a, 1.0), a), (('fpow', a, 2.0), ('fmul', a, a)), (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), (('fpow', 2.0, a), ('fexp2', a)), (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), - (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))), (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), - (('frcp', ('fexp', a)), ('fexp', ('fneg', a))), (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), - (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))), (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))), - (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))), (('flog2', ('frcp', a)), ('fneg', ('flog2', a))), - (('flog', ('frcp', a)), ('fneg', ('flog', a))), (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), - (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))), (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), - (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))), (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), - (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))), (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), - (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))), (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), - (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))), # Division and reciprocal (('fdiv', 1.0, a), ('frcp', a)), (('frcp', ('frcp', a)), a), @@ -187,6 +187,7 @@ optimizations = [ (('fcsel', a, b, b), b), # Conversions + (('i2b', ('b2i', a)), a), (('f2i', ('ftrunc', a)), ('f2i', a)), (('f2u', ('ftrunc', a)), ('f2u', a)), diff --git a/mesalib/src/glsl/nir/nir_opt_copy_propagate.c b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c index ee78e5aa0..71367d001 100644 --- a/mesalib/src/glsl/nir/nir_opt_copy_propagate.c +++ b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c @@ -93,62 +93,6 @@ is_swizzleless_move(nir_alu_instr *instr) } } -typedef struct { - nir_ssa_def *def; - bool found; -} search_def_state; - -static bool -search_def(nir_src *src, void *_state) -{ - search_def_state *state = (search_def_state *) _state; - - if (src->is_ssa && src->ssa == state->def) - state->found = true; - - return true; -} - -static void -rewrite_src_instr(nir_src *src, nir_ssa_def *new_def, nir_instr *parent_instr) -{ - nir_ssa_def *old_def = src->ssa; - - src->ssa = new_def; - - /* - * The instruction could still use the old definition in one of its other - * sources, so only remove the instruction from the uses if there are no - * more uses left. - */ - - search_def_state search_state; - search_state.def = old_def; - search_state.found = false; - nir_foreach_src(parent_instr, search_def, &search_state); - if (!search_state.found) { - struct set_entry *entry = _mesa_set_search(old_def->uses, parent_instr); - assert(entry); - _mesa_set_remove(old_def->uses, entry); - } - - _mesa_set_add(new_def->uses, parent_instr); -} - -static void -rewrite_src_if(nir_if *if_stmt, nir_ssa_def *new_def) -{ - nir_ssa_def *old_def = if_stmt->condition.ssa; - - if_stmt->condition.ssa = new_def; - - struct set_entry *entry = _mesa_set_search(old_def->if_uses, if_stmt); - assert(entry); - _mesa_set_remove(old_def->if_uses, entry); - - _mesa_set_add(new_def->if_uses, if_stmt); -} - static bool copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) { @@ -178,10 +122,14 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) return false; } - if (parent_instr) - rewrite_src_instr(src, alu_instr->src[0].src.ssa, parent_instr); - else - rewrite_src_if(parent_if, alu_instr->src[0].src.ssa); + if (parent_instr) { + nir_instr_rewrite_src(parent_instr, src, + nir_src_for_ssa(alu_instr->src[0].src.ssa)); + } else { + assert(src == &parent_if->condition); + nir_if_rewrite_condition(parent_if, + nir_src_for_ssa(alu_instr->src[0].src.ssa)); + } return true; } @@ -234,7 +182,8 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) for (unsigned i = 0; i < 4; i++) src->swizzle[i] = new_swizzle[i]; - rewrite_src_instr(&src->src, def, &parent_alu_instr->instr); + nir_instr_rewrite_src(&parent_alu_instr->instr, &src->src, + nir_src_for_ssa(def)); return true; } diff --git a/mesalib/src/glsl/nir/nir_opt_gcm.c b/mesalib/src/glsl/nir/nir_opt_gcm.c index b4f5fd3d5..44068bf37 100644 --- a/mesalib/src/glsl/nir/nir_opt_gcm.c +++ b/mesalib/src/glsl/nir/nir_opt_gcm.c @@ -279,9 +279,8 @@ gcm_schedule_late_def(nir_ssa_def *def, void *void_state) nir_block *lca = NULL; - struct set_entry *entry; - set_foreach(def->uses, entry) { - nir_instr *use_instr = (nir_instr *)entry->key; + nir_foreach_use(def, use_src) { + nir_instr *use_instr = use_src->parent_instr; gcm_schedule_late_instr(use_instr, state); @@ -304,8 +303,8 @@ gcm_schedule_late_def(nir_ssa_def *def, void *void_state) } } - set_foreach(def->if_uses, entry) { - nir_if *if_stmt = (nir_if *)entry->key; + nir_foreach_if_use(def, use_src) { + nir_if *if_stmt = use_src->parent_if; /* For if statements, we consider the block to be the one immediately * preceding the if CF node. @@ -377,9 +376,8 @@ gcm_place_instr(nir_instr *instr, struct gcm_state *state); static bool gcm_place_instr_def(nir_ssa_def *def, void *state) { - struct set_entry *entry; - set_foreach(def->uses, entry) - gcm_place_instr((nir_instr *)entry->key, state); + nir_foreach_use(def, use_src) + gcm_place_instr(use_src->parent_instr, state); return false; } diff --git a/mesalib/src/glsl/nir/nir_opt_global_to_local.c b/mesalib/src/glsl/nir/nir_opt_global_to_local.c index 00db37ba7..bccb45b62 100644 --- a/mesalib/src/glsl/nir/nir_opt_global_to_local.c +++ b/mesalib/src/glsl/nir/nir_opt_global_to_local.c @@ -34,9 +34,8 @@ global_to_local(nir_register *reg) assert(reg->is_global); - struct set_entry *entry; - set_foreach(reg->defs, entry) { - nir_instr *instr = (nir_instr *) entry->key; + nir_foreach_def(reg, def_dest) { + nir_instr *instr = def_dest->reg.parent_instr; nir_function_impl *instr_impl = nir_cf_node_get_function(&instr->block->cf_node); if (impl != NULL) { @@ -47,8 +46,8 @@ global_to_local(nir_register *reg) } } - set_foreach(reg->uses, entry) { - nir_instr *instr = (nir_instr *) entry->key; + nir_foreach_use(reg, use_src) { + nir_instr *instr = use_src->parent_instr; nir_function_impl *instr_impl = nir_cf_node_get_function(&instr->block->cf_node); if (impl != NULL) { @@ -59,8 +58,8 @@ global_to_local(nir_register *reg) } } - set_foreach(reg->if_uses, entry) { - nir_if *if_stmt = (nir_if *) entry->key; + nir_foreach_if_use(reg, use_src) { + nir_if *if_stmt = use_src->parent_if; nir_function_impl *if_impl = nir_cf_node_get_function(&if_stmt->cf_node); if (impl != NULL) { if (impl != if_impl) diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c index 9d5646fe6..b430eac8e 100644 --- a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c +++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c @@ -41,12 +41,11 @@ struct peephole_ffma_state { static inline bool are_all_uses_fadd(nir_ssa_def *def) { - if (def->if_uses->entries > 0) + if (!list_empty(&def->if_uses)) return false; - struct set_entry *use_iter; - set_foreach(def->uses, use_iter) { - nir_instr *use_instr = (nir_instr *)use_iter->key; + nir_foreach_use(def, use_src) { + nir_instr *use_instr = use_src->parent_instr; if (use_instr->type != nir_instr_type_alu) return false; @@ -220,7 +219,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state) state->mem_ctx); nir_instr_insert_before(&add->instr, &ffma->instr); - assert(add->dest.dest.ssa.uses->entries == 0); + assert(list_empty(&add->dest.dest.ssa.uses)); nir_instr_remove(&add->instr); state->progress = true; diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c index f400cfd66..82c65bb44 100644 --- a/mesalib/src/glsl/nir/nir_opt_peephole_select.c +++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c @@ -98,15 +98,13 @@ block_check_for_allowed_instrs(nir_block *block) return false; /* It cannot have any if-uses */ - if (mov->dest.dest.ssa.if_uses->entries != 0) + if (!list_empty(&mov->dest.dest.ssa.if_uses)) return false; /* The only uses of this definition must be phi's in the successor */ - struct set_entry *entry; - set_foreach(mov->dest.dest.ssa.uses, entry) { - const nir_instr *dest_instr = entry->key; - if (dest_instr->type != nir_instr_type_phi || - dest_instr->block != block->successors[0]) + nir_foreach_use(&mov->dest.dest.ssa, use) { + if (use->parent_instr->type != nir_instr_type_phi || + use->parent_instr->block != block->successors[0]) return false; } break; diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c index 5ba016085..0c4e48ce9 100644 --- a/mesalib/src/glsl/nir/nir_search.c +++ b/mesalib/src/glsl/nir/nir_search.c @@ -73,6 +73,14 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, { uint8_t new_swizzle[4]; + /* If the source is an explicitly sized source, then we need to reset + * both the number of components and the swizzle. + */ + if (nir_op_infos[instr->op].input_sizes[src] != 0) { + num_components = nir_op_infos[instr->op].input_sizes[src]; + swizzle = identity_swizzle; + } + for (int i = 0; i < num_components; ++i) new_swizzle[i] = instr->src[src].swizzle[swizzle[i]]; @@ -90,6 +98,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, case nir_search_value_variable: { nir_search_variable *var = nir_search_value_as_variable(value); + assert(var->variable < NIR_SEARCH_MAX_VARIABLES); if (state->variables_seen & (1 << var->variable)) { if (!nir_srcs_equal(state->variables[var->variable].src, @@ -198,16 +207,13 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr, } } + /* Stash off the current variables_seen bitmask. This way we can + * restore it prior to matching in the commutative case below. + */ + unsigned variables_seen_stash = state->variables_seen; + bool matched = true; for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - /* If the source is an explicitly sized source, then we need to reset - * both the number of components and the swizzle. - */ - if (nir_op_infos[instr->op].input_sizes[i] != 0) { - num_components = nir_op_infos[instr->op].input_sizes[i]; - swizzle = identity_swizzle; - } - if (!match_value(expr->srcs[i], instr, i, num_components, swizzle, state)) { matched = false; @@ -220,6 +226,13 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr, if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { assert(nir_op_infos[instr->op].num_inputs == 2); + + /* Restore the variables_seen bitmask. If we don't do this, then we + * could end up with an erroneous failure due to variables found in the + * first match attempt above not matching those in the second. + */ + state->variables_seen = variables_seen_stash; + if (!match_value(expr->srcs[0], instr, 1, num_components, swizzle, state)) return false; @@ -276,7 +289,7 @@ construct_value(const nir_search_value *value, nir_alu_type type, const nir_search_variable *var = nir_search_value_as_variable(value); assert(state->variables_seen & (1 << var->variable)); - nir_alu_src val; + nir_alu_src val = { NIR_SRC_INIT }; nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx); assert(!var->is_constant); diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c index 53ff54766..a3c35fa04 100644 --- a/mesalib/src/glsl/nir/nir_to_ssa.c +++ b/mesalib/src/glsl/nir/nir_to_ssa.c @@ -89,9 +89,8 @@ insert_phi_nodes(nir_function_impl *impl) w_start = w_end = 0; iter_count++; - struct set_entry *entry; - set_foreach(reg->defs, entry) { - nir_instr *def = (nir_instr *) entry->key; + nir_foreach_def(reg, dest) { + nir_instr *def = dest->reg.parent_instr; if (work[def->block->index] < iter_count) W[w_end++] = def->block; work[def->block->index] = iter_count; @@ -99,6 +98,7 @@ insert_phi_nodes(nir_function_impl *impl) while (w_start != w_end) { nir_block *cur = W[w_start++]; + struct set_entry *entry; set_foreach(cur->dom_frontier, entry) { nir_block *next = (nir_block *) entry->key; @@ -190,13 +190,12 @@ rewrite_use(nir_src *src, void *_state) if (state->states[index].stack == NULL) return true; - src->is_ssa = true; - src->ssa = get_ssa_src(src->reg.reg, state); - + nir_ssa_def *def = get_ssa_src(src->reg.reg, state); if (state->parent_instr) - _mesa_set_add(src->ssa->uses, state->parent_instr); + nir_instr_rewrite_src(state->parent_instr, src, nir_src_for_ssa(def)); else - _mesa_set_add(src->ssa->if_uses, state->parent_if); + nir_if_rewrite_condition(state->parent_if, nir_src_for_ssa(def)); + return true; } @@ -219,6 +218,7 @@ rewrite_def_forwards(nir_dest *dest, void *_state) name = ralloc_asprintf(state->mem_ctx, "%s_%u", dest->reg.reg->name, state->states[index].num_defs); + list_del(&dest->reg.def_link); nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name); /* push our SSA destination on the stack */ @@ -270,6 +270,7 @@ rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state) reg->name, state->states[index].num_defs); instr->dest.write_mask = (1 << num_components) - 1; + list_del(&instr->dest.dest.reg.def_link); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name); if (nir_op_infos[instr->op].output_size == 0) { @@ -484,7 +485,7 @@ init_rewrite_state(nir_function_impl *impl, rewrite_state *state) * called after phi nodes are inserted so we can count phi node * definitions too. */ - unsigned stack_size = reg->defs->entries; + unsigned stack_size = list_length(®->defs); state->states[reg->index].stack = ralloc_array(state->states, nir_ssa_def *, diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c index a7aa79837..da92ed904 100644 --- a/mesalib/src/glsl/nir/nir_validate.c +++ b/mesalib/src/glsl/nir/nir_validate.c @@ -97,50 +97,47 @@ typedef struct { static void validate_src(nir_src *src, validate_state *state); static void -validate_reg_src(nir_reg_src *src, validate_state *state) +validate_reg_src(nir_src *src, validate_state *state) { - assert(src->reg != NULL); + assert(src->reg.reg != NULL); struct hash_entry *entry; - entry = _mesa_hash_table_search(state->regs, src->reg); + entry = _mesa_hash_table_search(state->regs, src->reg.reg); assert(entry); reg_validate_state *reg_state = (reg_validate_state *) entry->data; if (state->instr) { - _mesa_set_add(reg_state->uses, state->instr); - - assert(_mesa_set_search(src->reg->uses, state->instr)); + _mesa_set_add(reg_state->uses, src); } else { assert(state->if_stmt); - _mesa_set_add(reg_state->if_uses, state->if_stmt); - - assert(_mesa_set_search(src->reg->if_uses, state->if_stmt)); + _mesa_set_add(reg_state->if_uses, src); } - if (!src->reg->is_global) { + if (!src->reg.reg->is_global) { assert(reg_state->where_defined == state->impl && "using a register declared in a different function"); } - assert((src->reg->num_array_elems == 0 || - src->base_offset < src->reg->num_array_elems) && + assert((src->reg.reg->num_array_elems == 0 || + src->reg.base_offset < src->reg.reg->num_array_elems) && "definitely out-of-bounds array access"); - if (src->indirect) { - assert(src->reg->num_array_elems != 0); - assert((src->indirect->is_ssa || src->indirect->reg.indirect == NULL) && + if (src->reg.indirect) { + assert(src->reg.reg->num_array_elems != 0); + assert((src->reg.indirect->is_ssa || + src->reg.indirect->reg.indirect == NULL) && "only one level of indirection allowed"); - validate_src(src->indirect, state); + validate_src(src->reg.indirect, state); } } static void -validate_ssa_src(nir_ssa_def *def, validate_state *state) +validate_ssa_src(nir_src *src, validate_state *state) { - assert(def != NULL); + assert(src->ssa != NULL); - struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); + struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, src->ssa); assert(entry); @@ -150,14 +147,10 @@ validate_ssa_src(nir_ssa_def *def, validate_state *state) "using an SSA value defined in a different function"); if (state->instr) { - _mesa_set_add(def_state->uses, state->instr); - - assert(_mesa_set_search(def->uses, state->instr)); + _mesa_set_add(def_state->uses, src); } else { assert(state->if_stmt); - _mesa_set_add(def_state->if_uses, state->if_stmt); - - assert(_mesa_set_search(def->if_uses, state->if_stmt)); + _mesa_set_add(def_state->if_uses, src); } /* TODO validate that the use is dominated by the definition */ @@ -166,10 +159,15 @@ validate_ssa_src(nir_ssa_def *def, validate_state *state) static void validate_src(nir_src *src, validate_state *state) { + if (state->instr) + assert(src->parent_instr == state->instr); + else + assert(src->parent_if == state->if_stmt); + if (src->is_ssa) - validate_ssa_src(src->ssa, state); + validate_ssa_src(src, state); else - validate_reg_src(&src->reg, state); + validate_reg_src(src, state); } static void @@ -201,8 +199,7 @@ validate_reg_dest(nir_reg_dest *dest, validate_state *state) { assert(dest->reg != NULL); - struct set_entry *entry = _mesa_set_search(dest->reg->defs, state->instr); - assert(entry && "definition not in nir_register.defs"); + assert(dest->parent_instr == state->instr); struct hash_entry *entry2; entry2 = _mesa_hash_table_search(state->regs, dest->reg); @@ -210,7 +207,7 @@ validate_reg_dest(nir_reg_dest *dest, validate_state *state) assert(entry2); reg_validate_state *reg_state = (reg_validate_state *) entry2->data; - _mesa_set_add(reg_state->defs, state->instr); + _mesa_set_add(reg_state->defs, dest); if (!dest->reg->is_global) { assert(reg_state->where_defined == state->impl && @@ -236,8 +233,13 @@ validate_ssa_def(nir_ssa_def *def, validate_state *state) assert(!BITSET_TEST(state->ssa_defs_found, def->index)); BITSET_SET(state->ssa_defs_found, def->index); + assert(def->parent_instr == state->instr); + assert(def->num_components <= 4); + list_validate(&def->uses); + list_validate(&def->if_uses); + ssa_def_validate_state *def_state = ralloc(state->ssa_defs, ssa_def_validate_state); def_state->where_defined = state->impl; @@ -699,6 +701,10 @@ prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state) assert(!BITSET_TEST(state->regs_found, reg->index)); BITSET_SET(state->regs_found, reg->index); + list_validate(®->uses); + list_validate(®->defs); + list_validate(®->if_uses); + reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state); reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -719,47 +725,47 @@ postvalidate_reg_decl(nir_register *reg, validate_state *state) reg_validate_state *reg_state = (reg_validate_state *) entry->data; - if (reg_state->uses->entries != reg->uses->entries) { + nir_foreach_use(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->uses, src); + assert(entry); + _mesa_set_remove(reg_state->uses, entry); + } + + if (reg_state->uses->entries != 0) { printf("extra entries in register uses:\n"); struct set_entry *entry; - set_foreach(reg->uses, entry) { - struct set_entry *entry2 = - _mesa_set_search(reg_state->uses, entry->key); - - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + set_foreach(reg_state->uses, entry) + printf("%p\n", entry->key); abort(); } - if (reg_state->if_uses->entries != reg->if_uses->entries) { + nir_foreach_if_use(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->if_uses, src); + assert(entry); + _mesa_set_remove(reg_state->if_uses, entry); + } + + if (reg_state->if_uses->entries != 0) { printf("extra entries in register if_uses:\n"); struct set_entry *entry; - set_foreach(reg->if_uses, entry) { - struct set_entry *entry2 = - _mesa_set_search(reg_state->if_uses, entry->key); - - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + set_foreach(reg_state->if_uses, entry) + printf("%p\n", entry->key); abort(); } - if (reg_state->defs->entries != reg->defs->entries) { + nir_foreach_def(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->defs, src); + assert(entry); + _mesa_set_remove(reg_state->defs, entry); + } + + if (reg_state->defs->entries != 0) { printf("extra entries in register defs:\n"); struct set_entry *entry; - set_foreach(reg->defs, entry) { - struct set_entry *entry2 = - _mesa_set_search(reg_state->defs, entry->key); - - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + set_foreach(reg_state->defs, entry) + printf("%p\n", entry->key); abort(); } @@ -788,32 +794,32 @@ postvalidate_ssa_def(nir_ssa_def *def, void *void_state) struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; - if (def_state->uses->entries != def->uses->entries) { - printf("extra entries in SSA def uses:\n"); - struct set_entry *entry; - set_foreach(def->uses, entry) { - struct set_entry *entry2 = - _mesa_set_search(def_state->uses, entry->key); + nir_foreach_use(def, src) { + struct set_entry *entry = _mesa_set_search(def_state->uses, src); + assert(entry); + _mesa_set_remove(def_state->uses, entry); + } - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + if (def_state->uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(def_state->uses, entry) + printf("%p\n", entry->key); abort(); } - if (def_state->if_uses->entries != def->if_uses->entries) { - printf("extra entries in SSA def uses:\n"); - struct set_entry *entry; - set_foreach(def->if_uses, entry) { - struct set_entry *entry2 = - _mesa_set_search(def_state->if_uses, entry->key); + nir_foreach_if_use(def, src) { + struct set_entry *entry = _mesa_set_search(def_state->if_uses, src); + assert(entry); + _mesa_set_remove(def_state->if_uses, entry); + } - if (entry2 == NULL) { - printf("%p\n", entry->key); - } - } + if (def_state->if_uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(def_state->if_uses, entry) + printf("%p\n", entry->key); abort(); } diff --git a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp index 92f20c71d..31719d20c 100644 --- a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp +++ b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp @@ -99,6 +99,16 @@ public: } else { this->fragdata_usage |= 1 << index->get_uint_component(0); + /* Don't lower fragdata array if the output variable + * is not a float variable (or float vector) because it will + * generate wrong register assignments because of different + * data types. + */ + if (var->type->gl_type != GL_FLOAT && + var->type->gl_type != GL_FLOAT_VEC2 && + var->type->gl_type != GL_FLOAT_VEC3 && + var->type->gl_type != GL_FLOAT_VEC4) + this->lower_fragdata_array = false; } /* Don't visit the leaves of ir_dereference_array. */ |