diff options
Diffstat (limited to 'mesalib/src/glsl')
100 files changed, 21448 insertions, 669 deletions
diff --git a/mesalib/src/glsl/Android.mk b/mesalib/src/glsl/Android.mk index 1cbc5c6d2..38c2087a4 100644 --- a/mesalib/src/glsl/Android.mk +++ b/mesalib/src/glsl/Android.mk @@ -27,7 +27,6 @@ LOCAL_PATH := $(call my-dir) include $(LOCAL_PATH)/Makefile.sources -GLSL_SRCDIR = . # --------------------------------------- # Build libmesa_glsl # --------------------------------------- diff --git a/mesalib/src/glsl/Makefile.am b/mesalib/src/glsl/Makefile.am index 9a3131738..5a0a643da 100644 --- a/mesalib/src/glsl/Makefile.am +++ b/mesalib/src/glsl/Makefile.am @@ -19,13 +19,19 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. +AUTOMAKE_OPTIONS = subdir-objects + AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa/ \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/glsl/glcpp \ + -I$(top_srcdir)/src/glsl/nir \ -I$(top_srcdir)/src/gtest/include \ + -I$(top_builddir)/src/glsl/nir \ $(DEFINES) AM_CFLAGS = $(VISIBILITY_CFLAGS) AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) @@ -35,12 +41,19 @@ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \ glsl_parser.yy \ glcpp/glcpp-lex.l \ glcpp/glcpp-parse.y \ + nir/nir_algebraic.py \ + nir/nir_constant_expressions.py \ + nir/nir_opcodes.py \ + nir/nir_opcodes_c.py \ + nir/nir_opcodes_h.py \ + nir/nir_opt_algebraic.py \ SConscript include Makefile.sources TESTS = glcpp/tests/glcpp-test \ glcpp/tests/glcpp-test-cr-lf \ + tests/blob-test \ tests/general-ir-test \ tests/optimization-test \ tests/sampler-types-test \ @@ -54,17 +67,20 @@ noinst_LTLIBRARIES = libglsl.la libglcpp.la check_PROGRAMS = \ glcpp/glcpp \ glsl_test \ + tests/blob-test \ tests/general-ir-test \ tests/sampler-types-test \ tests/uniform-initializer-test noinst_PROGRAMS = glsl_compiler +tests_blob_test_SOURCES = \ + tests/blob_test.c +tests_blob_test_LDADD = \ + $(top_builddir)/src/glsl/libglsl.la + tests_general_ir_test_SOURCES = \ - $(top_srcdir)/src/mesa/main/imports.c \ - $(top_srcdir)/src/mesa/program/prog_hash_table.c\ - $(top_srcdir)/src/mesa/program/symbol_table.c \ - $(GLSL_SRCDIR)/standalone_scaffolding.cpp \ + standalone_scaffolding.cpp \ tests/builtin_variable_test.cpp \ tests/invalidate_locations_test.cpp \ tests/general_ir_test.cpp \ @@ -75,12 +91,10 @@ tests_general_ir_test_CFLAGS = \ tests_general_ir_test_LDADD = \ $(top_builddir)/src/gtest/libgtest.la \ $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ $(PTHREAD_LIBS) tests_uniform_initializer_test_SOURCES = \ - $(top_srcdir)/src/mesa/main/imports.c \ - $(top_srcdir)/src/mesa/program/prog_hash_table.c\ - $(top_srcdir)/src/mesa/program/symbol_table.c \ tests/copy_constant_to_storage_tests.cpp \ tests/set_uniform_initializer_tests.cpp \ tests/uniform_initializer_utils.cpp \ @@ -91,11 +105,10 @@ tests_uniform_initializer_test_CFLAGS = \ tests_uniform_initializer_test_LDADD = \ $(top_builddir)/src/gtest/libgtest.la \ $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ $(PTHREAD_LIBS) tests_sampler_types_test_SOURCES = \ - $(top_srcdir)/src/mesa/program/prog_hash_table.c\ - $(top_srcdir)/src/mesa/program/symbol_table.c \ tests/sampler_types_test.cpp \ tests/common.c tests_sampler_types_test_CFLAGS = \ @@ -103,6 +116,7 @@ tests_sampler_types_test_CFLAGS = \ tests_sampler_types_test_LDADD = \ $(top_builddir)/src/gtest/libgtest.la \ $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ $(PTHREAD_LIBS) libglcpp_la_LIBADD = \ @@ -115,10 +129,10 @@ libglcpp_la_SOURCES = \ glcpp_glcpp_SOURCES = \ glcpp/glcpp.c \ - tests/common.c \ - $(top_srcdir)/src/mesa/program/prog_hash_table.c + tests/common.c glcpp_glcpp_LDADD = \ libglcpp.la \ + $(top_builddir)/src/libglsl_util.la \ -lm libglsl_la_LIBADD = libglcpp.la @@ -126,29 +140,28 @@ libglsl_la_SOURCES = \ glsl_lexer.cpp \ glsl_parser.cpp \ glsl_parser.h \ - $(LIBGLSL_FILES) + $(LIBGLSL_FILES) \ + $(NIR_FILES) glsl_compiler_SOURCES = \ - $(top_srcdir)/src/mesa/main/imports.c \ - $(top_srcdir)/src/mesa/program/prog_hash_table.c \ - $(top_srcdir)/src/mesa/program/symbol_table.c \ $(GLSL_COMPILER_CXX_FILES) glsl_compiler_LDADD = \ libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ $(PTHREAD_LIBS) glsl_test_SOURCES = \ - $(top_srcdir)/src/mesa/main/imports.c \ - $(top_srcdir)/src/mesa/program/prog_hash_table.c \ - $(top_srcdir)/src/mesa/program/symbol_table.c \ - $(GLSL_SRCDIR)/standalone_scaffolding.cpp \ + standalone_scaffolding.cpp \ tests/common.c \ test.cpp \ test_optpass.cpp \ test_optpass.h -glsl_test_LDADD = libglsl.la +glsl_test_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) # We write our own rules for yacc and lex below. We'd rather use automake, # but automake makes it especially difficult for a number of reasons: @@ -181,14 +194,14 @@ am__v_YACC_0 = @echo " YACC " $@; am__v_YACC_1 = glsl_parser.cpp glsl_parser.h: glsl_parser.yy - $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(GLSL_BUILDDIR)/glsl_parser.h $< + $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $< glsl_lexer.cpp: glsl_lexer.ll $(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $< glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y $(AM_V_at)$(MKDIR_P) glcpp - $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(GLSL_BUILDDIR)/glcpp/glcpp-parse.h $< + $(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $< glcpp/glcpp-lex.c: glcpp/glcpp-lex.l $(AM_V_at)$(MKDIR_P) glcpp @@ -204,7 +217,12 @@ BUILT_SOURCES = \ glsl_parser.cpp \ glsl_lexer.cpp \ glcpp/glcpp-parse.c \ - glcpp/glcpp-lex.c + glcpp/glcpp-lex.c \ + nir/nir_builder_opcodes.h \ + nir/nir_constant_expressions.c \ + nir/nir_opcodes.c \ + nir/nir_opcodes.h \ + nir/nir_opt_algebraic.c CLEANFILES = \ glcpp/glcpp-parse.h \ glsl_parser.h \ @@ -216,3 +234,25 @@ clean-local: dist-hook: $(RM) glcpp/tests/*.out $(RM) glcpp/tests/subtest*/*.out + +nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py + $(MKDIR_P) nir; \ + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ + +nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h + $(MKDIR_P) nir; \ + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@ + +nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py + $(MKDIR_P) nir; \ + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@ + +nir/nir.h: nir/nir_opcodes.h + +nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py + $(MKDIR_P) nir; \ + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@ + +nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py + $(MKDIR_P) nir; \ + $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@ diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources index e2acbac25..d0210d170 100644 --- a/mesalib/src/glsl/Makefile.sources +++ b/mesalib/src/glsl/Makefile.sources @@ -1,146 +1,197 @@ # shared source lists for Makefile, SConscript, and Android.mk -GLSL_SRCDIR = $(top_srcdir)/src/glsl -GLSL_BUILDDIR = $(top_builddir)/src/glsl - # libglcpp LIBGLCPP_FILES = \ - $(GLSL_SRCDIR)/glcpp/glcpp.h \ - $(GLSL_SRCDIR)/glcpp/pp.c + glcpp/glcpp.h \ + glcpp/pp.c LIBGLCPP_GENERATED_FILES = \ - $(GLSL_BUILDDIR)/glcpp/glcpp-lex.c \ - $(GLSL_BUILDDIR)/glcpp/glcpp-parse.c + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c + +NIR_GENERATED_FILES = \ + nir/nir_builder_opcodes.h \ + nir/nir_constant_expressions.c \ + nir/nir_opcodes.c \ + nir/nir_opcodes.h \ + nir/nir_opt_algebraic.c + +NIR_FILES = \ + nir/glsl_to_nir.cpp \ + nir/glsl_to_nir.h \ + nir/nir.c \ + nir/nir.h \ + nir/nir_constant_expressions.h \ + nir/nir_dominance.c \ + nir/nir_from_ssa.c \ + nir/nir_intrinsics.c \ + nir/nir_intrinsics.h \ + nir/nir_live_variables.c \ + nir/nir_lower_alu_to_scalar.c \ + nir/nir_lower_atomics.c \ + nir/nir_lower_global_vars_to_local.c \ + nir/nir_lower_locals_to_regs.c \ + nir/nir_lower_io.c \ + nir/nir_lower_phis_to_scalar.c \ + nir/nir_lower_samplers.cpp \ + nir/nir_lower_system_values.c \ + nir/nir_lower_to_source_mods.c \ + nir/nir_lower_vars_to_ssa.c \ + nir/nir_lower_var_copies.c \ + nir/nir_lower_vec_to_movs.c \ + nir/nir_metadata.c \ + nir/nir_opt_constant_folding.c \ + nir/nir_opt_copy_propagate.c \ + nir/nir_opt_cse.c \ + nir/nir_opt_dce.c \ + nir/nir_opt_gcm.c \ + nir/nir_opt_global_to_local.c \ + nir/nir_opt_peephole_select.c \ + nir/nir_opt_remove_phis.c \ + nir/nir_print.c \ + nir/nir_remove_dead_variables.c \ + nir/nir_search.c \ + nir/nir_search.h \ + nir/nir_split_var_copies.c \ + nir/nir_to_ssa.c \ + nir/nir_types.h \ + nir/nir_validate.c \ + nir/nir_worklist.c \ + nir/nir_worklist.h \ + nir/nir_types.cpp \ + $(NIR_GENERATED_FILES) # libglsl LIBGLSL_FILES = \ - $(GLSL_SRCDIR)/ast.h \ - $(GLSL_SRCDIR)/ast_array_index.cpp \ - $(GLSL_SRCDIR)/ast_expr.cpp \ - $(GLSL_SRCDIR)/ast_function.cpp \ - $(GLSL_SRCDIR)/ast_to_hir.cpp \ - $(GLSL_SRCDIR)/ast_type.cpp \ - $(GLSL_SRCDIR)/builtin_functions.cpp \ - $(GLSL_SRCDIR)/builtin_type_macros.h \ - $(GLSL_SRCDIR)/builtin_types.cpp \ - $(GLSL_SRCDIR)/builtin_variables.cpp \ - $(GLSL_SRCDIR)/glsl_parser_extras.cpp \ - $(GLSL_SRCDIR)/glsl_parser_extras.h \ - $(GLSL_SRCDIR)/glsl_symbol_table.cpp \ - $(GLSL_SRCDIR)/glsl_symbol_table.h \ - $(GLSL_SRCDIR)/glsl_types.cpp \ - $(GLSL_SRCDIR)/glsl_types.h \ - $(GLSL_SRCDIR)/hir_field_selection.cpp \ - $(GLSL_SRCDIR)/ir_basic_block.cpp \ - $(GLSL_SRCDIR)/ir_basic_block.h \ - $(GLSL_SRCDIR)/ir_builder.cpp \ - $(GLSL_SRCDIR)/ir_builder.h \ - $(GLSL_SRCDIR)/ir_clone.cpp \ - $(GLSL_SRCDIR)/ir_constant_expression.cpp \ - $(GLSL_SRCDIR)/ir.cpp \ - $(GLSL_SRCDIR)/ir.h \ - $(GLSL_SRCDIR)/ir_equals.cpp \ - $(GLSL_SRCDIR)/ir_expression_flattening.cpp \ - $(GLSL_SRCDIR)/ir_expression_flattening.h \ - $(GLSL_SRCDIR)/ir_function_can_inline.cpp \ - $(GLSL_SRCDIR)/ir_function_detect_recursion.cpp \ - $(GLSL_SRCDIR)/ir_function_inlining.h \ - $(GLSL_SRCDIR)/ir_function.cpp \ - $(GLSL_SRCDIR)/ir_hierarchical_visitor.cpp \ - $(GLSL_SRCDIR)/ir_hierarchical_visitor.h \ - $(GLSL_SRCDIR)/ir_hv_accept.cpp \ - $(GLSL_SRCDIR)/ir_import_prototypes.cpp \ - $(GLSL_SRCDIR)/ir_optimization.h \ - $(GLSL_SRCDIR)/ir_print_visitor.cpp \ - $(GLSL_SRCDIR)/ir_print_visitor.h \ - $(GLSL_SRCDIR)/ir_reader.cpp \ - $(GLSL_SRCDIR)/ir_reader.h \ - $(GLSL_SRCDIR)/ir_rvalue_visitor.cpp \ - $(GLSL_SRCDIR)/ir_rvalue_visitor.h \ - $(GLSL_SRCDIR)/ir_set_program_inouts.cpp \ - $(GLSL_SRCDIR)/ir_uniform.h \ - $(GLSL_SRCDIR)/ir_validate.cpp \ - $(GLSL_SRCDIR)/ir_variable_refcount.cpp \ - $(GLSL_SRCDIR)/ir_variable_refcount.h \ - $(GLSL_SRCDIR)/ir_visitor.h \ - $(GLSL_SRCDIR)/linker.cpp \ - $(GLSL_SRCDIR)/linker.h \ - $(GLSL_SRCDIR)/link_atomics.cpp \ - $(GLSL_SRCDIR)/link_functions.cpp \ - $(GLSL_SRCDIR)/link_interface_blocks.cpp \ - $(GLSL_SRCDIR)/link_uniforms.cpp \ - $(GLSL_SRCDIR)/link_uniform_initializers.cpp \ - $(GLSL_SRCDIR)/link_uniform_block_active_visitor.cpp \ - $(GLSL_SRCDIR)/link_uniform_block_active_visitor.h \ - $(GLSL_SRCDIR)/link_uniform_blocks.cpp \ - $(GLSL_SRCDIR)/link_varyings.cpp \ - $(GLSL_SRCDIR)/link_varyings.h \ - $(GLSL_SRCDIR)/list.h \ - $(GLSL_SRCDIR)/loop_analysis.cpp \ - $(GLSL_SRCDIR)/loop_analysis.h \ - $(GLSL_SRCDIR)/loop_controls.cpp \ - $(GLSL_SRCDIR)/loop_unroll.cpp \ - $(GLSL_SRCDIR)/lower_clip_distance.cpp \ - $(GLSL_SRCDIR)/lower_const_arrays_to_uniforms.cpp \ - $(GLSL_SRCDIR)/lower_discard.cpp \ - $(GLSL_SRCDIR)/lower_discard_flow.cpp \ - $(GLSL_SRCDIR)/lower_if_to_cond_assign.cpp \ - $(GLSL_SRCDIR)/lower_instructions.cpp \ - $(GLSL_SRCDIR)/lower_jumps.cpp \ - $(GLSL_SRCDIR)/lower_mat_op_to_vec.cpp \ - $(GLSL_SRCDIR)/lower_noise.cpp \ - $(GLSL_SRCDIR)/lower_offset_array.cpp \ - $(GLSL_SRCDIR)/lower_packed_varyings.cpp \ - $(GLSL_SRCDIR)/lower_named_interface_blocks.cpp \ - $(GLSL_SRCDIR)/lower_packing_builtins.cpp \ - $(GLSL_SRCDIR)/lower_texture_projection.cpp \ - $(GLSL_SRCDIR)/lower_variable_index_to_cond_assign.cpp \ - $(GLSL_SRCDIR)/lower_vec_index_to_cond_assign.cpp \ - $(GLSL_SRCDIR)/lower_vec_index_to_swizzle.cpp \ - $(GLSL_SRCDIR)/lower_vector.cpp \ - $(GLSL_SRCDIR)/lower_vector_insert.cpp \ - $(GLSL_SRCDIR)/lower_vertex_id.cpp \ - $(GLSL_SRCDIR)/lower_output_reads.cpp \ - $(GLSL_SRCDIR)/lower_ubo_reference.cpp \ - $(GLSL_SRCDIR)/opt_algebraic.cpp \ - $(GLSL_SRCDIR)/opt_array_splitting.cpp \ - $(GLSL_SRCDIR)/opt_constant_folding.cpp \ - $(GLSL_SRCDIR)/opt_constant_propagation.cpp \ - $(GLSL_SRCDIR)/opt_constant_variable.cpp \ - $(GLSL_SRCDIR)/opt_copy_propagation.cpp \ - $(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \ - $(GLSL_SRCDIR)/opt_cse.cpp \ - $(GLSL_SRCDIR)/opt_dead_builtin_variables.cpp \ - $(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \ - $(GLSL_SRCDIR)/opt_dead_code.cpp \ - $(GLSL_SRCDIR)/opt_dead_code_local.cpp \ - $(GLSL_SRCDIR)/opt_dead_functions.cpp \ - $(GLSL_SRCDIR)/opt_flatten_nested_if_blocks.cpp \ - $(GLSL_SRCDIR)/opt_flip_matrices.cpp \ - $(GLSL_SRCDIR)/opt_function_inlining.cpp \ - $(GLSL_SRCDIR)/opt_if_simplification.cpp \ - $(GLSL_SRCDIR)/opt_minmax.cpp \ - $(GLSL_SRCDIR)/opt_noop_swizzle.cpp \ - $(GLSL_SRCDIR)/opt_rebalance_tree.cpp \ - $(GLSL_SRCDIR)/opt_redundant_jumps.cpp \ - $(GLSL_SRCDIR)/opt_structure_splitting.cpp \ - $(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \ - $(GLSL_SRCDIR)/opt_tree_grafting.cpp \ - $(GLSL_SRCDIR)/opt_vectorize.cpp \ - $(GLSL_SRCDIR)/program.h \ - $(GLSL_SRCDIR)/s_expression.cpp \ - $(GLSL_SRCDIR)/s_expression.h + ast.h \ + ast_array_index.cpp \ + ast_expr.cpp \ + ast_function.cpp \ + ast_to_hir.cpp \ + ast_type.cpp \ + blob.c \ + blob.h \ + builtin_functions.cpp \ + builtin_type_macros.h \ + builtin_types.cpp \ + builtin_variables.cpp \ + glsl_parser_extras.cpp \ + glsl_parser_extras.h \ + glsl_symbol_table.cpp \ + glsl_symbol_table.h \ + glsl_types.cpp \ + glsl_types.h \ + hir_field_selection.cpp \ + ir_basic_block.cpp \ + ir_basic_block.h \ + ir_builder.cpp \ + ir_builder.h \ + ir_clone.cpp \ + ir_constant_expression.cpp \ + ir.cpp \ + ir.h \ + ir_equals.cpp \ + ir_expression_flattening.cpp \ + ir_expression_flattening.h \ + ir_function_can_inline.cpp \ + ir_function_detect_recursion.cpp \ + ir_function_inlining.h \ + ir_function.cpp \ + ir_hierarchical_visitor.cpp \ + ir_hierarchical_visitor.h \ + ir_hv_accept.cpp \ + ir_import_prototypes.cpp \ + ir_optimization.h \ + ir_print_visitor.cpp \ + ir_print_visitor.h \ + ir_reader.cpp \ + ir_reader.h \ + ir_rvalue_visitor.cpp \ + ir_rvalue_visitor.h \ + ir_set_program_inouts.cpp \ + ir_uniform.h \ + ir_validate.cpp \ + ir_variable_refcount.cpp \ + ir_variable_refcount.h \ + ir_visitor.h \ + linker.cpp \ + linker.h \ + link_atomics.cpp \ + link_functions.cpp \ + link_interface_blocks.cpp \ + link_uniforms.cpp \ + link_uniform_initializers.cpp \ + link_uniform_block_active_visitor.cpp \ + link_uniform_block_active_visitor.h \ + link_uniform_blocks.cpp \ + link_varyings.cpp \ + link_varyings.h \ + list.h \ + loop_analysis.cpp \ + loop_analysis.h \ + loop_controls.cpp \ + loop_unroll.cpp \ + lower_clip_distance.cpp \ + lower_const_arrays_to_uniforms.cpp \ + lower_discard.cpp \ + lower_discard_flow.cpp \ + lower_if_to_cond_assign.cpp \ + lower_instructions.cpp \ + lower_jumps.cpp \ + lower_mat_op_to_vec.cpp \ + lower_noise.cpp \ + lower_offset_array.cpp \ + lower_packed_varyings.cpp \ + lower_named_interface_blocks.cpp \ + lower_packing_builtins.cpp \ + lower_texture_projection.cpp \ + lower_variable_index_to_cond_assign.cpp \ + lower_vec_index_to_cond_assign.cpp \ + lower_vec_index_to_swizzle.cpp \ + lower_vector.cpp \ + lower_vector_insert.cpp \ + lower_vertex_id.cpp \ + lower_output_reads.cpp \ + lower_ubo_reference.cpp \ + opt_algebraic.cpp \ + opt_array_splitting.cpp \ + opt_constant_folding.cpp \ + opt_constant_propagation.cpp \ + opt_constant_variable.cpp \ + opt_copy_propagation.cpp \ + opt_copy_propagation_elements.cpp \ + opt_cse.cpp \ + opt_dead_builtin_variables.cpp \ + opt_dead_builtin_varyings.cpp \ + opt_dead_code.cpp \ + opt_dead_code_local.cpp \ + opt_dead_functions.cpp \ + opt_flatten_nested_if_blocks.cpp \ + opt_flip_matrices.cpp \ + opt_function_inlining.cpp \ + opt_if_simplification.cpp \ + opt_minmax.cpp \ + opt_noop_swizzle.cpp \ + opt_rebalance_tree.cpp \ + opt_redundant_jumps.cpp \ + opt_structure_splitting.cpp \ + opt_swizzle_swizzle.cpp \ + opt_tree_grafting.cpp \ + opt_vectorize.cpp \ + program.h \ + s_expression.cpp \ + s_expression.h # glsl_compiler GLSL_COMPILER_CXX_FILES = \ - $(GLSL_SRCDIR)/standalone_scaffolding.cpp \ - $(GLSL_SRCDIR)/standalone_scaffolding.h \ - $(GLSL_SRCDIR)/main.cpp + standalone_scaffolding.cpp \ + standalone_scaffolding.h \ + main.cpp # libglsl generated sources LIBGLSL_GENERATED_CXX_FILES = \ - $(GLSL_BUILDDIR)/glsl_lexer.cpp \ - $(GLSL_BUILDDIR)/glsl_parser.cpp + glsl_lexer.cpp \ + glsl_parser.cpp diff --git a/mesalib/src/glsl/README b/mesalib/src/glsl/README index 2f93f12ff..bfcf69f90 100644 --- a/mesalib/src/glsl/README +++ b/mesalib/src/glsl/README @@ -187,7 +187,7 @@ You may also need to update the backends if they will see the new expr type: You can then use the new expression from builtins (if all backends would rather see it), or scan the IR and convert to use your new -expression type (see ir_mod_to_fract, for example). +expression type (see ir_mod_to_floor, for example). Q: How is memory management handled in the compiler? diff --git a/mesalib/src/glsl/SConscript b/mesalib/src/glsl/SConscript index 847e96246..21c8266a6 100644 --- a/mesalib/src/glsl/SConscript +++ b/mesalib/src/glsl/SConscript @@ -11,6 +11,8 @@ env.Prepend(CPPPATH = [ '#src', '#src/mapi', '#src/mesa', + '#src/gallium/include', + '#src/gallium/auxiliary', '#src/glsl', '#src/glsl/glcpp', ]) diff --git a/mesalib/src/glsl/ast.h b/mesalib/src/glsl/ast.h index 6995ae83b..ef74e5137 100644 --- a/mesalib/src/glsl/ast.h +++ b/mesalib/src/glsl/ast.h @@ -189,6 +189,7 @@ enum ast_operators { ast_uint_constant, ast_float_constant, ast_bool_constant, + ast_double_constant, ast_sequence, ast_aggregate @@ -236,6 +237,7 @@ public: float float_constant; unsigned uint_constant; int bool_constant; + double double_constant; } primary_expression; diff --git a/mesalib/src/glsl/ast_function.cpp b/mesalib/src/glsl/ast_function.cpp index cbff9d8b4..918be6966 100644 --- a/mesalib/src/glsl/ast_function.cpp +++ b/mesalib/src/glsl/ast_function.cpp @@ -573,6 +573,9 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type) result = new(ctx) ir_expression(ir_unop_i2u, new(ctx) ir_expression(ir_unop_b2i, src)); break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2u, src); + break; } break; case GLSL_TYPE_INT: @@ -586,6 +589,9 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type) case GLSL_TYPE_BOOL: result = new(ctx) ir_expression(ir_unop_b2i, src); break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2i, src); + break; } break; case GLSL_TYPE_FLOAT: @@ -599,6 +605,9 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type) case GLSL_TYPE_BOOL: result = new(ctx) ir_expression(ir_unop_b2f, desired_type, src, NULL); break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2f, desired_type, src, NULL); + break; } break; case GLSL_TYPE_BOOL: @@ -613,8 +622,27 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type) case GLSL_TYPE_FLOAT: result = new(ctx) ir_expression(ir_unop_f2b, desired_type, src, NULL); break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2b, desired_type, src, NULL); + break; } break; + case GLSL_TYPE_DOUBLE: + switch (b) { + case GLSL_TYPE_INT: + result = new(ctx) ir_expression(ir_unop_i2d, src); + break; + case GLSL_TYPE_UINT: + result = new(ctx) ir_expression(ir_unop_u2d, src); + break; + case GLSL_TYPE_BOOL: + result = new(ctx) ir_expression(ir_unop_f2d, + new(ctx) ir_expression(ir_unop_b2f, src)); + break; + case GLSL_TYPE_FLOAT: + result = new(ctx) ir_expression(ir_unop_f2d, desired_type, src, NULL); + break; + } } assert(result != NULL); @@ -711,9 +739,9 @@ process_vec_mat_constructor(exec_list *instructions, /* Apply implicit conversions (not the scalar constructor rules!). See * the spec quote above. */ - if (constructor_type->is_float()) { + if (constructor_type->base_type != result->type->base_type) { const glsl_type *desired_type = - glsl_type::get_instance(GLSL_TYPE_FLOAT, + glsl_type::get_instance(constructor_type->base_type, ir->type->vector_elements, ir->type->matrix_columns); if (result->type->can_implicitly_convert_to(desired_type, state)) { @@ -847,13 +875,17 @@ process_array_constructor(exec_list *instructions, foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { ir_rvalue *result = ir; + const glsl_base_type element_base_type = + constructor_type->element_type()->base_type; + /* Apply implicit conversions (not the scalar constructor rules!). See * the spec quote above. */ - if (constructor_type->element_type()->is_float()) { - const glsl_type *desired_type = - glsl_type::get_instance(GLSL_TYPE_FLOAT, - ir->type->vector_elements, - ir->type->matrix_columns); + if (element_base_type != result->type->base_type) { + const glsl_type *desired_type = + glsl_type::get_instance(element_base_type, + ir->type->vector_elements, + ir->type->matrix_columns); + if (result->type->can_implicitly_convert_to(desired_type, state)) { /* Even though convert_component() implements the constructor * conversion rules (not the implicit conversion rules), its safe @@ -1012,6 +1044,9 @@ emit_inline_vector_constructor(const glsl_type *type, case GLSL_TYPE_FLOAT: data.f[i + base_component] = c->get_float_component(i); break; + case GLSL_TYPE_DOUBLE: + data.d[i + base_component] = c->get_double_component(i); + break; case GLSL_TYPE_BOOL: data.b[i + base_component] = c->get_bool_component(i); break; @@ -1167,16 +1202,21 @@ emit_inline_matrix_constructor(const glsl_type *type, /* Assign the scalar to the X component of a vec4, and fill the remaining * components with zero. */ + glsl_base_type param_base_type = first_param->type->base_type; + assert(param_base_type == GLSL_TYPE_FLOAT || + param_base_type == GLSL_TYPE_DOUBLE); ir_variable *rhs_var = - new(ctx) ir_variable(glsl_type::vec4_type, "mat_ctor_vec", - ir_var_temporary); + new(ctx) ir_variable(glsl_type::get_instance(param_base_type, 4, 1), + "mat_ctor_vec", + ir_var_temporary); instructions->push_tail(rhs_var); ir_constant_data zero; - zero.f[0] = 0.0; - zero.f[1] = 0.0; - zero.f[2] = 0.0; - zero.f[3] = 0.0; + for (unsigned i = 0; i < 4; i++) + if (param_base_type == GLSL_TYPE_FLOAT) + zero.f[i] = 0.0; + else + zero.d[i] = 0.0; ir_instruction *inst = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var), @@ -1524,10 +1564,10 @@ ast_function_expression::hir(exec_list *instructions, } - /* Constructors for samplers are illegal. + /* Constructors for opaque types are illegal. */ - if (constructor_type->is_sampler()) { - _mesa_glsl_error(& loc, state, "cannot construct sampler type `%s'", + if (constructor_type->contains_opaque()) { + _mesa_glsl_error(& loc, state, "cannot construct opaque type `%s'", constructor_type->name); return ir_rvalue::error_value(ctx); } diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp index 811a9557d..acb5c763c 100644 --- a/mesalib/src/glsl/ast_to_hir.cpp +++ b/mesalib/src/glsl/ast_to_hir.cpp @@ -172,6 +172,7 @@ get_conversion_operation(const glsl_type *to, const glsl_type *from, switch (from->base_type) { case GLSL_TYPE_INT: return ir_unop_i2f; case GLSL_TYPE_UINT: return ir_unop_u2f; + case GLSL_TYPE_DOUBLE: return ir_unop_d2f; default: return (ir_expression_operation)0; } @@ -183,6 +184,16 @@ get_conversion_operation(const glsl_type *to, const glsl_type *from, default: return (ir_expression_operation)0; } + case GLSL_TYPE_DOUBLE: + if (!state->has_double()) + return (ir_expression_operation)0; + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2d; + case GLSL_TYPE_UINT: return ir_unop_u2d; + case GLSL_TYPE_FLOAT: return ir_unop_f2d; + default: return (ir_expression_operation)0; + } + default: return (ir_expression_operation)0; } } @@ -340,8 +351,10 @@ arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, * type of both operands must be float. */ assert(type_a->is_matrix() || type_b->is_matrix()); - assert(type_a->base_type == GLSL_TYPE_FLOAT); - assert(type_b->base_type == GLSL_TYPE_FLOAT); + assert(type_a->base_type == GLSL_TYPE_FLOAT || + type_a->base_type == GLSL_TYPE_DOUBLE); + assert(type_b->base_type == GLSL_TYPE_FLOAT || + type_b->base_type == GLSL_TYPE_DOUBLE); /* "* The operator is add (+), subtract (-), or divide (/), and the * operands are matrices with the same number of rows and the same @@ -959,6 +972,7 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: return new(mem_ctx) ir_expression(operation, op0, op1); case GLSL_TYPE_ARRAY: { @@ -1597,13 +1611,11 @@ ast_expression::do_hir(exec_list *instructions, } ir_constant *cond_val = op[0]->constant_expression_value(); - ir_constant *then_val = op[1]->constant_expression_value(); - ir_constant *else_val = op[2]->constant_expression_value(); if (then_instructions.is_empty() && else_instructions.is_empty() - && (cond_val != NULL) && (then_val != NULL) && (else_val != NULL)) { - result = (cond_val->value.b[0]) ? then_val : else_val; + && cond_val != NULL) { + result = cond_val->value.b[0] ? op[1] : op[2]; } else { ir_variable *const tmp = new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary); @@ -1748,6 +1760,10 @@ ast_expression::do_hir(exec_list *instructions, result = new(ctx) ir_constant(bool(this->primary_expression.bool_constant)); break; + case ast_double_constant: + result = new(ctx) ir_constant(this->primary_expression.double_constant); + break; + case ast_sequence: { /* It should not be possible to generate a sequence in the AST without * any expressions in it. @@ -2562,6 +2578,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, _mesa_glsl_error(loc, state, "varying variables may not be of type struct"); break; + case GLSL_TYPE_DOUBLE: + break; default: _mesa_glsl_error(loc, state, "illegal type for a varying variable"); break; @@ -3603,6 +3621,51 @@ ast_declarator_list::hir(exec_list *instructions, handle_geometry_shader_input_decl(state, loc, var); } + } else if (var->data.mode == ir_var_shader_out) { + const glsl_type *check_type = var->type->without_array(); + + /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: + * + * It is a compile-time error to declare a vertex, tessellation + * evaluation, tessellation control, or geometry shader output + * that contains any of the following: + * + * * A Boolean type (bool, bvec2 ...) + * * An opaque type + */ + if (check_type->is_boolean() || check_type->contains_opaque()) + _mesa_glsl_error(&loc, state, + "%s shader output cannot have type %s", + _mesa_shader_stage_to_string(state->stage), + check_type->name); + + /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: + * + * It is a compile-time error to declare a fragment shader output + * that contains any of the following: + * + * * A Boolean type (bool, bvec2 ...) + * * A double-precision scalar or vector (double, dvec2 ...) + * * An opaque type + * * Any matrix type + * * A structure + */ + if (state->stage == MESA_SHADER_FRAGMENT) { + if (check_type->is_record() || check_type->is_matrix()) + _mesa_glsl_error(&loc, state, + "fragment shader output " + "cannot have struct or array type"); + switch (check_type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + break; + default: + _mesa_glsl_error(&loc, state, + "fragment shader output cannot have " + "type %s", check_type->name); + } + } } /* Integer fragment inputs must be qualified with 'flat'. In GLSL ES, @@ -3647,6 +3710,15 @@ ast_declarator_list::hir(exec_list *instructions, var_type); } + /* Double fragment inputs must be qualified with 'flat'. */ + if (var->type->contains_double() && + var->data.interpolation != INTERP_QUALIFIER_FLAT && + state->stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "if a fragment input is (or contains) " + "a double, then it must be qualified with 'flat'", + var_type); + } /* Interpolation qualifiers cannot be applied to 'centroid' and * 'centroid varying'. @@ -4133,6 +4205,27 @@ ast_function::hir(exec_list *instructions, emit_function(state, f); } + /* From GLSL ES 3.0 spec, chapter 6.1 "Function Definitions", page 71: + * + * "A shader cannot redefine or overload built-in functions." + * + * While in GLSL ES 1.0 specification, chapter 8 "Built-in Functions": + * + * "User code can overload the built-in functions but cannot redefine + * them." + */ + if (state->es_shader && state->language_version >= 300) { + /* Local shader has no exact candidates; check the built-ins. */ + _mesa_glsl_initialize_builtin_functions(); + if (_mesa_glsl_find_builtin_function_by_name(state, name)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "A shader cannot redefine or overload built-in " + "function `%s' in GLSL ES 3.00", name); + return NULL; + } + } + /* Verify that this function's signature either doesn't match a previously * seen signature for a function with the same name, or, if a match is found, * that the previously seen signature does not have an associated definition. @@ -5203,6 +5296,13 @@ ast_process_structure_or_interface_block(exec_list *instructions, "members"); } + if (qual->flags.q.constant) { + YYLTYPE loc = decl_list->get_location(); + _mesa_glsl_error(&loc, state, + "const storage qualifier cannot be applied " + "to struct or interface block members"); + } + field_type = process_array_type(&loc, decl_type, decl->array_specifier, state); fields[i].type = field_type; @@ -5383,6 +5483,14 @@ ast_interface_block::hir(exec_list *instructions, { YYLTYPE loc = this->get_location(); + /* Interface blocks must be declared at global scope */ + if (state->current_function != NULL) { + _mesa_glsl_error(&loc, state, + "Interface block `%s' must be declared " + "at global scope", + this->block_name); + } + /* The ast_interface_block has a list of ast_declarator_lists. We * need to turn those into ir_variables with an association * with this uniform block. @@ -5443,9 +5551,23 @@ ast_interface_block::hir(exec_list *instructions, state->struct_specifier_depth--; - if (!redeclaring_per_vertex) + if (!redeclaring_per_vertex) { validate_identifier(this->block_name, loc, state); + /* From section 4.3.9 ("Interface Blocks") of the GLSL 4.50 spec: + * + * "Block names have no other use within a shader beyond interface + * matching; it is a compile-time error to use a block name at global + * scope for anything other than as a block name." + */ + ir_variable *var = state->symbols->get_variable(this->block_name); + if (var && !var->type->is_interface()) { + _mesa_glsl_error(&loc, state, "Block name `%s' is " + "already used in the scope.", + this->block_name); + } + } + const glsl_type *earlier_per_vertex = NULL; if (redeclaring_per_vertex) { /* Find the previous declaration of gl_PerVertex. If we're redeclaring diff --git a/mesalib/src/glsl/blob.c b/mesalib/src/glsl/blob.c new file mode 100644 index 000000000..dd4341be9 --- /dev/null +++ b/mesalib/src/glsl/blob.c @@ -0,0 +1,323 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <string.h> + +#include "main/macros.h" +#include "util/ralloc.h" +#include "blob.h" + +#define BLOB_INITIAL_SIZE 4096 + +/* Ensure that \blob will be able to fit an additional object of size + * \additional. The growing (if any) will occur by doubling the existing + * allocation. + */ +static bool +grow_to_fit(struct blob *blob, size_t additional) +{ + size_t to_allocate; + uint8_t *new_data; + + if (blob->size + additional <= blob->allocated) + return true; + + if (blob->allocated == 0) + to_allocate = BLOB_INITIAL_SIZE; + else + to_allocate = blob->allocated * 2; + + to_allocate = MAX2(to_allocate, blob->allocated + additional); + + new_data = reralloc_size(blob, blob->data, to_allocate); + if (new_data == NULL) + return false; + + blob->data = new_data; + blob->allocated = to_allocate; + + return true; +} + +/* Align the blob->size so that reading or writing a value at (blob->data + + * blob->size) will result in an access aligned to a granularity of \alignment + * bytes. + * + * \return True unless allocation fails + */ +static bool +align_blob(struct blob *blob, size_t alignment) +{ + const size_t new_size = ALIGN(blob->size, alignment); + + if (! grow_to_fit (blob, new_size - blob->size)) + return false; + + blob->size = new_size; + + return true; +} + +static void +align_blob_reader(struct blob_reader *blob, size_t alignment) +{ + blob->current = blob->data + ALIGN(blob->current - blob->data, alignment); +} + +struct blob * +blob_create(void *mem_ctx) +{ + struct blob *blob; + + blob = ralloc(mem_ctx, struct blob); + if (blob == NULL) + return NULL; + + blob->data = NULL; + blob->allocated = 0; + blob->size = 0; + + return blob; +} + +bool +blob_overwrite_bytes(struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write) +{ + /* Detect an attempt to overwrite data out of bounds. */ + if (offset < 0 || blob->size - offset < to_write) + return false; + + memcpy(blob->data + offset, bytes, to_write); + + return true; +} + +bool +blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write) +{ + if (! grow_to_fit(blob, to_write)) + return false; + + memcpy(blob->data + blob->size, bytes, to_write); + blob->size += to_write; + + return true; +} + +uint8_t * +blob_reserve_bytes(struct blob *blob, size_t to_write) +{ + uint8_t *ret; + + if (! grow_to_fit (blob, to_write)) + return NULL; + + ret = blob->data + blob->size; + blob->size += to_write; + + return ret; +} + +bool +blob_write_uint32(struct blob *blob, uint32_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_overwrite_uint32 (struct blob *blob, + size_t offset, + uint32_t value) +{ + return blob_overwrite_bytes(blob, offset, &value, sizeof(value)); +} + +bool +blob_write_uint64(struct blob *blob, uint64_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_write_intptr(struct blob *blob, intptr_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_write_string(struct blob *blob, const char *str) +{ + return blob_write_bytes(blob, str, strlen(str) + 1); +} + +void +blob_reader_init(struct blob_reader *blob, uint8_t *data, size_t size) +{ + blob->data = data; + blob->end = data + size; + blob->current = data; + blob->overrun = false; +} + +/* Check that an object of size \size can be read from this blob. + * + * If not, set blob->overrun to indicate that we attempted to read too far. + */ +static bool +ensure_can_read(struct blob_reader *blob, size_t size) +{ + if (blob->current < blob->end && blob->end - blob->current >= size) + return true; + + blob->overrun = true; + + return false; +} + +void * +blob_read_bytes(struct blob_reader *blob, size_t size) +{ + void *ret; + + if (! ensure_can_read (blob, size)) + return NULL; + + ret = blob->current; + + blob->current += size; + + return ret; +} + +void +blob_copy_bytes(struct blob_reader *blob, uint8_t *dest, size_t size) +{ + uint8_t *bytes; + + bytes = blob_read_bytes(blob, size); + if (bytes == NULL) + return; + + memcpy(dest, bytes, size); +} + +/* These next three read functions have identical form. If we add any beyond + * these first three we should probably switch to generating these with a + * preprocessor macro. +*/ +uint32_t +blob_read_uint32(struct blob_reader *blob) +{ + uint32_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((uint32_t*) blob->current); + + blob->current += size; + + return ret; +} + +uint64_t +blob_read_uint64(struct blob_reader *blob) +{ + uint64_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((uint64_t*) blob->current); + + blob->current += size; + + return ret; +} + +intptr_t +blob_read_intptr(struct blob_reader *blob) +{ + intptr_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((intptr_t *) blob->current); + + blob->current += size; + + return ret; +} + +char * +blob_read_string(struct blob_reader *blob) +{ + int size; + char *ret; + uint8_t *nul; + + /* If we're already at the end, then this is an overrun. */ + if (blob->current >= blob->end) { + blob->overrun = true; + return NULL; + } + + /* Similarly, if there is no zero byte in the data remaining in this blob, + * we also consider that an overrun. + */ + nul = memchr(blob->current, 0, blob->end - blob->current); + + if (nul == NULL) { + blob->overrun = true; + return NULL; + } + + size = nul - blob->current + 1; + + assert(ensure_can_read(blob, size)); + + ret = (char *) blob->current; + + blob->current += size; + + return ret; +} diff --git a/mesalib/src/glsl/blob.h b/mesalib/src/glsl/blob.h new file mode 100644 index 000000000..ec903ec14 --- /dev/null +++ b/mesalib/src/glsl/blob.h @@ -0,0 +1,289 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once +#ifndef BLOB_H +#define BLOB_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +/* The blob functions implement a simple, low-level API for serializing and + * deserializing. + * + * All objects written to a blob will be serialized directly, (without any + * additional meta-data to describe the data written). Therefore, it is the + * caller's responsibility to ensure that any data can be read later, (either + * by knowing exactly what data is expected, or by writing to the blob + * sufficient meta-data to describe what has been written). + * + * A blob is efficient in that it dynamically grows by doubling in size, so + * allocation costs are logarithmic. + */ + +struct blob { + /* The data actually written to the blob. */ + uint8_t *data; + + /** Number of bytes that have been allocated for \c data. */ + size_t allocated; + + /** The number of bytes that have actual data written to them. */ + size_t size; +}; + +/* When done reading, the caller can ensure that everything was consumed by + * checking the following: + * + * 1. blob->current should be equal to blob->end, (if not, too little was + * read). + * + * 2. blob->overrun should be false, (otherwise, too much was read). + */ +struct blob_reader { + uint8_t *data; + uint8_t *end; + uint8_t *current; + bool overrun; +}; + +/** + * Create a new, empty blob, belonging to \mem_ctx. + * + * \return The new blob, (or NULL in case of allocation failure). + */ +struct blob * +blob_create (void *mem_ctx); + +/** + * Add some unstructured, fixed-size data to a blob. + * + * \return True unless allocation failed. + */ +bool +blob_write_bytes (struct blob *blob, const void *bytes, size_t to_write); + +/** + * Reserve space in \blob for a number of bytes. + * + * Space will be allocated within the blob for these byes, but the bytes will + * be left uninitialized. The caller is expected to use the return value to + * write directly (and immediately) to these bytes. + * + * \note The return value is valid immediately upon return, but can be + * invalidated by any other call to a blob function. So the caller should call + * blob_reserve_byes immediately before writing through the returned pointer. + * + * This function is intended to be used when interfacing with an existing API + * that is not aware of the blob API, (so that blob_write_bytes cannot be + * called). + * + * \return A pointer to space allocated within \blob to which \to_write bytes + * can be written, (or NULL in case of any allocation error). + */ +uint8_t * +blob_reserve_bytes (struct blob *blob, size_t to_write); + +/** + * Overwrite some data previously written to the blob. + * + * Writes data to an existing portion of the blob at an offset of \offset. + * This data range must have previously been written to the blob by one of the + * blob_write_* calls. + * + * For example usage, see blob_overwrite_uint32 + * + * \return True unless the requested offset or offset+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_bytes (struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write); + +/** + * Add a uint32_t to a blob. + * + * \note This function will only write to a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint32 (struct blob *blob, uint32_t value); + +/** + * Overwrite a uint32_t previously written to the blob. + * + * Writes a uint32_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * + * The expected usage is something like the following pattern: + * + * size_t offset; + * + * offset = blob->size; + * blob_write_uint32 (blob, 0); // placeholder + * ... various blob write calls, writing N items ... + * blob_overwrite_uint32 (blob, offset, N); + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_uint32 (struct blob *blob, + size_t offset, + uint32_t value); + +/** + * Add a uint64_t to a blob. + * + * \note This function will only write to a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint64 (struct blob *blob, uint64_t value); + +/** + * Add an intptr_t to a blob. + * + * \note This function will only write to an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_intptr (struct blob *blob, intptr_t value); + +/** + * Add a NULL-terminated string to a blob, (including the NULL terminator). + * + * \return True unless allocation failed. + */ +bool +blob_write_string (struct blob *blob, const char *str); + +/** + * Start reading a blob, (initializing the contents of \blob for reading). + * + * After this call, the caller can use the various blob_read_* functions to + * read elements from the data array. + * + * For all of the blob_read_* functions, if there is insufficient data + * remaining, the functions will do nothing, (perhaps returning default values + * such as 0). The caller can detect this by noting that the blob_reader's + * current value is unchanged before and after the call. + */ +void +blob_reader_init (struct blob_reader *blob, uint8_t *data, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, (and + * update the current location to just past this data). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the data in order to use it after the lifetime of the data + * underlying the blob reader. + * + * \return The bytes read (see note above about memory lifetime). + */ +void * +blob_read_bytes (struct blob_reader *blob, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, copying + * it to \dest (and update the current location to just past this data) + */ +void +blob_copy_bytes (struct blob_reader *blob, uint8_t *dest, size_t size); + +/** + * Read a uint32_t from the current location, (and update the current location + * to just past this uint32_t). + * + * \note This function will only read from a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint32_t read + */ +uint32_t +blob_read_uint32 (struct blob_reader *blob); + +/** + * Read a uint64_t from the current location, (and update the current location + * to just past this uint64_t). + * + * \note This function will only read from a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint64_t read + */ +uint64_t +blob_read_uint64 (struct blob_reader *blob); + +/** + * Read an intptr_t value from the current location, (and update the + * current location to just past this intptr_t). + * + * \note This function will only read from an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The intptr_t read + */ +intptr_t +blob_read_intptr (struct blob_reader *blob); + +/** + * Read a NULL-terminated string from the current location, (and update the + * current location to just past this string). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the string in order to use the string after the lifetime + * of the data underlying the blob reader. + * + * \return The string read (see note above about memory lifetime). However, if + * there is no NULL byte remaining within the blob, this function returns + * NULL. + */ +char * +blob_read_string (struct blob_reader *blob); + +#ifdef __cplusplus +} +#endif + +#endif /* BLOB_H */ diff --git a/mesalib/src/glsl/builtin_functions.cpp b/mesalib/src/glsl/builtin_functions.cpp index bb7fbcdc1..b643927d0 100644 --- a/mesalib/src/glsl/builtin_functions.cpp +++ b/mesalib/src/glsl/builtin_functions.cpp @@ -381,6 +381,12 @@ gs_streams(const _mesa_glsl_parse_state *state) return gpu_shader5(state) && gs_only(state); } +static bool +fp64(const _mesa_glsl_parse_state *state) +{ + return state->has_double(); +} + /** @} */ /******************************************************************************/ @@ -436,6 +442,7 @@ private: ir_constant *imm(float f, unsigned vector_elements=1); ir_constant *imm(int i, unsigned vector_elements=1); ir_constant *imm(unsigned u, unsigned vector_elements=1); + ir_constant *imm(double d, unsigned vector_elements=1); ir_constant *imm(const glsl_type *type, const ir_constant_data &); ir_dereference_variable *var_ref(ir_variable *var); ir_dereference_array *array_ref(ir_variable *var, int i); @@ -526,29 +533,29 @@ private: B1(log) B1(exp2) B1(log2) - B1(sqrt) - B1(inversesqrt) - B1(abs) - B1(sign) - B1(floor) - B1(trunc) - B1(round) - B1(roundEven) - B1(ceil) - B1(fract) + BA1(sqrt) + BA1(inversesqrt) + BA1(abs) + BA1(sign) + BA1(floor) + BA1(trunc) + BA1(round) + BA1(roundEven) + BA1(ceil) + BA1(fract) B2(mod) - B1(modf) + BA1(modf) BA2(min) BA2(max) BA2(clamp) - B2(mix_lrp) + BA2(mix_lrp) ir_function_signature *_mix_sel(builtin_available_predicate avail, const glsl_type *val_type, const glsl_type *blend_type); - B2(step) - B2(smoothstep) - B1(isnan) - B1(isinf) + BA2(step) + BA2(smoothstep) + BA1(isnan) + BA1(isinf) B1(floatBitsToInt) B1(floatBitsToUint) B1(intBitsToFloat) @@ -563,24 +570,27 @@ private: ir_function_signature *_unpackSnorm4x8(builtin_available_predicate avail); ir_function_signature *_packHalf2x16(builtin_available_predicate avail); ir_function_signature *_unpackHalf2x16(builtin_available_predicate avail); - B1(length) - B1(distance); - B1(dot); - B1(cross); - B1(normalize); + ir_function_signature *_packDouble2x32(builtin_available_predicate avail); + ir_function_signature *_unpackDouble2x32(builtin_available_predicate avail); + + BA1(length) + BA1(distance); + BA1(dot); + BA1(cross); + BA1(normalize); B0(ftransform); - B1(faceforward); - B1(reflect); - B1(refract); - B1(matrixCompMult); - B1(outerProduct); - B0(determinant_mat2); - B0(determinant_mat3); - B0(determinant_mat4); - B0(inverse_mat2); - B0(inverse_mat3); - B0(inverse_mat4); - B1(transpose); + BA1(faceforward); + BA1(reflect); + BA1(refract); + BA1(matrixCompMult); + BA1(outerProduct); + BA1(determinant_mat2); + BA1(determinant_mat3); + BA1(determinant_mat4); + BA1(inverse_mat2); + BA1(inverse_mat3); + BA1(inverse_mat4); + BA1(transpose); BA1(lessThan); BA1(lessThanEqual); BA1(greaterThan); @@ -644,9 +654,10 @@ private: B1(bitCount) B1(findLSB) B1(findMSB) - B1(fma) + BA1(fma) B2(ldexp) B2(frexp) + B2(dfrexp) B1(uaddCarry) B1(usubBorrow) B1(mulExtended) @@ -815,6 +826,42 @@ builtin_builder::create_builtins() _##NAME(glsl_type::vec4_type), \ NULL); +#define FD(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FD130(NAME) \ + add_function(#NAME, \ + _##NAME(v130, glsl_type::float_type), \ + _##NAME(v130, glsl_type::vec2_type), \ + _##NAME(v130, glsl_type::vec3_type), \ + _##NAME(v130, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FDGS5(NAME) \ + add_function(#NAME, \ + _##NAME(gpu_shader5, glsl_type::float_type), \ + _##NAME(gpu_shader5, glsl_type::vec2_type), \ + _##NAME(gpu_shader5, glsl_type::vec3_type), \ + _##NAME(gpu_shader5, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + #define FI(NAME) \ add_function(#NAME, \ _##NAME(glsl_type::float_type), \ @@ -827,7 +874,23 @@ builtin_builder::create_builtins() _##NAME(glsl_type::ivec4_type), \ NULL); -#define FIU(NAME) \ +#define FID(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + _##NAME(always_available, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FIUD(NAME) \ add_function(#NAME, \ _##NAME(always_available, glsl_type::float_type), \ _##NAME(always_available, glsl_type::vec2_type), \ @@ -843,6 +906,10 @@ builtin_builder::create_builtins() _##NAME(v130, glsl_type::uvec2_type), \ _##NAME(v130, glsl_type::uvec3_type), \ _##NAME(v130, glsl_type::uvec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ NULL); #define IU(NAME) \ @@ -858,7 +925,7 @@ builtin_builder::create_builtins() _##NAME(glsl_type::uvec4_type), \ NULL); -#define FIUB(NAME) \ +#define FIUBD(NAME) \ add_function(#NAME, \ _##NAME(always_available, glsl_type::float_type), \ _##NAME(always_available, glsl_type::vec2_type), \ @@ -879,9 +946,14 @@ builtin_builder::create_builtins() _##NAME(always_available, glsl_type::bvec2_type), \ _##NAME(always_available, glsl_type::bvec3_type), \ _##NAME(always_available, glsl_type::bvec4_type), \ + \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ NULL); -#define FIU2_MIXED(NAME) \ +#define FIUD2_MIXED(NAME) \ add_function(#NAME, \ _##NAME(always_available, glsl_type::float_type, glsl_type::float_type), \ _##NAME(always_available, glsl_type::vec2_type, glsl_type::float_type), \ @@ -909,6 +981,14 @@ builtin_builder::create_builtins() _##NAME(v130, glsl_type::uvec2_type, glsl_type::uvec2_type), \ _##NAME(v130, glsl_type::uvec3_type, glsl_type::uvec3_type), \ _##NAME(v130, glsl_type::uvec4_type, glsl_type::uvec4_type), \ + \ + _##NAME(fp64, glsl_type::double_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec3_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec4_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), \ NULL); F(radians) @@ -941,16 +1021,16 @@ builtin_builder::create_builtins() F(log) F(exp2) F(log2) - F(sqrt) - F(inversesqrt) - FI(abs) - FI(sign) - F(floor) - F(trunc) - F(round) - F(roundEven) - F(ceil) - F(fract) + FD(sqrt) + FD(inversesqrt) + FID(abs) + FID(sign) + FD(floor) + FD(trunc) + FD(round) + FD(roundEven) + FD(ceil) + FD(fract) add_function("mod", _mod(glsl_type::float_type, glsl_type::float_type), @@ -961,29 +1041,52 @@ builtin_builder::create_builtins() _mod(glsl_type::vec2_type, glsl_type::vec2_type), _mod(glsl_type::vec3_type, glsl_type::vec3_type), _mod(glsl_type::vec4_type, glsl_type::vec4_type), + + _mod(glsl_type::double_type, glsl_type::double_type), + _mod(glsl_type::dvec2_type, glsl_type::double_type), + _mod(glsl_type::dvec3_type, glsl_type::double_type), + _mod(glsl_type::dvec4_type, glsl_type::double_type), + + _mod(glsl_type::dvec2_type, glsl_type::dvec2_type), + _mod(glsl_type::dvec3_type, glsl_type::dvec3_type), + _mod(glsl_type::dvec4_type, glsl_type::dvec4_type), NULL); - F(modf) + FD(modf) - FIU2_MIXED(min) - FIU2_MIXED(max) - FIU2_MIXED(clamp) + FIUD2_MIXED(min) + FIUD2_MIXED(max) + FIUD2_MIXED(clamp) add_function("mix", - _mix_lrp(glsl_type::float_type, glsl_type::float_type), - _mix_lrp(glsl_type::vec2_type, glsl_type::float_type), - _mix_lrp(glsl_type::vec3_type, glsl_type::float_type), - _mix_lrp(glsl_type::vec4_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::float_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::float_type), + + _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::vec4_type), - _mix_lrp(glsl_type::vec2_type, glsl_type::vec2_type), - _mix_lrp(glsl_type::vec3_type, glsl_type::vec3_type), - _mix_lrp(glsl_type::vec4_type, glsl_type::vec4_type), + _mix_lrp(fp64, glsl_type::double_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::double_type), + + _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), _mix_sel(v130, glsl_type::float_type, glsl_type::bool_type), _mix_sel(v130, glsl_type::vec2_type, glsl_type::bvec2_type), _mix_sel(v130, glsl_type::vec3_type, glsl_type::bvec3_type), _mix_sel(v130, glsl_type::vec4_type, glsl_type::bvec4_type), + _mix_sel(fp64, glsl_type::double_type, glsl_type::bool_type), + _mix_sel(fp64, glsl_type::dvec2_type, glsl_type::bvec2_type), + _mix_sel(fp64, glsl_type::dvec3_type, glsl_type::bvec3_type), + _mix_sel(fp64, glsl_type::dvec4_type, glsl_type::bvec4_type), + _mix_sel(shader_integer_mix, glsl_type::int_type, glsl_type::bool_type), _mix_sel(shader_integer_mix, glsl_type::ivec2_type, glsl_type::bvec2_type), _mix_sel(shader_integer_mix, glsl_type::ivec3_type, glsl_type::bvec3_type), @@ -1001,29 +1104,45 @@ builtin_builder::create_builtins() NULL); add_function("step", - _step(glsl_type::float_type, glsl_type::float_type), - _step(glsl_type::float_type, glsl_type::vec2_type), - _step(glsl_type::float_type, glsl_type::vec3_type), - _step(glsl_type::float_type, glsl_type::vec4_type), - - _step(glsl_type::vec2_type, glsl_type::vec2_type), - _step(glsl_type::vec3_type, glsl_type::vec3_type), - _step(glsl_type::vec4_type, glsl_type::vec4_type), + _step(always_available, glsl_type::float_type, glsl_type::float_type), + _step(always_available, glsl_type::float_type, glsl_type::vec2_type), + _step(always_available, glsl_type::float_type, glsl_type::vec3_type), + _step(always_available, glsl_type::float_type, glsl_type::vec4_type), + + _step(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _step(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _step(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + _step(fp64, glsl_type::double_type, glsl_type::double_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec2_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec3_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec4_type), + + _step(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _step(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _step(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), NULL); add_function("smoothstep", - _smoothstep(glsl_type::float_type, glsl_type::float_type), - _smoothstep(glsl_type::float_type, glsl_type::vec2_type), - _smoothstep(glsl_type::float_type, glsl_type::vec3_type), - _smoothstep(glsl_type::float_type, glsl_type::vec4_type), - - _smoothstep(glsl_type::vec2_type, glsl_type::vec2_type), - _smoothstep(glsl_type::vec3_type, glsl_type::vec3_type), - _smoothstep(glsl_type::vec4_type, glsl_type::vec4_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::float_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec2_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec3_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec4_type), + + _smoothstep(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _smoothstep(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _smoothstep(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::double_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec2_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec3_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec4_type), + + _smoothstep(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _smoothstep(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _smoothstep(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), NULL); - F(isnan) - F(isinf) + FD130(isnan) + FD130(isinf) F(floatBitsToInt) F(floatBitsToUint) @@ -1050,68 +1169,106 @@ builtin_builder::create_builtins() add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_gpu_shader5), NULL); add_function("packHalf2x16", _packHalf2x16(shader_packing_or_es3), NULL); add_function("unpackHalf2x16", _unpackHalf2x16(shader_packing_or_es3), NULL); + add_function("packDouble2x32", _packDouble2x32(fp64), NULL); + add_function("unpackDouble2x32", _unpackDouble2x32(fp64), NULL); - F(length) - F(distance) - F(dot) - add_function("cross", _cross(glsl_type::vec3_type), NULL); + FD(length) + FD(distance) + FD(dot) - F(normalize) + add_function("cross", _cross(always_available, glsl_type::vec3_type), + _cross(fp64, glsl_type::dvec3_type), NULL); + + FD(normalize) add_function("ftransform", _ftransform(), NULL); - F(faceforward) - F(reflect) - F(refract) + FD(faceforward) + FD(reflect) + FD(refract) // ... add_function("matrixCompMult", - _matrixCompMult(glsl_type::mat2_type), - _matrixCompMult(glsl_type::mat3_type), - _matrixCompMult(glsl_type::mat4_type), - _matrixCompMult(glsl_type::mat2x3_type), - _matrixCompMult(glsl_type::mat2x4_type), - _matrixCompMult(glsl_type::mat3x2_type), - _matrixCompMult(glsl_type::mat3x4_type), - _matrixCompMult(glsl_type::mat4x2_type), - _matrixCompMult(glsl_type::mat4x3_type), + _matrixCompMult(always_available, glsl_type::mat2_type), + _matrixCompMult(always_available, glsl_type::mat3_type), + _matrixCompMult(always_available, glsl_type::mat4_type), + _matrixCompMult(always_available, glsl_type::mat2x3_type), + _matrixCompMult(always_available, glsl_type::mat2x4_type), + _matrixCompMult(always_available, glsl_type::mat3x2_type), + _matrixCompMult(always_available, glsl_type::mat3x4_type), + _matrixCompMult(always_available, glsl_type::mat4x2_type), + _matrixCompMult(always_available, glsl_type::mat4x3_type), + _matrixCompMult(fp64, glsl_type::dmat2_type), + _matrixCompMult(fp64, glsl_type::dmat3_type), + _matrixCompMult(fp64, glsl_type::dmat4_type), + _matrixCompMult(fp64, glsl_type::dmat2x3_type), + _matrixCompMult(fp64, glsl_type::dmat2x4_type), + _matrixCompMult(fp64, glsl_type::dmat3x2_type), + _matrixCompMult(fp64, glsl_type::dmat3x4_type), + _matrixCompMult(fp64, glsl_type::dmat4x2_type), + _matrixCompMult(fp64, glsl_type::dmat4x3_type), NULL); add_function("outerProduct", - _outerProduct(glsl_type::mat2_type), - _outerProduct(glsl_type::mat3_type), - _outerProduct(glsl_type::mat4_type), - _outerProduct(glsl_type::mat2x3_type), - _outerProduct(glsl_type::mat2x4_type), - _outerProduct(glsl_type::mat3x2_type), - _outerProduct(glsl_type::mat3x4_type), - _outerProduct(glsl_type::mat4x2_type), - _outerProduct(glsl_type::mat4x3_type), + _outerProduct(v120, glsl_type::mat2_type), + _outerProduct(v120, glsl_type::mat3_type), + _outerProduct(v120, glsl_type::mat4_type), + _outerProduct(v120, glsl_type::mat2x3_type), + _outerProduct(v120, glsl_type::mat2x4_type), + _outerProduct(v120, glsl_type::mat3x2_type), + _outerProduct(v120, glsl_type::mat3x4_type), + _outerProduct(v120, glsl_type::mat4x2_type), + _outerProduct(v120, glsl_type::mat4x3_type), + _outerProduct(fp64, glsl_type::dmat2_type), + _outerProduct(fp64, glsl_type::dmat3_type), + _outerProduct(fp64, glsl_type::dmat4_type), + _outerProduct(fp64, glsl_type::dmat2x3_type), + _outerProduct(fp64, glsl_type::dmat2x4_type), + _outerProduct(fp64, glsl_type::dmat3x2_type), + _outerProduct(fp64, glsl_type::dmat3x4_type), + _outerProduct(fp64, glsl_type::dmat4x2_type), + _outerProduct(fp64, glsl_type::dmat4x3_type), NULL); add_function("determinant", - _determinant_mat2(), - _determinant_mat3(), - _determinant_mat4(), + _determinant_mat2(v120, glsl_type::mat2_type), + _determinant_mat3(v120, glsl_type::mat3_type), + _determinant_mat4(v120, glsl_type::mat4_type), + _determinant_mat2(fp64, glsl_type::dmat2_type), + _determinant_mat3(fp64, glsl_type::dmat3_type), + _determinant_mat4(fp64, glsl_type::dmat4_type), + NULL); add_function("inverse", - _inverse_mat2(), - _inverse_mat3(), - _inverse_mat4(), + _inverse_mat2(v120, glsl_type::mat2_type), + _inverse_mat3(v120, glsl_type::mat3_type), + _inverse_mat4(v120, glsl_type::mat4_type), + _inverse_mat2(fp64, glsl_type::dmat2_type), + _inverse_mat3(fp64, glsl_type::dmat3_type), + _inverse_mat4(fp64, glsl_type::dmat4_type), NULL); add_function("transpose", - _transpose(glsl_type::mat2_type), - _transpose(glsl_type::mat3_type), - _transpose(glsl_type::mat4_type), - _transpose(glsl_type::mat2x3_type), - _transpose(glsl_type::mat2x4_type), - _transpose(glsl_type::mat3x2_type), - _transpose(glsl_type::mat3x4_type), - _transpose(glsl_type::mat4x2_type), - _transpose(glsl_type::mat4x3_type), + _transpose(v120, glsl_type::mat2_type), + _transpose(v120, glsl_type::mat3_type), + _transpose(v120, glsl_type::mat4_type), + _transpose(v120, glsl_type::mat2x3_type), + _transpose(v120, glsl_type::mat2x4_type), + _transpose(v120, glsl_type::mat3x2_type), + _transpose(v120, glsl_type::mat3x4_type), + _transpose(v120, glsl_type::mat4x2_type), + _transpose(v120, glsl_type::mat4x3_type), + _transpose(fp64, glsl_type::dmat2_type), + _transpose(fp64, glsl_type::dmat3_type), + _transpose(fp64, glsl_type::dmat4_type), + _transpose(fp64, glsl_type::dmat2x3_type), + _transpose(fp64, glsl_type::dmat2x4_type), + _transpose(fp64, glsl_type::dmat3x2_type), + _transpose(fp64, glsl_type::dmat3x4_type), + _transpose(fp64, glsl_type::dmat4x2_type), + _transpose(fp64, glsl_type::dmat4x3_type), NULL); - FIU(lessThan) - FIU(lessThanEqual) - FIU(greaterThan) - FIU(greaterThanEqual) - FIUB(notEqual) - FIUB(equal) + FIUD(lessThan) + FIUD(lessThanEqual) + FIUD(greaterThan) + FIUD(greaterThanEqual) + FIUBD(notEqual) + FIUBD(equal) add_function("any", _any(glsl_type::bvec2_type), @@ -2180,13 +2337,17 @@ builtin_builder::create_builtins() IU(bitCount) IU(findLSB) IU(findMSB) - F(fma) + FDGS5(fma) add_function("ldexp", _ldexp(glsl_type::float_type, glsl_type::int_type), _ldexp(glsl_type::vec2_type, glsl_type::ivec2_type), _ldexp(glsl_type::vec3_type, glsl_type::ivec3_type), _ldexp(glsl_type::vec4_type, glsl_type::ivec4_type), + _ldexp(glsl_type::double_type, glsl_type::int_type), + _ldexp(glsl_type::dvec2_type, glsl_type::ivec2_type), + _ldexp(glsl_type::dvec3_type, glsl_type::ivec3_type), + _ldexp(glsl_type::dvec4_type, glsl_type::ivec4_type), NULL); add_function("frexp", @@ -2194,6 +2355,10 @@ builtin_builder::create_builtins() _frexp(glsl_type::vec2_type, glsl_type::ivec2_type), _frexp(glsl_type::vec3_type, glsl_type::ivec3_type), _frexp(glsl_type::vec4_type, glsl_type::ivec4_type), + _dfrexp(glsl_type::double_type, glsl_type::int_type), + _dfrexp(glsl_type::dvec2_type, glsl_type::ivec2_type), + _dfrexp(glsl_type::dvec3_type, glsl_type::ivec3_type), + _dfrexp(glsl_type::dvec4_type, glsl_type::ivec4_type), NULL); add_function("uaddCarry", _uaddCarry(glsl_type::uint_type), @@ -2310,8 +2475,8 @@ builtin_builder::create_builtins() #undef F #undef FI -#undef FIU -#undef FIUB +#undef FIUD +#undef FIUBD #undef FIU2_MIXED } @@ -2470,11 +2635,19 @@ builtin_builder::imm(unsigned u, unsigned vector_elements) } ir_constant * +builtin_builder::imm(double d, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(d, vector_elements); +} + +ir_constant * builtin_builder::imm(const glsl_type *type, const ir_constant_data &data) { return new(mem_ctx) ir_constant(type, &data); } +#define IMM_FP(type, val) (type->base_type == GLSL_TYPE_DOUBLE) ? imm(val) : imm((float)val) + ir_dereference_variable * builtin_builder::var_ref(ir_variable *var) { @@ -2550,6 +2723,13 @@ builtin_builder::_##NAME(const glsl_type *type) \ return unop(&AVAIL, OPCODE, type, type); \ } +#define UNOPA(NAME, OPCODE) \ +ir_function_signature * \ +builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *type) \ +{ \ + return unop(avail, OPCODE, type, type); \ +} + ir_function_signature * builtin_builder::binop(ir_expression_operation opcode, builtin_available_predicate avail, @@ -2855,19 +3035,19 @@ UNOP(exp, ir_unop_exp, always_available) UNOP(log, ir_unop_log, always_available) UNOP(exp2, ir_unop_exp2, always_available) UNOP(log2, ir_unop_log2, always_available) -UNOP(sqrt, ir_unop_sqrt, always_available) -UNOP(inversesqrt, ir_unop_rsq, always_available) +UNOPA(sqrt, ir_unop_sqrt) +UNOPA(inversesqrt, ir_unop_rsq) /** @} */ -UNOP(abs, ir_unop_abs, always_available) -UNOP(sign, ir_unop_sign, always_available) -UNOP(floor, ir_unop_floor, always_available) -UNOP(trunc, ir_unop_trunc, v130) -UNOP(round, ir_unop_round_even, always_available) -UNOP(roundEven, ir_unop_round_even, always_available) -UNOP(ceil, ir_unop_ceil, always_available) -UNOP(fract, ir_unop_fract, always_available) +UNOPA(abs, ir_unop_abs) +UNOPA(sign, ir_unop_sign) +UNOPA(floor, ir_unop_floor) +UNOPA(trunc, ir_unop_trunc) +UNOPA(round, ir_unop_round_even) +UNOPA(roundEven, ir_unop_round_even) +UNOPA(ceil, ir_unop_ceil) +UNOPA(fract, ir_unop_fract) ir_function_signature * builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type) @@ -2876,11 +3056,11 @@ builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type) } ir_function_signature * -builtin_builder::_modf(const glsl_type *type) +builtin_builder::_modf(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); ir_variable *i = out_var(type, "i"); - MAKE_SIG(type, v130, 2, x, i); + MAKE_SIG(type, avail, 2, x, i); ir_variable *t = body.make_temp(type, "t"); body.emit(assign(t, expr(ir_unop_trunc, x))); @@ -2919,12 +3099,12 @@ builtin_builder::_clamp(builtin_available_predicate avail, } ir_function_signature * -builtin_builder::_mix_lrp(const glsl_type *val_type, const glsl_type *blend_type) +builtin_builder::_mix_lrp(builtin_available_predicate avail, const glsl_type *val_type, const glsl_type *blend_type) { ir_variable *x = in_var(val_type, "x"); ir_variable *y = in_var(val_type, "y"); ir_variable *a = in_var(blend_type, "a"); - MAKE_SIG(val_type, always_available, 3, x, y, a); + MAKE_SIG(val_type, avail, 3, x, y, a); body.emit(ret(lrp(x, y, a))); @@ -2954,26 +3134,37 @@ builtin_builder::_mix_sel(builtin_available_predicate avail, } ir_function_signature * -builtin_builder::_step(const glsl_type *edge_type, const glsl_type *x_type) +builtin_builder::_step(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) { ir_variable *edge = in_var(edge_type, "edge"); ir_variable *x = in_var(x_type, "x"); - MAKE_SIG(x_type, always_available, 2, edge, x); + MAKE_SIG(x_type, avail, 2, edge, x); ir_variable *t = body.make_temp(x_type, "t"); if (x_type->vector_elements == 1) { /* Both are floats */ - body.emit(assign(t, b2f(gequal(x, edge)))); + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(x, edge))))); + else + body.emit(assign(t, b2f(gequal(x, edge)))); } else if (edge_type->vector_elements == 1) { /* x is a vector but edge is a float */ for (int i = 0; i < x_type->vector_elements; i++) { - body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i)); + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), edge))), 1 << i)); + else + body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i)); } } else { /* Both are vectors */ for (int i = 0; i < x_type->vector_elements; i++) { - body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))), - 1 << i)); + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1)))), + 1 << i)); + else + body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))), + 1 << i)); + } } body.emit(ret(t)); @@ -2982,12 +3173,12 @@ builtin_builder::_step(const glsl_type *edge_type, const glsl_type *x_type) } ir_function_signature * -builtin_builder::_smoothstep(const glsl_type *edge_type, const glsl_type *x_type) +builtin_builder::_smoothstep(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) { ir_variable *edge0 = in_var(edge_type, "edge0"); ir_variable *edge1 = in_var(edge_type, "edge1"); ir_variable *x = in_var(x_type, "x"); - MAKE_SIG(x_type, always_available, 3, edge0, edge1, x); + MAKE_SIG(x_type, avail, 3, edge0, edge1, x); /* From the GLSL 1.10 specification: * @@ -2998,18 +3189,18 @@ builtin_builder::_smoothstep(const glsl_type *edge_type, const glsl_type *x_type ir_variable *t = body.make_temp(x_type, "t"); body.emit(assign(t, clamp(div(sub(x, edge0), sub(edge1, edge0)), - imm(0.0f), imm(1.0f)))); + IMM_FP(x_type, 0.0), IMM_FP(x_type, 1.0)))); - body.emit(ret(mul(t, mul(t, sub(imm(3.0f), mul(imm(2.0f), t)))))); + body.emit(ret(mul(t, mul(t, sub(IMM_FP(x_type, 3.0), mul(IMM_FP(x_type, 2.0), t)))))); return sig; } ir_function_signature * -builtin_builder::_isnan(const glsl_type *type) +builtin_builder::_isnan(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::bvec(type->vector_elements), v130, 1, x); + MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); body.emit(ret(nequal(x, x))); @@ -3017,10 +3208,10 @@ builtin_builder::_isnan(const glsl_type *type) } ir_function_signature * -builtin_builder::_isinf(const glsl_type *type) +builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::bvec(type->vector_elements), v130, 1, x); + MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); ir_constant_data infinities; for (int i = 0; i < type->vector_elements; i++) { @@ -3160,10 +3351,28 @@ builtin_builder::_unpackHalf2x16(builtin_available_predicate avail) } ir_function_signature * -builtin_builder::_length(const glsl_type *type) +builtin_builder::_packDouble2x32(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::uvec2_type, "v"); + MAKE_SIG(glsl_type::double_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_double_2x32, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackDouble2x32(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::double_type, "p"); + MAKE_SIG(glsl_type::uvec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_double_2x32, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_length(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::float_type, always_available, 1, x); + MAKE_SIG(type->get_base_type(), avail, 1, x); body.emit(ret(sqrt(dot(x, x)))); @@ -3171,11 +3380,11 @@ builtin_builder::_length(const glsl_type *type) } ir_function_signature * -builtin_builder::_distance(const glsl_type *type) +builtin_builder::_distance(builtin_available_predicate avail, const glsl_type *type) { ir_variable *p0 = in_var(type, "p0"); ir_variable *p1 = in_var(type, "p1"); - MAKE_SIG(glsl_type::float_type, always_available, 2, p0, p1); + MAKE_SIG(type->get_base_type(), avail, 2, p0, p1); if (type->vector_elements == 1) { body.emit(ret(abs(sub(p0, p1)))); @@ -3189,21 +3398,21 @@ builtin_builder::_distance(const glsl_type *type) } ir_function_signature * -builtin_builder::_dot(const glsl_type *type) +builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type) { if (type->vector_elements == 1) - return binop(ir_binop_mul, always_available, type, type, type); + return binop(ir_binop_mul, avail, type, type, type); - return binop(ir_binop_dot, always_available, - glsl_type::float_type, type, type); + return binop(ir_binop_dot, avail, + type->get_base_type(), type, type); } ir_function_signature * -builtin_builder::_cross(const glsl_type *type) +builtin_builder::_cross(builtin_available_predicate avail, const glsl_type *type) { ir_variable *a = in_var(type, "a"); ir_variable *b = in_var(type, "b"); - MAKE_SIG(type, always_available, 2, a, b); + MAKE_SIG(type, avail, 2, a, b); int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0); int zxy = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, 0); @@ -3215,10 +3424,10 @@ builtin_builder::_cross(const glsl_type *type) } ir_function_signature * -builtin_builder::_normalize(const glsl_type *type) +builtin_builder::_normalize(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, always_available, 1, x); + MAKE_SIG(type, avail, 1, x); if (type->vector_elements == 1) { body.emit(ret(sign(x))); @@ -3248,41 +3457,41 @@ builtin_builder::_ftransform() } ir_function_signature * -builtin_builder::_faceforward(const glsl_type *type) +builtin_builder::_faceforward(builtin_available_predicate avail, const glsl_type *type) { ir_variable *N = in_var(type, "N"); ir_variable *I = in_var(type, "I"); ir_variable *Nref = in_var(type, "Nref"); - MAKE_SIG(type, always_available, 3, N, I, Nref); + MAKE_SIG(type, avail, 3, N, I, Nref); - body.emit(if_tree(less(dot(Nref, I), imm(0.0f)), + body.emit(if_tree(less(dot(Nref, I), IMM_FP(type, 0.0)), ret(N), ret(neg(N)))); return sig; } ir_function_signature * -builtin_builder::_reflect(const glsl_type *type) +builtin_builder::_reflect(builtin_available_predicate avail, const glsl_type *type) { ir_variable *I = in_var(type, "I"); ir_variable *N = in_var(type, "N"); - MAKE_SIG(type, always_available, 2, I, N); + MAKE_SIG(type, avail, 2, I, N); /* I - 2 * dot(N, I) * N */ - body.emit(ret(sub(I, mul(imm(2.0f), mul(dot(N, I), N))))); + body.emit(ret(sub(I, mul(IMM_FP(type, 2.0), mul(dot(N, I), N))))); return sig; } ir_function_signature * -builtin_builder::_refract(const glsl_type *type) +builtin_builder::_refract(builtin_available_predicate avail, const glsl_type *type) { ir_variable *I = in_var(type, "I"); ir_variable *N = in_var(type, "N"); - ir_variable *eta = in_var(glsl_type::float_type, "eta"); - MAKE_SIG(type, always_available, 3, I, N, eta); + ir_variable *eta = in_var(type->get_base_type(), "eta"); + MAKE_SIG(type, avail, 3, I, N, eta); - ir_variable *n_dot_i = body.make_temp(glsl_type::float_type, "n_dot_i"); + ir_variable *n_dot_i = body.make_temp(type->get_base_type(), "n_dot_i"); body.emit(assign(n_dot_i, dot(N, I))); /* From the GLSL 1.10 specification: @@ -3292,11 +3501,11 @@ builtin_builder::_refract(const glsl_type *type) * else * return eta * I - (eta * dot(N, I) + sqrt(k)) * N */ - ir_variable *k = body.make_temp(glsl_type::float_type, "k"); - body.emit(assign(k, sub(imm(1.0f), - mul(eta, mul(eta, sub(imm(1.0f), + ir_variable *k = body.make_temp(type->get_base_type(), "k"); + body.emit(assign(k, sub(IMM_FP(type, 1.0), + mul(eta, mul(eta, sub(IMM_FP(type, 1.0), mul(n_dot_i, n_dot_i))))))); - body.emit(if_tree(less(k, imm(0.0f)), + body.emit(if_tree(less(k, IMM_FP(type, 0.0)), ret(ir_constant::zero(mem_ctx, type)), ret(sub(mul(eta, I), mul(add(mul(eta, n_dot_i), sqrt(k)), N))))); @@ -3305,11 +3514,11 @@ builtin_builder::_refract(const glsl_type *type) } ir_function_signature * -builtin_builder::_matrixCompMult(const glsl_type *type) +builtin_builder::_matrixCompMult(builtin_available_predicate avail, const glsl_type *type) { ir_variable *x = in_var(type, "x"); ir_variable *y = in_var(type, "y"); - MAKE_SIG(type, always_available, 2, x, y); + MAKE_SIG(type, avail, 2, x, y); ir_variable *z = body.make_temp(type, "z"); for (int i = 0; i < type->matrix_columns; i++) { @@ -3321,11 +3530,19 @@ builtin_builder::_matrixCompMult(const glsl_type *type) } ir_function_signature * -builtin_builder::_outerProduct(const glsl_type *type) +builtin_builder::_outerProduct(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *c = in_var(glsl_type::vec(type->vector_elements), "c"); - ir_variable *r = in_var(glsl_type::vec(type->matrix_columns), "r"); - MAKE_SIG(type, v120, 2, c, r); + ir_variable *c; + ir_variable *r; + + if (type->base_type == GLSL_TYPE_DOUBLE) { + r = in_var(glsl_type::dvec(type->matrix_columns), "r"); + c = in_var(glsl_type::dvec(type->vector_elements), "c"); + } else { + r = in_var(glsl_type::vec(type->matrix_columns), "r"); + c = in_var(glsl_type::vec(type->vector_elements), "c"); + } + MAKE_SIG(type, avail, 2, c, r); ir_variable *m = body.make_temp(type, "m"); for (int i = 0; i < type->matrix_columns; i++) { @@ -3337,15 +3554,15 @@ builtin_builder::_outerProduct(const glsl_type *type) } ir_function_signature * -builtin_builder::_transpose(const glsl_type *orig_type) +builtin_builder::_transpose(builtin_available_predicate avail, const glsl_type *orig_type) { const glsl_type *transpose_type = - glsl_type::get_instance(GLSL_TYPE_FLOAT, + glsl_type::get_instance(orig_type->base_type, orig_type->matrix_columns, orig_type->vector_elements); ir_variable *m = in_var(orig_type, "m"); - MAKE_SIG(transpose_type, v120, 1, m); + MAKE_SIG(transpose_type, avail, 1, m); ir_variable *t = body.make_temp(transpose_type, "t"); for (int i = 0; i < orig_type->matrix_columns; i++) { @@ -3361,10 +3578,10 @@ builtin_builder::_transpose(const glsl_type *orig_type) } ir_function_signature * -builtin_builder::_determinant_mat2() +builtin_builder::_determinant_mat2(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *m = in_var(glsl_type::mat2_type, "m"); - MAKE_SIG(glsl_type::float_type, v120, 1, m); + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type->get_base_type(), avail, 1, m); body.emit(ret(sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))))); @@ -3373,10 +3590,10 @@ builtin_builder::_determinant_mat2() } ir_function_signature * -builtin_builder::_determinant_mat3() +builtin_builder::_determinant_mat3(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *m = in_var(glsl_type::mat3_type, "m"); - MAKE_SIG(glsl_type::float_type, v120, 1, m); + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type->get_base_type(), avail, 1, m); ir_expression *f1 = sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), @@ -3398,30 +3615,31 @@ builtin_builder::_determinant_mat3() } ir_function_signature * -builtin_builder::_determinant_mat4() -{ - ir_variable *m = in_var(glsl_type::mat4_type, "m"); - MAKE_SIG(glsl_type::float_type, v120, 1, m); - - ir_variable *SubFactor00 = body.make_temp(glsl_type::float_type, "SubFactor00"); - ir_variable *SubFactor01 = body.make_temp(glsl_type::float_type, "SubFactor01"); - ir_variable *SubFactor02 = body.make_temp(glsl_type::float_type, "SubFactor02"); - ir_variable *SubFactor03 = body.make_temp(glsl_type::float_type, "SubFactor03"); - ir_variable *SubFactor04 = body.make_temp(glsl_type::float_type, "SubFactor04"); - ir_variable *SubFactor05 = body.make_temp(glsl_type::float_type, "SubFactor05"); - ir_variable *SubFactor06 = body.make_temp(glsl_type::float_type, "SubFactor06"); - ir_variable *SubFactor07 = body.make_temp(glsl_type::float_type, "SubFactor07"); - ir_variable *SubFactor08 = body.make_temp(glsl_type::float_type, "SubFactor08"); - ir_variable *SubFactor09 = body.make_temp(glsl_type::float_type, "SubFactor09"); - ir_variable *SubFactor10 = body.make_temp(glsl_type::float_type, "SubFactor10"); - ir_variable *SubFactor11 = body.make_temp(glsl_type::float_type, "SubFactor11"); - ir_variable *SubFactor12 = body.make_temp(glsl_type::float_type, "SubFactor12"); - ir_variable *SubFactor13 = body.make_temp(glsl_type::float_type, "SubFactor13"); - ir_variable *SubFactor14 = body.make_temp(glsl_type::float_type, "SubFactor14"); - ir_variable *SubFactor15 = body.make_temp(glsl_type::float_type, "SubFactor15"); - ir_variable *SubFactor16 = body.make_temp(glsl_type::float_type, "SubFactor16"); - ir_variable *SubFactor17 = body.make_temp(glsl_type::float_type, "SubFactor17"); - ir_variable *SubFactor18 = body.make_temp(glsl_type::float_type, "SubFactor18"); +builtin_builder::_determinant_mat4(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(btype, avail, 1, m); + + ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); + ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); + ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); + ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); + ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); + ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); + ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); + ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); + ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); + ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); + ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); + ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); + ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); + ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); + ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); + ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); + ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); + ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); + ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); @@ -3443,7 +3661,7 @@ builtin_builder::_determinant_mat4() body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); - ir_variable *adj_0 = body.make_temp(glsl_type::vec4_type, "adj_0"); + ir_variable *adj_0 = body.make_temp(btype == glsl_type::float_type ? glsl_type::vec4_type : glsl_type::dvec4_type, "adj_0"); body.emit(assign(adj_0, add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), @@ -3472,12 +3690,12 @@ builtin_builder::_determinant_mat4() } ir_function_signature * -builtin_builder::_inverse_mat2() +builtin_builder::_inverse_mat2(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *m = in_var(glsl_type::mat2_type, "m"); - MAKE_SIG(glsl_type::mat2_type, v120, 1, m); + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type, avail, 1, m); - ir_variable *adj = body.make_temp(glsl_type::mat2_type, "adj"); + ir_variable *adj = body.make_temp(type, "adj"); body.emit(assign(array_ref(adj, 0), matrix_elt(m, 1, 1), 1 << 0)); body.emit(assign(array_ref(adj, 0), neg(matrix_elt(m, 0, 1)), 1 << 1)); body.emit(assign(array_ref(adj, 1), neg(matrix_elt(m, 1, 0)), 1 << 0)); @@ -3492,14 +3710,15 @@ builtin_builder::_inverse_mat2() } ir_function_signature * -builtin_builder::_inverse_mat3() +builtin_builder::_inverse_mat3(builtin_available_predicate avail, const glsl_type *type) { - ir_variable *m = in_var(glsl_type::mat3_type, "m"); - MAKE_SIG(glsl_type::mat3_type, v120, 1, m); + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(type, avail, 1, m); - ir_variable *f11_22_21_12 = body.make_temp(glsl_type::float_type, "f11_22_21_12"); - ir_variable *f10_22_20_12 = body.make_temp(glsl_type::float_type, "f10_22_20_12"); - ir_variable *f10_21_20_11 = body.make_temp(glsl_type::float_type, "f10_21_20_11"); + ir_variable *f11_22_21_12 = body.make_temp(btype, "f11_22_21_12"); + ir_variable *f10_22_20_12 = body.make_temp(btype, "f10_22_20_12"); + ir_variable *f10_21_20_11 = body.make_temp(btype, "f10_21_20_11"); body.emit(assign(f11_22_21_12, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), @@ -3511,7 +3730,7 @@ builtin_builder::_inverse_mat3() sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); - ir_variable *adj = body.make_temp(glsl_type::mat3_type, "adj"); + ir_variable *adj = body.make_temp(type, "adj"); body.emit(assign(array_ref(adj, 0), f11_22_21_12, WRITEMASK_X)); body.emit(assign(array_ref(adj, 1), neg(f10_22_20_12), WRITEMASK_X)); body.emit(assign(array_ref(adj, 2), f10_21_20_11, WRITEMASK_X)); @@ -3553,30 +3772,31 @@ builtin_builder::_inverse_mat3() } ir_function_signature * -builtin_builder::_inverse_mat4() -{ - ir_variable *m = in_var(glsl_type::mat4_type, "m"); - MAKE_SIG(glsl_type::mat4_type, v120, 1, m); - - ir_variable *SubFactor00 = body.make_temp(glsl_type::float_type, "SubFactor00"); - ir_variable *SubFactor01 = body.make_temp(glsl_type::float_type, "SubFactor01"); - ir_variable *SubFactor02 = body.make_temp(glsl_type::float_type, "SubFactor02"); - ir_variable *SubFactor03 = body.make_temp(glsl_type::float_type, "SubFactor03"); - ir_variable *SubFactor04 = body.make_temp(glsl_type::float_type, "SubFactor04"); - ir_variable *SubFactor05 = body.make_temp(glsl_type::float_type, "SubFactor05"); - ir_variable *SubFactor06 = body.make_temp(glsl_type::float_type, "SubFactor06"); - ir_variable *SubFactor07 = body.make_temp(glsl_type::float_type, "SubFactor07"); - ir_variable *SubFactor08 = body.make_temp(glsl_type::float_type, "SubFactor08"); - ir_variable *SubFactor09 = body.make_temp(glsl_type::float_type, "SubFactor09"); - ir_variable *SubFactor10 = body.make_temp(glsl_type::float_type, "SubFactor10"); - ir_variable *SubFactor11 = body.make_temp(glsl_type::float_type, "SubFactor11"); - ir_variable *SubFactor12 = body.make_temp(glsl_type::float_type, "SubFactor12"); - ir_variable *SubFactor13 = body.make_temp(glsl_type::float_type, "SubFactor13"); - ir_variable *SubFactor14 = body.make_temp(glsl_type::float_type, "SubFactor14"); - ir_variable *SubFactor15 = body.make_temp(glsl_type::float_type, "SubFactor15"); - ir_variable *SubFactor16 = body.make_temp(glsl_type::float_type, "SubFactor16"); - ir_variable *SubFactor17 = body.make_temp(glsl_type::float_type, "SubFactor17"); - ir_variable *SubFactor18 = body.make_temp(glsl_type::float_type, "SubFactor18"); +builtin_builder::_inverse_mat4(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(type, avail, 1, m); + + ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); + ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); + ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); + ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); + ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); + ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); + ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); + ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); + ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); + ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); + ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); + ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); + ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); + ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); + ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); + ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); + ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); + ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); + ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); @@ -3598,7 +3818,7 @@ builtin_builder::_inverse_mat4() body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); - ir_variable *adj = body.make_temp(glsl_type::mat4_type, "adj"); + ir_variable *adj = body.make_temp(btype == glsl_type::float_type ? glsl_type::mat4_type : glsl_type::dmat4_type, "adj"); body.emit(assign(array_ref(adj, 0), add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), mul(matrix_elt(m, 1, 2), SubFactor01)), @@ -4270,12 +4490,12 @@ builtin_builder::_findMSB(const glsl_type *type) } ir_function_signature * -builtin_builder::_fma(const glsl_type *type) +builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type) { ir_variable *a = in_var(type, "a"); ir_variable *b = in_var(type, "b"); ir_variable *c = in_var(type, "c"); - MAKE_SIG(type, gpu_shader5, 3, a, b, c); + MAKE_SIG(type, avail, 3, a, b, c); body.emit(ret(ir_builder::fma(a, b, c))); @@ -4285,7 +4505,20 @@ builtin_builder::_fma(const glsl_type *type) ir_function_signature * builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type) { - return binop(ir_binop_ldexp, gpu_shader5, x_type, x_type, exp_type); + return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5, x_type, x_type, exp_type); +} + +ir_function_signature * +builtin_builder::_dfrexp(const glsl_type *x_type, const glsl_type *exp_type) +{ + ir_variable *x = in_var(x_type, "x"); + ir_variable *exponent = out_var(exp_type, "exp"); + MAKE_SIG(x_type, fp64, 2, x, exponent); + + body.emit(assign(exponent, expr(ir_unop_frexp_exp, x))); + + body.emit(ret(expr(ir_unop_frexp_sig, x))); + return sig; } ir_function_signature * @@ -4618,6 +4851,17 @@ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, return s; } +ir_function * +_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state, + const char *name) +{ + ir_function *f; + mtx_lock(&builtins_lock); + f = builtins.shader->symbols->get_function(name); + mtx_unlock(&builtins_lock); + return f; +} + gl_shader * _mesa_glsl_get_builtin_function_shader() { diff --git a/mesalib/src/glsl/builtin_type_macros.h b/mesalib/src/glsl/builtin_type_macros.h index 236e1ce8c..8e16ae454 100644 --- a/mesalib/src/glsl/builtin_type_macros.h +++ b/mesalib/src/glsl/builtin_type_macros.h @@ -64,6 +64,22 @@ DECL_TYPE(mat3x4, GL_FLOAT_MAT3x4, GLSL_TYPE_FLOAT, 4, 3) DECL_TYPE(mat4x2, GL_FLOAT_MAT4x2, GLSL_TYPE_FLOAT, 2, 4) DECL_TYPE(mat4x3, GL_FLOAT_MAT4x3, GLSL_TYPE_FLOAT, 3, 4) +DECL_TYPE(double, GL_DOUBLE, GLSL_TYPE_DOUBLE, 1, 1) +DECL_TYPE(dvec2, GL_DOUBLE_VEC2, GLSL_TYPE_DOUBLE, 2, 1) +DECL_TYPE(dvec3, GL_DOUBLE_VEC3, GLSL_TYPE_DOUBLE, 3, 1) +DECL_TYPE(dvec4, GL_DOUBLE_VEC4, GLSL_TYPE_DOUBLE, 4, 1) + +DECL_TYPE(dmat2, GL_DOUBLE_MAT2, GLSL_TYPE_DOUBLE, 2, 2) +DECL_TYPE(dmat3, GL_DOUBLE_MAT3, GLSL_TYPE_DOUBLE, 3, 3) +DECL_TYPE(dmat4, GL_DOUBLE_MAT4, GLSL_TYPE_DOUBLE, 4, 4) + +DECL_TYPE(dmat2x3, GL_DOUBLE_MAT2x3, GLSL_TYPE_DOUBLE, 3, 2) +DECL_TYPE(dmat2x4, GL_DOUBLE_MAT2x4, GLSL_TYPE_DOUBLE, 4, 2) +DECL_TYPE(dmat3x2, GL_DOUBLE_MAT3x2, GLSL_TYPE_DOUBLE, 2, 3) +DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3) +DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4) +DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4) + DECL_TYPE(sampler1D, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_FLOAT) DECL_TYPE(sampler2D, GL_SAMPLER_2D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_FLOAT) DECL_TYPE(sampler3D, GL_SAMPLER_3D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT) @@ -110,39 +126,39 @@ DECL_TYPE(sampler2DRectShadow, GL_SAMPLER_2D_RECT_SHADOW, GLSL_TYPE_SA DECL_TYPE(samplerExternalOES, GL_SAMPLER_EXTERNAL_OES, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_EXTERNAL, 0, 0, GLSL_TYPE_FLOAT) -DECL_TYPE(image1D, GL_IMAGE_1D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_FLOAT); -DECL_TYPE(image2D, GL_IMAGE_2D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_FLOAT); -DECL_TYPE(image3D, GL_IMAGE_3D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT); -DECL_TYPE(image2DRect, GL_IMAGE_2D_RECT, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT, 0, 0, GLSL_TYPE_FLOAT); -DECL_TYPE(imageCube, GL_IMAGE_CUBE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 0, GLSL_TYPE_FLOAT); -DECL_TYPE(imageBuffer, GL_IMAGE_BUFFER, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF, 0, 0, GLSL_TYPE_FLOAT); -DECL_TYPE(image1DArray, GL_IMAGE_1D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 1, GLSL_TYPE_FLOAT); -DECL_TYPE(image2DArray, GL_IMAGE_2D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 1, GLSL_TYPE_FLOAT); -DECL_TYPE(imageCubeArray, GL_IMAGE_CUBE_MAP_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 1, GLSL_TYPE_FLOAT); -DECL_TYPE(image2DMS, GL_IMAGE_2D_MULTISAMPLE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 0, GLSL_TYPE_FLOAT); -DECL_TYPE(image2DMSArray, GL_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 1, GLSL_TYPE_FLOAT); -DECL_TYPE(iimage1D, GL_INT_IMAGE_1D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_INT); -DECL_TYPE(iimage2D, GL_INT_IMAGE_2D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_INT); -DECL_TYPE(iimage3D, GL_INT_IMAGE_3D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_INT); -DECL_TYPE(iimage2DRect, GL_INT_IMAGE_2D_RECT, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT, 0, 0, GLSL_TYPE_INT); -DECL_TYPE(iimageCube, GL_INT_IMAGE_CUBE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 0, GLSL_TYPE_INT); -DECL_TYPE(iimageBuffer, GL_INT_IMAGE_BUFFER, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF, 0, 0, GLSL_TYPE_INT); -DECL_TYPE(iimage1DArray, GL_INT_IMAGE_1D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 1, GLSL_TYPE_INT); -DECL_TYPE(iimage2DArray, GL_INT_IMAGE_2D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 1, GLSL_TYPE_INT); -DECL_TYPE(iimageCubeArray, GL_INT_IMAGE_CUBE_MAP_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 1, GLSL_TYPE_INT); -DECL_TYPE(iimage2DMS, GL_INT_IMAGE_2D_MULTISAMPLE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 0, GLSL_TYPE_INT); -DECL_TYPE(iimage2DMSArray, GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 1, GLSL_TYPE_INT); -DECL_TYPE(uimage1D, GL_UNSIGNED_INT_IMAGE_1D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_UINT); -DECL_TYPE(uimage2D, GL_UNSIGNED_INT_IMAGE_2D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_UINT); -DECL_TYPE(uimage3D, GL_UNSIGNED_INT_IMAGE_3D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_UINT); -DECL_TYPE(uimage2DRect, GL_UNSIGNED_INT_IMAGE_2D_RECT, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT, 0, 0, GLSL_TYPE_UINT); -DECL_TYPE(uimageCube, GL_UNSIGNED_INT_IMAGE_CUBE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 0, GLSL_TYPE_UINT); -DECL_TYPE(uimageBuffer, GL_UNSIGNED_INT_IMAGE_BUFFER, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF, 0, 0, GLSL_TYPE_UINT); -DECL_TYPE(uimage1DArray, GL_UNSIGNED_INT_IMAGE_1D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 1, GLSL_TYPE_UINT); -DECL_TYPE(uimage2DArray, GL_UNSIGNED_INT_IMAGE_2D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 1, GLSL_TYPE_UINT); -DECL_TYPE(uimageCubeArray, GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 1, GLSL_TYPE_UINT); -DECL_TYPE(uimage2DMS, GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 0, GLSL_TYPE_UINT); -DECL_TYPE(uimage2DMSArray, GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 1, GLSL_TYPE_UINT); +DECL_TYPE(image1D, GL_IMAGE_1D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_FLOAT) +DECL_TYPE(image2D, GL_IMAGE_2D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_FLOAT) +DECL_TYPE(image3D, GL_IMAGE_3D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT) +DECL_TYPE(image2DRect, GL_IMAGE_2D_RECT, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT, 0, 0, GLSL_TYPE_FLOAT) +DECL_TYPE(imageCube, GL_IMAGE_CUBE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 0, GLSL_TYPE_FLOAT) +DECL_TYPE(imageBuffer, GL_IMAGE_BUFFER, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF, 0, 0, GLSL_TYPE_FLOAT) +DECL_TYPE(image1DArray, GL_IMAGE_1D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 1, GLSL_TYPE_FLOAT) +DECL_TYPE(image2DArray, GL_IMAGE_2D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 1, GLSL_TYPE_FLOAT) +DECL_TYPE(imageCubeArray, GL_IMAGE_CUBE_MAP_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 1, GLSL_TYPE_FLOAT) +DECL_TYPE(image2DMS, GL_IMAGE_2D_MULTISAMPLE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 0, GLSL_TYPE_FLOAT) +DECL_TYPE(image2DMSArray, GL_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 1, GLSL_TYPE_FLOAT) +DECL_TYPE(iimage1D, GL_INT_IMAGE_1D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_INT) +DECL_TYPE(iimage2D, GL_INT_IMAGE_2D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_INT) +DECL_TYPE(iimage3D, GL_INT_IMAGE_3D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_INT) +DECL_TYPE(iimage2DRect, GL_INT_IMAGE_2D_RECT, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT, 0, 0, GLSL_TYPE_INT) +DECL_TYPE(iimageCube, GL_INT_IMAGE_CUBE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 0, GLSL_TYPE_INT) +DECL_TYPE(iimageBuffer, GL_INT_IMAGE_BUFFER, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF, 0, 0, GLSL_TYPE_INT) +DECL_TYPE(iimage1DArray, GL_INT_IMAGE_1D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 1, GLSL_TYPE_INT) +DECL_TYPE(iimage2DArray, GL_INT_IMAGE_2D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 1, GLSL_TYPE_INT) +DECL_TYPE(iimageCubeArray, GL_INT_IMAGE_CUBE_MAP_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 1, GLSL_TYPE_INT) +DECL_TYPE(iimage2DMS, GL_INT_IMAGE_2D_MULTISAMPLE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 0, GLSL_TYPE_INT) +DECL_TYPE(iimage2DMSArray, GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 1, GLSL_TYPE_INT) +DECL_TYPE(uimage1D, GL_UNSIGNED_INT_IMAGE_1D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_UINT) +DECL_TYPE(uimage2D, GL_UNSIGNED_INT_IMAGE_2D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_UINT) +DECL_TYPE(uimage3D, GL_UNSIGNED_INT_IMAGE_3D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_UINT) +DECL_TYPE(uimage2DRect, GL_UNSIGNED_INT_IMAGE_2D_RECT, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT, 0, 0, GLSL_TYPE_UINT) +DECL_TYPE(uimageCube, GL_UNSIGNED_INT_IMAGE_CUBE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 0, GLSL_TYPE_UINT) +DECL_TYPE(uimageBuffer, GL_UNSIGNED_INT_IMAGE_BUFFER, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF, 0, 0, GLSL_TYPE_UINT) +DECL_TYPE(uimage1DArray, GL_UNSIGNED_INT_IMAGE_1D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 1, GLSL_TYPE_UINT) +DECL_TYPE(uimage2DArray, GL_UNSIGNED_INT_IMAGE_2D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 1, GLSL_TYPE_UINT) +DECL_TYPE(uimageCubeArray, GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE, 0, 1, GLSL_TYPE_UINT) +DECL_TYPE(uimage2DMS, GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 0, GLSL_TYPE_UINT) +DECL_TYPE(uimage2DMSArray, GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS, 0, 1, GLSL_TYPE_UINT) DECL_TYPE(atomic_uint, GL_UNSIGNED_INT_ATOMIC_COUNTER, GLSL_TYPE_ATOMIC_UINT, 1, 1) diff --git a/mesalib/src/glsl/builtin_types.cpp b/mesalib/src/glsl/builtin_types.cpp index 10fac0f81..fef86df28 100644 --- a/mesalib/src/glsl/builtin_types.cpp +++ b/mesalib/src/glsl/builtin_types.cpp @@ -159,6 +159,20 @@ const static struct builtin_type_versions { T(mat4x2, 120, 300) T(mat4x3, 120, 300) + T(double, 400, 999) + T(dvec2, 400, 999) + T(dvec3, 400, 999) + T(dvec4, 400, 999) + T(dmat2, 400, 999) + T(dmat3, 400, 999) + T(dmat4, 400, 999) + T(dmat2x3, 400, 999) + T(dmat2x4, 400, 999) + T(dmat3x2, 400, 999) + T(dmat3x4, 400, 999) + T(dmat4x2, 400, 999) + T(dmat4x3, 400, 999) + T(sampler1D, 110, 999) T(sampler2D, 110, 100) T(sampler3D, 110, 300) @@ -361,5 +375,21 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) if (state->ARB_shader_atomic_counters_enable) { add_type(symbols, glsl_type::atomic_uint_type); } + + if (state->ARB_gpu_shader_fp64_enable) { + add_type(symbols, glsl_type::double_type); + add_type(symbols, glsl_type::dvec2_type); + add_type(symbols, glsl_type::dvec3_type); + add_type(symbols, glsl_type::dvec4_type); + add_type(symbols, glsl_type::dmat2_type); + add_type(symbols, glsl_type::dmat3_type); + add_type(symbols, glsl_type::dmat4_type); + add_type(symbols, glsl_type::dmat2x3_type); + add_type(symbols, glsl_type::dmat2x4_type); + add_type(symbols, glsl_type::dmat3x2_type); + add_type(symbols, glsl_type::dmat3x4_type); + add_type(symbols, glsl_type::dmat4x2_type); + add_type(symbols, glsl_type::dmat4x3_type); + } } /** @} */ diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y index 9b1a4f401..c2f5223a9 100644 --- a/mesalib/src/glsl/glcpp/glcpp-parse.y +++ b/mesalib/src/glsl/glcpp/glcpp-parse.y @@ -2375,6 +2375,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (parser->is_gles) { add_builtin_define(parser, "GL_ES", 1); add_builtin_define(parser, "GL_EXT_separate_shader_objects", 1); + add_builtin_define(parser, "GL_EXT_draw_buffers", 1); if (extensions != NULL) { if (extensions->OES_EGL_image_external) @@ -2444,6 +2445,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions->ARB_gpu_shader5) add_builtin_define(parser, "GL_ARB_gpu_shader5", 1); + if (extensions->ARB_gpu_shader_fp64) + add_builtin_define(parser, "GL_ARB_gpu_shader_fp64", 1); + if (extensions->AMD_vertex_shader_layer) add_builtin_define(parser, "GL_AMD_vertex_shader_layer", 1); @@ -2473,6 +2477,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions->ARB_derivative_control) add_builtin_define(parser, "GL_ARB_derivative_control", 1); + + if (extensions->ARB_shader_precision) + add_builtin_define(parser, "GL_ARB_shader_precision", 1); } } diff --git a/mesalib/src/glsl/glcpp/glcpp.c b/mesalib/src/glsl/glcpp/glcpp.c index ca188015c..5144516a6 100644 --- a/mesalib/src/glsl/glcpp/glcpp.c +++ b/mesalib/src/glsl/glcpp/glcpp.c @@ -121,7 +121,7 @@ enum { DISABLE_LINE_CONTINUATIONS_OPT = CHAR_MAX + 1 }; -const static struct option +static const struct option long_options[] = { {"disable-line-continuations", no_argument, 0, DISABLE_LINE_CONTINUATIONS_OPT }, {"debug", no_argument, 0, 'd'}, diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll index 57c46be84..8dc3d106b 100644 --- a/mesalib/src/glsl/glsl_lexer.ll +++ b/mesalib/src/glsl/glsl_lexer.ll @@ -458,6 +458,17 @@ layout { return FLOATCONSTANT; } +[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF) | +\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF) | +[0-9]+\.([eE][+-]?[0-9]+)?(lf|LF) | +[0-9]+[eE][+-]?[0-9]+(lf|LF) { + if (!yyextra->is_version(400, 0) && + !yyextra->ARB_gpu_shader_fp64_enable) + return ERROR_TOK; + yylval->dreal = _mesa_strtod(yytext, NULL); + return DOUBLECONSTANT; + } + true { yylval->n = 1; return BOOLCONSTANT; @@ -489,7 +500,7 @@ external KEYWORD(110, 100, 0, 0, EXTERNAL); interface KEYWORD(110, 100, 0, 0, INTERFACE); long KEYWORD(110, 100, 0, 0, LONG_TOK); short KEYWORD(110, 100, 0, 0, SHORT_TOK); -double KEYWORD(110, 100, 400, 0, DOUBLE_TOK); +double KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DOUBLE_TOK); half KEYWORD(110, 100, 0, 0, HALF); fixed KEYWORD(110, 100, 0, 0, FIXED_TOK); unsigned KEYWORD(110, 100, 0, 0, UNSIGNED); @@ -498,9 +509,21 @@ output KEYWORD(110, 100, 0, 0, OUTPUT); hvec2 KEYWORD(110, 100, 0, 0, HVEC2); hvec3 KEYWORD(110, 100, 0, 0, HVEC3); hvec4 KEYWORD(110, 100, 0, 0, HVEC4); -dvec2 KEYWORD(110, 100, 400, 0, DVEC2); -dvec3 KEYWORD(110, 100, 400, 0, DVEC3); -dvec4 KEYWORD(110, 100, 400, 0, DVEC4); +dvec2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC2); +dvec3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC3); +dvec4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC4); +dmat2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2); +dmat3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3); +dmat4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4); +dmat2x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2); +dmat2x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X3); +dmat2x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X4); +dmat3x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X2); +dmat3x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3); +dmat3x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X4); +dmat4x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X2); +dmat4x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X3); +dmat4x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4); fvec2 KEYWORD(110, 100, 0, 0, FVEC2); fvec3 KEYWORD(110, 100, 0, 0, FVEC3); fvec4 KEYWORD(110, 100, 0, 0, FVEC4); @@ -544,7 +567,13 @@ subroutine KEYWORD(0, 300, 0, 0, SUBROUTINE); [_a-zA-Z][_a-zA-Z0-9]* { struct _mesa_glsl_parse_state *state = yyextra; void *ctx = state; - yylval->identifier = ralloc_strdup(ctx, yytext); + if (state->es_shader && strlen(yytext) > 1024) { + _mesa_glsl_error(yylloc, state, + "Identifier `%s' exceeds 1024 characters", + yytext); + } else { + yylval->identifier = ralloc_strdup(ctx, yytext); + } return classify_identifier(state, yytext); } diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy index 7fb8c38ab..ea3bd8a24 100644 --- a/mesalib/src/glsl/glsl_parser.yy +++ b/mesalib/src/glsl/glsl_parser.yy @@ -94,6 +94,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2, %union { int n; float real; + double dreal; const char *identifier; struct ast_type_qualifier type_qualifier; @@ -128,14 +129,17 @@ static bool match_layout_qualifier(const char *s1, const char *s2, } selection_rest_statement; } -%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK +%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK %token BREAK CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT -%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 +%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 DVEC2 DVEC3 DVEC4 %token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE %token NOPERSPECTIVE FLAT SMOOTH %token MAT2X2 MAT2X3 MAT2X4 %token MAT3X2 MAT3X3 MAT3X4 %token MAT4X2 MAT4X3 MAT4X4 +%token DMAT2X2 DMAT2X3 DMAT2X4 +%token DMAT3X2 DMAT3X3 DMAT3X4 +%token DMAT4X2 DMAT4X3 DMAT4X4 %token SAMPLER1D SAMPLER2D SAMPLER3D SAMPLERCUBE SAMPLER1DSHADOW SAMPLER2DSHADOW %token SAMPLERCUBESHADOW SAMPLER1DARRAY SAMPLER2DARRAY SAMPLER1DARRAYSHADOW %token SAMPLER2DARRAYSHADOW SAMPLERCUBEARRAY SAMPLERCUBEARRAYSHADOW @@ -162,6 +166,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2, %type <identifier> any_identifier %type <interface_block> instance_name_opt %token <real> FLOATCONSTANT +%token <dreal> DOUBLECONSTANT %token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT %token <identifier> FIELD_SELECTION %token LEFT_OP RIGHT_OP @@ -182,8 +187,8 @@ static bool match_layout_qualifier(const char *s1, const char *s2, */ %token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO %token INLINE_TOK NOINLINE PUBLIC_TOK STATIC EXTERN EXTERNAL -%token LONG_TOK SHORT_TOK DOUBLE_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK -%token HVEC2 HVEC3 HVEC4 DVEC2 DVEC3 DVEC4 FVEC2 FVEC3 FVEC4 +%token LONG_TOK SHORT_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK +%token HVEC2 HVEC3 HVEC4 FVEC2 FVEC3 FVEC4 %token SAMPLER3DRECT %token SIZEOF CAST NAMESPACE USING %token RESOURCE PATCH @@ -434,6 +439,13 @@ primary_expression: $$->set_location(@1); $$->primary_expression.float_constant = $1; } + | DOUBLECONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_double_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.double_constant = $1; + } | BOOLCONSTANT { void *ctx = state; @@ -1864,6 +1876,7 @@ type_specifier_nonarray: basic_type_specifier_nonarray: VOID_TOK { $$ = "void"; } | FLOAT_TOK { $$ = "float"; } + | DOUBLE_TOK { $$ = "double"; } | INT_TOK { $$ = "int"; } | UINT_TOK { $$ = "uint"; } | BOOL_TOK { $$ = "bool"; } @@ -1879,6 +1892,9 @@ basic_type_specifier_nonarray: | UVEC2 { $$ = "uvec2"; } | UVEC3 { $$ = "uvec3"; } | UVEC4 { $$ = "uvec4"; } + | DVEC2 { $$ = "dvec2"; } + | DVEC3 { $$ = "dvec3"; } + | DVEC4 { $$ = "dvec4"; } | MAT2X2 { $$ = "mat2"; } | MAT2X3 { $$ = "mat2x3"; } | MAT2X4 { $$ = "mat2x4"; } @@ -1888,6 +1904,15 @@ basic_type_specifier_nonarray: | MAT4X2 { $$ = "mat4x2"; } | MAT4X3 { $$ = "mat4x3"; } | MAT4X4 { $$ = "mat4"; } + | DMAT2X2 { $$ = "dmat2"; } + | DMAT2X3 { $$ = "dmat2x3"; } + | DMAT2X4 { $$ = "dmat2x4"; } + | DMAT3X2 { $$ = "dmat3x2"; } + | DMAT3X3 { $$ = "dmat3"; } + | DMAT3X4 { $$ = "dmat3x4"; } + | DMAT4X2 { $$ = "dmat4x2"; } + | DMAT4X3 { $$ = "dmat4x3"; } + | DMAT4X4 { $$ = "dmat4"; } | SAMPLER1D { $$ = "sampler1D"; } | SAMPLER2D { $$ = "sampler2D"; } | SAMPLER2DRECT { $$ = "sampler2DRect"; } @@ -2539,6 +2564,28 @@ basic_interface_block: "interface block member does not match " "the interface block"); } + + /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks": + * + * "GLSL ES 3.0 does not support interface blocks for shader inputs or + * outputs." + * + * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":. + * + * "Only variables output from a shader can be candidates for + * invariance." + * + * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks": + * + * "If optional qualifiers are used, they can include interpolation + * qualifiers, auxiliary storage qualifiers, and storage qualifiers + * and they must declare an input, output, or uniform member + * consistent with the interface qualifier of the block" + */ + if (qualifier.flags.q.invariant) + _mesa_glsl_error(&@1, state, + "invariant qualifiers cannot be used " + "with interface blocks members"); } $$ = block; diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp index 27e2eaf37..9f7931380 100644 --- a/mesalib/src/glsl/glsl_parser_extras.cpp +++ b/mesalib/src/glsl/glsl_parser_extras.cpp @@ -370,10 +370,27 @@ _mesa_shader_stage_to_string(unsigned stage) case MESA_SHADER_VERTEX: return "vertex"; case MESA_SHADER_FRAGMENT: return "fragment"; case MESA_SHADER_GEOMETRY: return "geometry"; + case MESA_SHADER_COMPUTE: return "compute"; } - assert(!"Should not get here."); - return "unknown"; + unreachable("Unknown shader stage."); +} + +/** + * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS) + * for debug printouts and error messages. + */ +const char * +_mesa_shader_stage_to_abbrev(unsigned stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: return "VS"; + case MESA_SHADER_FRAGMENT: return "FS"; + case MESA_SHADER_GEOMETRY: return "GS"; + case MESA_SHADER_COMPUTE: return "CS"; + } + + unreachable("Unknown shader stage."); } /* This helper function will append the given message to the shader's @@ -527,11 +544,13 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_fragment_coord_conventions, true, false, ARB_fragment_coord_conventions), EXT(ARB_fragment_layer_viewport, true, false, ARB_fragment_layer_viewport), EXT(ARB_gpu_shader5, true, false, ARB_gpu_shader5), + EXT(ARB_gpu_shader_fp64, true, false, ARB_gpu_shader_fp64), EXT(ARB_sample_shading, true, false, ARB_sample_shading), EXT(ARB_separate_shader_objects, true, false, dummy_true), EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters), EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding), EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store), + EXT(ARB_shader_precision, true, false, ARB_shader_precision), EXT(ARB_shader_stencil_export, true, false, ARB_shader_stencil_export), EXT(ARB_shader_texture_lod, true, false, ARB_shader_texture_lod), EXT(ARB_shading_language_420pack, true, false, ARB_shading_language_420pack), @@ -561,6 +580,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(AMD_shader_trinary_minmax, true, false, dummy_true), EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer), EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index), + EXT(EXT_draw_buffers, false, true, dummy_true), EXT(EXT_separate_shader_objects, false, true, dummy_true), EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix), EXT(EXT_texture_array, true, false, EXT_texture_array), @@ -959,6 +979,10 @@ ast_expression::print(void) const printf("%f ", primary_expression.float_constant); break; + case ast_double_constant: + printf("%f ", primary_expression.double_constant); + break; + case ast_bool_constant: printf("%s ", primary_expression.bool_constant diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h index e04f7ced5..0975c86ed 100644 --- a/mesalib/src/glsl/glsl_parser_extras.h +++ b/mesalib/src/glsl/glsl_parser_extras.h @@ -205,6 +205,11 @@ struct _mesa_glsl_parse_state { || EXT_separate_shader_objects_enable; } + bool has_double() const + { + return ARB_gpu_shader_fp64_enable || is_version(400, 0); + } + void process_version_directive(YYLTYPE *locp, int version, const char *ident); @@ -414,6 +419,8 @@ struct _mesa_glsl_parse_state { bool ARB_fragment_layer_viewport_warn; bool ARB_gpu_shader5_enable; bool ARB_gpu_shader5_warn; + bool ARB_gpu_shader_fp64_enable; + bool ARB_gpu_shader_fp64_warn; bool ARB_sample_shading_enable; bool ARB_sample_shading_warn; bool ARB_separate_shader_objects_enable; @@ -424,6 +431,8 @@ struct _mesa_glsl_parse_state { bool ARB_shader_bit_encoding_warn; bool ARB_shader_image_load_store_enable; bool ARB_shader_image_load_store_warn; + bool ARB_shader_precision_enable; + bool ARB_shader_precision_warn; bool ARB_shader_stencil_export_enable; bool ARB_shader_stencil_export_warn; bool ARB_shader_texture_lod_enable; @@ -473,6 +482,8 @@ struct _mesa_glsl_parse_state { bool AMD_vertex_shader_layer_warn; bool AMD_vertex_shader_viewport_index_enable; bool AMD_vertex_shader_viewport_index_warn; + bool EXT_draw_buffers_enable; + bool EXT_draw_buffers_warn; bool EXT_separate_shader_objects_enable; bool EXT_separate_shader_objects_warn; bool EXT_shader_integer_mix_enable; @@ -572,6 +583,9 @@ extern "C" { extern const char * _mesa_shader_stage_to_string(unsigned stage); +extern const char * +_mesa_shader_stage_to_abbrev(unsigned stage); + extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log, const struct gl_extensions *extensions, struct gl_context *gl_ctx); diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp index 0d2eb7cec..38b37a6a9 100644 --- a/mesalib/src/glsl/glsl_types.cpp +++ b/mesalib/src/glsl/glsl_types.cpp @@ -194,6 +194,22 @@ glsl_type::contains_integer() const } bool +glsl_type::contains_double() const +{ + if (this->is_array()) { + return this->fields.array->contains_double(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_double()) + return true; + } + return false; + } else { + return this->is_double(); + } +} + +bool glsl_type::contains_opaque() const { switch (base_type) { case GLSL_TYPE_SAMPLER: @@ -268,6 +284,8 @@ const glsl_type *glsl_type::get_base_type() const return int_type; case GLSL_TYPE_FLOAT: return float_type; + case GLSL_TYPE_DOUBLE: + return double_type; case GLSL_TYPE_BOOL: return bool_type; default: @@ -292,6 +310,8 @@ const glsl_type *glsl_type::get_scalar_type() const return int_type; case GLSL_TYPE_FLOAT: return float_type; + case GLSL_TYPE_DOUBLE: + return double_type; case GLSL_TYPE_BOOL: return bool_type; default: @@ -377,6 +397,17 @@ glsl_type::vec(unsigned components) return ts[components - 1]; } +const glsl_type * +glsl_type::dvec(unsigned components) +{ + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + double_type, dvec2_type, dvec3_type, dvec4_type + }; + return ts[components - 1]; +} const glsl_type * glsl_type::ivec(unsigned components) @@ -436,13 +467,15 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns) return ivec(rows); case GLSL_TYPE_FLOAT: return vec(rows); + case GLSL_TYPE_DOUBLE: + return dvec(rows); case GLSL_TYPE_BOOL: return bvec(rows); default: return error_type; } } else { - if ((base_type != GLSL_TYPE_FLOAT) || (rows == 1)) + if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1)) return error_type; /* GLSL matrix types are named mat{COLUMNS}x{ROWS}. Only the following @@ -456,17 +489,32 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns) */ #define IDX(c,r) (((c-1)*3) + (r-1)) - switch (IDX(columns, rows)) { - case IDX(2,2): return mat2_type; - case IDX(2,3): return mat2x3_type; - case IDX(2,4): return mat2x4_type; - case IDX(3,2): return mat3x2_type; - case IDX(3,3): return mat3_type; - case IDX(3,4): return mat3x4_type; - case IDX(4,2): return mat4x2_type; - case IDX(4,3): return mat4x3_type; - case IDX(4,4): return mat4_type; - default: return error_type; + if (base_type == GLSL_TYPE_DOUBLE) { + switch (IDX(columns, rows)) { + case IDX(2,2): return dmat2_type; + case IDX(2,3): return dmat2x3_type; + case IDX(2,4): return dmat2x4_type; + case IDX(3,2): return dmat3x2_type; + case IDX(3,3): return dmat3_type; + case IDX(3,4): return dmat3x4_type; + case IDX(4,2): return dmat4x2_type; + case IDX(4,3): return dmat4x3_type; + case IDX(4,4): return dmat4_type; + default: return error_type; + } + } else { + switch (IDX(columns, rows)) { + case IDX(2,2): return mat2_type; + case IDX(2,3): return mat2x3_type; + case IDX(2,4): return mat2x4_type; + case IDX(3,2): return mat3x2_type; + case IDX(3,3): return mat3_type; + case IDX(3,4): return mat3x4_type; + case IDX(4,2): return mat4x2_type; + case IDX(4,3): return mat4x3_type; + case IDX(4,4): return mat4_type; + default: return error_type; + } } } @@ -474,6 +522,117 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns) return error_type; } +const glsl_type * +glsl_type::get_sampler_instance(enum glsl_sampler_dim dim, + bool shadow, + bool array, + glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_FLOAT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + if (shadow) + return (array ? sampler1DArrayShadow_type : sampler1DShadow_type); + else + return (array ? sampler1DArray_type : sampler1D_type); + case GLSL_SAMPLER_DIM_2D: + if (shadow) + return (array ? sampler2DArrayShadow_type : sampler2DShadow_type); + else + return (array ? sampler2DArray_type : sampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (shadow || array) + return error_type; + else + return sampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + if (shadow) + return (array ? samplerCubeArrayShadow_type : samplerCubeShadow_type); + else + return (array ? samplerCubeArray_type : samplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + if (shadow) + return sampler2DRectShadow_type; + else + return sampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (shadow || array) + return error_type; + else + return samplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + if (shadow) + return error_type; + return (array ? sampler2DMSArray_type : sampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + if (shadow || array) + return error_type; + else + return samplerExternalOES_type; + } + case GLSL_TYPE_INT: + if (shadow) + return error_type; + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? isampler1DArray_type : isampler1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? isampler2DArray_type : isampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return isampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? isamplerCubeArray_type : isamplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return isampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return isamplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? isampler2DMSArray_type : isampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_UINT: + if (shadow) + return error_type; + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? usampler1DArray_type : usampler1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? usampler2DArray_type : usampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return usampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? usamplerCubeArray_type : usamplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return usampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return usamplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? usampler2DMSArray_type : usampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + default: + return error_type; + } + + unreachable("switch statement above should be complete"); +} const glsl_type * glsl_type::get_array_instance(const glsl_type *base, unsigned array_size) @@ -707,6 +866,9 @@ glsl_type::component_slots() const case GLSL_TYPE_BOOL: return this->components(); + case GLSL_TYPE_DOUBLE: + return 2 * this->components(); + case GLSL_TYPE_STRUCT: case GLSL_TYPE_INTERFACE: { unsigned size = 0; @@ -742,6 +904,7 @@ glsl_type::uniform_locations() const case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: case GLSL_TYPE_BOOL: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: @@ -786,12 +949,26 @@ glsl_type::can_implicitly_convert_to(const glsl_type *desired, desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT) return true; + /* No implicit conversions from double. */ + if ((!state || state->has_double()) && this->is_double()) + return false; + + /* Conversions from different types to double. */ + if ((!state || state->has_double()) && desired->is_double()) { + if (this->is_float()) + return true; + if (this->is_integer()) + return true; + } + return false; } unsigned glsl_type::std140_base_alignment(bool row_major) const { + unsigned N = is_double() ? 8 : 4; + /* (1) If the member is a scalar consuming <N> basic machine units, the * base alignment is <N>. * @@ -805,12 +982,12 @@ glsl_type::std140_base_alignment(bool row_major) const if (this->is_scalar() || this->is_vector()) { switch (this->vector_elements) { case 1: - return 4; + return N; case 2: - return 8; + return 2 * N; case 3: case 4: - return 16; + return 4 * N; } } @@ -859,10 +1036,10 @@ glsl_type::std140_base_alignment(bool row_major) const int r = this->vector_elements; if (row_major) { - vec_type = get_instance(GLSL_TYPE_FLOAT, c, 1); + vec_type = get_instance(base_type, c, 1); array_type = glsl_type::get_array_instance(vec_type, r); } else { - vec_type = get_instance(GLSL_TYPE_FLOAT, r, 1); + vec_type = get_instance(base_type, r, 1); array_type = glsl_type::get_array_instance(vec_type, c); } @@ -900,6 +1077,15 @@ glsl_type::std140_base_alignment(bool row_major) const return base_alignment; } + /* A sampler may never occur in a UBO (without bindless of some sort), + * however it is convenient to use this alignment function even with + * regular uniforms. This allows use of this function on uniform structs + * that contain samplers. + */ + if (this->is_sampler()) { + return 0; + } + assert(!"not reached"); return -1; } @@ -907,6 +1093,8 @@ glsl_type::std140_base_alignment(bool row_major) const unsigned glsl_type::std140_size(bool row_major) const { + unsigned N = is_double() ? 8 : 4; + /* (1) If the member is a scalar consuming <N> basic machine units, the * base alignment is <N>. * @@ -918,7 +1106,7 @@ glsl_type::std140_size(bool row_major) const * <N> basic machine units, the base alignment is 4<N>. */ if (this->is_scalar() || this->is_vector()) { - return this->vector_elements * 4; + return this->vector_elements * N; } /* (5) If the member is a column-major matrix with <C> columns and @@ -953,11 +1141,12 @@ glsl_type::std140_size(bool row_major) const } if (row_major) { - vec_type = get_instance(GLSL_TYPE_FLOAT, - element_type->matrix_columns, 1); + vec_type = get_instance(element_type->base_type, + element_type->matrix_columns, 1); + array_len *= element_type->vector_elements; } else { - vec_type = get_instance(GLSL_TYPE_FLOAT, + vec_type = get_instance(element_type->base_type, element_type->vector_elements, 1); array_len *= element_type->matrix_columns; } @@ -1060,6 +1249,7 @@ glsl_type::count_attribute_slots() const case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: return this->matrix_columns; case GLSL_TYPE_STRUCT: @@ -1114,8 +1304,13 @@ glsl_type::coordinate_components() const break; } - /* Array textures need an additional component for the array index. */ - if (sampler_array) + /* Array textures need an additional component for the array index, except + * for cubemap array images that behave like a 2D array of interleaved + * cubemap faces. + */ + if (sampler_array && + !(base_type == GLSL_TYPE_IMAGE && + sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE)) size += 1; return size; diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h index 474b12914..7359e9476 100644 --- a/mesalib/src/glsl/glsl_types.h +++ b/mesalib/src/glsl/glsl_types.h @@ -28,7 +28,6 @@ #include <string.h> #include <assert.h> -#include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */ #ifdef __cplusplus extern "C" { @@ -51,6 +50,7 @@ enum glsl_base_type { GLSL_TYPE_UINT = 0, GLSL_TYPE_INT, GLSL_TYPE_FLOAT, + GLSL_TYPE_DOUBLE, GLSL_TYPE_BOOL, GLSL_TYPE_SAMPLER, GLSL_TYPE_IMAGE, @@ -103,6 +103,7 @@ enum glsl_matrix_layout { #ifdef __cplusplus #include "GL/gl.h" #include "util/ralloc.h" +#include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */ struct glsl_type { GLenum gl_type; @@ -199,6 +200,7 @@ struct glsl_type { * @{ */ static const glsl_type *vec(unsigned components); + static const glsl_type *dvec(unsigned components); static const glsl_type *ivec(unsigned components); static const glsl_type *uvec(unsigned components); static const glsl_type *bvec(unsigned components); @@ -244,6 +246,15 @@ struct glsl_type { unsigned columns); /** + * Get the instance of a sampler type + */ + static const glsl_type *get_sampler_instance(enum glsl_sampler_dim dim, + bool shadow, + bool array, + glsl_base_type type); + + + /** * Get the instance of an array type */ static const glsl_type *get_array_instance(const glsl_type *base, @@ -378,7 +389,7 @@ struct glsl_type { bool is_matrix() const { /* GLSL only has float matrices. */ - return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT); + return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT || base_type == GLSL_TYPE_DOUBLE); } /** @@ -386,7 +397,7 @@ struct glsl_type { */ bool is_numeric() const { - return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_FLOAT); + return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE); } /** @@ -404,6 +415,12 @@ struct glsl_type { bool contains_integer() const; /** + * Query whether or not type is a double type, or for struct and array + * types, contains a double type. + */ + bool contains_double() const; + + /** * Query whether or not a type is a float type */ bool is_float() const @@ -412,6 +429,14 @@ struct glsl_type { } /** + * Query whether or not a type is a double type + */ + bool is_double() const + { + return base_type == GLSL_TYPE_DOUBLE; + } + + /** * Query whether or not a type is a non-array boolean type */ bool is_boolean() const diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp index fe5601a16..f4f92e9df 100644 --- a/mesalib/src/glsl/ir.cpp +++ b/mesalib/src/glsl/ir.cpp @@ -257,6 +257,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) case ir_unop_f2i: case ir_unop_b2i: case ir_unop_u2i: + case ir_unop_d2i: case ir_unop_bitcast_f2i: case ir_unop_bit_count: case ir_unop_find_msb: @@ -268,6 +269,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) case ir_unop_b2f: case ir_unop_i2f: case ir_unop_u2f: + case ir_unop_d2f: case ir_unop_bitcast_i2f: case ir_unop_bitcast_u2f: this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, @@ -276,12 +278,21 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) case ir_unop_f2b: case ir_unop_i2b: + case ir_unop_d2b: this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, op0->type->vector_elements, 1); break; + case ir_unop_f2d: + case ir_unop_i2d: + case ir_unop_u2d: + this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, + op0->type->vector_elements, 1); + break; + case ir_unop_i2u: case ir_unop_f2u: + case ir_unop_d2u: case ir_unop_bitcast_f2u: this->type = glsl_type::get_instance(GLSL_TYPE_UINT, op0->type->vector_elements, 1); @@ -293,6 +304,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) this->type = glsl_type::float_type; break; + case ir_unop_unpack_double_2x32: + this->type = glsl_type::uvec2_type; + break; + case ir_unop_any: this->type = glsl_type::bool_type; break; @@ -305,6 +320,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) this->type = glsl_type::uint_type; break; + case ir_unop_pack_double_2x32: + this->type = glsl_type::double_type; + break; + case ir_unop_unpack_snorm_2x16: case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_half_2x16: @@ -316,6 +335,14 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) this->type = glsl_type::vec4_type; break; + case ir_unop_frexp_sig: + this->type = op0->type; + break; + case ir_unop_frexp_exp: + this->type = glsl_type::get_instance(GLSL_TYPE_INT, + op0->type->vector_elements, 1); + break; + default: assert(!"not reached: missing automatic type setup for ir_expression"); this->type = op0->type; @@ -390,7 +417,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) break; case ir_binop_dot: - this->type = glsl_type::float_type; + this->type = op0->type->get_base_type(); break; case ir_binop_pack_half_2x16_split: @@ -494,6 +521,13 @@ static const char *const operator_strs[] = { "u2f", "i2u", "u2i", + "d2f", + "f2d", + "d2i", + "i2d", + "d2u", + "u2d", + "d2b", "bitcast_i2f", "bitcast_f2i", "bitcast_u2f", @@ -531,6 +565,10 @@ static const char *const operator_strs[] = { "find_msb", "find_lsb", "sat", + "packDouble2x32", + "unpackDouble2x32", + "frexp_sig", + "frexp_exp", "noise", "interpolate_at_centroid", "+", @@ -646,6 +684,19 @@ ir_constant::ir_constant(float f, unsigned vector_elements) } } +ir_constant::ir_constant(double d, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.d[i] = d; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.d[i] = 0.0; + } +} + ir_constant::ir_constant(unsigned int u, unsigned vector_elements) : ir_rvalue(ir_type_constant) { @@ -695,6 +746,7 @@ ir_constant::ir_constant(const ir_constant *c, unsigned i) case GLSL_TYPE_INT: this->value.i[0] = c->value.i[i]; break; case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break; case GLSL_TYPE_BOOL: this->value.b[0] = c->value.b[i]; break; + case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break; default: assert(!"Should not get here."); break; } } @@ -746,9 +798,16 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list) if (value->type->is_scalar() && value->next->is_tail_sentinel()) { if (type->is_matrix()) { /* Matrix - fill diagonal (rest is already set to 0) */ - assert(type->base_type == GLSL_TYPE_FLOAT); - for (unsigned i = 0; i < type->matrix_columns; i++) - this->value.f[i * type->vector_elements + i] = value->value.f[0]; + assert(type->base_type == GLSL_TYPE_FLOAT || + type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned i = 0; i < type->matrix_columns; i++) { + if (type->base_type == GLSL_TYPE_FLOAT) + this->value.f[i * type->vector_elements + i] = + value->value.f[0]; + else + this->value.d[i * type->vector_elements + i] = + value->value.d[0]; + } } else { /* Vector or scalar - fill all components */ switch (type->base_type) { @@ -761,6 +820,10 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list) for (unsigned i = 0; i < type->components(); i++) this->value.f[i] = value->value.f[0]; break; + case GLSL_TYPE_DOUBLE: + for (unsigned i = 0; i < type->components(); i++) + this->value.d[i] = value->value.d[0]; + break; case GLSL_TYPE_BOOL: for (unsigned i = 0; i < type->components(); i++) this->value.b[i] = value->value.b[0]; @@ -819,6 +882,9 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list) case GLSL_TYPE_BOOL: this->value.b[i] = value->get_bool_component(j); break; + case GLSL_TYPE_DOUBLE: + this->value.d[i] = value->get_double_component(j); + break; default: /* FINISHME: What to do? Exceptions are not the answer. */ @@ -869,6 +935,7 @@ ir_constant::get_bool_component(unsigned i) const case GLSL_TYPE_INT: return this->value.i[i] != 0; case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0; case GLSL_TYPE_BOOL: return this->value.b[i]; + case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0; default: assert(!"Should not get here."); break; } @@ -886,6 +953,25 @@ ir_constant::get_float_component(unsigned i) const case GLSL_TYPE_INT: return (float) this->value.i[i]; case GLSL_TYPE_FLOAT: return this->value.f[i]; case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0f : 0.0f; + case GLSL_TYPE_DOUBLE: return (float) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0.0; +} + +double +ir_constant::get_double_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return (double) this->value.u[i]; + case GLSL_TYPE_INT: return (double) this->value.i[i]; + case GLSL_TYPE_FLOAT: return (double) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0 : 0.0; + case GLSL_TYPE_DOUBLE: return this->value.d[i]; default: assert(!"Should not get here."); break; } @@ -903,6 +989,7 @@ ir_constant::get_int_component(unsigned i) const case GLSL_TYPE_INT: return this->value.i[i]; case GLSL_TYPE_FLOAT: return (int) this->value.f[i]; case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; + case GLSL_TYPE_DOUBLE: return (int) this->value.d[i]; default: assert(!"Should not get here."); break; } @@ -920,6 +1007,7 @@ ir_constant::get_uint_component(unsigned i) const case GLSL_TYPE_INT: return this->value.i[i]; case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i]; case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; + case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i]; default: assert(!"Should not get here."); break; } @@ -984,6 +1072,7 @@ ir_constant::copy_offset(ir_constant *src, int offset) case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: case GLSL_TYPE_BOOL: { unsigned int size = src->type->components(); assert (size <= this->type->components() - offset); @@ -1001,6 +1090,9 @@ ir_constant::copy_offset(ir_constant *src, int offset) case GLSL_TYPE_BOOL: value.b[i+offset] = src->get_bool_component(i); break; + case GLSL_TYPE_DOUBLE: + value.d[i+offset] = src->get_double_component(i); + break; default: // Shut up the compiler break; } @@ -1057,6 +1149,9 @@ ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask) case GLSL_TYPE_BOOL: value.b[i+offset] = src->get_bool_component(id++); break; + case GLSL_TYPE_DOUBLE: + value.d[i+offset] = src->get_double_component(id++); + break; default: assert(!"Should not get here."); return; @@ -1117,6 +1212,10 @@ ir_constant::has_value(const ir_constant *c) const if (this->value.b[i] != c->value.b[i]) return false; break; + case GLSL_TYPE_DOUBLE: + if (this->value.d[i] != c->value.d[i]) + return false; + break; default: assert(!"Should not get here."); return false; @@ -1154,6 +1253,10 @@ ir_constant::is_value(float f, int i) const if (this->value.b[c] != bool(i)) return false; break; + case GLSL_TYPE_DOUBLE: + if (this->value.d[c] != double(f)) + return false; + break; default: /* The only other base types are structures, arrays, and samplers. * Samplers cannot be constants, and the others should have been diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h index a0f48b2af..25f2ecada 100644 --- a/mesalib/src/glsl/ir.h +++ b/mesalib/src/glsl/ir.h @@ -450,11 +450,8 @@ public: */ inline bool is_interface_instance() const { - const glsl_type *const t = this->type; - - return (t == this->interface_type) - || (t->is_array() && t->fields.array == this->interface_type); - } + return this->type->without_array() == this->interface_type; + } /** * Set this->interface_type on a newly created variable. @@ -1269,6 +1266,13 @@ enum ir_expression_operation { ir_unop_u2f, /**< Unsigned-to-float conversion. */ ir_unop_i2u, /**< Integer-to-unsigned conversion. */ ir_unop_u2i, /**< Unsigned-to-integer conversion. */ + ir_unop_d2f, /**< Double-to-float conversion. */ + ir_unop_f2d, /**< Float-to-double conversion. */ + ir_unop_d2i, /**< Double-to-integer conversion. */ + ir_unop_i2d, /**< Integer-to-double conversion. */ + ir_unop_d2u, /**< Double-to-unsigned conversion. */ + ir_unop_u2d, /**< Unsigned-to-double conversion. */ + ir_unop_d2b, /**< Double-to-boolean conversion. */ ir_unop_bitcast_i2f, /**< Bit-identical int-to-float "conversion" */ ir_unop_bitcast_f2i, /**< Bit-identical float-to-int "conversion" */ ir_unop_bitcast_u2f, /**< Bit-identical uint-to-float "conversion" */ @@ -1345,6 +1349,18 @@ enum ir_expression_operation { /*@}*/ ir_unop_saturate, + + /** + * \name Double packing, part of ARB_gpu_shader_fp64. + */ + /*@{*/ + ir_unop_pack_double_2x32, + ir_unop_unpack_double_2x32, + /*@}*/ + + ir_unop_frexp_sig, + ir_unop_frexp_exp, + ir_unop_noise, /** @@ -2153,6 +2169,7 @@ union ir_constant_data { int i[16]; float f[16]; bool b[16]; + double d[16]; }; @@ -2163,6 +2180,7 @@ public: ir_constant(unsigned int u, unsigned vector_elements=1); ir_constant(int i, unsigned vector_elements=1); ir_constant(float f, unsigned vector_elements=1); + ir_constant(double d, unsigned vector_elements=1); /** * Construct an ir_constant from a list of ir_constant values @@ -2209,6 +2227,7 @@ public: /*@{*/ bool get_bool_component(unsigned i) const; float get_float_component(unsigned i) const; + double get_double_component(unsigned i) const; int get_int_component(unsigned i) const; unsigned get_uint_component(unsigned i) const; /*@}*/ @@ -2417,6 +2436,10 @@ extern ir_function_signature * _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, const char *name, exec_list *actual_parameters); +extern ir_function * +_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state, + const char *name); + extern gl_shader * _mesa_glsl_get_builtin_function_shader(void); diff --git a/mesalib/src/glsl/ir_builder.cpp b/mesalib/src/glsl/ir_builder.cpp index a2f6f2967..e44b05c99 100644 --- a/mesalib/src/glsl/ir_builder.cpp +++ b/mesalib/src/glsl/ir_builder.cpp @@ -246,11 +246,21 @@ ir_expression *borrow(operand a, operand b) return expr(ir_binop_borrow, a, b); } +ir_expression *trunc(operand a) +{ + return expr(ir_unop_trunc, a); +} + ir_expression *round_even(operand a) { return expr(ir_unop_round_even, a); } +ir_expression *fract(operand a) +{ + return expr(ir_unop_fract, a); +} + /* dot for vectors, mul for scalars */ ir_expression *dot(operand a, operand b) { @@ -515,6 +525,24 @@ interpolate_at_sample(operand a, operand b) } ir_expression * +f2d(operand a) +{ + return expr(ir_unop_f2d, a); +} + +ir_expression * +i2d(operand a) +{ + return expr(ir_unop_i2d, a); +} + +ir_expression * +u2d(operand a) +{ + return expr(ir_unop_u2d, a); +} + +ir_expression * fma(operand a, operand b, operand c) { return expr(ir_triop_fma, a, b, c); diff --git a/mesalib/src/glsl/ir_builder.h b/mesalib/src/glsl/ir_builder.h index 573596cf1..870265881 100644 --- a/mesalib/src/glsl/ir_builder.h +++ b/mesalib/src/glsl/ir_builder.h @@ -137,7 +137,9 @@ ir_expression *imul_high(operand a, operand b); ir_expression *div(operand a, operand b); ir_expression *carry(operand a, operand b); ir_expression *borrow(operand a, operand b); +ir_expression *trunc(operand a); ir_expression *round_even(operand a); +ir_expression *fract(operand a); ir_expression *dot(operand a, operand b); ir_expression *clamp(operand a, operand b, operand c); ir_expression *saturate(operand a); @@ -183,6 +185,10 @@ ir_expression *i2b(operand a); ir_expression *f2b(operand a); ir_expression *b2f(operand a); +ir_expression *f2d(operand a); +ir_expression *i2d(operand a); +ir_expression *u2d(operand a); + ir_expression *min2(operand a, operand b); ir_expression *max2(operand a, operand b); diff --git a/mesalib/src/glsl/ir_clone.cpp b/mesalib/src/glsl/ir_clone.cpp index dffa57844..5c7279ca3 100644 --- a/mesalib/src/glsl/ir_clone.cpp +++ b/mesalib/src/glsl/ir_clone.cpp @@ -327,6 +327,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: case GLSL_TYPE_BOOL: return new(mem_ctx) ir_constant(this->type, &this->value); diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp index 1e8b3a3cc..07dd439d5 100644 --- a/mesalib/src/glsl/ir_constant_expression.cpp +++ b/mesalib/src/glsl/ir_constant_expression.cpp @@ -44,7 +44,7 @@ static int isnormal(double x) { return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN; } -#elif defined(__SUNPRO_CC) +#elif defined(__SUNPRO_CC) && !defined(isnormal) #include <ieeefp.h> static int isnormal(double x) { @@ -60,7 +60,7 @@ static double copysign(double x, double y) #endif static float -dot(ir_constant *op0, ir_constant *op1) +dot_f(ir_constant *op0, ir_constant *op1) { assert(op0->type->is_float() && op1->type->is_float()); @@ -71,6 +71,18 @@ dot(ir_constant *op0, ir_constant *op1) return result; } +static double +dot_d(ir_constant *op0, ir_constant *op1) +{ + assert(op0->type->is_double() && op1->type->is_double()); + + double result = 0; + for (unsigned c = 0; c < op0->type->components(); c++) + result += op0->value.d[c] * op1->value.d[c]; + + return result; +} + /* This method is the only one supported by gcc. Unions in particular * are iffy, and read-through-converted-pointer is killed by strict * aliasing. OTOH, the compiler sees through the memcpy, so the @@ -667,32 +679,81 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) data.b[0] = true; } break; - - case ir_unop_trunc: + case ir_unop_d2f: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = op[0]->value.d[c]; + } + break; + case ir_unop_f2d: assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = truncf(op[0]->value.f[c]); + data.d[c] = op[0]->value.f[c]; + } + break; + case ir_unop_d2i: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = op[0]->value.d[c]; + } + break; + case ir_unop_i2d: + assert(op[0]->type->base_type == GLSL_TYPE_INT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.d[c] = op[0]->value.i[c]; + } + break; + case ir_unop_d2u: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.u[c] = op[0]->value.d[c]; + } + break; + case ir_unop_u2d: + assert(op[0]->type->base_type == GLSL_TYPE_UINT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.d[c] = op[0]->value.u[c]; + } + break; + case ir_unop_d2b: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.b[c] = op[0]->value.d[c] != 0.0; + } + break; + case ir_unop_trunc: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = trunc(op[0]->value.d[c]); + else + data.f[c] = truncf(op[0]->value.f[c]); } break; case ir_unop_round_even: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = _mesa_round_to_even(op[0]->value.f[c]); + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = _mesa_round_to_even(op[0]->value.d[c]); + else + data.f[c] = _mesa_round_to_even(op[0]->value.f[c]); } break; case ir_unop_ceil: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = ceilf(op[0]->value.f[c]); + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = ceil(op[0]->value.d[c]); + else + data.f[c] = ceilf(op[0]->value.f[c]); } break; case ir_unop_floor: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = floorf(op[0]->value.f[c]); + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = floor(op[0]->value.d[c]); + else + data.f[c] = floorf(op[0]->value.f[c]); } break; @@ -708,6 +769,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = op[0]->value.f[c] - floor(op[0]->value.f[c]); break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c] - floor(op[0]->value.d[c]); + break; default: assert(0); } @@ -742,6 +806,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = -op[0]->value.f[c]; break; + case GLSL_TYPE_DOUBLE: + data.d[c] = -op[0]->value.d[c]; + break; default: assert(0); } @@ -762,6 +829,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = fabs(op[0]->value.f[c]); break; + case GLSL_TYPE_DOUBLE: + data.d[c] = fabs(op[0]->value.d[c]); + break; default: assert(0); } @@ -780,6 +850,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = float((op[0]->value.f[c] > 0)-(op[0]->value.f[c] < 0)); break; + case GLSL_TYPE_DOUBLE: + data.d[c] = double((op[0]->value.d[c] > 0)-(op[0]->value.d[c] < 0)); + break; default: assert(0); } @@ -787,7 +860,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) break; case ir_unop_rcp: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { switch (this->type->base_type) { case GLSL_TYPE_UINT: @@ -802,6 +874,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) if (op[0]->value.f[c] != 0.0) data.f[c] = 1.0F / op[0]->value.f[c]; break; + case GLSL_TYPE_DOUBLE: + if (op[0]->value.d[c] != 0.0) + data.d[c] = 1.0 / op[0]->value.d[c]; + break; default: assert(0); } @@ -809,16 +885,20 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) break; case ir_unop_rsq: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]); + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = 1.0 / sqrt(op[0]->value.d[c]); + else + data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]); } break; case ir_unop_sqrt: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = sqrtf(op[0]->value.f[c]); + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = sqrt(op[0]->value.d[c]); + else + data.f[c] = sqrtf(op[0]->value.f[c]); } break; @@ -934,7 +1014,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) break; case ir_binop_dot: - data.f[0] = dot(op[0], op[1]); + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[0] = dot_d(op[0], op[1]); + else + data.f[0] = dot_f(op[0], op[1]); break; case ir_binop_min: @@ -953,6 +1036,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = MIN2(op[0]->value.f[c0], op[1]->value.f[c1]); break; + case GLSL_TYPE_DOUBLE: + data.d[c] = MIN2(op[0]->value.d[c0], op[1]->value.d[c1]); + break; default: assert(0); } @@ -975,6 +1061,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = MAX2(op[0]->value.f[c0], op[1]->value.f[c1]); break; + case GLSL_TYPE_DOUBLE: + data.d[c] = MAX2(op[0]->value.d[c0], op[1]->value.d[c1]); + break; default: assert(0); } @@ -997,6 +1086,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = op[0]->value.f[c0] + op[1]->value.f[c1]; break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] + op[1]->value.d[c1]; + break; default: assert(0); } @@ -1019,6 +1111,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1]; break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1]; + break; default: assert(0); } @@ -1043,6 +1138,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = op[0]->value.f[c0] * op[1]->value.f[c1]; break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] * op[1]->value.d[c1]; + break; default: assert(0); } @@ -1066,7 +1164,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) for (unsigned j = 0; j < p; j++) { for (unsigned i = 0; i < n; i++) { for (unsigned k = 0; k < m; k++) { - data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j]; + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[i+n*j] += op[0]->value.d[i+n*k]*op[1]->value.d[k+m*j]; + else + data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j]; } } } @@ -1098,6 +1199,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1]; break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] / op[1]->value.d[c1]; + break; default: assert(0); } @@ -1133,6 +1237,13 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1] * floorf(op[0]->value.f[c0] / op[1]->value.f[c1]); break; + case GLSL_TYPE_DOUBLE: + /* We don't use fmod because it rounds toward zero; GLSL specifies + * the use of floor. + */ + data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1] + * floor(op[0]->value.d[c0] / op[1]->value.d[c1]); + break; default: assert(0); } @@ -1169,6 +1280,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.b[c] = op[0]->value.f[c] < op[1]->value.f[c]; break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] < op[1]->value.d[c]; + break; default: assert(0); } @@ -1187,6 +1301,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.b[c] = op[0]->value.f[c] > op[1]->value.f[c]; break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] > op[1]->value.d[c]; + break; default: assert(0); } @@ -1205,6 +1322,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.b[c] = op[0]->value.f[c] <= op[1]->value.f[c]; break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] <= op[1]->value.d[c]; + break; default: assert(0); } @@ -1223,6 +1343,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.b[c] = op[0]->value.f[c] >= op[1]->value.f[c]; break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] >= op[1]->value.d[c]; + break; default: assert(0); } @@ -1244,6 +1367,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_BOOL: data.b[c] = op[0]->value.b[c] == op[1]->value.b[c]; break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] == op[1]->value.d[c]; + break; default: assert(0); } @@ -1265,6 +1391,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_BOOL: data.b[c] = op[0]->value.b[c] != op[1]->value.b[c]; break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] != op[1]->value.d[c]; + break; default: assert(0); } @@ -1375,6 +1504,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[0] = op[0]->value.f[c]; break; + case GLSL_TYPE_DOUBLE: + data.d[0] = op[0]->value.d[c]; + break; case GLSL_TYPE_BOOL: data.b[0] = op[0]->value.b[c]; break; @@ -1474,6 +1606,19 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) data.f[c] = CLAMP(op[0]->value.f[c], 0.0f, 1.0f); } break; + case ir_unop_pack_double_2x32: { + /* XXX needs to be checked on big-endian */ + uint64_t temp; + temp = (uint64_t)op[0]->value.u[0] | ((uint64_t)op[0]->value.u[1] << 32); + data.d[0] = *(double *)&temp; + + break; + } + case ir_unop_unpack_double_2x32: + /* XXX needs to be checked on big-endian */ + data.u[0] = *(uint32_t *)&op[0]->value.d[0]; + data.u[1] = *((uint32_t *)&op[0]->value.d[0] + 1); + break; case ir_triop_bitfield_extract: { int offset = op[1]->value.i[0]; @@ -1523,40 +1668,65 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case ir_binop_ldexp: for (unsigned c = 0; c < components; c++) { - data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]); - /* Flush subnormal values to zero. */ - if (!isnormal(data.f[c])) - data.f[c] = copysign(0.0f, op[0]->value.f[c]); + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) { + data.d[c] = ldexp(op[0]->value.d[c], op[1]->value.i[c]); + /* Flush subnormal values to zero. */ + if (!isnormal(data.d[c])) + data.d[c] = copysign(0.0, op[0]->value.d[c]); + } else { + data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]); + /* Flush subnormal values to zero. */ + if (!isnormal(data.f[c])) + data.f[c] = copysign(0.0f, op[0]->value.f[c]); + } } break; case ir_triop_fma: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(op[1]->type->base_type == GLSL_TYPE_FLOAT); - assert(op[2]->type->base_type == GLSL_TYPE_FLOAT); + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT || + op[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[1]->type->base_type == GLSL_TYPE_FLOAT || + op[1]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[2]->type->base_type == GLSL_TYPE_FLOAT || + op[2]->type->base_type == GLSL_TYPE_DOUBLE); for (unsigned c = 0; c < components; c++) { - data.f[c] = op[0]->value.f[c] * op[1]->value.f[c] - + op[2]->value.f[c]; + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = op[0]->value.d[c] * op[1]->value.d[c] + + op[2]->value.d[c]; + else + data.f[c] = op[0]->value.f[c] * op[1]->value.f[c] + + op[2]->value.f[c]; } break; case ir_triop_lrp: { - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(op[1]->type->base_type == GLSL_TYPE_FLOAT); - assert(op[2]->type->base_type == GLSL_TYPE_FLOAT); + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT || + op[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[1]->type->base_type == GLSL_TYPE_FLOAT || + op[1]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[2]->type->base_type == GLSL_TYPE_FLOAT || + op[2]->type->base_type == GLSL_TYPE_DOUBLE); unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1; for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) { - data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) + - (op[1]->value.f[c] * op[2]->value.f[c2]); + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = op[0]->value.d[c] * (1.0 - op[2]->value.d[c2]) + + (op[1]->value.d[c] * op[2]->value.d[c2]); + else + data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) + + (op[1]->value.f[c] * op[2]->value.f[c2]); } break; } case ir_triop_csel: for (unsigned c = 0; c < components; c++) { - data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c] + if (op[1]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = op[0]->value.b[c] ? op[1]->value.d[c] + : op[2]->value.d[c]; + else + data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c] : op[2]->value.u[c]; } break; @@ -1579,6 +1749,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_BOOL: data.b[idx] = op[1]->value.b[0]; break; + case GLSL_TYPE_DOUBLE: + data.d[idx] = op[1]->value.d[0]; + break; default: assert(!"Should not get here."); break; @@ -1625,6 +1798,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_FLOAT: data.f[c] = op[c]->value.f[0]; break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[c]->value.d[0]; + break; default: assert(0); } @@ -1666,6 +1842,7 @@ ir_swizzle::constant_expression_value(struct hash_table *variable_context) case GLSL_TYPE_INT: data.u[i] = v->value.u[swiz_idx[i]]; break; case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break; case GLSL_TYPE_BOOL: data.b[i] = v->value.b[swiz_idx[i]]; break; + case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break; default: assert(!"Should not get here."); break; } } @@ -1740,6 +1917,12 @@ ir_dereference_array::constant_expression_value(struct hash_table *variable_cont break; + case GLSL_TYPE_DOUBLE: + for (unsigned i = 0; i < column_type->vector_elements; i++) + data.d[i] = array->value.d[mat_idx + i]; + + break; + default: assert(!"Should not get here."); break; diff --git a/mesalib/src/glsl/ir_function_can_inline.cpp b/mesalib/src/glsl/ir_function_can_inline.cpp index 7b15d5df1..3b1d15f80 100644 --- a/mesalib/src/glsl/ir_function_can_inline.cpp +++ b/mesalib/src/glsl/ir_function_can_inline.cpp @@ -26,11 +26,10 @@ * * Determines if we can inline a function call using ir_function_inlining.cpp. * - * The primary restriction is that we can't return from the function - * other than as the last instruction. We could potentially work - * around this for some constructs by flattening control flow and - * moving the return to the end, or by using breaks from a do {} while - * (0) loop surrounding the function body. + * The primary restriction is that we can't return from the function other + * than as the last instruction. In lower_jumps.cpp, we can lower return + * statements not at the end of the function to other control flow in order to + * deal with this restriction. */ #include "ir.h" diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h index 34e0b4b94..7eb861ae5 100644 --- a/mesalib/src/glsl/ir_optimization.h +++ b/mesalib/src/glsl/ir_optimization.h @@ -34,13 +34,15 @@ #define EXP_TO_EXP2 0x04 #define POW_TO_EXP2 0x08 #define LOG_TO_LOG2 0x10 -#define MOD_TO_FRACT 0x20 +#define MOD_TO_FLOOR 0x20 #define INT_DIV_TO_MUL_RCP 0x40 #define BITFIELD_INSERT_TO_BFM_BFI 0x80 #define LDEXP_TO_ARITH 0x100 #define CARRY_TO_ARITH 0x200 #define BORROW_TO_ARITH 0x400 #define SAT_TO_CLAMP 0x800 +#define DOPS_TO_DFRAC 0x1000 +#define DFREXP_DLDEXP_TO_ARITH 0x2000 /** * \see class lower_packing_builtins_visitor diff --git a/mesalib/src/glsl/ir_print_visitor.cpp b/mesalib/src/glsl/ir_print_visitor.cpp index bd398052c..01f52e85f 100644 --- a/mesalib/src/glsl/ir_print_visitor.cpp +++ b/mesalib/src/glsl/ir_print_visitor.cpp @@ -436,6 +436,17 @@ void ir_print_visitor::visit(ir_constant *ir) fprintf(f, "%f", ir->value.f[i]); break; case GLSL_TYPE_BOOL: fprintf(f, "%d", ir->value.b[i]); break; + case GLSL_TYPE_DOUBLE: + if (ir->value.d[i] == 0.0) + /* 0.0 == -0.0, so print with %f to get the proper sign. */ + fprintf(f, "%.1f", ir->value.d[i]); + else if (fabs(ir->value.d[i]) < 0.000001) + fprintf(f, "%a", ir->value.d[i]); + else if (fabs(ir->value.d[i]) > 1000000.0) + fprintf(f, "%e", ir->value.d[i]); + else + fprintf(f, "%f", ir->value.d[i]); + break; default: assert(0); } } diff --git a/mesalib/src/glsl/ir_set_program_inouts.cpp b/mesalib/src/glsl/ir_set_program_inouts.cpp index 97ead750a..e877a2019 100644 --- a/mesalib/src/glsl/ir_set_program_inouts.cpp +++ b/mesalib/src/glsl/ir_set_program_inouts.cpp @@ -81,6 +81,13 @@ is_shader_inout(ir_variable *var) var->data.mode == ir_var_system_value; } +static inline bool +is_dual_slot(ir_variable *var) +{ + const glsl_type *type = var->type->without_array(); + return type == glsl_type::dvec4_type || type == glsl_type::dvec3_type; +} + static void mark(struct gl_program *prog, ir_variable *var, int offset, int len, bool is_fragment_shader) @@ -94,19 +101,32 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len, */ for (int i = 0; i < len; i++) { - GLbitfield64 bitfield = - BITFIELD64_BIT(var->data.location + var->data.index + offset + i); + bool dual_slot = is_dual_slot(var); + int idx = var->data.location + var->data.index + offset + i; + GLbitfield64 bitfield = BITFIELD64_BIT(idx); + + /* dvec3 and dvec4 take up 2 slots */ + if (dual_slot) { + idx += i; + bitfield |= bitfield << 1; + } if (var->data.mode == ir_var_shader_in) { prog->InputsRead |= bitfield; if (is_fragment_shader) { gl_fragment_program *fprog = (gl_fragment_program *) prog; - fprog->InterpQualifier[var->data.location + - var->data.index + offset + i] = + fprog->InterpQualifier[idx] = (glsl_interp_qualifier) var->data.interpolation; if (var->data.centroid) fprog->IsCentroid |= bitfield; if (var->data.sample) fprog->IsSample |= bitfield; + + /* Set the InterpQualifier of the next slot to the same as the + * current one, since dvec3 and dvec4 spans 2 slots. + */ + if (dual_slot) + fprog->InterpQualifier[idx + 1] = + (glsl_interp_qualifier) var->data.interpolation; } } else if (var->data.mode == ir_var_system_value) { prog->SystemValuesRead |= bitfield; diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp index 5a6f8bbf5..667889480 100644 --- a/mesalib/src/glsl/ir_validate.cpp +++ b/mesalib/src/glsl/ir_validate.cpp @@ -313,6 +313,10 @@ ir_validate::visit_leave(ir_expression *ir) case ir_unop_ceil: case ir_unop_floor: case ir_unop_fract: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type == ir->type); + break; case ir_unop_sin: case ir_unop_cos: case ir_unop_sin_reduced: @@ -340,6 +344,11 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type == glsl_type::vec4_type); break; + case ir_unop_pack_double_2x32: + assert(ir->type == glsl_type::double_type); + assert(ir->operands[0]->type == glsl_type::uvec2_type); + break; + case ir_unop_unpack_snorm_2x16: case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_half_2x16: @@ -359,6 +368,11 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type == glsl_type::uint_type); break; + case ir_unop_unpack_double_2x32: + assert(ir->type == glsl_type::uvec2_type); + assert(ir->operands[0]->type == glsl_type::double_type); + break; + case ir_unop_bitfield_reverse: assert(ir->operands[0]->type == ir->type); assert(ir->type->is_integer()); @@ -381,6 +395,45 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type->is_float()); break; + case ir_unop_d2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_f2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_i2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_u2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_unop_frexp_sig: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_frexp_exp: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; case ir_binop_add: case ir_binop_sub: case ir_binop_mul: @@ -481,8 +534,10 @@ ir_validate::visit_leave(ir_expression *ir) break; case ir_binop_dot: - assert(ir->type == glsl_type::float_type); - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type == glsl_type::float_type || + ir->type == glsl_type::double_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); assert(ir->operands[0]->type->is_vector()); assert(ir->operands[0]->type == ir->operands[1]->type); break; @@ -507,7 +562,8 @@ ir_validate::visit_leave(ir_expression *ir) case ir_binop_ldexp: assert(ir->operands[0]->type == ir->type); - assert(ir->operands[0]->type->is_float()); + assert(ir->operands[0]->type->is_float() || + ir->operands[0]->type->is_double()); assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT); assert(ir->operands[0]->type->components() == ir->operands[1]->type->components()); @@ -533,16 +589,20 @@ ir_validate::visit_leave(ir_expression *ir) break; case ir_triop_fma: - assert(ir->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT || + ir->type->base_type == GLSL_TYPE_DOUBLE); assert(ir->type == ir->operands[0]->type); assert(ir->type == ir->operands[1]->type); assert(ir->type == ir->operands[2]->type); break; case ir_triop_lrp: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); assert(ir->operands[0]->type == ir->operands[1]->type); - assert(ir->operands[2]->type == ir->operands[0]->type || ir->operands[2]->type == glsl_type::float_type); + assert(ir->operands[2]->type == ir->operands[0]->type || + ir->operands[2]->type == glsl_type::float_type || + ir->operands[2]->type == glsl_type::double_type); break; case ir_triop_csel: @@ -706,7 +766,7 @@ ir_validate::visit(ir_variable *ir) } if (ir->data.mode == ir_var_uniform - && strncmp(ir->name, "gl_", 3) == 0 + && is_gl_identifier(ir->name) && ir->get_state_slots() == NULL) { printf("built-in uniform has no state\n"); ir->print(); diff --git a/mesalib/src/glsl/link_uniform_blocks.cpp b/mesalib/src/glsl/link_uniform_blocks.cpp index f5fc5022e..6ca41107e 100644 --- a/mesalib/src/glsl/link_uniform_blocks.cpp +++ b/mesalib/src/glsl/link_uniform_blocks.cpp @@ -67,6 +67,28 @@ private: assert(!"Should not get here."); } + virtual void enter_record(const glsl_type *type, const char *name, + bool row_major) { + assert(type->is_record()); + this->offset = glsl_align( + this->offset, type->std140_base_alignment(row_major)); + } + + virtual void leave_record(const glsl_type *type, const char *name, + bool row_major) { + assert(type->is_record()); + + /* If this is the last field of a structure, apply rule #9. The + * GL_ARB_uniform_buffer_object spec says: + * + * "The structure may have padding at the end; the base offset of + * the member following the sub-structure is rounded up to the next + * multiple of the base alignment of the structure." + */ + this->offset = glsl_align( + this->offset, type->std140_base_alignment(row_major)); + } + virtual void visit_field(const glsl_type *type, const char *name, bool row_major, const glsl_type *record_type, bool last_field) @@ -97,27 +119,13 @@ private: v->IndexName = v->Name; } - const unsigned alignment = record_type - ? record_type->std140_base_alignment(v->RowMajor) - : type->std140_base_alignment(v->RowMajor); + const unsigned alignment = type->std140_base_alignment(v->RowMajor); unsigned size = type->std140_size(v->RowMajor); this->offset = glsl_align(this->offset, alignment); v->Offset = this->offset; - /* If this is the last field of a structure, apply rule #9. The - * GL_ARB_uniform_buffer_object spec says: - * - * "The structure may have padding at the end; the base offset of - * the member following the sub-structure is rounded up to the next - * multiple of the base alignment of the structure." - * - * last_field won't be set if this is the last field of a UBO that is - * not a named instance. - */ this->offset += size; - if (last_field) - this->offset = glsl_align(this->offset, 16); /* From the GL_ARB_uniform_buffer_object spec: * @@ -131,16 +139,6 @@ private: */ this->buffer_size = glsl_align(this->offset, 16); } - - virtual void visit_field(const glsl_struct_field *field) - { - /* FINISHME: When support for doubles (dvec4, etc.) is added to the - * FINISHME: compiler, this may be incorrect for a structure in a UBO - * FINISHME: like struct s { struct { float f } s1; dvec4 v; };. - */ - this->offset = glsl_align(this->offset, - field->type->std140_base_alignment(false)); - } }; class count_block_size : public program_resource_visitor { diff --git a/mesalib/src/glsl/link_uniform_initializers.cpp b/mesalib/src/glsl/link_uniform_initializers.cpp index f6a60bce9..69073841e 100644 --- a/mesalib/src/glsl/link_uniform_initializers.cpp +++ b/mesalib/src/glsl/link_uniform_initializers.cpp @@ -75,6 +75,11 @@ copy_constant_to_storage(union gl_constant_value *storage, case GLSL_TYPE_FLOAT: storage[i].f = val->value.f[i]; break; + case GLSL_TYPE_DOUBLE: + /* XXX need to check on big-endian */ + storage[i * 2].u = *(uint32_t *)&val->value.d[i]; + storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1); + break; case GLSL_TYPE_BOOL: storage[i].b = val->value.b[i] ? boolean_true : 0; break; @@ -200,6 +205,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, val->array_elements[0]->type->base_type; const unsigned int elements = val->array_elements[0]->type->components(); unsigned int idx = 0; + unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1; assert(val->type->length >= storage->array_elements); for (unsigned int i = 0; i < storage->array_elements; i++) { @@ -209,7 +215,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, elements, boolean_true); - idx += elements; + idx += elements * dmul; } } else { copy_constant_to_storage(storage->storage, diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp index de2f6c9ac..799c74bb9 100644 --- a/mesalib/src/glsl/link_uniforms.cpp +++ b/mesalib/src/glsl/link_uniforms.cpp @@ -169,6 +169,9 @@ program_resource_visitor::recursion(const glsl_type *t, char **name, if (record_type == NULL && t->is_record()) record_type = t; + if (t->is_record()) + this->enter_record(t, *name, row_major); + for (unsigned i = 0; i < t->length; i++) { const char *field = t->fields.structure[i].name; size_t new_length = name_length; @@ -208,6 +211,11 @@ program_resource_visitor::recursion(const glsl_type *t, char **name, */ record_type = NULL; } + + if (t->is_record()) { + (*name)[name_length] = '\0'; + this->leave_record(t, *name, row_major); + } } else if (t->is_array() && (t->fields.array->is_record() || t->fields.array->is_interface())) { if (record_type == NULL && t->fields.array->is_record()) @@ -249,6 +257,16 @@ program_resource_visitor::visit_field(const glsl_struct_field *field) /* empty */ } +void +program_resource_visitor::enter_record(const glsl_type *, const char *, bool) +{ +} + +void +program_resource_visitor::leave_record(const glsl_type *, const char *, bool) +{ +} + namespace { /** @@ -526,6 +544,20 @@ private: assert(!"Should not get here."); } + virtual void enter_record(const glsl_type *type, const char *name, + bool row_major) { + assert(type->is_record()); + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std140_base_alignment(row_major)); + } + + virtual void leave_record(const glsl_type *type, const char *name, + bool row_major) { + assert(type->is_record()); + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std140_base_alignment(row_major)); + } + virtual void visit_field(const glsl_type *type, const char *name, bool row_major, const glsl_type *record_type, bool last_field) @@ -590,16 +622,11 @@ private: if (this->ubo_block_index != -1) { this->uniforms[id].block_index = this->ubo_block_index; - const unsigned alignment = record_type - ? record_type->std140_base_alignment(row_major) - : type->std140_base_alignment(row_major); + const unsigned alignment = type->std140_base_alignment(row_major); this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment); this->uniforms[id].offset = this->ubo_byte_offset; this->ubo_byte_offset += type->std140_size(row_major); - if (last_field) - this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, 16); - if (type->is_array()) { this->uniforms[id].array_stride = glsl_align(type->fields.array->std140_size(row_major), 16); @@ -608,7 +635,12 @@ private: } if (type->without_array()->is_matrix()) { - this->uniforms[id].matrix_stride = 16; + const glsl_type *matrix = type->without_array(); + const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4; + const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements; + + assert(items <= 4); + this->uniforms[id].matrix_stride = glsl_align(items * N, 16); this->uniforms[id].row_major = row_major; } else { this->uniforms[id].matrix_stride = 0; diff --git a/mesalib/src/glsl/linker.h b/mesalib/src/glsl/linker.h index 6ee585898..be4da5e0a 100644 --- a/mesalib/src/glsl/linker.h +++ b/mesalib/src/glsl/linker.h @@ -170,6 +170,12 @@ protected: */ virtual void visit_field(const glsl_struct_field *field); + virtual void enter_record(const glsl_type *type, const char *name, + bool row_major); + + virtual void leave_record(const glsl_type *type, const char *name, + bool row_major); + private: /** * \param name_length Length of the current name \b not including the diff --git a/mesalib/src/glsl/list.h b/mesalib/src/glsl/list.h index 995c666ea..ddb98f76f 100644 --- a/mesalib/src/glsl/list.h +++ b/mesalib/src/glsl/list.h @@ -51,6 +51,10 @@ * Therefore, if \c head->next is \c NULL or \c tail_prev->prev is \c NULL, * the list is empty. * + * Do note that this means that the list nodes will contain pointers into the + * list structure itself and as a result you may not \c realloc() an \c + * exec_list or any structure in which an \c exec_list is embedded. + * * To anyone familiar with "exec lists" on the Amiga, this structure should * be immediately recognizable. See the following link for the original Amiga * operating system documentation on the subject. @@ -534,9 +538,7 @@ exec_list_validate(const struct exec_list *list) * either require C++ or assume the exec_node is embedded in a structure * which is not the case for this function. */ - for (node = exec_list_get_head_const(list); - !exec_node_is_tail_sentinel(node); - node = exec_node_get_next_const(node)) { + for (node = list->head; node->next != NULL; node = node->next) { assert(node->next->prev == node); assert(node->prev->next == node); } @@ -638,6 +640,12 @@ inline void exec_node::insert_before(exec_list *before) __next != NULL; \ __node = __next, __next = (__type *)__next->next) +#define foreach_in_list_reverse_safe(__type, __node, __list) \ + for (__type *__node = (__type *)(__list)->tail_pred, \ + *__prev = (__type *)__node->prev; \ + __prev != NULL; \ + __node = __prev, __prev = (__type *)__prev->prev) + #define foreach_in_list_use_after(__type, __inst, __list) \ __type *(__inst); \ for ((__inst) = (__type *)(__list)->head; \ @@ -665,6 +673,12 @@ inline void exec_node::insert_before(exec_list *before) (__node)->__field.next != NULL; \ (__node) = exec_node_data(__type, (__node)->__field.next, __field)) +#define foreach_list_typed_reverse(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->tail_pred, __field); \ + (__node)->__field.prev != NULL; \ + (__node) = exec_node_data(__type, (__node)->__field.prev, __field)) + #define foreach_list_typed_safe(__type, __node, __field, __list) \ for (__type * __node = \ exec_node_data(__type, (__list)->head, __field), \ @@ -674,4 +688,13 @@ inline void exec_node::insert_before(exec_list *before) __node = __next, __next = \ exec_node_data(__type, (__next)->__field.next, __field)) +#define foreach_list_typed_safe_reverse(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->tail_pred, __field), \ + * __prev = \ + exec_node_data(__type, (__node)->__field.prev, __field); \ + __prev != NULL; \ + __node = __prev, __prev = \ + exec_node_data(__type, (__prev)->__field.prev, __field)) + #endif /* LIST_CONTAINER_H */ diff --git a/mesalib/src/glsl/loop_controls.cpp b/mesalib/src/glsl/loop_controls.cpp index 1c1d34fef..2459fc1c3 100644 --- a/mesalib/src/glsl/loop_controls.cpp +++ b/mesalib/src/glsl/loop_controls.cpp @@ -102,9 +102,10 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, return -1; if (!iter->type->is_integer()) { + const ir_expression_operation op = iter->type->is_double() + ? ir_unop_d2i : ir_unop_f2i; ir_rvalue *cast = - new(mem_ctx) ir_expression(ir_unop_f2i, glsl_type::int_type, iter, - NULL); + new(mem_ctx) ir_expression(op, glsl_type::int_type, iter, NULL); iter = cast->constant_expression_value(); } @@ -134,6 +135,9 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, case GLSL_TYPE_FLOAT: iter = new(mem_ctx) ir_constant(float(iter_value + bias[i])); break; + case GLSL_TYPE_DOUBLE: + iter = new(mem_ctx) ir_constant(double(iter_value + bias[i])); + break; default: unreachable(!"Unsupported type for loop iterator."); } diff --git a/mesalib/src/glsl/lower_instructions.cpp b/mesalib/src/glsl/lower_instructions.cpp index 684285350..4779de059 100644 --- a/mesalib/src/glsl/lower_instructions.cpp +++ b/mesalib/src/glsl/lower_instructions.cpp @@ -36,12 +36,14 @@ * - EXP_TO_EXP2 * - POW_TO_EXP2 * - LOG_TO_LOG2 - * - MOD_TO_FRACT + * - MOD_TO_FLOOR * - LDEXP_TO_ARITH + * - DFREXP_TO_ARITH * - BITFIELD_INSERT_TO_BFM_BFI * - CARRY_TO_ARITH * - BORROW_TO_ARITH * - SAT_TO_CLAMP + * - DOPS_TO_DFRAC * * SUB_TO_ADD_NEG: * --------------- @@ -77,17 +79,25 @@ * Many older GPUs don't have an x**y instruction. For these GPUs, convert * x**y to 2**(y * log2(x)). * - * MOD_TO_FRACT: + * MOD_TO_FLOOR: * ------------- - * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1)) + * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1)) * * Many GPUs don't have a MOD instruction (945 and 965 included), and * if we have to break it down like this anyway, it gives an * opportunity to do things like constant fold the (1.0 / op1) easily. * + * Note: before we used to implement this as op1 * fract(op / op1) but this + * implementation had significant precision errors. + * * LDEXP_TO_ARITH: * ------------- - * Converts ir_binop_ldexp to arithmetic and bit operations. + * Converts ir_binop_ldexp to arithmetic and bit operations for float sources. + * + * DFREXP_DLDEXP_TO_ARITH: + * --------------- + * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to + * arithmetic and bit ops for double arguments. * * BITFIELD_INSERT_TO_BFM_BFI: * --------------------------- @@ -109,9 +119,13 @@ * ------------- * Converts ir_unop_saturate into min(max(x, 0.0), 1.0) * + * DOPS_TO_DFRAC: + * -------------- + * Converts double trunc, ceil, floor, round to fract */ #include "main/core.h" /* for M_LOG2E */ +#include "program/prog_instruction.h" /* for swizzle */ #include "glsl_types.h" #include "ir.h" #include "ir_builder.h" @@ -136,15 +150,25 @@ private: void sub_to_add_neg(ir_expression *); void div_to_mul_rcp(ir_expression *); void int_div_to_mul_rcp(ir_expression *); - void mod_to_fract(ir_expression *); + void mod_to_floor(ir_expression *); void exp_to_exp2(ir_expression *); void pow_to_exp2(ir_expression *); void log_to_log2(ir_expression *); void bitfield_insert_to_bfm_bfi(ir_expression *); void ldexp_to_arith(ir_expression *); + void dldexp_to_arith(ir_expression *); + void dfrexp_sig_to_arith(ir_expression *); + void dfrexp_exp_to_arith(ir_expression *); void carry_to_arith(ir_expression *); void borrow_to_arith(ir_expression *); void sat_to_clamp(ir_expression *); + void double_dot_to_fma(ir_expression *); + void double_lrp(ir_expression *); + void dceil_to_dfrac(ir_expression *); + void dfloor_to_dfrac(ir_expression *); + void dround_even_to_dfrac(ir_expression *); + void dtrunc_to_dfrac(ir_expression *); + void dsign_to_csel(ir_expression *); }; } /* anonymous namespace */ @@ -175,7 +199,7 @@ lower_instructions_visitor::sub_to_add_neg(ir_expression *ir) void lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) { - assert(ir->operands[1]->type->is_float()); + assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double()); /* New expression for the 1.0 / op1 */ ir_rvalue *expr; @@ -276,37 +300,50 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir) } void -lower_instructions_visitor::mod_to_fract(ir_expression *ir) +lower_instructions_visitor::mod_to_floor(ir_expression *ir) { - ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b", - ir_var_temporary); - this->base_ir->insert_before(temp); - - ir_assignment *const assign = - new(ir) ir_assignment(new(ir) ir_dereference_variable(temp), - ir->operands[1], NULL); - - this->base_ir->insert_before(assign); + ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x", + ir_var_temporary); + ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y", + ir_var_temporary); + this->base_ir->insert_before(x); + this->base_ir->insert_before(y); + + ir_assignment *const assign_x = + new(ir) ir_assignment(new(ir) ir_dereference_variable(x), + ir->operands[0], NULL); + ir_assignment *const assign_y = + new(ir) ir_assignment(new(ir) ir_dereference_variable(y), + ir->operands[1], NULL); + + this->base_ir->insert_before(assign_x); + this->base_ir->insert_before(assign_y); ir_expression *const div_expr = - new(ir) ir_expression(ir_binop_div, ir->operands[0]->type, - ir->operands[0], - new(ir) ir_dereference_variable(temp)); + new(ir) ir_expression(ir_binop_div, x->type, + new(ir) ir_dereference_variable(x), + new(ir) ir_dereference_variable(y)); /* Don't generate new IR that would need to be lowered in an additional * pass. */ - if (lowering(DIV_TO_MUL_RCP)) + if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double())) div_to_mul_rcp(div_expr); - ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract, - ir->operands[0]->type, - div_expr, - NULL); + ir_expression *const floor_expr = + new(ir) ir_expression(ir_unop_floor, x->type, div_expr); - ir->operation = ir_binop_mul; - ir->operands[0] = new(ir) ir_dereference_variable(temp); - ir->operands[1] = expr; + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dfloor_to_dfrac(floor_expr); + + ir_expression *const mul_expr = + new(ir) ir_expression(ir_binop_mul, + new(ir) ir_dereference_variable(y), + floor_expr); + + ir->operation = ir_binop_sub; + ir->operands[0] = new(ir) ir_dereference_variable(x); + ir->operands[1] = mul_expr; this->progress = true; } @@ -455,6 +492,262 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir) } void +lower_instructions_visitor::dldexp_to_arith(ir_expression *ir) +{ + /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent + * from the significand. + */ + + const unsigned vec_elem = ir->type->vector_elements; + + /* Types */ + const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + + /* Constants */ + ir_constant *zeroi = ir_constant::zero(ir, ivec); + + ir_constant *sign_mask = new(ir) ir_constant(0x80000000u); + + ir_constant *exp_shift = new(ir) ir_constant(20); + ir_constant *exp_width = new(ir) ir_constant(11); + ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem); + + /* Temporary variables */ + ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); + ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); + + ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", + ir_var_temporary); + + ir_variable *extracted_biased_exp = + new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); + ir_variable *resulting_biased_exp = + new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); + + ir_variable *is_not_zero_or_underflow = + new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); + + ir_instruction &i = *base_ir; + + /* Copy <x> and <exp> arguments. */ + i.insert_before(x); + i.insert_before(assign(x, ir->operands[0])); + i.insert_before(exp); + i.insert_before(assign(exp, ir->operands[1])); + + ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x); + if (lowering(DFREXP_DLDEXP_TO_ARITH)) + dfrexp_exp_to_arith(frexp_exp); + + /* Extract the biased exponent from <x>. */ + i.insert_before(extracted_biased_exp); + i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias))); + + i.insert_before(resulting_biased_exp); + i.insert_before(assign(resulting_biased_exp, + add(extracted_biased_exp, exp))); + + /* Test if result is ±0.0, subnormal, or underflow by checking if the + * resulting biased exponent would be less than 0x1. If so, the result is + * 0.0 with the sign of x. (Actually, invert the conditions so that + * immediate values are the second arguments, which is better for i965) + * TODO: Implement in a vector fashion. + */ + i.insert_before(zero_sign_x); + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_variable *unpacked = + new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); + i.insert_before(unpacked); + i.insert_before( + assign(unpacked, + expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); + i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)), + WRITEMASK_Y)); + i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X)); + i.insert_before(assign(zero_sign_x, + expr(ir_unop_pack_double_2x32, unpacked), + 1 << elem)); + } + i.insert_before(is_not_zero_or_underflow); + i.insert_before(assign(is_not_zero_or_underflow, + gequal(resulting_biased_exp, + new(ir) ir_constant(0x1, vec_elem)))); + i.insert_before(assign(x, csel(is_not_zero_or_underflow, + x, zero_sign_x))); + i.insert_before(assign(resulting_biased_exp, + csel(is_not_zero_or_underflow, + resulting_biased_exp, zeroi))); + + /* We could test for overflows by checking if the resulting biased exponent + * would be greater than 0xFE. Turns out we don't need to because the GLSL + * spec says: + * + * "If this product is too large to be represented in the + * floating-point type, the result is undefined." + */ + + ir_rvalue *results[4] = {NULL}; + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_variable *unpacked = + new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); + i.insert_before(unpacked); + i.insert_before( + assign(unpacked, + expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); + + ir_expression *bfi = bitfield_insert( + swizzle_y(unpacked), + i2u(swizzle(resulting_biased_exp, elem, 1)), + exp_shift->clone(ir, NULL), + exp_width->clone(ir, NULL)); + + if (lowering(BITFIELD_INSERT_TO_BFM_BFI)) + bitfield_insert_to_bfm_bfi(bfi); + + i.insert_before(assign(unpacked, bfi, WRITEMASK_Y)); + + results[elem] = expr(ir_unop_pack_double_2x32, unpacked); + } + + ir->operation = ir_quadop_vector; + ir->operands[0] = results[0]; + ir->operands[1] = results[1]; + ir->operands[2] = results[2]; + ir->operands[3] = results[3]; + + /* Don't generate new IR that would need to be lowered in an additional + * pass. + */ + + this->progress = true; +} + +void +lower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir) +{ + const unsigned vec_elem = ir->type->vector_elements; + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + + /* Double-precision floating-point values are stored as + * 1 sign bit; + * 11 exponent bits; + * 52 mantissa bits. + * + * We're just extracting the significand here, so we only need to modify + * the upper 32-bit uint. Unfortunately we must extract each double + * independently as there is no vector version of unpackDouble. + */ + + ir_instruction &i = *base_ir; + + ir_variable *is_not_zero = + new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); + ir_rvalue *results[4] = {NULL}; + + ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); + i.insert_before(is_not_zero); + i.insert_before( + assign(is_not_zero, + nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero))); + + /* TODO: Remake this as more vector-friendly when int64 support is + * available. + */ + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_constant *zero = new(ir) ir_constant(0u, 1); + ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1); + + /* Exponent of double floating-point values in the range [0.5, 1.0). */ + ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1); + + ir_variable *bits = + new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary); + ir_variable *unpacked = + new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); + + ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1); + + i.insert_before(bits); + i.insert_before(unpacked); + i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x))); + + /* Manipulate the high uint to remove the exponent and replace it with + * either the default exponent or zero. + */ + i.insert_before(assign(bits, swizzle_y(unpacked))); + i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask))); + i.insert_before(assign(bits, bit_or(bits, + csel(swizzle(is_not_zero, elem, 1), + exponent_value, + zero)))); + i.insert_before(assign(unpacked, bits, WRITEMASK_Y)); + results[elem] = expr(ir_unop_pack_double_2x32, unpacked); + } + + /* Put the dvec back together */ + ir->operation = ir_quadop_vector; + ir->operands[0] = results[0]; + ir->operands[1] = results[1]; + ir->operands[2] = results[2]; + ir->operands[3] = results[3]; + + this->progress = true; +} + +void +lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir) +{ + const unsigned vec_elem = ir->type->vector_elements; + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); + + /* Double-precision floating-point values are stored as + * 1 sign bit; + * 11 exponent bits; + * 52 mantissa bits. + * + * We're just extracting the exponent here, so we only care about the upper + * 32-bit uint. + */ + + ir_instruction &i = *base_ir; + + ir_variable *is_not_zero = + new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); + ir_variable *high_words = + new(ir) ir_variable(uvec, "high_words", ir_var_temporary); + ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); + ir_constant *izero = new(ir) ir_constant(0, vec_elem); + + ir_rvalue *absval = abs(ir->operands[0]); + + i.insert_before(is_not_zero); + i.insert_before(high_words); + i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero))); + + /* Extract all of the upper uints. */ + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1); + + i.insert_before(assign(high_words, + swizzle_y(expr(ir_unop_unpack_double_2x32, x)), + 1 << elem)); + + } + ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem); + ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem); + + /* For non-zero inputs, shift the exponent down and apply bias. */ + ir->operation = ir_triop_csel; + ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero); + ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift))); + ir->operands[2] = izero; + + this->progress = true; +} + +void lower_instructions_visitor::carry_to_arith(ir_expression *ir) { /* Translates @@ -508,10 +801,211 @@ lower_instructions_visitor::sat_to_clamp(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::double_dot_to_fma(ir_expression *ir) +{ + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res", + ir_var_temporary); + this->base_ir->insert_before(temp); + + int nc = ir->operands[0]->type->components(); + for (int i = nc - 1; i >= 1; i--) { + ir_assignment *assig; + if (i == (nc - 1)) { + assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), + swizzle(ir->operands[1]->clone(ir, NULL), i, 1))); + } else { + assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), + swizzle(ir->operands[1]->clone(ir, NULL), i, 1), + temp)); + } + this->base_ir->insert_before(assig); + } + + ir->operation = ir_triop_fma; + ir->operands[0] = swizzle(ir->operands[0], 0, 1); + ir->operands[1] = swizzle(ir->operands[1], 0, 1); + ir->operands[2] = new(ir) ir_dereference_variable(temp); + + this->progress = true; + +} + +void +lower_instructions_visitor::double_lrp(ir_expression *ir) +{ + int swizval; + ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2]; + ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements); + + switch (op2->type->vector_elements) { + case 1: + swizval = SWIZZLE_XXXX; + break; + default: + assert(op0->type->vector_elements == op2->type->vector_elements); + swizval = SWIZZLE_XYZW; + break; + } + + ir->operation = ir_triop_fma; + ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements); + ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0); + + this->progress = true; +} + +void +lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) +{ + /* + * frtemp = frac(x); + * temp = sub(x, frtemp); + * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0); + */ + ir_instruction &i = *base_ir; + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", + ir_var_temporary); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(ir->operands[0]))); + + ir->operation = ir_binop_add; + ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp); + ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL)); + + this->progress = true; +} + +void +lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) +{ + /* + * frtemp = frac(x); + * result = sub(x, frtemp); + */ + ir->operation = ir_binop_sub; + ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL)); + + this->progress = true; +} +void +lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir) +{ + /* + * insane but works + * temp = x + 0.5; + * frtemp = frac(temp); + * t2 = sub(temp, frtemp); + * if (frac(x) == 0.5) + * result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1; + * else + * result = t2; + + */ + ir_instruction &i = *base_ir; + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", + ir_var_temporary); + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", + ir_var_temporary); + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", + ir_var_temporary); + ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); + + i.insert_before(temp); + i.insert_before(assign(temp, add(ir->operands[0], p5))); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(temp))); + + i.insert_before(t2); + i.insert_before(assign(t2, sub(temp, frtemp))); + + ir->operation = ir_triop_csel; + ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)), + p5->clone(ir, NULL)); + ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))), + zero), + t2, + sub(t2, one)); + ir->operands[2] = new(ir) ir_dereference_variable(t2); + + this->progress = true; +} + +void +lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir) +{ + /* + * frtemp = frac(x); + * temp = sub(x, frtemp); + * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1; + */ + ir_rvalue *arg = ir->operands[0]; + ir_instruction &i = *base_ir; + + ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); + ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp", + ir_var_temporary); + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", + ir_var_temporary); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(arg))); + i.insert_before(temp); + i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp))); + + ir->operation = ir_triop_csel; + ir->operands[0] = gequal(arg->clone(ir, NULL), zero); + ir->operands[1] = new (ir) ir_dereference_variable(temp); + ir->operands[2] = add(temp, + csel(equal(frtemp, zero->clone(ir, NULL)), + zero->clone(ir, NULL), + one)); + + this->progress = true; +} + +void +lower_instructions_visitor::dsign_to_csel(ir_expression *ir) +{ + /* + * temp = x > 0.0 ? 1.0 : 0.0; + * result = x < 0.0 ? -1.0 : temp; + */ + ir_rvalue *arg = ir->operands[0]; + ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); + ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements); + + ir->operation = ir_triop_csel; + ir->operands[0] = less(arg->clone(ir, NULL), + zero->clone(ir, NULL)); + ir->operands[1] = neg_one; + ir->operands[2] = csel(greater(arg, zero), + one, + zero->clone(ir, NULL)); + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { switch (ir->operation) { + case ir_binop_dot: + if (ir->operands[0]->type->is_double()) + double_dot_to_fma(ir); + break; + case ir_triop_lrp: + if (ir->operands[0]->type->is_double()) + double_lrp(ir); + break; case ir_binop_sub: if (lowering(SUB_TO_ADD_NEG)) sub_to_add_neg(ir); @@ -520,7 +1014,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) case ir_binop_div: if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP)) int_div_to_mul_rcp(ir); - else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP)) + else if ((ir->operands[1]->type->is_float() || + ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP)) div_to_mul_rcp(ir); break; @@ -535,8 +1030,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) break; case ir_binop_mod: - if (lowering(MOD_TO_FRACT) && ir->type->is_float()) - mod_to_fract(ir); + if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double())) + mod_to_floor(ir); break; case ir_binop_pow: @@ -550,8 +1045,20 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) break; case ir_binop_ldexp: - if (lowering(LDEXP_TO_ARITH)) + if (lowering(LDEXP_TO_ARITH) && ir->type->is_float()) ldexp_to_arith(ir); + if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double()) + dldexp_to_arith(ir); + break; + + case ir_unop_frexp_exp: + if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) + dfrexp_exp_to_arith(ir); + break; + + case ir_unop_frexp_sig: + if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) + dfrexp_sig_to_arith(ir); break; case ir_binop_carry: @@ -569,6 +1076,30 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) sat_to_clamp(ir); break; + case ir_unop_trunc: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dtrunc_to_dfrac(ir); + break; + + case ir_unop_ceil: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dceil_to_dfrac(ir); + break; + + case ir_unop_floor: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dfloor_to_dfrac(ir); + break; + + case ir_unop_round_even: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dround_even_to_dfrac(ir); + break; + + case ir_unop_sign: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dsign_to_csel(ir); + break; default: return visit_continue; } diff --git a/mesalib/src/glsl/lower_mat_op_to_vec.cpp b/mesalib/src/glsl/lower_mat_op_to_vec.cpp index 105ee0d3f..dda754f91 100644 --- a/mesalib/src/glsl/lower_mat_op_to_vec.cpp +++ b/mesalib/src/glsl/lower_mat_op_to_vec.cpp @@ -354,6 +354,8 @@ ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *orig_assign) /* OK, time to break down this matrix operation. */ switch (orig_expr->operation) { + case ir_unop_d2f: + case ir_unop_f2d: case ir_unop_neg: { /* Apply the operation to each column.*/ for (i = 0; i < matrix_columns; i++) { diff --git a/mesalib/src/glsl/lower_ubo_reference.cpp b/mesalib/src/glsl/lower_ubo_reference.cpp index 43dd067fa..4ea4ccb03 100644 --- a/mesalib/src/glsl/lower_ubo_reference.cpp +++ b/mesalib/src/glsl/lower_ubo_reference.cpp @@ -140,7 +140,8 @@ public: void handle_rvalue(ir_rvalue **rvalue); void emit_ubo_loads(ir_dereference *deref, ir_variable *base_offset, - unsigned int deref_offset, bool row_major); + unsigned int deref_offset, bool row_major, + int matrix_columns); ir_expression *ubo_load(const struct glsl_type *type, ir_rvalue *offset); @@ -265,6 +266,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) ir_rvalue *offset = new(mem_ctx) ir_constant(0u); unsigned const_offset = 0; bool row_major = is_dereferenced_thing_row_major(deref); + int matrix_columns = 1; /* Calculate the offset to the start of the region of the UBO * dereferenced by *rvalue. This may be a variable offset if an @@ -288,6 +290,9 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) * vector) is handled below in emit_ubo_loads. */ array_stride = 4; + if (deref_array->array->type->is_double()) + array_stride *= 2; + matrix_columns = deref_array->array->type->matrix_columns; } else if (deref_array->type->is_interface()) { /* We're processing an array dereference of an interface instance * array. The thing being dereferenced *must* be a variable @@ -334,15 +339,6 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) const glsl_type *struct_type = deref_record->record->type; unsigned intra_struct_offset = 0; - /* glsl_type::std140_base_alignment doesn't grok interfaces. Use - * 16-bytes for the alignment because that is the general minimum of - * std140. - */ - const unsigned struct_alignment = struct_type->is_interface() - ? 16 - : struct_type->std140_base_alignment(row_major); - - for (unsigned int i = 0; i < struct_type->length; i++) { const glsl_type *type = struct_type->fields.structure[i].type; @@ -372,7 +368,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) */ if (type->without_array()->is_record()) { intra_struct_offset = glsl_align(intra_struct_offset, - struct_alignment); + field_align); } } @@ -405,7 +401,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) base_ir->insert_before(assign(load_offset, offset)); deref = new(mem_ctx) ir_dereference_variable(load_var); - emit_ubo_loads(deref, load_offset, const_offset, row_major); + emit_ubo_loads(deref, load_offset, const_offset, row_major, matrix_columns); *rvalue = deref; progress = true; @@ -436,7 +432,8 @@ void lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, ir_variable *base_offset, unsigned int deref_offset, - bool row_major) + bool row_major, + int matrix_columns) { if (deref->type->is_record()) { unsigned int field_offset = 0; @@ -453,7 +450,7 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, field->type->std140_base_alignment(row_major)); emit_ubo_loads(field_deref, base_offset, deref_offset + field_offset, - row_major); + row_major, 1); field_offset += field->type->std140_size(row_major); } @@ -472,7 +469,7 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, element); emit_ubo_loads(element_deref, base_offset, deref_offset + i * array_stride, - row_major); + row_major, 1); } return; } @@ -488,14 +485,18 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, /* For a row-major matrix, the next column starts at the next * element. */ - emit_ubo_loads(col_deref, base_offset, deref_offset + i * 4, - row_major); + int size_mul = deref->type->is_double() ? 8 : 4; + emit_ubo_loads(col_deref, base_offset, deref_offset + i * size_mul, + row_major, deref->type->matrix_columns); } else { /* std140 always rounds the stride of arrays (and matrices) to a - * vec4, so matrices are always 16 between columns/rows. + * vec4, so matrices are always 16 between columns/rows. With + * doubles, they will be 32 apart when there are more than 2 rows. */ - emit_ubo_loads(col_deref, base_offset, deref_offset + i * 16, - row_major); + int size_mul = (deref->type->is_double() && + deref->type->vector_elements > 2) ? 32 : 16; + emit_ubo_loads(col_deref, base_offset, deref_offset + i * size_mul, + row_major, deref->type->matrix_columns); } } return; @@ -510,16 +511,24 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), ubo_load(deref->type, offset))); } else { + unsigned N = deref->type->is_double() ? 8 : 4; + /* We're dereffing a column out of a row-major matrix, so we * gather the vector from each stored row. */ - assert(deref->type->base_type == GLSL_TYPE_FLOAT); + assert(deref->type->base_type == GLSL_TYPE_FLOAT || + deref->type->base_type == GLSL_TYPE_DOUBLE); /* Matrices, row_major or not, are stored as if they were * arrays of vectors of the appropriate size in std140. * Arrays have their strides rounded up to a vec4, so the - * matrix stride is always 16. + * matrix stride is always 16. However a double matrix may either be 16 + * or 32 depending on the number of columns. */ - unsigned matrix_stride = 16; + assert(matrix_columns <= 4); + unsigned matrix_stride = glsl_align(matrix_columns * N, 16); + + const glsl_type *ubo_type = deref->type->base_type == GLSL_TYPE_FLOAT ? + glsl_type::float_type : glsl_type::double_type; for (unsigned i = 0; i < deref->type->vector_elements; i++) { ir_rvalue *chan_offset = @@ -527,7 +536,7 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), - ubo_load(glsl_type::float_type, + ubo_load(ubo_type, chan_offset), (1U << i))); } diff --git a/mesalib/src/glsl/nir/.gitignore b/mesalib/src/glsl/nir/.gitignore new file mode 100644 index 000000000..64828eba6 --- /dev/null +++ b/mesalib/src/glsl/nir/.gitignore @@ -0,0 +1,5 @@ +nir_builder_opcodes.h +nir_opt_algebraic.c +nir_opcodes.c +nir_opcodes.h +nir_constant_expressions.c diff --git a/mesalib/src/glsl/nir/README b/mesalib/src/glsl/nir/README new file mode 100644 index 000000000..2c81db9db --- /dev/null +++ b/mesalib/src/glsl/nir/README @@ -0,0 +1,118 @@ +New IR, or NIR, is an IR for Mesa intended to sit below GLSL IR and Mesa IR. +Its design inherits from the various IR's that Mesa has used in the past, as +well as Direct3D assembly, and it includes a few new ideas as well. It is a +flat (in terms of using instructions instead of expressions), typeless IR, +similar to TGSI and Mesa IR. It also supports SSA (although it doesn't require +it). + +Variables +========= + +NIR includes support for source-level GLSL variables through a structure mostly +copied from GLSL IR. These will be used for linking and conversion from GLSL IR +(and later, from an AST), but for the most part, they will be lowered to +registers (see below) and loads/stores. + +Registers +========= + +Registers are light-weight; they consist of a structure that only contains its +size, its index for liveness analysis, and an optional name for debugging. In +addition, registers can be local to a function or global to the entire shader; +the latter will be used in ARB_shader_subroutine for passing parameters and +getting return values from subroutines. Registers can also be an array, in which +case they can be accessed indirectly. Each ALU instruction (add, subtract, etc.) +works directly with registers or SSA values (see below). + +SSA +======== + +Everywhere a register can be loaded/stored, an SSA value can be used instead. +The only exception is that arrays/indirect addressing are not supported with +SSA; although research has been done on extensions of SSA to arrays before, it's +usually for the purpose of parallelization (which we're not interested in), and +adds some overhead in the form of adding copies or extra arrays (which is much +more expensive than introducing copies between non-array registers). SSA uses +point directly to their corresponding definition, which in turn points to the +instruction it is part of. This creates an implicit use-def chain and avoids the +need for an external structure for each SSA register. + +Functions +========= + +Support for function calls is mostly similar to GLSL IR. Each shader contains a +list of functions, and each function has a list of overloads. Each overload +contains a list of parameters, and may contain an implementation which specifies +the variables that correspond to the parameters and return value. Inlining a +function, assuming it has a single return point, is as simple as copying its +instructions, registers, and local variables into the target function and then +inserting copies to and from the new parameters as appropriate. After functions +are inlined and any non-subroutine functions are deleted, parameters and return +variables will be converted to global variables and then global registers. We +don't do this lowering earlier (i.e. the fortranizer idea) for a few reasons: + +- If we want to do optimizations before link time, we need to have the function +signature available during link-time. + +- If we do any inlining before link time, then we might wind up with the +inlined function and the non-inlined function using the same global +variables/registers which would preclude optimization. + +Intrinsics +========= + +Any operation (other than function calls and textures) which touches a variable +or is not referentially transparent is represented by an intrinsic. Intrinsics +are similar to the idea of a "builtin function," i.e. a function declaration +whose implementation is provided by the backend, except they are more powerful +in the following ways: + +- They can also load and store registers when appropriate, which limits the +number of variables needed in later stages of the IR while obviating the need +for a separate load/store variable instruction. + +- Intrinsics can be marked as side-effect free, which permits them to be +treated like any other instruction when it comes to optimizations. This allows +load intrinsics to be represented as intrinsics while still being optimized +away by dead code elimination, common subexpression elimination, etc. + +Intrinsics are used for: + +- Atomic operations +- Memory barriers +- Subroutine calls +- Geometry shader emitVertex and endPrimitive +- Loading and storing variables (before lowering) +- Loading and storing uniforms, shader inputs and outputs, etc (after lowering) +- Copying variables (cases where in GLSL the destination is a structure or +array) +- The kitchen sink +- ... + +Textures +========= + +Unfortunately, there are far too many texture operations to represent each one +of them with an intrinsic, so there's a special texture instruction similar to +the GLSL IR one. The biggest difference is that, while the texture instruction +has a sampler dereference field used just like in GLSL IR, this gets lowered to +a texture unit index (with a possible indirect offset) while the type +information of the original sampler is kept around for backends. Also, all the +non-constant sources are stored in a single array to make it easier for +optimization passes to iterate over all the sources. + +Control Flow +========= + +Like in GLSL IR, control flow consists of a tree of "control flow nodes", which +include if statements and loops, and jump instructions (break, continue, and +return). Unlike GLSL IR, though, the leaves of the tree aren't statements but +basic blocks. Each basic block also keeps track of its successors and +predecessors, and function implementations keep track of the beginning basic +block (the first basic block of the function) and the ending basic block (a fake +basic block that every return statement points to). Together, these elements +make up the control flow graph, in this case a redundant piece of information on +top of the control flow tree that will be used by almost all the optimizations. +There are helper functions to add and remove control flow nodes that also update +the control flow graph, and so usually it doesn't need to be touched by passes +that modify control flow nodes. diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp new file mode 100644 index 000000000..544d0d932 --- /dev/null +++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp @@ -0,0 +1,1814 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "glsl_to_nir.h" +#include "ir_visitor.h" +#include "ir_hierarchical_visitor.h" +#include "ir.h" + +/* + * pass to lower GLSL IR to NIR + * + * This will lower variable dereferences to loads/stores of corresponding + * variables in NIR - the variables will be converted to registers in a later + * pass. + */ + +namespace { + +class nir_visitor : public ir_visitor +{ +public: + nir_visitor(nir_shader *shader, bool supports_ints); + ~nir_visitor(); + + virtual void visit(ir_variable *); + virtual void visit(ir_function *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_loop *); + virtual void visit(ir_if *); + virtual void visit(ir_discard *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_return *); + virtual void visit(ir_call *); + virtual void visit(ir_assignment *); + virtual void visit(ir_emit_vertex *); + virtual void visit(ir_end_primitive *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_texture *); + virtual void visit(ir_constant *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_dereference_array *); + + void create_function(ir_function *ir); + +private: + void create_overload(ir_function_signature *ir, nir_function *function); + void add_instr(nir_instr *instr, unsigned num_components); + nir_src evaluate_rvalue(ir_rvalue *ir); + + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src *srcs); + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1); + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1, + nir_src src2); + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1, + nir_src src2, nir_src src3); + + bool supports_ints; + + nir_shader *shader; + nir_function_impl *impl; + exec_list *cf_node_list; + nir_instr *result; /* result of the expression tree last visited */ + + /* the head of the dereference chain we're creating */ + nir_deref_var *deref_head; + /* the tail of the dereference chain we're creating */ + nir_deref *deref_tail; + + nir_variable *var; /* variable created by ir_variable visitor */ + + /* whether the IR we're operating on is per-function or global */ + bool is_global; + + /* map of ir_variable -> nir_variable */ + struct hash_table *var_table; + + /* map of ir_function_signature -> nir_function_overload */ + struct hash_table *overload_table; +}; + +/* + * This visitor runs before the main visitor, calling create_function() for + * each function so that the main visitor can resolve forward references in + * calls. + */ + +class nir_function_visitor : public ir_hierarchical_visitor +{ +public: + nir_function_visitor(nir_visitor *v) : visitor(v) + { + } + virtual ir_visitor_status visit_enter(ir_function *); + +private: + nir_visitor *visitor; +}; + +}; /* end of anonymous namespace */ + +static const nir_shader_compiler_options default_options = { +}; + +nir_shader * +glsl_to_nir(exec_list *ir, _mesa_glsl_parse_state *state, + bool native_integers) +{ + const nir_shader_compiler_options *options; + + if (state) { + struct gl_context *ctx = state->ctx; + struct gl_shader_compiler_options *gl_options = + &ctx->Const.ShaderCompilerOptions[state->stage]; + + if (!gl_options->NirOptions) { + nir_shader_compiler_options *new_options = + rzalloc(ctx, nir_shader_compiler_options); + options = gl_options->NirOptions = new_options; + + if (gl_options->EmitNoPow) + new_options->lower_fpow = true; + } else { + options = gl_options->NirOptions; + } + } else { + options = &default_options; + } + + nir_shader *shader = nir_shader_create(NULL, options); + + if (state) { + shader->num_user_structures = state->num_user_structures; + shader->user_structures = ralloc_array(shader, glsl_type *, + shader->num_user_structures); + memcpy(shader->user_structures, state->user_structures, + shader->num_user_structures * sizeof(glsl_type *)); + } else { + shader->num_user_structures = 0; + shader->user_structures = NULL; + } + + nir_visitor v1(shader, native_integers); + nir_function_visitor v2(&v1); + v2.run(ir); + visit_exec_list(ir, &v1); + + return shader; +} + +nir_visitor::nir_visitor(nir_shader *shader, bool supports_ints) +{ + this->supports_ints = supports_ints; + this->shader = shader; + this->is_global = true; + this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + +nir_visitor::~nir_visitor() +{ + _mesa_hash_table_destroy(this->var_table, NULL); + _mesa_hash_table_destroy(this->overload_table, NULL); +} + +static nir_constant * +constant_copy(ir_constant *ir, void *mem_ctx) +{ + if (ir == NULL) + return NULL; + + nir_constant *ret = ralloc(mem_ctx, nir_constant); + + unsigned total_elems = ir->type->components(); + unsigned i; + switch (ir->type->base_type) { + case GLSL_TYPE_UINT: + for (i = 0; i < total_elems; i++) + ret->value.u[i] = ir->value.u[i]; + break; + + case GLSL_TYPE_INT: + for (i = 0; i < total_elems; i++) + ret->value.i[i] = ir->value.i[i]; + break; + + case GLSL_TYPE_FLOAT: + for (i = 0; i < total_elems; i++) + ret->value.f[i] = ir->value.f[i]; + break; + + case GLSL_TYPE_BOOL: + for (i = 0; i < total_elems; i++) + ret->value.b[i] = ir->value.b[i]; + break; + + case GLSL_TYPE_STRUCT: + ret->elements = ralloc_array(mem_ctx, nir_constant *, + ir->type->length); + i = 0; + foreach_in_list(ir_constant, field, &ir->components) { + ret->elements[i] = constant_copy(field, mem_ctx); + i++; + } + break; + + case GLSL_TYPE_ARRAY: + ret->elements = ralloc_array(mem_ctx, nir_constant *, + ir->type->length); + + for (i = 0; i < ir->type->length; i++) + ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx); + break; + + default: + unreachable("not reached"); + } + + return ret; +} + +void +nir_visitor::visit(ir_variable *ir) +{ + nir_variable *var = ralloc(shader, nir_variable); + var->type = ir->type; + var->name = ralloc_strdup(var, ir->name); + + if (ir->is_interface_instance() && ir->get_max_ifc_array_access() != NULL) { + unsigned size = ir->get_interface_type()->length; + var->max_ifc_array_access = ralloc_array(var, unsigned, size); + memcpy(var->max_ifc_array_access, ir->get_max_ifc_array_access(), + size * sizeof(unsigned)); + } else { + var->max_ifc_array_access = NULL; + } + + var->data.read_only = ir->data.read_only; + var->data.centroid = ir->data.centroid; + var->data.sample = ir->data.sample; + var->data.invariant = ir->data.invariant; + var->data.location = ir->data.location; + + switch(ir->data.mode) { + case ir_var_auto: + case ir_var_temporary: + if (is_global) + var->data.mode = nir_var_global; + else + var->data.mode = nir_var_local; + break; + + case ir_var_function_in: + case ir_var_function_out: + case ir_var_function_inout: + case ir_var_const_in: + var->data.mode = nir_var_local; + break; + + case ir_var_shader_in: + if (ir->data.location == VARYING_SLOT_FACE) { + /* For whatever reason, GLSL IR makes gl_FrontFacing an input */ + var->data.location = SYSTEM_VALUE_FRONT_FACE; + var->data.mode = nir_var_system_value; + } else { + var->data.mode = nir_var_shader_in; + } + break; + + case ir_var_shader_out: + var->data.mode = nir_var_shader_out; + break; + + case ir_var_uniform: + var->data.mode = nir_var_uniform; + break; + + + case ir_var_system_value: + var->data.mode = nir_var_system_value; + break; + + default: + unreachable("not reached"); + } + + var->data.interpolation = ir->data.interpolation; + var->data.origin_upper_left = ir->data.origin_upper_left; + var->data.pixel_center_integer = ir->data.pixel_center_integer; + var->data.explicit_location = ir->data.explicit_location; + var->data.explicit_index = ir->data.explicit_index; + var->data.explicit_binding = ir->data.explicit_binding; + var->data.has_initializer = ir->data.has_initializer; + var->data.is_unmatched_generic_inout = ir->data.is_unmatched_generic_inout; + var->data.location_frac = ir->data.location_frac; + var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array; + var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray; + + switch (ir->data.depth_layout) { + case ir_depth_layout_none: + var->data.depth_layout = nir_depth_layout_none; + break; + case ir_depth_layout_any: + var->data.depth_layout = nir_depth_layout_any; + break; + case ir_depth_layout_greater: + var->data.depth_layout = nir_depth_layout_greater; + break; + case ir_depth_layout_less: + var->data.depth_layout = nir_depth_layout_less; + break; + case ir_depth_layout_unchanged: + var->data.depth_layout = nir_depth_layout_unchanged; + break; + default: + unreachable("not reached"); + } + + var->data.index = ir->data.index; + var->data.binding = ir->data.binding; + /* XXX Get rid of buffer_index */ + var->data.atomic.buffer_index = ir->data.binding; + var->data.atomic.offset = ir->data.atomic.offset; + var->data.image.read_only = ir->data.image_read_only; + var->data.image.write_only = ir->data.image_write_only; + var->data.image.coherent = ir->data.image_coherent; + var->data.image._volatile = ir->data.image_volatile; + var->data.image.restrict_flag = ir->data.image_restrict; + var->data.image.format = ir->data.image_format; + var->data.max_array_access = ir->data.max_array_access; + + var->num_state_slots = ir->get_num_state_slots(); + if (var->num_state_slots > 0) { + var->state_slots = ralloc_array(var, nir_state_slot, + var->num_state_slots); + + ir_state_slot *state_slots = ir->get_state_slots(); + for (unsigned i = 0; i < var->num_state_slots; i++) { + for (unsigned j = 0; j < 5; j++) + var->state_slots[i].tokens[j] = state_slots[i].tokens[j]; + var->state_slots[i].swizzle = state_slots[i].swizzle; + } + } else { + var->state_slots = NULL; + } + + var->constant_initializer = constant_copy(ir->constant_initializer, var); + + var->interface_type = ir->get_interface_type(); + + switch (var->data.mode) { + case nir_var_local: + exec_list_push_tail(&impl->locals, &var->node); + break; + + case nir_var_global: + exec_list_push_tail(&shader->globals, &var->node); + break; + + case nir_var_shader_in: + _mesa_hash_table_insert(shader->inputs, var->name, var); + break; + + case nir_var_shader_out: + _mesa_hash_table_insert(shader->outputs, var->name, var); + break; + + case nir_var_uniform: + _mesa_hash_table_insert(shader->uniforms, var->name, var); + break; + + case nir_var_system_value: + exec_list_push_tail(&shader->system_values, &var->node); + break; + + default: + unreachable("not reached"); + } + + _mesa_hash_table_insert(var_table, ir, var); + this->var = var; +} + +ir_visitor_status +nir_function_visitor::visit_enter(ir_function *ir) +{ + visitor->create_function(ir); + return visit_continue_with_parent; +} + + +void +nir_visitor::create_function(ir_function *ir) +{ + nir_function *func = nir_function_create(this->shader, ir->name); + foreach_in_list(ir_function_signature, sig, &ir->signatures) { + create_overload(sig, func); + } +} + + + +void +nir_visitor::create_overload(ir_function_signature *ir, nir_function *function) +{ + if (ir->is_intrinsic) + return; + + nir_function_overload *overload = nir_function_overload_create(function); + + unsigned num_params = ir->parameters.length(); + overload->num_params = num_params; + overload->params = ralloc_array(shader, nir_parameter, num_params); + + unsigned i = 0; + foreach_in_list(ir_variable, param, &ir->parameters) { + switch (param->data.mode) { + case ir_var_function_in: + overload->params[i].param_type = nir_parameter_in; + break; + + case ir_var_function_out: + overload->params[i].param_type = nir_parameter_out; + break; + + case ir_var_function_inout: + overload->params[i].param_type = nir_parameter_inout; + break; + + default: + unreachable("not reached"); + } + + overload->params[i].type = param->type; + i++; + } + + overload->return_type = ir->return_type; + + _mesa_hash_table_insert(this->overload_table, ir, overload); +} + +void +nir_visitor::visit(ir_function *ir) +{ + foreach_in_list(ir_function_signature, sig, &ir->signatures) + sig->accept(this); +} + +void +nir_visitor::visit(ir_function_signature *ir) +{ + if (ir->is_intrinsic) + return; + + struct hash_entry *entry = + _mesa_hash_table_search(this->overload_table, ir); + + assert(entry); + nir_function_overload *overload = (nir_function_overload *) entry->data; + + if (ir->is_defined) { + nir_function_impl *impl = nir_function_impl_create(overload); + this->impl = impl; + + unsigned num_params = overload->num_params; + impl->num_params = num_params; + impl->params = ralloc_array(this->shader, nir_variable *, num_params); + unsigned i = 0; + foreach_in_list(ir_variable, param, &ir->parameters) { + param->accept(this); + impl->params[i] = this->var; + i++; + } + + if (overload->return_type == glsl_type::void_type) { + impl->return_var = NULL; + } else { + impl->return_var = ralloc(this->shader, nir_variable); + impl->return_var->name = ralloc_strdup(impl->return_var, + "return_var"); + impl->return_var->type = overload->return_type; + } + + this->is_global = false; + + this->cf_node_list = &impl->body; + visit_exec_list(&ir->body, this); + + this->is_global = true; + } else { + overload->impl = NULL; + } +} + +void +nir_visitor::visit(ir_loop *ir) +{ + exec_list *old_list = this->cf_node_list; + + nir_loop *loop = nir_loop_create(this->shader); + nir_cf_node_insert_end(old_list, &loop->cf_node); + this->cf_node_list = &loop->body; + visit_exec_list(&ir->body_instructions, this); + + this->cf_node_list = old_list; +} + +void +nir_visitor::visit(ir_if *ir) +{ + nir_src condition = evaluate_rvalue(ir->condition); + + exec_list *old_list = this->cf_node_list; + + nir_if *if_stmt = nir_if_create(this->shader); + if_stmt->condition = condition; + nir_cf_node_insert_end(old_list, &if_stmt->cf_node); + + this->cf_node_list = &if_stmt->then_list; + visit_exec_list(&ir->then_instructions, this); + + this->cf_node_list = &if_stmt->else_list; + visit_exec_list(&ir->else_instructions, this); + + this->cf_node_list = old_list; +} + +void +nir_visitor::visit(ir_discard *ir) +{ + /* + * discards aren't treated as control flow, because before we lower them + * they can appear anywhere in the shader and the stuff after them may still + * be executed (yay, crazy GLSL rules!). However, after lowering, all the + * discards will be immediately followed by a return. + */ + + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard); + nir_instr_insert_after_cf_list(this->cf_node_list, &discard->instr); +} + +void +nir_visitor::visit(ir_emit_vertex *ir) +{ + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex); + instr->const_index[0] = ir->stream_id(); + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); +} + +void +nir_visitor::visit(ir_end_primitive *ir) +{ + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive); + instr->const_index[0] = ir->stream_id(); + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); +} + +void +nir_visitor::visit(ir_loop_jump *ir) +{ + nir_jump_type type; + switch (ir->mode) { + case ir_loop_jump::jump_break: + type = nir_jump_break; + break; + case ir_loop_jump::jump_continue: + type = nir_jump_continue; + break; + default: + unreachable("not reached"); + } + + nir_jump_instr *instr = nir_jump_instr_create(this->shader, type); + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); +} + +void +nir_visitor::visit(ir_return *ir) +{ + if (ir->value != NULL) { + ir->value->accept(this); + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); + + copy->variables[0] = nir_deref_var_create(this->shader, + this->impl->return_var); + copy->variables[1] = this->deref_head; + } + + nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); +} + +void +nir_visitor::visit(ir_call *ir) +{ + if (ir->callee->is_intrinsic) { + nir_intrinsic_op op; + if (strcmp(ir->callee_name(), "__intrinsic_atomic_read") == 0) { + op = nir_intrinsic_atomic_counter_read_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_increment") == 0) { + op = nir_intrinsic_atomic_counter_inc_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) { + op = nir_intrinsic_atomic_counter_dec_var; + } else { + unreachable("not reached"); + } + + nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); + ir_dereference *param = + (ir_dereference *) ir->actual_parameters.get_head(); + param->accept(this); + instr->variables[0] = this->deref_head; + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); + + nir_intrinsic_instr *store_instr = + nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); + store_instr->num_components = 1; + + ir->return_deref->accept(this); + store_instr->variables[0] = this->deref_head; + store_instr->src[0].is_ssa = true; + store_instr->src[0].ssa = &instr->dest.ssa; + + nir_instr_insert_after_cf_list(this->cf_node_list, &store_instr->instr); + + return; + } + + struct hash_entry *entry = + _mesa_hash_table_search(this->overload_table, ir->callee); + assert(entry); + nir_function_overload *callee = (nir_function_overload *) entry->data; + + nir_call_instr *instr = nir_call_instr_create(this->shader, callee); + + unsigned i = 0; + foreach_in_list(ir_dereference, param, &ir->actual_parameters) { + param->accept(this); + instr->params[i] = this->deref_head; + i++; + } + + ir->return_deref->accept(this); + instr->return_deref = this->deref_head; + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); +} + +void +nir_visitor::visit(ir_assignment *ir) +{ + unsigned num_components = ir->lhs->type->vector_elements; + + if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) && + (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) { + /* We're doing a plain-as-can-be copy, so emit a copy_var */ + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); + + ir->lhs->accept(this); + copy->variables[0] = this->deref_head; + + ir->rhs->accept(this); + copy->variables[1] = this->deref_head; + + + if (ir->condition) { + nir_if *if_stmt = nir_if_create(this->shader); + if_stmt->condition = evaluate_rvalue(ir->condition); + nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node); + nir_instr_insert_after_cf_list(&if_stmt->then_list, ©->instr); + } else { + nir_instr_insert_after_cf_list(this->cf_node_list, ©->instr); + } + return; + } + + assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector()); + + ir->lhs->accept(this); + nir_deref_var *lhs_deref = this->deref_head; + nir_src src = evaluate_rvalue(ir->rhs); + + if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) { + /* + * We have no good way to update only part of a variable, so just load + * the LHS and do a vec operation to combine the old with the new, and + * then store it + * back into the LHS. Copy propagation should get rid of the mess. + */ + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var); + load->num_components = ir->lhs->type->vector_elements; + nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); + load->variables[0] = lhs_deref; + nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr); + + nir_op vec_op; + switch (ir->lhs->type->vector_elements) { + case 1: vec_op = nir_op_imov; break; + case 2: vec_op = nir_op_vec2; break; + case 3: vec_op = nir_op_vec3; break; + case 4: vec_op = nir_op_vec4; break; + default: unreachable("Invalid number of components"); break; + } + nir_alu_instr *vec = nir_alu_instr_create(this->shader, vec_op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + vec->dest.write_mask = (1 << num_components) - 1; + + unsigned component = 0; + for (unsigned i = 0; i < ir->lhs->type->vector_elements; i++) { + if (ir->write_mask & (1 << i)) { + vec->src[i].src = src; + + /* GLSL IR will give us the input to the write-masked assignment + * in a single packed vector. So, for example, if the + * writemask is xzw, then we have to swizzle x -> x, y -> z, + * and z -> w and get the y component from the load. + */ + vec->src[i].swizzle[0] = component++; + } else { + vec->src[i].src.is_ssa = true; + vec->src[i].src.ssa = &load->dest.ssa; + vec->src[i].swizzle[0] = i; + } + } + + nir_instr_insert_after_cf_list(this->cf_node_list, &vec->instr); + + src.is_ssa = true; + src.ssa = &vec->dest.dest.ssa; + } + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); + store->num_components = ir->lhs->type->vector_elements; + nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref); + store->variables[0] = nir_deref_as_var(store_deref); + store->src[0] = src; + + if (ir->condition) { + nir_if *if_stmt = nir_if_create(this->shader); + if_stmt->condition = evaluate_rvalue(ir->condition); + nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node); + nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr); + } else { + nir_instr_insert_after_cf_list(this->cf_node_list, &store->instr); + } +} + +/* + * Given an instruction, returns a pointer to its destination or NULL if there + * is no destination. + * + * Note that this only handles instructions we generate at this level. + */ +static nir_dest * +get_instr_dest(nir_instr *instr) +{ + nir_alu_instr *alu_instr; + nir_intrinsic_instr *intrinsic_instr; + nir_tex_instr *tex_instr; + + switch (instr->type) { + case nir_instr_type_alu: + alu_instr = nir_instr_as_alu(instr); + return &alu_instr->dest.dest; + + case nir_instr_type_intrinsic: + intrinsic_instr = nir_instr_as_intrinsic(instr); + if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest) + return &intrinsic_instr->dest; + else + return NULL; + + case nir_instr_type_tex: + tex_instr = nir_instr_as_tex(instr); + return &tex_instr->dest; + + default: + unreachable("not reached"); + } + + return NULL; +} + +void +nir_visitor::add_instr(nir_instr *instr, unsigned num_components) +{ + nir_dest *dest = get_instr_dest(instr); + + nir_ssa_dest_init(instr, dest, num_components, NULL); + + nir_instr_insert_after_cf_list(this->cf_node_list, instr); + this->result = instr; +} + +nir_src +nir_visitor::evaluate_rvalue(ir_rvalue* ir) +{ + ir->accept(this); + if (ir->as_dereference() || ir->as_constant()) { + /* + * A dereference is being used on the right hand side, which means we + * must emit a variable load. + */ + + nir_intrinsic_instr *load_instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var); + load_instr->num_components = ir->type->vector_elements; + load_instr->variables[0] = this->deref_head; + add_instr(&load_instr->instr, ir->type->vector_elements); + } + + nir_dest *dest = get_instr_dest(this->result); + + assert(dest->is_ssa); + nir_src src; + src.is_ssa = true; + src.ssa = &dest->ssa; + + return src; +} + +nir_alu_instr * +nir_visitor::emit(nir_op op, unsigned dest_size, nir_src *srcs) +{ + nir_alu_instr *instr = nir_alu_instr_create(this->shader, op); + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = srcs[i]; + instr->dest.write_mask = (1 << dest_size) - 1; + add_instr(&instr->instr, dest_size); + return instr; +} + +nir_alu_instr * +nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1) +{ + assert(nir_op_infos[op].num_inputs == 1); + return emit(op, dest_size, &src1); +} + +nir_alu_instr * +nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1, + nir_src src2) +{ + assert(nir_op_infos[op].num_inputs == 2); + nir_src srcs[] = { src1, src2 }; + return emit(op, dest_size, srcs); +} + +nir_alu_instr * +nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1, + nir_src src2, nir_src src3) +{ + assert(nir_op_infos[op].num_inputs == 3); + nir_src srcs[] = { src1, src2, src3 }; + return emit(op, dest_size, srcs); +} + +void +nir_visitor::visit(ir_expression *ir) +{ + /* Some special cases */ + switch (ir->operation) { + case ir_binop_ubo_load: { + ir_constant *const_index = ir->operands[1]->as_constant(); + + nir_intrinsic_op op; + if (const_index) { + op = nir_intrinsic_load_ubo; + } else { + op = nir_intrinsic_load_ubo_indirect; + } + nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op); + load->num_components = ir->type->vector_elements; + load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */ + load->const_index[1] = 1; /* number of vec4's */ + load->src[0] = evaluate_rvalue(ir->operands[0]); + if (!const_index) + load->src[1] = evaluate_rvalue(ir->operands[1]); + add_instr(&load->instr, ir->type->vector_elements); + + /* + * In UBO's, a true boolean value is any non-zero value, but we consider + * a true boolean to be ~0. Fix this up with a != 0 comparison. + */ + + if (ir->type->base_type == GLSL_TYPE_BOOL) { + nir_load_const_instr *const_zero = nir_load_const_instr_create(shader, 1); + const_zero->value.u[0] = 0; + nir_instr_insert_after_cf_list(this->cf_node_list, &const_zero->instr); + + nir_alu_instr *compare = nir_alu_instr_create(shader, nir_op_ine); + compare->src[0].src.is_ssa = true; + compare->src[0].src.ssa = &load->dest.ssa; + compare->src[1].src.is_ssa = true; + compare->src[1].src.ssa = &const_zero->def; + for (unsigned i = 0; i < ir->type->vector_elements; i++) + compare->src[1].swizzle[i] = 0; + compare->dest.write_mask = (1 << ir->type->vector_elements) - 1; + + add_instr(&compare->instr, ir->type->vector_elements); + } + + return; + } + + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: { + ir_dereference *deref = ir->operands[0]->as_dereference(); + ir_swizzle *swizzle = NULL; + if (!deref) { + /* the api does not allow a swizzle here, but the varying packing code + * may have pushed one into here. + */ + swizzle = ir->operands[0]->as_swizzle(); + assert(swizzle); + deref = swizzle->val->as_dereference(); + assert(deref); + } + + deref->accept(this); + + nir_intrinsic_op op; + if (this->deref_head->var->data.mode == nir_var_shader_in) { + switch (ir->operation) { + case ir_unop_interpolate_at_centroid: + op = nir_intrinsic_interp_var_at_centroid; + break; + case ir_binop_interpolate_at_offset: + op = nir_intrinsic_interp_var_at_offset; + break; + case ir_binop_interpolate_at_sample: + op = nir_intrinsic_interp_var_at_sample; + break; + default: + unreachable("Invalid interpolation intrinsic"); + } + } else { + /* This case can happen if the vertex shader does not write the + * given varying. In this case, the linker will lower it to a + * global variable. Since interpolating a variable makes no + * sense, we'll just turn it into a load which will probably + * eventually end up as an SSA definition. + */ + assert(this->deref_head->var->data.mode == nir_var_global); + op = nir_intrinsic_load_var; + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); + intrin->num_components = deref->type->vector_elements; + intrin->variables[0] = this->deref_head; + + if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset || + intrin->intrinsic == nir_intrinsic_interp_var_at_sample) + intrin->src[0] = evaluate_rvalue(ir->operands[1]); + + add_instr(&intrin->instr, deref->type->vector_elements); + + if (swizzle) { + nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov); + mov->dest.write_mask = (1 << swizzle->type->vector_elements) - 1; + mov->src[0].src.is_ssa = true; + mov->src[0].src.ssa = &intrin->dest.ssa; + + mov->src[0].swizzle[0] = swizzle->mask.x; + mov->src[0].swizzle[1] = swizzle->mask.y; + mov->src[0].swizzle[2] = swizzle->mask.z; + mov->src[0].swizzle[3] = swizzle->mask.w; + for (unsigned i = deref->type->vector_elements; i < 4; i++) + mov->src[0].swizzle[i] = 0; + + add_instr(&mov->instr, swizzle->type->vector_elements); + } + + return; + } + + default: + break; + } + + nir_src srcs[4]; + for (unsigned i = 0; i < ir->get_num_operands(); i++) + srcs[i] = evaluate_rvalue(ir->operands[i]); + + glsl_base_type types[4]; + for (unsigned i = 0; i < ir->get_num_operands(); i++) + if (supports_ints) + types[i] = ir->operands[i]->type->base_type; + else + types[i] = GLSL_TYPE_FLOAT; + + glsl_base_type out_type; + if (supports_ints) + out_type = ir->type->base_type; + else + out_type = GLSL_TYPE_FLOAT; + + unsigned dest_size = ir->type->vector_elements; + + nir_alu_instr *instr; + nir_op op; + + switch (ir->operation) { + case ir_unop_bit_not: emit(nir_op_inot, dest_size, srcs); break; + case ir_unop_logic_not: + emit(supports_ints ? nir_op_inot : nir_op_fnot, dest_size, srcs); + break; + case ir_unop_neg: + instr = emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fneg : nir_op_ineg, + dest_size, srcs); + break; + case ir_unop_abs: + instr = emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fabs : nir_op_iabs, + dest_size, srcs); + break; + case ir_unop_saturate: + assert(types[0] == GLSL_TYPE_FLOAT); + instr = emit(nir_op_fsat, dest_size, srcs); + break; + case ir_unop_sign: + emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fsign : nir_op_isign, + dest_size, srcs); + break; + case ir_unop_rcp: emit(nir_op_frcp, dest_size, srcs); break; + case ir_unop_rsq: emit(nir_op_frsq, dest_size, srcs); break; + case ir_unop_sqrt: emit(nir_op_fsqrt, dest_size, srcs); break; + case ir_unop_exp: emit(nir_op_fexp, dest_size, srcs); break; + case ir_unop_log: emit(nir_op_flog, dest_size, srcs); break; + case ir_unop_exp2: emit(nir_op_fexp2, dest_size, srcs); break; + case ir_unop_log2: emit(nir_op_flog2, dest_size, srcs); break; + case ir_unop_i2f: + emit(supports_ints ? nir_op_i2f : nir_op_fmov, dest_size, srcs); + break; + case ir_unop_u2f: + emit(supports_ints ? nir_op_u2f : nir_op_fmov, dest_size, srcs); + break; + case ir_unop_b2f: + emit(supports_ints ? nir_op_b2f : nir_op_fmov, dest_size, srcs); + break; + case ir_unop_f2i: emit(nir_op_f2i, dest_size, srcs); break; + case ir_unop_f2u: emit(nir_op_f2u, dest_size, srcs); break; + case ir_unop_f2b: emit(nir_op_f2b, dest_size, srcs); break; + case ir_unop_i2b: emit(nir_op_i2b, dest_size, srcs); break; + case ir_unop_b2i: emit(nir_op_b2i, dest_size, srcs); break; + case ir_unop_i2u: + case ir_unop_u2i: + case ir_unop_bitcast_i2f: + case ir_unop_bitcast_f2i: + case ir_unop_bitcast_u2f: + case ir_unop_bitcast_f2u: + /* no-op */ + emit(nir_op_imov, dest_size, srcs); + break; + case ir_unop_any: + switch (ir->operands[0]->type->vector_elements) { + case 2: + emit(supports_ints ? nir_op_bany2 : nir_op_fany2, + dest_size, srcs); + break; + case 3: + emit(supports_ints ? nir_op_bany3 : nir_op_fany3, + dest_size, srcs); + break; + case 4: + emit(supports_ints ? nir_op_bany4 : nir_op_fany4, + dest_size, srcs); + break; + default: + unreachable("not reached"); + } + break; + case ir_unop_trunc: emit(nir_op_ftrunc, dest_size, srcs); break; + case ir_unop_ceil: emit(nir_op_fceil, dest_size, srcs); break; + case ir_unop_floor: emit(nir_op_ffloor, dest_size, srcs); break; + case ir_unop_fract: emit(nir_op_ffract, dest_size, srcs); break; + case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break; + case ir_unop_sin: emit(nir_op_fsin, dest_size, srcs); break; + case ir_unop_cos: emit(nir_op_fcos, dest_size, srcs); break; + case ir_unop_sin_reduced: + emit(nir_op_fsin_reduced, dest_size, srcs); + break; + case ir_unop_cos_reduced: + emit(nir_op_fcos_reduced, dest_size, srcs); + break; + case ir_unop_dFdx: emit(nir_op_fddx, dest_size, srcs); break; + case ir_unop_dFdy: emit(nir_op_fddy, dest_size, srcs); break; + case ir_unop_dFdx_fine: emit(nir_op_fddx_fine, dest_size, srcs); break; + case ir_unop_dFdy_fine: emit(nir_op_fddy_fine, dest_size, srcs); break; + case ir_unop_dFdx_coarse: emit(nir_op_fddx_coarse, dest_size, srcs); break; + case ir_unop_dFdy_coarse: emit(nir_op_fddy_coarse, dest_size, srcs); break; + case ir_unop_pack_snorm_2x16: + emit(nir_op_pack_snorm_2x16, dest_size, srcs); + break; + case ir_unop_pack_snorm_4x8: + emit(nir_op_pack_snorm_4x8, dest_size, srcs); + break; + case ir_unop_pack_unorm_2x16: + emit(nir_op_pack_unorm_2x16, dest_size, srcs); + break; + case ir_unop_pack_unorm_4x8: + emit(nir_op_pack_unorm_4x8, dest_size, srcs); + break; + case ir_unop_pack_half_2x16: + emit(nir_op_pack_half_2x16, dest_size, srcs); + break; + case ir_unop_unpack_snorm_2x16: + emit(nir_op_unpack_snorm_2x16, dest_size, srcs); + break; + case ir_unop_unpack_snorm_4x8: + emit(nir_op_unpack_snorm_4x8, dest_size, srcs); + break; + case ir_unop_unpack_unorm_2x16: + emit(nir_op_unpack_unorm_2x16, dest_size, srcs); + break; + case ir_unop_unpack_unorm_4x8: + emit(nir_op_unpack_unorm_4x8, dest_size, srcs); + break; + case ir_unop_unpack_half_2x16: + emit(nir_op_unpack_half_2x16, dest_size, srcs); + break; + case ir_unop_unpack_half_2x16_split_x: + emit(nir_op_unpack_half_2x16_split_x, dest_size, srcs); + break; + case ir_unop_unpack_half_2x16_split_y: + emit(nir_op_unpack_half_2x16_split_y, dest_size, srcs); + break; + case ir_unop_bitfield_reverse: + emit(nir_op_bitfield_reverse, dest_size, srcs); + break; + case ir_unop_bit_count: + emit(nir_op_bit_count, dest_size, srcs); + break; + case ir_unop_find_msb: + switch (types[0]) { + case GLSL_TYPE_UINT: + emit(nir_op_ufind_msb, dest_size, srcs); + break; + case GLSL_TYPE_INT: + emit(nir_op_ifind_msb, dest_size, srcs); + break; + default: + unreachable("Invalid type for findMSB()"); + } + break; + case ir_unop_find_lsb: + emit(nir_op_find_lsb, dest_size, srcs); + break; + + case ir_unop_noise: + switch (ir->type->vector_elements) { + case 1: + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_fnoise1_1, dest_size, srcs); break; + case 2: emit(nir_op_fnoise1_2, dest_size, srcs); break; + case 3: emit(nir_op_fnoise1_3, dest_size, srcs); break; + case 4: emit(nir_op_fnoise1_4, dest_size, srcs); break; + default: unreachable("not reached"); + } + break; + case 2: + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_fnoise2_1, dest_size, srcs); break; + case 2: emit(nir_op_fnoise2_2, dest_size, srcs); break; + case 3: emit(nir_op_fnoise2_3, dest_size, srcs); break; + case 4: emit(nir_op_fnoise2_4, dest_size, srcs); break; + default: unreachable("not reached"); + } + break; + case 3: + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_fnoise3_1, dest_size, srcs); break; + case 2: emit(nir_op_fnoise3_2, dest_size, srcs); break; + case 3: emit(nir_op_fnoise3_3, dest_size, srcs); break; + case 4: emit(nir_op_fnoise3_4, dest_size, srcs); break; + default: unreachable("not reached"); + } + break; + case 4: + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_fnoise4_1, dest_size, srcs); break; + case 2: emit(nir_op_fnoise4_2, dest_size, srcs); break; + case 3: emit(nir_op_fnoise4_3, dest_size, srcs); break; + case 4: emit(nir_op_fnoise4_4, dest_size, srcs); break; + default: unreachable("not reached"); + } + break; + default: + unreachable("not reached"); + } + break; + case ir_binop_add: + case ir_binop_sub: + case ir_binop_mul: + case ir_binop_div: + case ir_binop_mod: + case ir_binop_min: + case ir_binop_max: + case ir_binop_pow: + case ir_binop_bit_and: + case ir_binop_bit_or: + case ir_binop_bit_xor: + case ir_binop_lshift: + case ir_binop_rshift: + switch (ir->operation) { + case ir_binop_add: + if (out_type == GLSL_TYPE_FLOAT) + op = nir_op_fadd; + else + op = nir_op_iadd; + break; + case ir_binop_sub: + if (out_type == GLSL_TYPE_FLOAT) + op = nir_op_fsub; + else + op = nir_op_isub; + break; + case ir_binop_mul: + if (out_type == GLSL_TYPE_FLOAT) + op = nir_op_fmul; + else + op = nir_op_imul; + break; + case ir_binop_div: + if (out_type == GLSL_TYPE_FLOAT) + op = nir_op_fdiv; + else if (out_type == GLSL_TYPE_INT) + op = nir_op_idiv; + else + op = nir_op_udiv; + break; + case ir_binop_mod: + if (out_type == GLSL_TYPE_FLOAT) + op = nir_op_fmod; + else + op = nir_op_umod; + break; + case ir_binop_min: + if (out_type == GLSL_TYPE_FLOAT) + op = nir_op_fmin; + else if (out_type == GLSL_TYPE_INT) + op = nir_op_imin; + else + op = nir_op_umin; + break; + case ir_binop_max: + if (out_type == GLSL_TYPE_FLOAT) + op = nir_op_fmax; + else if (out_type == GLSL_TYPE_INT) + op = nir_op_imax; + else + op = nir_op_umax; + break; + case ir_binop_bit_and: + op = nir_op_iand; + break; + case ir_binop_bit_or: + op = nir_op_ior; + break; + case ir_binop_bit_xor: + op = nir_op_ixor; + break; + case ir_binop_lshift: + op = nir_op_ishl; + break; + case ir_binop_rshift: + if (out_type == GLSL_TYPE_INT) + op = nir_op_ishr; + else + op = nir_op_ushr; + break; + case ir_binop_pow: + op = nir_op_fpow; + break; + + default: + unreachable("not reached"); + } + + instr = emit(op, dest_size, srcs); + + if (ir->operands[0]->type->vector_elements != 1 && + ir->operands[1]->type->vector_elements == 1) { + for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; + i++) { + instr->src[1].swizzle[i] = 0; + } + } + + if (ir->operands[1]->type->vector_elements != 1 && + ir->operands[0]->type->vector_elements == 1) { + for (unsigned i = 0; i < ir->operands[1]->type->vector_elements; + i++) { + instr->src[0].swizzle[i] = 0; + } + } + + break; + case ir_binop_imul_high: + emit(out_type == GLSL_TYPE_UINT ? nir_op_umul_high : nir_op_imul_high, + dest_size, srcs); + break; + case ir_binop_carry: emit(nir_op_uadd_carry, dest_size, srcs); break; + case ir_binop_borrow: emit(nir_op_usub_borrow, dest_size, srcs); break; + case ir_binop_less: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + emit(nir_op_flt, dest_size, srcs); + else if (types[0] == GLSL_TYPE_INT) + emit(nir_op_ilt, dest_size, srcs); + else + emit(nir_op_ult, dest_size, srcs); + } else { + emit(nir_op_slt, dest_size, srcs); + } + break; + case ir_binop_greater: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + emit(nir_op_flt, dest_size, srcs[1], srcs[0]); + else if (types[0] == GLSL_TYPE_INT) + emit(nir_op_ilt, dest_size, srcs[1], srcs[0]); + else + emit(nir_op_ult, dest_size, srcs[1], srcs[0]); + } else { + emit(nir_op_slt, dest_size, srcs[1], srcs[0]); + } + break; + case ir_binop_lequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + emit(nir_op_fge, dest_size, srcs[1], srcs[0]); + else if (types[0] == GLSL_TYPE_INT) + emit(nir_op_ige, dest_size, srcs[1], srcs[0]); + else + emit(nir_op_uge, dest_size, srcs[1], srcs[0]); + } else { + emit(nir_op_slt, dest_size, srcs[1], srcs[0]); + } + break; + case ir_binop_gequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + emit(nir_op_fge, dest_size, srcs); + else if (types[0] == GLSL_TYPE_INT) + emit(nir_op_ige, dest_size, srcs); + else + emit(nir_op_uge, dest_size, srcs); + } else { + emit(nir_op_slt, dest_size, srcs); + } + break; + case ir_binop_equal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + emit(nir_op_feq, dest_size, srcs); + else + emit(nir_op_ieq, dest_size, srcs); + } else { + emit(nir_op_seq, dest_size, srcs); + } + break; + case ir_binop_nequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + emit(nir_op_fne, dest_size, srcs); + else + emit(nir_op_ine, dest_size, srcs); + } else { + emit(nir_op_sne, dest_size, srcs); + } + break; + case ir_binop_all_equal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) { + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_feq, dest_size, srcs); break; + case 2: emit(nir_op_ball_fequal2, dest_size, srcs); break; + case 3: emit(nir_op_ball_fequal3, dest_size, srcs); break; + case 4: emit(nir_op_ball_fequal4, dest_size, srcs); break; + default: + unreachable("not reached"); + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_ieq, dest_size, srcs); break; + case 2: emit(nir_op_ball_iequal2, dest_size, srcs); break; + case 3: emit(nir_op_ball_iequal3, dest_size, srcs); break; + case 4: emit(nir_op_ball_iequal4, dest_size, srcs); break; + default: + unreachable("not reached"); + } + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_seq, dest_size, srcs); break; + case 2: emit(nir_op_fall_equal2, dest_size, srcs); break; + case 3: emit(nir_op_fall_equal3, dest_size, srcs); break; + case 4: emit(nir_op_fall_equal4, dest_size, srcs); break; + default: + unreachable("not reached"); + } + } + break; + case ir_binop_any_nequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) { + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_fne, dest_size, srcs); break; + case 2: emit(nir_op_bany_fnequal2, dest_size, srcs); break; + case 3: emit(nir_op_bany_fnequal3, dest_size, srcs); break; + case 4: emit(nir_op_bany_fnequal4, dest_size, srcs); break; + default: + unreachable("not reached"); + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_ine, dest_size, srcs); break; + case 2: emit(nir_op_bany_inequal2, dest_size, srcs); break; + case 3: emit(nir_op_bany_inequal3, dest_size, srcs); break; + case 4: emit(nir_op_bany_inequal4, dest_size, srcs); break; + default: + unreachable("not reached"); + } + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: emit(nir_op_sne, dest_size, srcs); break; + case 2: emit(nir_op_fany_nequal2, dest_size, srcs); break; + case 3: emit(nir_op_fany_nequal3, dest_size, srcs); break; + case 4: emit(nir_op_fany_nequal4, dest_size, srcs); break; + default: + unreachable("not reached"); + } + } + break; + case ir_binop_logic_and: + if (supports_ints) + emit(nir_op_iand, dest_size, srcs); + else + emit(nir_op_fand, dest_size, srcs); + break; + case ir_binop_logic_or: + if (supports_ints) + emit(nir_op_ior, dest_size, srcs); + else + emit(nir_op_for, dest_size, srcs); + break; + case ir_binop_logic_xor: + if (supports_ints) + emit(nir_op_ixor, dest_size, srcs); + else + emit(nir_op_fxor, dest_size, srcs); + break; + case ir_binop_dot: + switch (ir->operands[0]->type->vector_elements) { + case 2: emit(nir_op_fdot2, dest_size, srcs); break; + case 3: emit(nir_op_fdot3, dest_size, srcs); break; + case 4: emit(nir_op_fdot4, dest_size, srcs); break; + default: + unreachable("not reached"); + } + break; + + case ir_binop_pack_half_2x16_split: + emit(nir_op_pack_half_2x16_split, dest_size, srcs); + break; + case ir_binop_bfm: emit(nir_op_bfm, dest_size, srcs); break; + case ir_binop_ldexp: emit(nir_op_ldexp, dest_size, srcs); break; + case ir_triop_fma: emit(nir_op_ffma, dest_size, srcs); break; + case ir_triop_lrp: + instr = emit(nir_op_flrp, dest_size, srcs); + if (ir->operands[0]->type->vector_elements != 1 && + ir->operands[2]->type->vector_elements == 1) { + for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; + i++) { + instr->src[2].swizzle[i] = 0; + } + } + break; + case ir_triop_csel: + if (supports_ints) + emit(nir_op_bcsel, dest_size, srcs); + else + emit(nir_op_fcsel, dest_size, srcs); + break; + case ir_triop_bfi: + instr = emit(nir_op_bfi, dest_size, srcs); + for (unsigned i = 0; i < ir->operands[1]->type->vector_elements; i++) { + instr->src[0].swizzle[i] = 0; + } + break; + case ir_triop_bitfield_extract: + instr = emit(out_type == GLSL_TYPE_INT ? nir_op_ibitfield_extract : + nir_op_ubitfield_extract, dest_size, srcs); + for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; i++) { + instr->src[1].swizzle[i] = 0; + instr->src[2].swizzle[i] = 0; + } + break; + case ir_quadop_bitfield_insert: + instr = emit(nir_op_bitfield_insert, dest_size, srcs); + for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; i++) { + instr->src[2].swizzle[i] = 0; + instr->src[3].swizzle[i] = 0; + } + break; + case ir_quadop_vector: + switch (ir->type->vector_elements) { + case 2: emit(nir_op_vec2, dest_size, srcs); break; + case 3: emit(nir_op_vec3, dest_size, srcs); break; + case 4: emit(nir_op_vec4, dest_size, srcs); break; + default: unreachable("not reached"); + } + break; + + default: + unreachable("not reached"); + } +} + +void +nir_visitor::visit(ir_swizzle *ir) +{ + nir_alu_instr *instr = emit(supports_ints ? nir_op_imov : nir_op_fmov, + ir->type->vector_elements, + evaluate_rvalue(ir->val)); + + unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w }; + for (unsigned i = 0; i < ir->type->vector_elements; i++) + instr->src[0].swizzle[i] = swizzle[i]; +} + +void +nir_visitor::visit(ir_texture *ir) +{ + unsigned num_srcs; + nir_texop op; + switch (ir->op) { + case ir_tex: + op = nir_texop_tex; + num_srcs = 1; /* coordinate */ + break; + + case ir_txb: + case ir_txl: + op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl; + num_srcs = 2; /* coordinate, bias/lod */ + break; + + case ir_txd: + op = nir_texop_txd; /* coordinate, dPdx, dPdy */ + num_srcs = 3; + break; + + case ir_txf: + op = nir_texop_txf; + if (ir->lod_info.lod != NULL) + num_srcs = 2; /* coordinate, lod */ + else + num_srcs = 1; /* coordinate */ + break; + + case ir_txf_ms: + op = nir_texop_txf_ms; + num_srcs = 2; /* coordinate, sample_index */ + break; + + case ir_txs: + op = nir_texop_txs; + if (ir->lod_info.lod != NULL) + num_srcs = 1; /* lod */ + else + num_srcs = 0; + break; + + case ir_lod: + op = nir_texop_lod; + num_srcs = 1; /* coordinate */ + break; + + case ir_tg4: + op = nir_texop_tg4; + num_srcs = 1; /* coordinate */ + break; + + case ir_query_levels: + op = nir_texop_query_levels; + num_srcs = 0; + break; + + default: + unreachable("not reached"); + } + + if (ir->projector != NULL) + num_srcs++; + if (ir->shadow_comparitor != NULL) + num_srcs++; + if (ir->offset != NULL && ir->offset->as_constant() == NULL) + num_srcs++; + + nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); + + instr->op = op; + instr->sampler_dim = + (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality; + instr->is_array = ir->sampler->type->sampler_array; + instr->is_shadow = ir->sampler->type->sampler_shadow; + if (instr->is_shadow) + instr->is_new_style_shadow = (ir->type->vector_elements == 1); + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + instr->dest_type = nir_type_float; + break; + case GLSL_TYPE_INT: + instr->dest_type = nir_type_int; + break; + case GLSL_TYPE_UINT: + instr->dest_type = nir_type_unsigned; + break; + default: + unreachable("not reached"); + } + + ir->sampler->accept(this); + instr->sampler = this->deref_head; + + unsigned src_number = 0; + + if (ir->coordinate != NULL) { + instr->coord_components = ir->coordinate->type->vector_elements; + instr->src[src_number].src = evaluate_rvalue(ir->coordinate); + instr->src[src_number].src_type = nir_tex_src_coord; + src_number++; + } + + if (ir->projector != NULL) { + instr->src[src_number].src = evaluate_rvalue(ir->projector); + instr->src[src_number].src_type = nir_tex_src_projector; + src_number++; + } + + if (ir->shadow_comparitor != NULL) { + instr->src[src_number].src = evaluate_rvalue(ir->shadow_comparitor); + instr->src[src_number].src_type = nir_tex_src_comparitor; + src_number++; + } + + if (ir->offset != NULL) { + /* we don't support multiple offsets yet */ + assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); + + ir_constant *const_offset = ir->offset->as_constant(); + if (const_offset != NULL) { + for (unsigned i = 0; i < const_offset->type->vector_elements; i++) + instr->const_offset[i] = const_offset->value.i[i]; + } else { + instr->src[src_number].src = evaluate_rvalue(ir->offset); + instr->src[src_number].src_type = nir_tex_src_offset; + src_number++; + } + } + + switch (ir->op) { + case ir_txb: + instr->src[src_number].src = evaluate_rvalue(ir->lod_info.bias); + instr->src[src_number].src_type = nir_tex_src_bias; + src_number++; + break; + + case ir_txl: + case ir_txf: + case ir_txs: + if (ir->lod_info.lod != NULL) { + instr->src[src_number].src = evaluate_rvalue(ir->lod_info.lod); + instr->src[src_number].src_type = nir_tex_src_lod; + src_number++; + } + break; + + case ir_txd: + instr->src[src_number].src = evaluate_rvalue(ir->lod_info.grad.dPdx); + instr->src[src_number].src_type = nir_tex_src_ddx; + src_number++; + instr->src[src_number].src = evaluate_rvalue(ir->lod_info.grad.dPdy); + instr->src[src_number].src_type = nir_tex_src_ddy; + src_number++; + break; + + case ir_txf_ms: + instr->src[src_number].src = evaluate_rvalue(ir->lod_info.sample_index); + instr->src[src_number].src_type = nir_tex_src_ms_index; + src_number++; + break; + + case ir_tg4: + instr->component = ir->lod_info.component->as_constant()->value.u[0]; + break; + + default: + break; + } + + assert(src_number == num_srcs); + + add_instr(&instr->instr, nir_tex_instr_dest_size(instr)); +} + +void +nir_visitor::visit(ir_constant *ir) +{ + /* + * We don't know if this variable is an an array or struct that gets + * dereferenced, so do the safe thing an make it a variable with a + * constant initializer and return a dereference. + */ + + nir_variable *var = ralloc(this->shader, nir_variable); + var->name = ralloc_strdup(var, "const_temp"); + var->type = ir->type; + var->data.mode = nir_var_local; + var->data.read_only = true; + var->constant_initializer = constant_copy(ir, var); + exec_list_push_tail(&this->impl->locals, &var->node); + + this->deref_head = nir_deref_var_create(this->shader, var); + this->deref_tail = &this->deref_head->deref; +} + +void +nir_visitor::visit(ir_dereference_variable *ir) +{ + struct hash_entry *entry = + _mesa_hash_table_search(this->var_table, ir->var); + assert(entry); + nir_variable *var = (nir_variable *) entry->data; + + nir_deref_var *deref = nir_deref_var_create(this->shader, var); + this->deref_head = deref; + this->deref_tail = &deref->deref; +} + +void +nir_visitor::visit(ir_dereference_record *ir) +{ + ir->record->accept(this); + + int field_index = this->deref_tail->type->field_index(ir->field); + assert(field_index >= 0); + + nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index); + deref->deref.type = ir->type; + this->deref_tail->child = &deref->deref; + this->deref_tail = &deref->deref; +} + +void +nir_visitor::visit(ir_dereference_array *ir) +{ + nir_deref_array *deref = nir_deref_array_create(this->shader); + deref->deref.type = ir->type; + + ir_constant *const_index = ir->array_index->as_constant(); + if (const_index != NULL) { + deref->deref_array_type = nir_deref_array_type_direct; + deref->base_offset = const_index->value.u[0]; + } else { + deref->deref_array_type = nir_deref_array_type_indirect; + deref->indirect = evaluate_rvalue(ir->array_index); + } + + ir->array->accept(this); + + this->deref_tail->child = &deref->deref; + this->deref_tail = &deref->deref; +} diff --git a/mesalib/src/glsl/nir/glsl_to_nir.h b/mesalib/src/glsl/nir/glsl_to_nir.h new file mode 100644 index 000000000..58b2cee6a --- /dev/null +++ b/mesalib/src/glsl/nir/glsl_to_nir.h @@ -0,0 +1,40 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include "../glsl_parser_extras.h" + +#ifdef __cplusplus +extern "C" { +#endif + +nir_shader *glsl_to_nir(exec_list * ir, _mesa_glsl_parse_state *state, + bool native_integers); + +#ifdef __cplusplus +} +#endif diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c new file mode 100644 index 000000000..5b0e4bc50 --- /dev/null +++ b/mesalib/src/glsl/nir/nir.c @@ -0,0 +1,2085 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include <assert.h> + +nir_shader * +nir_shader_create(void *mem_ctx, const nir_shader_compiler_options *options) +{ + nir_shader *shader = ralloc(mem_ctx, nir_shader); + + shader->uniforms = _mesa_hash_table_create(shader, _mesa_key_hash_string, + _mesa_key_string_equal); + shader->inputs = _mesa_hash_table_create(shader, _mesa_key_hash_string, + _mesa_key_string_equal); + shader->outputs = _mesa_hash_table_create(shader, _mesa_key_hash_string, + _mesa_key_string_equal); + + shader->options = options; + + shader->num_user_structures = 0; + shader->user_structures = NULL; + + exec_list_make_empty(&shader->functions); + exec_list_make_empty(&shader->registers); + exec_list_make_empty(&shader->globals); + exec_list_make_empty(&shader->system_values); + shader->reg_alloc = 0; + + shader->num_inputs = 0; + shader->num_outputs = 0; + shader->num_uniforms = 0; + + return shader; +} + +static nir_register * +reg_create(void *mem_ctx, struct exec_list *list) +{ + nir_register *reg = ralloc(mem_ctx, nir_register); + + reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + reg->num_components = 0; + reg->num_array_elems = 0; + reg->is_packed = false; + reg->name = NULL; + + exec_list_push_tail(list, ®->node); + + return reg; +} + +nir_register * +nir_global_reg_create(nir_shader *shader) +{ + nir_register *reg = reg_create(shader, &shader->registers); + reg->index = shader->reg_alloc++; + reg->is_global = true; + + return reg; +} + +nir_register * +nir_local_reg_create(nir_function_impl *impl) +{ + nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers); + reg->index = impl->reg_alloc++; + reg->is_global = false; + + return reg; +} + +void +nir_reg_remove(nir_register *reg) +{ + exec_node_remove(®->node); +} + +nir_function * +nir_function_create(nir_shader *shader, const char *name) +{ + nir_function *func = ralloc(shader, nir_function); + + exec_list_push_tail(&shader->functions, &func->node); + exec_list_make_empty(&func->overload_list); + func->name = name; + func->shader = shader; + + return func; +} + +nir_function_overload * +nir_function_overload_create(nir_function *func) +{ + void *mem_ctx = ralloc_parent(func); + + nir_function_overload *overload = ralloc(mem_ctx, nir_function_overload); + + overload->num_params = 0; + overload->params = NULL; + overload->return_type = glsl_void_type(); + overload->impl = NULL; + + exec_list_push_tail(&func->overload_list, &overload->node); + overload->function = func; + + return overload; +} + +void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx) +{ + dest->is_ssa = src->is_ssa; + if (src->is_ssa) { + dest->ssa = src->ssa; + } else { + dest->reg.base_offset = src->reg.base_offset; + dest->reg.reg = src->reg.reg; + if (src->reg.indirect) { + dest->reg.indirect = ralloc(mem_ctx, nir_src); + nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx); + } else { + dest->reg.indirect = NULL; + } + } +} + +void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx) +{ + dest->is_ssa = src->is_ssa; + if (src->is_ssa) { + dest->ssa = src->ssa; + } else { + dest->reg.base_offset = src->reg.base_offset; + dest->reg.reg = src->reg.reg; + if (src->reg.indirect) { + dest->reg.indirect = ralloc(mem_ctx, nir_src); + nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx); + } else { + dest->reg.indirect = NULL; + } + } +} + +void +nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx) +{ + nir_src_copy(&dest->src, &src->src, mem_ctx); + dest->abs = src->abs; + dest->negate = src->negate; + for (unsigned i = 0; i < 4; i++) + dest->swizzle[i] = src->swizzle[i]; +} + +void +nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx) +{ + nir_dest_copy(&dest->dest, &src->dest, mem_ctx); + dest->write_mask = src->write_mask; + dest->saturate = src->saturate; +} + +static inline void +block_add_pred(nir_block *block, nir_block *pred) +{ + _mesa_set_add(block->predecessors, pred); +} + +static void +cf_init(nir_cf_node *node, nir_cf_node_type type) +{ + exec_node_init(&node->node); + node->parent = NULL; + node->type = type; +} + +static void +link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2) +{ + pred->successors[0] = succ1; + block_add_pred(succ1, pred); + + pred->successors[1] = succ2; + if (succ2 != NULL) + block_add_pred(succ2, pred); +} + +static void +unlink_blocks(nir_block *pred, nir_block *succ) +{ + if (pred->successors[0] == succ) { + pred->successors[0] = pred->successors[1]; + pred->successors[1] = NULL; + } else { + assert(pred->successors[1] == succ); + pred->successors[1] = NULL; + } + + struct set_entry *entry = _mesa_set_search(succ->predecessors, pred); + + assert(entry); + + _mesa_set_remove(succ->predecessors, entry); +} + +static void +unlink_block_successors(nir_block *block) +{ + if (block->successors[0] != NULL) + unlink_blocks(block, block->successors[0]); + if (block->successors[1] != NULL) + unlink_blocks(block, block->successors[1]); +} + + +nir_function_impl * +nir_function_impl_create(nir_function_overload *overload) +{ + assert(overload->impl == NULL); + + void *mem_ctx = ralloc_parent(overload); + + nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl); + + overload->impl = impl; + impl->overload = overload; + + cf_init(&impl->cf_node, nir_cf_node_function); + + exec_list_make_empty(&impl->body); + exec_list_make_empty(&impl->registers); + exec_list_make_empty(&impl->locals); + impl->num_params = 0; + impl->params = NULL; + impl->return_var = NULL; + impl->reg_alloc = 0; + impl->ssa_alloc = 0; + impl->valid_metadata = nir_metadata_none; + + /* create start & end blocks */ + nir_block *start_block = nir_block_create(mem_ctx); + nir_block *end_block = nir_block_create(mem_ctx); + start_block->cf_node.parent = &impl->cf_node; + end_block->cf_node.parent = &impl->cf_node; + impl->start_block = start_block; + impl->end_block = end_block; + + exec_list_push_tail(&impl->body, &start_block->cf_node.node); + + start_block->successors[0] = end_block; + block_add_pred(end_block, start_block); + + return impl; +} + +nir_block * +nir_block_create(void *mem_ctx) +{ + nir_block *block = ralloc(mem_ctx, nir_block); + + cf_init(&block->cf_node, nir_cf_node_block); + + block->successors[0] = block->successors[1] = NULL; + block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + block->imm_dom = NULL; + block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + exec_list_make_empty(&block->instr_list); + + return block; +} + +static inline void +src_init(nir_src *src) +{ + src->is_ssa = false; + src->reg.reg = NULL; + src->reg.indirect = NULL; + src->reg.base_offset = 0; +} + +nir_if * +nir_if_create(void *mem_ctx) +{ + nir_if *if_stmt = ralloc(mem_ctx, nir_if); + + cf_init(&if_stmt->cf_node, nir_cf_node_if); + src_init(&if_stmt->condition); + + nir_block *then = nir_block_create(mem_ctx); + exec_list_make_empty(&if_stmt->then_list); + exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node); + then->cf_node.parent = &if_stmt->cf_node; + + nir_block *else_stmt = nir_block_create(mem_ctx); + exec_list_make_empty(&if_stmt->else_list); + exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node); + else_stmt->cf_node.parent = &if_stmt->cf_node; + + return if_stmt; +} + +nir_loop * +nir_loop_create(void *mem_ctx) +{ + nir_loop *loop = ralloc(mem_ctx, nir_loop); + + cf_init(&loop->cf_node, nir_cf_node_loop); + + nir_block *body = nir_block_create(mem_ctx); + exec_list_make_empty(&loop->body); + exec_list_push_tail(&loop->body, &body->cf_node.node); + body->cf_node.parent = &loop->cf_node; + + body->successors[0] = body; + block_add_pred(body, body); + + return loop; +} + +static void +instr_init(nir_instr *instr, nir_instr_type type) +{ + instr->type = type; + instr->block = NULL; + exec_node_init(&instr->node); +} + +static void +dest_init(nir_dest *dest) +{ + dest->is_ssa = false; + dest->reg.reg = NULL; + dest->reg.indirect = NULL; + dest->reg.base_offset = 0; +} + +static void +alu_dest_init(nir_alu_dest *dest) +{ + dest_init(&dest->dest); + dest->saturate = false; + dest->write_mask = 0xf; +} + +static void +alu_src_init(nir_alu_src *src) +{ + src_init(&src->src); + src->abs = src->negate = false; + src->swizzle[0] = 0; + src->swizzle[1] = 1; + src->swizzle[2] = 2; + src->swizzle[3] = 3; +} + +nir_alu_instr * +nir_alu_instr_create(void *mem_ctx, nir_op op) +{ + unsigned num_srcs = nir_op_infos[op].num_inputs; + nir_alu_instr *instr = + ralloc_size(mem_ctx, + sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); + + instr_init(&instr->instr, nir_instr_type_alu); + instr->op = op; + alu_dest_init(&instr->dest); + for (unsigned i = 0; i < num_srcs; i++) + alu_src_init(&instr->src[i]); + + return instr; +} + +nir_jump_instr * +nir_jump_instr_create(void *mem_ctx, nir_jump_type type) +{ + nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr); + instr_init(&instr->instr, nir_instr_type_jump); + instr->type = type; + return instr; +} + +nir_load_const_instr * +nir_load_const_instr_create(void *mem_ctx, unsigned num_components) +{ + nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr); + instr_init(&instr->instr, nir_instr_type_load_const); + + nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); + + return instr; +} + +nir_intrinsic_instr * +nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op) +{ + unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; + nir_intrinsic_instr *instr = + ralloc_size(mem_ctx, + sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); + + instr_init(&instr->instr, nir_instr_type_intrinsic); + instr->intrinsic = op; + + if (nir_intrinsic_infos[op].has_dest) + dest_init(&instr->dest); + + for (unsigned i = 0; i < num_srcs; i++) + src_init(&instr->src[i]); + + return instr; +} + +nir_call_instr * +nir_call_instr_create(void *mem_ctx, nir_function_overload *callee) +{ + nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr); + instr_init(&instr->instr, nir_instr_type_call); + + instr->callee = callee; + instr->num_params = callee->num_params; + instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params); + instr->return_deref = NULL; + + return instr; +} + +nir_tex_instr * +nir_tex_instr_create(void *mem_ctx, unsigned num_srcs) +{ + nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr); + instr_init(&instr->instr, nir_instr_type_tex); + + dest_init(&instr->dest); + + instr->num_srcs = num_srcs; + instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs); + for (unsigned i = 0; i < num_srcs; i++) + src_init(&instr->src[i].src); + + instr->sampler_index = 0; + instr->sampler_array_size = 0; + instr->sampler = NULL; + + return instr; +} + +nir_phi_instr * +nir_phi_instr_create(void *mem_ctx) +{ + nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr); + instr_init(&instr->instr, nir_instr_type_phi); + + dest_init(&instr->dest); + exec_list_make_empty(&instr->srcs); + return instr; +} + +nir_parallel_copy_instr * +nir_parallel_copy_instr_create(void *mem_ctx) +{ + nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr); + instr_init(&instr->instr, nir_instr_type_parallel_copy); + + exec_list_make_empty(&instr->entries); + + return instr; +} + +nir_ssa_undef_instr * +nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components) +{ + nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr); + instr_init(&instr->instr, nir_instr_type_ssa_undef); + + nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); + + return instr; +} + +nir_deref_var * +nir_deref_var_create(void *mem_ctx, nir_variable *var) +{ + nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var); + deref->deref.deref_type = nir_deref_type_var; + deref->deref.child = NULL; + deref->deref.type = var->type; + deref->var = var; + return deref; +} + +nir_deref_array * +nir_deref_array_create(void *mem_ctx) +{ + nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array); + deref->deref.deref_type = nir_deref_type_array; + deref->deref.child = NULL; + deref->deref_array_type = nir_deref_array_type_direct; + src_init(&deref->indirect); + deref->base_offset = 0; + return deref; +} + +nir_deref_struct * +nir_deref_struct_create(void *mem_ctx, unsigned field_index) +{ + nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct); + deref->deref.deref_type = nir_deref_type_struct; + deref->deref.child = NULL; + deref->index = field_index; + return deref; +} + +static nir_deref_var * +copy_deref_var(void *mem_ctx, nir_deref_var *deref) +{ + nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); + ret->deref.type = deref->deref.type; + if (deref->deref.child) + ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + return ret; +} + +static nir_deref_array * +copy_deref_array(void *mem_ctx, nir_deref_array *deref) +{ + nir_deref_array *ret = nir_deref_array_create(mem_ctx); + ret->base_offset = deref->base_offset; + ret->deref_array_type = deref->deref_array_type; + if (deref->deref_array_type == nir_deref_array_type_indirect) { + nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx); + } + ret->deref.type = deref->deref.type; + if (deref->deref.child) + ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + return ret; +} + +static nir_deref_struct * +copy_deref_struct(void *mem_ctx, nir_deref_struct *deref) +{ + nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index); + ret->deref.type = deref->deref.type; + if (deref->deref.child) + ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + return ret; +} + +nir_deref * +nir_copy_deref(void *mem_ctx, nir_deref *deref) +{ + switch (deref->deref_type) { + case nir_deref_type_var: + return ©_deref_var(mem_ctx, nir_deref_as_var(deref))->deref; + case nir_deref_type_array: + return ©_deref_array(mem_ctx, nir_deref_as_array(deref))->deref; + case nir_deref_type_struct: + return ©_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref; + default: + unreachable("Invalid dereference type"); + } + + return NULL; +} + + +/** + * \name Control flow modification + * + * These functions modify the control flow tree while keeping the control flow + * graph up-to-date. The invariants respected are: + * 1. Each then statement, else statement, or loop body must have at least one + * control flow node. + * 2. Each if-statement and loop must have one basic block before it and one + * after. + * 3. Two basic blocks cannot be directly next to each other. + * 4. If a basic block has a jump instruction, there must be only one and it + * must be at the end of the block. + * 5. The CFG must always be connected - this means that we must insert a fake + * CFG edge for loops with no break statement. + * + * The purpose of the second one is so that we have places to insert code during + * GCM, as well as eliminating the possibility of critical edges. + */ +/*@{*/ + +static void +link_non_block_to_block(nir_cf_node *node, nir_block *block) +{ + if (node->type == nir_cf_node_if) { + /* + * We're trying to link an if to a block after it; this just means linking + * the last block of the then and else branches. + */ + + nir_if *if_stmt = nir_cf_node_as_if(node); + + nir_cf_node *last_then = nir_if_last_then_node(if_stmt); + assert(last_then->type == nir_cf_node_block); + nir_block *last_then_block = nir_cf_node_as_block(last_then); + + nir_cf_node *last_else = nir_if_last_else_node(if_stmt); + assert(last_else->type == nir_cf_node_block); + nir_block *last_else_block = nir_cf_node_as_block(last_else); + + if (exec_list_is_empty(&last_then_block->instr_list) || + nir_block_last_instr(last_then_block)->type != nir_instr_type_jump) { + unlink_block_successors(last_then_block); + link_blocks(last_then_block, block, NULL); + } + + if (exec_list_is_empty(&last_else_block->instr_list) || + nir_block_last_instr(last_else_block)->type != nir_instr_type_jump) { + unlink_block_successors(last_else_block); + link_blocks(last_else_block, block, NULL); + } + } else { + assert(node->type == nir_cf_node_loop); + + /* + * We can only get to this codepath if we're inserting a new loop, or + * at least a loop with no break statements; we can't insert break + * statements into a loop when we haven't inserted it into the CFG + * because we wouldn't know which block comes after the loop + * and therefore, which block should be the successor of the block with + * the break). Therefore, we need to insert a fake edge (see invariant + * #5). + */ + + nir_loop *loop = nir_cf_node_as_loop(node); + + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + + last_block->successors[1] = block; + block_add_pred(block, last_block); + } +} + +static void +link_block_to_non_block(nir_block *block, nir_cf_node *node) +{ + if (node->type == nir_cf_node_if) { + /* + * We're trying to link a block to an if after it; this just means linking + * the block to the first block of the then and else branches. + */ + + nir_if *if_stmt = nir_cf_node_as_if(node); + + nir_cf_node *first_then = nir_if_first_then_node(if_stmt); + assert(first_then->type == nir_cf_node_block); + nir_block *first_then_block = nir_cf_node_as_block(first_then); + + nir_cf_node *first_else = nir_if_first_else_node(if_stmt); + assert(first_else->type == nir_cf_node_block); + nir_block *first_else_block = nir_cf_node_as_block(first_else); + + unlink_block_successors(block); + link_blocks(block, first_then_block, first_else_block); + } else { + /* + * For similar reasons as the corresponding case in + * link_non_block_to_block(), don't worry about if the loop header has + * any predecessors that need to be unlinked. + */ + + assert(node->type == nir_cf_node_loop); + + nir_loop *loop = nir_cf_node_as_loop(node); + + nir_cf_node *loop_header = nir_loop_first_cf_node(loop); + assert(loop_header->type == nir_cf_node_block); + nir_block *loop_header_block = nir_cf_node_as_block(loop_header); + + unlink_block_successors(block); + link_blocks(block, loop_header_block, NULL); + } + +} + +/** + * Takes a basic block and inserts a new empty basic block before it, making its + * predecessors point to the new block. This essentially splits the block into + * an empty header and a body so that another non-block CF node can be inserted + * between the two. Note that this does *not* link the two basic blocks, so + * some kind of cleanup *must* be performed after this call. + */ + +static nir_block * +split_block_beginning(nir_block *block) +{ + nir_block *new_block = nir_block_create(ralloc_parent(block)); + new_block->cf_node.parent = block->cf_node.parent; + exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node); + + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + unlink_blocks(pred, block); + link_blocks(pred, new_block, NULL); + } + + return new_block; +} + +static void +rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src(phi, src) { + if (src->pred == old_pred) { + src->pred = new_pred; + break; + } + } + } +} + +/** + * Moves the successors of source to the successors of dest, leaving both + * successors of source NULL. + */ + +static void +move_successors(nir_block *source, nir_block *dest) +{ + nir_block *succ1 = source->successors[0]; + nir_block *succ2 = source->successors[1]; + + if (succ1) { + unlink_blocks(source, succ1); + rewrite_phi_preds(succ1, source, dest); + } + + if (succ2) { + unlink_blocks(source, succ2); + rewrite_phi_preds(succ2, source, dest); + } + + unlink_block_successors(dest); + link_blocks(dest, succ1, succ2); +} + +static nir_block * +split_block_end(nir_block *block) +{ + nir_block *new_block = nir_block_create(ralloc_parent(block)); + new_block->cf_node.parent = block->cf_node.parent; + exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node); + + move_successors(block, new_block); + + return new_block; +} + +/** + * Inserts a non-basic block between two basic blocks and links them together. + */ + +static void +insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after) +{ + node->parent = before->cf_node.parent; + exec_node_insert_after(&before->cf_node.node, &node->node); + link_block_to_non_block(before, node); + link_non_block_to_block(node, after); +} + +/** + * Inserts a non-basic block before a basic block. + */ + +static void +insert_non_block_before_block(nir_cf_node *node, nir_block *block) +{ + /* split off the beginning of block into new_block */ + nir_block *new_block = split_block_beginning(block); + + /* insert our node in between new_block and block */ + insert_non_block(new_block, node, block); +} + +static void +insert_non_block_after_block(nir_block *block, nir_cf_node *node) +{ + /* split off the end of block into new_block */ + nir_block *new_block = split_block_end(block); + + /* insert our node in between block and new_block */ + insert_non_block(block, node, new_block); +} + +/* walk up the control flow tree to find the innermost enclosed loop */ +static nir_loop * +nearest_loop(nir_cf_node *node) +{ + while (node->type != nir_cf_node_loop) { + node = node->parent; + } + + return nir_cf_node_as_loop(node); +} + +nir_function_impl * +nir_cf_node_get_function(nir_cf_node *node) +{ + while (node->type != nir_cf_node_function) { + node = node->parent; + } + + return nir_cf_node_as_function(node); +} + +/* + * update the CFG after a jump instruction has been added to the end of a block + */ + +static void +handle_jump(nir_block *block) +{ + nir_instr *instr = nir_block_last_instr(block); + nir_jump_instr *jump_instr = nir_instr_as_jump(instr); + + unlink_block_successors(block); + + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_metadata_preserve(impl, nir_metadata_none); + + if (jump_instr->type == nir_jump_break || + jump_instr->type == nir_jump_continue) { + nir_loop *loop = nearest_loop(&block->cf_node); + + if (jump_instr->type == nir_jump_continue) { + nir_cf_node *first_node = nir_loop_first_cf_node(loop); + assert(first_node->type == nir_cf_node_block); + nir_block *first_block = nir_cf_node_as_block(first_node); + link_blocks(block, first_block, NULL); + } else { + nir_cf_node *after = nir_cf_node_next(&loop->cf_node); + assert(after->type == nir_cf_node_block); + nir_block *after_block = nir_cf_node_as_block(after); + link_blocks(block, after_block, NULL); + + /* If we inserted a fake link, remove it */ + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + if (last_block->successors[1] != NULL) + unlink_blocks(last_block, after_block); + } + } else { + assert(jump_instr->type == nir_jump_return); + link_blocks(block, impl->end_block, NULL); + } +} + +static void +handle_remove_jump(nir_block *block, nir_jump_type type) +{ + unlink_block_successors(block); + + if (exec_node_is_tail_sentinel(block->cf_node.node.next)) { + nir_cf_node *parent = block->cf_node.parent; + if (parent->type == nir_cf_node_if) { + nir_cf_node *next = nir_cf_node_next(parent); + assert(next->type == nir_cf_node_block); + nir_block *next_block = nir_cf_node_as_block(next); + + link_blocks(block, next_block, NULL); + } else { + assert(parent->type == nir_cf_node_loop); + nir_loop *loop = nir_cf_node_as_loop(parent); + + nir_cf_node *head = nir_loop_first_cf_node(loop); + assert(head->type == nir_cf_node_block); + nir_block *head_block = nir_cf_node_as_block(head); + + link_blocks(block, head_block, NULL); + } + } else { + nir_cf_node *next = nir_cf_node_next(&block->cf_node); + if (next->type == nir_cf_node_if) { + nir_if *next_if = nir_cf_node_as_if(next); + + nir_cf_node *first_then = nir_if_first_then_node(next_if); + assert(first_then->type == nir_cf_node_block); + nir_block *first_then_block = nir_cf_node_as_block(first_then); + + nir_cf_node *first_else = nir_if_first_else_node(next_if); + assert(first_else->type == nir_cf_node_block); + nir_block *first_else_block = nir_cf_node_as_block(first_else); + + link_blocks(block, first_then_block, first_else_block); + } else { + assert(next->type == nir_cf_node_loop); + nir_loop *next_loop = nir_cf_node_as_loop(next); + + nir_cf_node *first = nir_loop_first_cf_node(next_loop); + assert(first->type == nir_cf_node_block); + nir_block *first_block = nir_cf_node_as_block(first); + + link_blocks(block, first_block, NULL); + } + } + + if (type == nir_jump_break) { + nir_loop *loop = nearest_loop(&block->cf_node); + + nir_cf_node *next = nir_cf_node_next(&loop->cf_node); + assert(next->type == nir_cf_node_block); + nir_block *next_block = nir_cf_node_as_block(next); + + if (next_block->predecessors->entries == 0) { + /* insert fake link */ + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + + last_block->successors[1] = next_block; + block_add_pred(next_block, last_block); + } + } + + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_metadata_preserve(impl, nir_metadata_none); +} + +/** + * Inserts a basic block before another by merging the instructions. + * + * @param block the target of the insertion + * @param before the block to be inserted - must not have been inserted before + * @param has_jump whether \before has a jump instruction at the end + */ + +static void +insert_block_before_block(nir_block *block, nir_block *before, bool has_jump) +{ + assert(!has_jump || exec_list_is_empty(&block->instr_list)); + + foreach_list_typed(nir_instr, instr, node, &before->instr_list) { + instr->block = block; + } + + exec_list_prepend(&block->instr_list, &before->instr_list); + + if (has_jump) + handle_jump(block); +} + +/** + * Inserts a basic block after another by merging the instructions. + * + * @param block the target of the insertion + * @param after the block to be inserted - must not have been inserted before + * @param has_jump whether \after has a jump instruction at the end + */ + +static void +insert_block_after_block(nir_block *block, nir_block *after, bool has_jump) +{ + foreach_list_typed(nir_instr, instr, node, &after->instr_list) { + instr->block = block; + } + + exec_list_append(&block->instr_list, &after->instr_list); + + if (has_jump) + handle_jump(block); +} + +static void +update_if_uses(nir_cf_node *node) +{ + if (node->type != nir_cf_node_if) + return; + + nir_if *if_stmt = nir_cf_node_as_if(node); + + struct set *if_uses_set = if_stmt->condition.is_ssa ? + if_stmt->condition.ssa->if_uses : + if_stmt->condition.reg.reg->uses; + + _mesa_set_add(if_uses_set, if_stmt); +} + +void +nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after) +{ + update_if_uses(after); + + if (after->type == nir_cf_node_block) { + /* + * either node or the one after it must be a basic block, by invariant #2; + * in either case, just merge the blocks together. + */ + nir_block *after_block = nir_cf_node_as_block(after); + + bool has_jump = !exec_list_is_empty(&after_block->instr_list) && + nir_block_last_instr(after_block)->type == nir_instr_type_jump; + + if (node->type == nir_cf_node_block) { + insert_block_after_block(nir_cf_node_as_block(node), after_block, + has_jump); + } else { + nir_cf_node *next = nir_cf_node_next(node); + assert(next->type == nir_cf_node_block); + nir_block *next_block = nir_cf_node_as_block(next); + + insert_block_before_block(next_block, after_block, has_jump); + } + } else { + if (node->type == nir_cf_node_block) { + insert_non_block_after_block(nir_cf_node_as_block(node), after); + } else { + /* + * We have to insert a non-basic block after a non-basic block. Since + * every non-basic block has a basic block after it, this is equivalent + * to inserting a non-basic block before a basic block. + */ + + nir_cf_node *next = nir_cf_node_next(node); + assert(next->type == nir_cf_node_block); + nir_block *next_block = nir_cf_node_as_block(next); + + insert_non_block_before_block(after, next_block); + } + } + + nir_function_impl *impl = nir_cf_node_get_function(node); + nir_metadata_preserve(impl, nir_metadata_none); +} + +void +nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before) +{ + update_if_uses(before); + + if (before->type == nir_cf_node_block) { + nir_block *before_block = nir_cf_node_as_block(before); + + bool has_jump = !exec_list_is_empty(&before_block->instr_list) && + nir_block_last_instr(before_block)->type == nir_instr_type_jump; + + if (node->type == nir_cf_node_block) { + insert_block_before_block(nir_cf_node_as_block(node), before_block, + has_jump); + } else { + nir_cf_node *prev = nir_cf_node_prev(node); + assert(prev->type == nir_cf_node_block); + nir_block *prev_block = nir_cf_node_as_block(prev); + + insert_block_after_block(prev_block, before_block, has_jump); + } + } else { + if (node->type == nir_cf_node_block) { + insert_non_block_before_block(before, nir_cf_node_as_block(node)); + } else { + /* + * We have to insert a non-basic block before a non-basic block. This + * is equivalent to inserting a non-basic block after a basic block. + */ + + nir_cf_node *prev_node = nir_cf_node_prev(node); + assert(prev_node->type == nir_cf_node_block); + nir_block *prev_block = nir_cf_node_as_block(prev_node); + + insert_non_block_after_block(prev_block, before); + } + } + + nir_function_impl *impl = nir_cf_node_get_function(node); + nir_metadata_preserve(impl, nir_metadata_none); +} + +void +nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node) +{ + nir_cf_node *begin = exec_node_data(nir_cf_node, list->head, node); + nir_cf_node_insert_before(begin, node); +} + +void +nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node) +{ + nir_cf_node *end = exec_node_data(nir_cf_node, list->tail_pred, node); + nir_cf_node_insert_after(end, node); +} + +/** + * Stitch two basic blocks together into one. The aggregate must have the same + * predecessors as the first and the same successors as the second. + */ + +static void +stitch_blocks(nir_block *before, nir_block *after) +{ + /* + * We move after into before, so we have to deal with up to 2 successors vs. + * possibly a large number of predecessors. + * + * TODO: special case when before is empty and after isn't? + */ + + move_successors(after, before); + + foreach_list_typed(nir_instr, instr, node, &after->instr_list) { + instr->block = before; + } + + exec_list_append(&before->instr_list, &after->instr_list); + exec_node_remove(&after->cf_node.node); +} + +static void +remove_defs_uses(nir_instr *instr); + +static void +cleanup_cf_node(nir_cf_node *node) +{ + switch (node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(node); + /* We need to walk the instructions and clean up defs/uses */ + nir_foreach_instr(block, instr) + remove_defs_uses(instr); + break; + } + + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list) + cleanup_cf_node(child); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) + cleanup_cf_node(child); + + struct set *if_uses; + if (if_stmt->condition.is_ssa) { + if_uses = if_stmt->condition.ssa->if_uses; + } else { + if_uses = if_stmt->condition.reg.reg->if_uses; + } + + struct set_entry *entry = _mesa_set_search(if_uses, if_stmt); + assert(entry); + _mesa_set_remove(if_uses, entry); + break; + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + foreach_list_typed(nir_cf_node, child, node, &loop->body) + cleanup_cf_node(child); + break; + } + case nir_cf_node_function: { + nir_function_impl *impl = nir_cf_node_as_function(node); + foreach_list_typed(nir_cf_node, child, node, &impl->body) + cleanup_cf_node(child); + break; + } + default: + unreachable("Invalid CF node type"); + } +} + +void +nir_cf_node_remove(nir_cf_node *node) +{ + nir_function_impl *impl = nir_cf_node_get_function(node); + nir_metadata_preserve(impl, nir_metadata_none); + + if (node->type == nir_cf_node_block) { + /* + * Basic blocks can't really be removed by themselves, since they act as + * padding between the non-basic blocks. So all we do here is empty the + * block of instructions. + * + * TODO: could we assert here? + */ + exec_list_make_empty(&nir_cf_node_as_block(node)->instr_list); + } else { + nir_cf_node *before = nir_cf_node_prev(node); + assert(before->type == nir_cf_node_block); + nir_block *before_block = nir_cf_node_as_block(before); + + nir_cf_node *after = nir_cf_node_next(node); + assert(after->type == nir_cf_node_block); + nir_block *after_block = nir_cf_node_as_block(after); + + exec_node_remove(&node->node); + stitch_blocks(before_block, after_block); + } + + cleanup_cf_node(node); +} + +static bool +add_use_cb(nir_src *src, void *state) +{ + nir_instr *instr = state; + + struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses; + + _mesa_set_add(uses_set, instr); + + return true; +} + +static bool +add_ssa_def_cb(nir_ssa_def *def, void *state) +{ + nir_instr *instr = state; + + if (instr->block && def->index == UINT_MAX) { + nir_function_impl *impl = + nir_cf_node_get_function(&instr->block->cf_node); + + def->index = impl->ssa_alloc++; + } + + return true; +} + +static bool +add_reg_def_cb(nir_dest *dest, void *state) +{ + nir_instr *instr = state; + + if (!dest->is_ssa) + _mesa_set_add(dest->reg.reg->defs, instr); + + return true; +} + +static void +add_defs_uses(nir_instr *instr) +{ + nir_foreach_src(instr, add_use_cb, instr); + nir_foreach_dest(instr, add_reg_def_cb, instr); + nir_foreach_ssa_def(instr, add_ssa_def_cb, instr); +} + +void +nir_instr_insert_before(nir_instr *instr, nir_instr *before) +{ + assert(before->type != nir_instr_type_jump); + before->block = instr->block; + add_defs_uses(before); + exec_node_insert_node_before(&instr->node, &before->node); +} + +void +nir_instr_insert_after(nir_instr *instr, nir_instr *after) +{ + if (after->type == nir_instr_type_jump) { + assert(instr == nir_block_last_instr(instr->block)); + assert(instr->type != nir_instr_type_jump); + } + + after->block = instr->block; + add_defs_uses(after); + exec_node_insert_after(&instr->node, &after->node); + + if (after->type == nir_instr_type_jump) + handle_jump(after->block); +} + +void +nir_instr_insert_before_block(nir_block *block, nir_instr *before) +{ + if (before->type == nir_instr_type_jump) + assert(exec_list_is_empty(&block->instr_list)); + + before->block = block; + add_defs_uses(before); + exec_list_push_head(&block->instr_list, &before->node); + + if (before->type == nir_instr_type_jump) + handle_jump(block); +} + +void +nir_instr_insert_after_block(nir_block *block, nir_instr *after) +{ + if (after->type == nir_instr_type_jump) { + assert(exec_list_is_empty(&block->instr_list) || + nir_block_last_instr(block)->type != nir_instr_type_jump); + } + + after->block = block; + add_defs_uses(after); + exec_list_push_tail(&block->instr_list, &after->node); + + if (after->type == nir_instr_type_jump) + handle_jump(block); +} + +void +nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) +{ + if (node->type == nir_cf_node_block) { + nir_instr_insert_before_block(nir_cf_node_as_block(node), before); + } else { + nir_cf_node *prev = nir_cf_node_prev(node); + assert(prev->type == nir_cf_node_block); + nir_block *prev_block = nir_cf_node_as_block(prev); + + nir_instr_insert_before_block(prev_block, before); + } +} + +void +nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) +{ + if (node->type == nir_cf_node_block) { + nir_instr_insert_after_block(nir_cf_node_as_block(node), after); + } else { + nir_cf_node *next = nir_cf_node_next(node); + assert(next->type == nir_cf_node_block); + nir_block *next_block = nir_cf_node_as_block(next); + + nir_instr_insert_before_block(next_block, after); + } +} + +void +nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) +{ + nir_cf_node *first_node = exec_node_data(nir_cf_node, + exec_list_get_head(list), node); + nir_instr_insert_before_cf(first_node, before); +} + +void +nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) +{ + nir_cf_node *last_node = exec_node_data(nir_cf_node, + exec_list_get_tail(list), node); + nir_instr_insert_after_cf(last_node, after); +} + +static bool +remove_use_cb(nir_src *src, void *state) +{ + nir_instr *instr = state; + + struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses; + + struct set_entry *entry = _mesa_set_search(uses_set, instr); + if (entry) + _mesa_set_remove(uses_set, entry); + + return true; +} + +static bool +remove_def_cb(nir_dest *dest, void *state) +{ + nir_instr *instr = state; + + if (dest->is_ssa) + return true; + + nir_register *reg = dest->reg.reg; + + struct set_entry *entry = _mesa_set_search(reg->defs, instr); + if (entry) + _mesa_set_remove(reg->defs, entry); + + return true; +} + +static void +remove_defs_uses(nir_instr *instr) +{ + nir_foreach_dest(instr, remove_def_cb, instr); + nir_foreach_src(instr, remove_use_cb, instr); +} + +void nir_instr_remove(nir_instr *instr) +{ + remove_defs_uses(instr); + exec_node_remove(&instr->node); + + if (instr->type == nir_instr_type_jump) { + nir_jump_instr *jump_instr = nir_instr_as_jump(instr); + handle_remove_jump(instr->block, jump_instr->type); + } +} + +/*@}*/ + +void +nir_index_local_regs(nir_function_impl *impl) +{ + unsigned index = 0; + foreach_list_typed(nir_register, reg, node, &impl->registers) { + reg->index = index++; + } + impl->reg_alloc = index; +} + +void +nir_index_global_regs(nir_shader *shader) +{ + unsigned index = 0; + foreach_list_typed(nir_register, reg, node, &shader->registers) { + reg->index = index++; + } + shader->reg_alloc = index; +} + +static bool +visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) +{ + return cb(&instr->dest.dest, state); +} + +static bool +visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb, + void *state) +{ + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + return cb(&instr->dest, state); + + return true; +} + +static bool +visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb, + void *state) +{ + return cb(&instr->dest, state); +} + +static bool +visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state) +{ + return cb(&instr->dest, state); +} + +static bool +visit_parallel_copy_dest(nir_parallel_copy_instr *instr, + nir_foreach_dest_cb cb, void *state) +{ + nir_foreach_parallel_copy_entry(instr, entry) { + if (!cb(&entry->dest, state)) + return false; + } + + return true; +} + +bool +nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) +{ + switch (instr->type) { + case nir_instr_type_alu: + return visit_alu_dest(nir_instr_as_alu(instr), cb, state); + case nir_instr_type_intrinsic: + return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); + case nir_instr_type_tex: + return visit_texture_dest(nir_instr_as_tex(instr), cb, state); + case nir_instr_type_phi: + return visit_phi_dest(nir_instr_as_phi(instr), cb, state); + case nir_instr_type_parallel_copy: + return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr), + cb, state); + + case nir_instr_type_load_const: + case nir_instr_type_ssa_undef: + case nir_instr_type_call: + case nir_instr_type_jump: + break; + + default: + unreachable("Invalid instruction type"); + break; + } + + return true; +} + +struct foreach_ssa_def_state { + nir_foreach_ssa_def_cb cb; + void *client_state; +}; + +static inline bool +nir_ssa_def_visitor(nir_dest *dest, void *void_state) +{ + struct foreach_ssa_def_state *state = void_state; + + if (dest->is_ssa) + return state->cb(&dest->ssa, state->client_state); + else + return true; +} + +bool +nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) +{ + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_tex: + case nir_instr_type_intrinsic: + case nir_instr_type_phi: + case nir_instr_type_parallel_copy: { + struct foreach_ssa_def_state foreach_state = {cb, state}; + return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state); + } + + case nir_instr_type_load_const: + return cb(&nir_instr_as_load_const(instr)->def, state); + case nir_instr_type_ssa_undef: + return cb(&nir_instr_as_ssa_undef(instr)->def, state); + case nir_instr_type_call: + case nir_instr_type_jump: + return true; + default: + unreachable("Invalid instruction type"); + } +} + +static bool +visit_src(nir_src *src, nir_foreach_src_cb cb, void *state) +{ + if (!cb(src, state)) + return false; + if (!src->is_ssa && src->reg.indirect) + return cb(src->reg.indirect, state); + return true; +} + +static bool +visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb, + void *state) +{ + if (deref->deref_array_type == nir_deref_array_type_indirect) + return visit_src(&deref->indirect, cb, state); + return true; +} + +static bool +visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state) +{ + nir_deref *cur = &deref->deref; + while (cur != NULL) { + if (cur->deref_type == nir_deref_type_array) + if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state)) + return false; + + cur = cur->child; + } + + return true; +} + +static bool +visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) +{ + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + if (!visit_src(&instr->src[i].src, cb, state)) + return false; + + return true; +} + +static bool +visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) +{ + for (unsigned i = 0; i < instr->num_srcs; i++) + if (!visit_src(&instr->src[i].src, cb, state)) + return false; + + if (instr->sampler != NULL) + if (!visit_deref_src(instr->sampler, cb, state)) + return false; + + return true; +} + +static bool +visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb, + void *state) +{ + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + for (unsigned i = 0; i < num_srcs; i++) + if (!visit_src(&instr->src[i], cb, state)) + return false; + + unsigned num_vars = + nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) + if (!visit_deref_src(instr->variables[i], cb, state)) + return false; + + return true; +} + +static bool +visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state) +{ + return true; +} + +static bool +visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb, + void *state) +{ + return true; +} + +static bool +visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state) +{ + nir_foreach_phi_src(instr, src) { + if (!visit_src(&src->src, cb, state)) + return false; + } + + return true; +} + +static bool +visit_parallel_copy_src(nir_parallel_copy_instr *instr, + nir_foreach_src_cb cb, void *state) +{ + nir_foreach_parallel_copy_entry(instr, entry) { + if (!visit_src(&entry->src, cb, state)) + return false; + } + + return true; +} + +typedef struct { + void *state; + nir_foreach_src_cb cb; +} visit_dest_indirect_state; + +static bool +visit_dest_indirect(nir_dest *dest, void *_state) +{ + visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state; + + if (!dest->is_ssa && dest->reg.indirect) + return state->cb(dest->reg.indirect, state->state); + + return true; +} + +bool +nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) +{ + switch (instr->type) { + case nir_instr_type_alu: + if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) + return false; + break; + case nir_instr_type_intrinsic: + if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) + return false; + break; + case nir_instr_type_tex: + if (!visit_tex_src(nir_instr_as_tex(instr), cb, state)) + return false; + break; + case nir_instr_type_call: + if (!visit_call_src(nir_instr_as_call(instr), cb, state)) + return false; + break; + case nir_instr_type_load_const: + if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state)) + return false; + break; + case nir_instr_type_phi: + if (!visit_phi_src(nir_instr_as_phi(instr), cb, state)) + return false; + break; + case nir_instr_type_parallel_copy: + if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr), + cb, state)) + return false; + break; + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + return true; + + default: + unreachable("Invalid instruction type"); + break; + } + + visit_dest_indirect_state dest_state; + dest_state.state = state; + dest_state.cb = cb; + return nir_foreach_dest(instr, visit_dest_indirect, &dest_state); +} + +nir_const_value * +nir_src_as_const_value(nir_src src) +{ + if (!src.is_ssa) + return NULL; + + if (src.ssa->parent_instr->type != nir_instr_type_load_const) + return NULL; + + nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); + + return &load->value; +} + +bool +nir_srcs_equal(nir_src src1, nir_src src2) +{ + if (src1.is_ssa) { + if (src2.is_ssa) { + return src1.ssa == src2.ssa; + } else { + return false; + } + } else { + if (src2.is_ssa) { + return false; + } else { + if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL)) + return false; + + if (src1.reg.indirect) { + if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect)) + return false; + } + + return src1.reg.reg == src2.reg.reg && + src1.reg.base_offset == src2.reg.base_offset; + } + } +} + +static bool +src_does_not_use_def(nir_src *src, void *void_def) +{ + nir_ssa_def *def = void_def; + + if (src->is_ssa) { + return src->ssa != def; + } else { + return true; + } +} + +static bool +src_does_not_use_reg(nir_src *src, void *void_reg) +{ + nir_register *reg = void_reg; + + if (src->is_ssa) { + return true; + } else { + return src->reg.reg != reg; + } +} + +void +nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) +{ + if (src->is_ssa) { + nir_ssa_def *old_ssa = src->ssa; + *src = new_src; + if (old_ssa && nir_foreach_src(instr, src_does_not_use_def, old_ssa)) { + struct set_entry *entry = _mesa_set_search(old_ssa->uses, instr); + assert(entry); + _mesa_set_remove(old_ssa->uses, entry); + } + } else { + if (src->reg.indirect) + nir_instr_rewrite_src(instr, src->reg.indirect, new_src); + + nir_register *old_reg = src->reg.reg; + *src = new_src; + if (old_reg && nir_foreach_src(instr, src_does_not_use_reg, old_reg)) { + struct set_entry *entry = _mesa_set_search(old_reg->uses, instr); + assert(entry); + _mesa_set_remove(old_reg->uses, entry); + } + } + + if (new_src.is_ssa) { + if (new_src.ssa) + _mesa_set_add(new_src.ssa->uses, instr); + } else { + if (new_src.reg.reg) + _mesa_set_add(new_src.reg.reg->uses, instr); + } +} + +void +nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, + unsigned num_components, const char *name) +{ + void *mem_ctx = ralloc_parent(instr); + + def->name = name; + def->parent_instr = instr; + def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + def->num_components = num_components; + + if (instr->block) { + nir_function_impl *impl = + nir_cf_node_get_function(&instr->block->cf_node); + + def->index = impl->ssa_alloc++; + } else { + def->index = UINT_MAX; + } +} + +void +nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, + unsigned num_components, const char *name) +{ + dest->is_ssa = true; + nir_ssa_def_init(instr, &dest->ssa, num_components, name); +} + +struct ssa_def_rewrite_state { + void *mem_ctx; + nir_ssa_def *old; + nir_src new_src; +}; + +static bool +ssa_def_rewrite_uses_src(nir_src *src, void *void_state) +{ + struct ssa_def_rewrite_state *state = void_state; + + if (src->is_ssa && src->ssa == state->old) + nir_src_copy(src, &state->new_src, state->mem_ctx); + + return true; +} + +void +nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx) +{ + struct ssa_def_rewrite_state state; + state.mem_ctx = mem_ctx; + state.old = def; + state.new_src = new_src; + + assert(!new_src.is_ssa || def != new_src.ssa); + + struct set *new_uses, *new_if_uses; + if (new_src.is_ssa) { + new_uses = new_src.ssa->uses; + new_if_uses = new_src.ssa->if_uses; + } else { + new_uses = new_src.reg.reg->uses; + new_if_uses = new_src.reg.reg->if_uses; + } + + struct set_entry *entry; + set_foreach(def->uses, entry) { + nir_instr *instr = (nir_instr *)entry->key; + + _mesa_set_remove(def->uses, entry); + nir_foreach_src(instr, ssa_def_rewrite_uses_src, &state); + _mesa_set_add(new_uses, instr); + } + + set_foreach(def->if_uses, entry) { + nir_if *if_use = (nir_if *)entry->key; + + _mesa_set_remove(def->if_uses, entry); + nir_src_copy(&if_use->condition, &new_src, mem_ctx); + _mesa_set_add(new_if_uses, if_use); + } +} + + +static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + bool reverse, void *state); + +static inline bool +foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state) +{ + if (reverse) { + foreach_list_typed_safe_reverse(nir_cf_node, node, node, + &if_stmt->else_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + + foreach_list_typed_safe_reverse(nir_cf_node, node, node, + &if_stmt->then_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } else { + foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + + foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } + + return true; +} + +static inline bool +foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state) +{ + if (reverse) { + foreach_list_typed_safe_reverse(nir_cf_node, node, node, &loop->body) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } else { + foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } + + return true; +} + +static bool +foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + bool reverse, void *state) +{ + switch (node->type) { + case nir_cf_node_block: + return cb(nir_cf_node_as_block(node), state); + case nir_cf_node_if: + return foreach_if(nir_cf_node_as_if(node), cb, reverse, state); + case nir_cf_node_loop: + return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state); + break; + + default: + unreachable("Invalid CFG node type"); + break; + } + + return false; +} + +bool +nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state) +{ + foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { + if (!foreach_cf_node(node, cb, false, state)) + return false; + } + + return cb(impl->end_block, state); +} + +bool +nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb, + void *state) +{ + if (!cb(impl->end_block, state)) + return false; + + foreach_list_typed_safe_reverse(nir_cf_node, node, node, &impl->body) { + if (!foreach_cf_node(node, cb, true, state)) + return false; + } + + return true; +} + +nir_if * +nir_block_get_following_if(nir_block *block) +{ + if (exec_node_is_tail_sentinel(&block->cf_node.node)) + return NULL; + + if (nir_cf_node_is_last(&block->cf_node)) + return NULL; + + nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); + + if (next_node->type != nir_cf_node_if) + return NULL; + + return nir_cf_node_as_if(next_node); +} + +static bool +index_block(nir_block *block, void *state) +{ + unsigned *index = state; + block->index = (*index)++; + return true; +} + +void +nir_index_blocks(nir_function_impl *impl) +{ + unsigned index = 0; + + if (impl->valid_metadata & nir_metadata_block_index) + return; + + nir_foreach_block(impl, index_block, &index); + + impl->num_blocks = index; +} + +static bool +index_ssa_def_cb(nir_ssa_def *def, void *state) +{ + unsigned *index = (unsigned *) state; + def->index = (*index)++; + + return true; +} + +static bool +index_ssa_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, index_ssa_def_cb, state); + + return true; +} + +void +nir_index_ssa_defs(nir_function_impl *impl) +{ + unsigned index = 0; + nir_foreach_block(impl, index_ssa_block, &index); + impl->ssa_alloc = index; +} diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h new file mode 100644 index 000000000..d74caa959 --- /dev/null +++ b/mesalib/src/glsl/nir/nir.h @@ -0,0 +1,1618 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#pragma once + +#include "util/hash_table.h" +#include "../list.h" +#include "GL/gl.h" /* GLenum */ +#include "util/ralloc.h" +#include "util/set.h" +#include "util/bitset.h" +#include "nir_types.h" +#include <stdio.h> + +#include "nir_opcodes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct gl_program; +struct gl_shader_program; + +#define NIR_FALSE 0u +#define NIR_TRUE (~0u) + +/** Defines a cast function + * + * This macro defines a cast function from in_type to out_type where + * out_type is some structure type that contains a field of type out_type. + * + * Note that you have to be a bit careful as the generated cast function + * destroys constness. + */ +#define NIR_DEFINE_CAST(name, in_type, out_type, field) \ +static inline out_type * \ +name(const in_type *parent) \ +{ \ + return exec_node_data(out_type, parent, field); \ +} + +struct nir_function_overload; +struct nir_function; +struct nir_shader; + + +/** + * Description of built-in state associated with a uniform + * + * \sa nir_variable::state_slots + */ +typedef struct { + int tokens[5]; + int swizzle; +} nir_state_slot; + +typedef enum { + nir_var_shader_in, + nir_var_shader_out, + nir_var_global, + nir_var_local, + nir_var_uniform, + nir_var_system_value +} nir_variable_mode; + +/** + * Data stored in an nir_constant + */ +union nir_constant_data { + unsigned u[16]; + int i[16]; + float f[16]; + bool b[16]; +}; + +typedef struct nir_constant { + /** + * Value of the constant. + * + * The field used to back the values supplied by the constant is determined + * by the type associated with the \c nir_variable. Constants may be + * scalars, vectors, or matrices. + */ + union nir_constant_data value; + + /* Array elements / Structure Fields */ + struct nir_constant **elements; +} nir_constant; + +/** + * \brief Layout qualifiers for gl_FragDepth. + * + * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared + * with a layout qualifier. + */ +typedef enum { + nir_depth_layout_none, /**< No depth layout is specified. */ + nir_depth_layout_any, + nir_depth_layout_greater, + nir_depth_layout_less, + nir_depth_layout_unchanged +} nir_depth_layout; + +/** + * Either a uniform, global variable, shader input, or shader output. Based on + * ir_variable - it should be easy to translate between the two. + */ + +typedef struct { + struct exec_node node; + + /** + * Declared type of the variable + */ + const struct glsl_type *type; + + /** + * Declared name of the variable + */ + char *name; + + /** + * For variables which satisfy the is_interface_instance() predicate, this + * points to an array of integers such that if the ith member of the + * interface block is an array, max_ifc_array_access[i] is the maximum + * array element of that member that has been accessed. If the ith member + * of the interface block is not an array, max_ifc_array_access[i] is + * unused. + * + * For variables whose type is not an interface block, this pointer is + * NULL. + */ + unsigned *max_ifc_array_access; + + struct nir_variable_data { + + /** + * Is the variable read-only? + * + * This is set for variables declared as \c const, shader inputs, + * and uniforms. + */ + unsigned read_only:1; + unsigned centroid:1; + unsigned sample:1; + unsigned invariant:1; + + /** + * Storage class of the variable. + * + * \sa nir_variable_mode + */ + nir_variable_mode mode:4; + + /** + * Interpolation mode for shader inputs / outputs + * + * \sa glsl_interp_qualifier + */ + unsigned interpolation:2; + + /** + * \name ARB_fragment_coord_conventions + * @{ + */ + unsigned origin_upper_left:1; + unsigned pixel_center_integer:1; + /*@}*/ + + /** + * Was the location explicitly set in the shader? + * + * If the location is explicitly set in the shader, it \b cannot be changed + * by the linker or by the API (e.g., calls to \c glBindAttribLocation have + * no effect). + */ + unsigned explicit_location:1; + unsigned explicit_index:1; + + /** + * Was an initial binding explicitly set in the shader? + * + * If so, constant_initializer contains an integer nir_constant + * representing the initial binding point. + */ + unsigned explicit_binding:1; + + /** + * Does this variable have an initializer? + * + * This is used by the linker to cross-validiate initializers of global + * variables. + */ + unsigned has_initializer:1; + + /** + * Is this variable a generic output or input that has not yet been matched + * up to a variable in another stage of the pipeline? + * + * This is used by the linker as scratch storage while assigning locations + * to generic inputs and outputs. + */ + unsigned is_unmatched_generic_inout:1; + + /** + * If non-zero, then this variable may be packed along with other variables + * into a single varying slot, so this offset should be applied when + * accessing components. For example, an offset of 1 means that the x + * component of this variable is actually stored in component y of the + * location specified by \c location. + */ + unsigned location_frac:2; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was not an array. + * + * Note that this variable and \c from_named_ifc_block_array will never + * both be non-zero. + */ + unsigned from_named_ifc_block_nonarray:1; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was an array. + * + * Note that this variable and \c from_named_ifc_block_nonarray will never + * both be non-zero. + */ + unsigned from_named_ifc_block_array:1; + + /** + * \brief Layout qualifier for gl_FragDepth. + * + * This is not equal to \c ir_depth_layout_none if and only if this + * variable is \c gl_FragDepth and a layout qualifier is specified. + */ + nir_depth_layout depth_layout; + + /** + * Storage location of the base of this variable + * + * The precise meaning of this field depends on the nature of the variable. + * + * - Vertex shader input: one of the values from \c gl_vert_attrib. + * - Vertex shader output: one of the values from \c gl_varying_slot. + * - Geometry shader input: one of the values from \c gl_varying_slot. + * - Geometry shader output: one of the values from \c gl_varying_slot. + * - Fragment shader input: one of the values from \c gl_varying_slot. + * - Fragment shader output: one of the values from \c gl_frag_result. + * - Uniforms: Per-stage uniform slot number for default uniform block. + * - Uniforms: Index within the uniform block definition for UBO members. + * - Other: This field is not currently used. + * + * If the variable is a uniform, shader input, or shader output, and the + * slot has not been assigned, the value will be -1. + */ + int location; + + /** + * The actual location of the variable in the IR. Only valid for inputs + * and outputs. + */ + unsigned int driver_location; + + /** + * output index for dual source blending. + */ + int index; + + /** + * Initial binding point for a sampler or UBO. + * + * For array types, this represents the binding point for the first element. + */ + int binding; + + /** + * Location an atomic counter is stored at. + */ + struct { + unsigned buffer_index; + unsigned offset; + } atomic; + + /** + * ARB_shader_image_load_store qualifiers. + */ + struct { + bool read_only; /**< "readonly" qualifier. */ + bool write_only; /**< "writeonly" qualifier. */ + bool coherent; + bool _volatile; + bool restrict_flag; + + /** Image internal format if specified explicitly, otherwise GL_NONE. */ + GLenum format; + } image; + + /** + * Highest element accessed with a constant expression array index + * + * Not used for non-array variables. + */ + unsigned max_array_access; + + } data; + + /** + * Built-in state that backs this uniform + * + * Once set at variable creation, \c state_slots must remain invariant. + * This is because, ideally, this array would be shared by all clones of + * this variable in the IR tree. In other words, we'd really like for it + * to be a fly-weight. + * + * If the variable is not a uniform, \c num_state_slots will be zero and + * \c state_slots will be \c NULL. + */ + /*@{*/ + unsigned num_state_slots; /**< Number of state slots used */ + nir_state_slot *state_slots; /**< State descriptors. */ + /*@}*/ + + /** + * Constant expression assigned in the initializer of the variable + */ + nir_constant *constant_initializer; + + /** + * For variables that are in an interface block or are an instance of an + * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. + * + * \sa ir_variable::location + */ + const struct glsl_type *interface_type; +} nir_variable; + +typedef struct { + struct exec_node node; + + unsigned num_components; /** < number of vector components */ + unsigned num_array_elems; /** < size of array (0 for no array) */ + + /** generic register index. */ + unsigned index; + + /** only for debug purposes, can be NULL */ + const char *name; + + /** whether this register is local (per-function) or global (per-shader) */ + bool is_global; + + /** + * If this flag is set to true, then accessing channels >= num_components + * is well-defined, and simply spills over to the next array element. This + * is useful for backends that can do per-component accessing, in + * particular scalar backends. By setting this flag and making + * num_components equal to 1, structures can be packed tightly into + * registers and then registers can be accessed per-component to get to + * each structure member, even if it crosses vec4 boundaries. + */ + bool is_packed; + + /** set of nir_instr's where this register is used (read from) */ + struct set *uses; + + /** set of nir_instr's where this register is defined (written to) */ + struct set *defs; + + /** set of nir_if's where this register is used as a condition */ + struct set *if_uses; +} nir_register; + +typedef enum { + nir_instr_type_alu, + nir_instr_type_call, + nir_instr_type_tex, + nir_instr_type_intrinsic, + nir_instr_type_load_const, + nir_instr_type_jump, + nir_instr_type_ssa_undef, + nir_instr_type_phi, + nir_instr_type_parallel_copy, +} nir_instr_type; + +typedef struct { + struct exec_node node; + nir_instr_type type; + struct nir_block *block; + + /* A temporary for optimization and analysis passes to use for storing + * flags. For instance, DCE uses this to store the "dead/live" info. + */ + uint8_t pass_flags; +} nir_instr; + +static inline nir_instr * +nir_instr_next(nir_instr *instr) +{ + struct exec_node *next = exec_node_get_next(&instr->node); + if (exec_node_is_tail_sentinel(next)) + return NULL; + else + return exec_node_data(nir_instr, next, node); +} + +static inline nir_instr * +nir_instr_prev(nir_instr *instr) +{ + struct exec_node *prev = exec_node_get_prev(&instr->node); + if (exec_node_is_head_sentinel(prev)) + return NULL; + else + return exec_node_data(nir_instr, prev, node); +} + +typedef struct { + /** for debugging only, can be NULL */ + const char* name; + + /** generic SSA definition index. */ + unsigned index; + + /** Index into the live_in and live_out bitfields */ + unsigned live_index; + + nir_instr *parent_instr; + + /** set of nir_instr's where this register is used (read from) */ + struct set *uses; + + /** set of nir_if's where this register is used as a condition */ + struct set *if_uses; + + uint8_t num_components; +} nir_ssa_def; + +struct nir_src; + +typedef struct { + nir_register *reg; + struct nir_src *indirect; /** < NULL for no indirect offset */ + unsigned base_offset; + + /* TODO use-def chain goes here */ +} nir_reg_src; + +typedef struct { + nir_register *reg; + struct nir_src *indirect; /** < NULL for no indirect offset */ + unsigned base_offset; + + /* TODO def-use chain goes here */ +} nir_reg_dest; + +typedef struct nir_src { + union { + nir_reg_src reg; + nir_ssa_def *ssa; + }; + + bool is_ssa; +} nir_src; + +typedef struct { + union { + nir_reg_dest reg; + nir_ssa_def ssa; + }; + + bool is_ssa; +} nir_dest; + +static inline nir_src +nir_src_for_ssa(nir_ssa_def *def) +{ + nir_src src; + + src.is_ssa = true; + src.ssa = def; + + return src; +} + +static inline nir_src +nir_src_for_reg(nir_register *reg) +{ + nir_src src; + + src.is_ssa = false; + src.reg.reg = reg; + src.reg.indirect = NULL; + src.reg.base_offset = 0; + + return src; +} + +static inline nir_dest +nir_dest_for_reg(nir_register *reg) +{ + nir_dest dest; + + dest.is_ssa = false; + dest.reg.reg = reg; + dest.reg.indirect = NULL; + dest.reg.base_offset = 0; + + return dest; +} + +void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx); +void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx); + +typedef struct { + nir_src src; + + /** + * \name input modifiers + */ + /*@{*/ + /** + * For inputs interpreted as floating point, flips the sign bit. For + * inputs interpreted as integers, performs the two's complement negation. + */ + bool negate; + + /** + * Clears the sign bit for floating point values, and computes the integer + * absolute value for integers. Note that the negate modifier acts after + * the absolute value modifier, therefore if both are set then all inputs + * will become negative. + */ + bool abs; + /*@}*/ + + /** + * For each input component, says which component of the register it is + * chosen from. Note that which elements of the swizzle are used and which + * are ignored are based on the write mask for most opcodes - for example, + * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and + * a swizzle of {2, x, 1, 0} where x means "don't care." + */ + uint8_t swizzle[4]; +} nir_alu_src; + +typedef struct { + nir_dest dest; + + /** + * \name saturate output modifier + * + * Only valid for opcodes that output floating-point numbers. Clamps the + * output to between 0.0 and 1.0 inclusive. + */ + + bool saturate; + + unsigned write_mask : 4; /* ignored if dest.is_ssa is true */ +} nir_alu_dest; + +void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx); +void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, + void *mem_ctx); + +typedef enum { + nir_type_invalid = 0, /* Not a valid type */ + nir_type_float, + nir_type_int, + nir_type_unsigned, + nir_type_bool +} nir_alu_type; + +typedef enum { + NIR_OP_IS_COMMUTATIVE = (1 << 0), + NIR_OP_IS_ASSOCIATIVE = (1 << 1), +} nir_op_algebraic_property; + +typedef struct { + const char *name; + + unsigned num_inputs; + + /** + * The number of components in the output + * + * If non-zero, this is the size of the output and input sizes are + * explicitly given; swizzle and writemask are still in effect, but if + * the output component is masked out, then the input component may + * still be in use. + * + * If zero, the opcode acts in the standard, per-component manner; the + * operation is performed on each component (except the ones that are + * masked out) with the input being taken from the input swizzle for + * that component. + * + * The size of some of the inputs may be given (i.e. non-zero) even + * though output_size is zero; in that case, the inputs with a zero + * size act per-component, while the inputs with non-zero size don't. + */ + unsigned output_size; + + /** + * The type of vector that the instruction outputs. Note that the + * staurate modifier is only allowed on outputs with the float type. + */ + + nir_alu_type output_type; + + /** + * The number of components in each input + */ + unsigned input_sizes[4]; + + /** + * The type of vector that each input takes. Note that negate and + * absolute value are only allowed on inputs with int or float type and + * behave differently on the two. + */ + nir_alu_type input_types[4]; + + nir_op_algebraic_property algebraic_properties; +} nir_op_info; + +extern const nir_op_info nir_op_infos[nir_num_opcodes]; + +typedef struct nir_alu_instr { + nir_instr instr; + nir_op op; + nir_alu_dest dest; + nir_alu_src src[]; +} nir_alu_instr; + +/* is this source channel used? */ +static inline bool +nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel) +{ + if (nir_op_infos[instr->op].input_sizes[src] > 0) + return channel < nir_op_infos[instr->op].input_sizes[src]; + + return (instr->dest.write_mask >> channel) & 1; +} + +/* + * For instructions whose destinations are SSA, get the number of channels + * used for a source + */ +static inline unsigned +nir_ssa_alu_instr_src_components(nir_alu_instr *instr, unsigned src) +{ + assert(instr->dest.dest.is_ssa); + + if (nir_op_infos[instr->op].input_sizes[src] > 0) + return nir_op_infos[instr->op].input_sizes[src]; + + return instr->dest.dest.ssa.num_components; +} + +typedef enum { + nir_deref_type_var, + nir_deref_type_array, + nir_deref_type_struct +} nir_deref_type; + +typedef struct nir_deref { + nir_deref_type deref_type; + struct nir_deref *child; + const struct glsl_type *type; +} nir_deref; + +typedef struct { + nir_deref deref; + + nir_variable *var; +} nir_deref_var; + +/* This enum describes how the array is referenced. If the deref is + * direct then the base_offset is used. If the deref is indirect then then + * offset is given by base_offset + indirect. If the deref is a wildcard + * then the deref refers to all of the elements of the array at the same + * time. Wildcard dereferences are only ever allowed in copy_var + * intrinsics and the source and destination derefs must have matching + * wildcards. + */ +typedef enum { + nir_deref_array_type_direct, + nir_deref_array_type_indirect, + nir_deref_array_type_wildcard, +} nir_deref_array_type; + +typedef struct { + nir_deref deref; + + nir_deref_array_type deref_array_type; + unsigned base_offset; + nir_src indirect; +} nir_deref_array; + +typedef struct { + nir_deref deref; + + unsigned index; +} nir_deref_struct; + +NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref) +NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref) +NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref) + +typedef struct { + nir_instr instr; + + unsigned num_params; + nir_deref_var **params; + nir_deref_var *return_deref; + + struct nir_function_overload *callee; +} nir_call_instr; + +#define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \ + num_variables, num_indices, flags) \ + nir_intrinsic_##name, + +#define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name, + +typedef enum { +#include "nir_intrinsics.h" + nir_num_intrinsics = nir_last_intrinsic + 1 +} nir_intrinsic_op; + +#undef INTRINSIC +#undef LAST_INTRINSIC + +/** Represents an intrinsic + * + * An intrinsic is an instruction type for handling things that are + * more-or-less regular operations but don't just consume and produce SSA + * values like ALU operations do. Intrinsics are not for things that have + * special semantic meaning such as phi nodes and parallel copies. + * Examples of intrinsics include variable load/store operations, system + * value loads, and the like. Even though texturing more-or-less falls + * under this category, texturing is its own instruction type because + * trying to represent texturing with intrinsics would lead to a + * combinatorial explosion of intrinsic opcodes. + * + * By having a single instruction type for handling a lot of different + * cases, optimization passes can look for intrinsics and, for the most + * part, completely ignore them. Each intrinsic type also has a few + * possible flags that govern whether or not they can be reordered or + * eliminated. That way passes like dead code elimination can still work + * on intrisics without understanding the meaning of each. + * + * Each intrinsic has some number of constant indices, some number of + * variables, and some number of sources. What these sources, variables, + * and indices mean depends on the intrinsic and is documented with the + * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture + * instructions are the only types of instruction that can operate on + * variables. + */ +typedef struct { + nir_instr instr; + + nir_intrinsic_op intrinsic; + + nir_dest dest; + + /** number of components if this is a vectorized intrinsic + * + * Similarly to ALU operations, some intrinsics are vectorized. + * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. + * For vectorized intrinsics, the num_components field specifies the + * number of destination components and the number of source components + * for all sources with nir_intrinsic_infos.src_components[i] == 0. + */ + uint8_t num_components; + + int const_index[3]; + + nir_deref_var *variables[2]; + + nir_src src[]; +} nir_intrinsic_instr; + +/** + * \name NIR intrinsics semantic flags + * + * information about what the compiler can do with the intrinsics. + * + * \sa nir_intrinsic_info::flags + */ +typedef enum { + /** + * whether the intrinsic can be safely eliminated if none of its output + * value is not being used. + */ + NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), + + /** + * Whether the intrinsic can be reordered with respect to any other + * intrinsic, i.e. whether the only reordering dependencies of the + * intrinsic are due to the register reads/writes. + */ + NIR_INTRINSIC_CAN_REORDER = (1 << 1), +} nir_intrinsic_semantic_flag; + +#define NIR_INTRINSIC_MAX_INPUTS 4 + +typedef struct { + const char *name; + + unsigned num_srcs; /** < number of register/SSA inputs */ + + /** number of components of each input register + * + * If this value is 0, the number of components is given by the + * num_components field of nir_intrinsic_instr. + */ + unsigned src_components[NIR_INTRINSIC_MAX_INPUTS]; + + bool has_dest; + + /** number of components of the output register + * + * If this value is 0, the number of components is given by the + * num_components field of nir_intrinsic_instr. + */ + unsigned dest_components; + + /** the number of inputs/outputs that are variables */ + unsigned num_variables; + + /** the number of constant indices used by the intrinsic */ + unsigned num_indices; + + /** semantic flags for calls to this intrinsic */ + nir_intrinsic_semantic_flag flags; +} nir_intrinsic_info; + +extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; + +/** + * \group texture information + * + * This gives semantic information about textures which is useful to the + * frontend, the backend, and lowering passes, but not the optimizer. + */ + +typedef enum { + nir_tex_src_coord, + nir_tex_src_projector, + nir_tex_src_comparitor, /* shadow comparitor */ + nir_tex_src_offset, + nir_tex_src_bias, + nir_tex_src_lod, + nir_tex_src_ms_index, /* MSAA sample index */ + nir_tex_src_ddx, + nir_tex_src_ddy, + nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ + nir_num_tex_src_types +} nir_tex_src_type; + +typedef struct { + nir_src src; + nir_tex_src_type src_type; +} nir_tex_src; + +typedef enum { + nir_texop_tex, /**< Regular texture look-up */ + nir_texop_txb, /**< Texture look-up with LOD bias */ + nir_texop_txl, /**< Texture look-up with explicit LOD */ + nir_texop_txd, /**< Texture look-up with partial derivatvies */ + nir_texop_txf, /**< Texel fetch with explicit LOD */ + nir_texop_txf_ms, /**< Multisample texture fetch */ + nir_texop_txs, /**< Texture size */ + nir_texop_lod, /**< Texture lod query */ + nir_texop_tg4, /**< Texture gather */ + nir_texop_query_levels /**< Texture levels query */ +} nir_texop; + +typedef struct { + nir_instr instr; + + enum glsl_sampler_dim sampler_dim; + nir_alu_type dest_type; + + nir_texop op; + nir_dest dest; + nir_tex_src *src; + unsigned num_srcs, coord_components; + bool is_array, is_shadow; + + /** + * If is_shadow is true, whether this is the old-style shadow that outputs 4 + * components or the new-style shadow that outputs 1 component. + */ + bool is_new_style_shadow; + + /* constant offset - must be 0 if the offset source is used */ + int const_offset[4]; + + /* gather component selector */ + unsigned component : 2; + + /** The sampler index + * + * If this texture instruction has a nir_tex_src_sampler_offset source, + * then the sampler index is given by sampler_index + sampler_offset. + */ + unsigned sampler_index; + + /** The size of the sampler array or 0 if it's not an array */ + unsigned sampler_array_size; + + nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */ +} nir_tex_instr; + +static inline unsigned +nir_tex_instr_dest_size(nir_tex_instr *instr) +{ + if (instr->op == nir_texop_txs) { + unsigned ret; + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + ret = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_CUBE: + case GLSL_SAMPLER_DIM_MS: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_EXTERNAL: + ret = 2; + break; + case GLSL_SAMPLER_DIM_3D: + ret = 3; + break; + default: + unreachable("not reached"); + } + if (instr->is_array) + ret++; + return ret; + } + + if (instr->op == nir_texop_query_levels) + return 2; + + if (instr->is_shadow && instr->is_new_style_shadow) + return 1; + + return 4; +} + +static inline unsigned +nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src) +{ + if (instr->src[src].src_type == nir_tex_src_coord) + return instr->coord_components; + + + if (instr->src[src].src_type == nir_tex_src_offset || + instr->src[src].src_type == nir_tex_src_ddx || + instr->src[src].src_type == nir_tex_src_ddy) { + if (instr->is_array) + return instr->coord_components - 1; + else + return instr->coord_components; + } + + return 1; +} + +static inline int +nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type) +{ + for (unsigned i = 0; i < instr->num_srcs; i++) + if (instr->src[i].src_type == type) + return (int) i; + + return -1; +} + +typedef struct { + union { + float f[4]; + int32_t i[4]; + uint32_t u[4]; + }; +} nir_const_value; + +typedef struct { + nir_instr instr; + + nir_const_value value; + + nir_ssa_def def; +} nir_load_const_instr; + +typedef enum { + nir_jump_return, + nir_jump_break, + nir_jump_continue, +} nir_jump_type; + +typedef struct { + nir_instr instr; + nir_jump_type type; +} nir_jump_instr; + +/* creates a new SSA variable in an undefined state */ + +typedef struct { + nir_instr instr; + nir_ssa_def def; +} nir_ssa_undef_instr; + +typedef struct { + struct exec_node node; + + /* The predecessor block corresponding to this source */ + struct nir_block *pred; + + nir_src src; +} nir_phi_src; + +#define nir_foreach_phi_src(phi, entry) \ + foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs) + +typedef struct { + nir_instr instr; + + struct exec_list srcs; /** < list of nir_phi_src */ + + nir_dest dest; +} nir_phi_instr; + +typedef struct { + struct exec_node node; + nir_src src; + nir_dest dest; +} nir_parallel_copy_entry; + +#define nir_foreach_parallel_copy_entry(pcopy, entry) \ + foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) + +typedef struct { + nir_instr instr; + + /* A list of nir_parallel_copy_entry's. The sources of all of the + * entries are copied to the corresponding destinations "in parallel". + * In other words, if we have two entries: a -> b and b -> a, the values + * get swapped. + */ + struct exec_list entries; +} nir_parallel_copy_instr; + +NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, + nir_parallel_copy_instr, instr) + +/* + * Control flow + * + * Control flow consists of a tree of control flow nodes, which include + * if-statements and loops. The leaves of the tree are basic blocks, lists of + * instructions that always run start-to-finish. Each basic block also keeps + * track of its successors (blocks which may run immediately after the current + * block) and predecessors (blocks which could have run immediately before the + * current block). Each function also has a start block and an end block which + * all return statements point to (which is always empty). Together, all the + * blocks with their predecessors and successors make up the control flow + * graph (CFG) of the function. There are helpers that modify the tree of + * control flow nodes while modifying the CFG appropriately; these should be + * used instead of modifying the tree directly. + */ + +typedef enum { + nir_cf_node_block, + nir_cf_node_if, + nir_cf_node_loop, + nir_cf_node_function +} nir_cf_node_type; + +typedef struct nir_cf_node { + struct exec_node node; + nir_cf_node_type type; + struct nir_cf_node *parent; +} nir_cf_node; + +typedef struct nir_block { + nir_cf_node cf_node; + + struct exec_list instr_list; /** < list of nir_instr */ + + /** generic block index; generated by nir_index_blocks */ + unsigned index; + + /* + * Each block can only have up to 2 successors, so we put them in a simple + * array - no need for anything more complicated. + */ + struct nir_block *successors[2]; + + /* Set of nir_block predecessors in the CFG */ + struct set *predecessors; + + /* + * this node's immediate dominator in the dominance tree - set to NULL for + * the start block. + */ + struct nir_block *imm_dom; + + /* This node's children in the dominance tree */ + unsigned num_dom_children; + struct nir_block **dom_children; + + /* Set of nir_block's on the dominance frontier of this block */ + struct set *dom_frontier; + + /* + * These two indices have the property that dom_{pre,post}_index for each + * child of this block in the dominance tree will always be between + * dom_pre_index and dom_post_index for this block, which makes testing if + * a given block is dominated by another block an O(1) operation. + */ + unsigned dom_pre_index, dom_post_index; + + /* live in and out for this block; used for liveness analysis */ + BITSET_WORD *live_in; + BITSET_WORD *live_out; +} nir_block; + +static inline nir_instr * +nir_block_first_instr(nir_block *block) +{ + struct exec_node *head = exec_list_get_head(&block->instr_list); + return exec_node_data(nir_instr, head, node); +} + +static inline nir_instr * +nir_block_last_instr(nir_block *block) +{ + struct exec_node *tail = exec_list_get_tail(&block->instr_list); + return exec_node_data(nir_instr, tail, node); +} + +#define nir_foreach_instr(block, instr) \ + foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) +#define nir_foreach_instr_reverse(block, instr) \ + foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) +#define nir_foreach_instr_safe(block, instr) \ + foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) + +typedef struct { + nir_cf_node cf_node; + nir_src condition; + + struct exec_list then_list; /** < list of nir_cf_node */ + struct exec_list else_list; /** < list of nir_cf_node */ +} nir_if; + +static inline nir_cf_node * +nir_if_first_then_node(nir_if *if_stmt) +{ + struct exec_node *head = exec_list_get_head(&if_stmt->then_list); + return exec_node_data(nir_cf_node, head, node); +} + +static inline nir_cf_node * +nir_if_last_then_node(nir_if *if_stmt) +{ + struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); + return exec_node_data(nir_cf_node, tail, node); +} + +static inline nir_cf_node * +nir_if_first_else_node(nir_if *if_stmt) +{ + struct exec_node *head = exec_list_get_head(&if_stmt->else_list); + return exec_node_data(nir_cf_node, head, node); +} + +static inline nir_cf_node * +nir_if_last_else_node(nir_if *if_stmt) +{ + struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); + return exec_node_data(nir_cf_node, tail, node); +} + +typedef struct { + nir_cf_node cf_node; + + struct exec_list body; /** < list of nir_cf_node */ +} nir_loop; + +static inline nir_cf_node * +nir_loop_first_cf_node(nir_loop *loop) +{ + return exec_node_data(nir_cf_node, exec_list_get_head(&loop->body), node); +} + +static inline nir_cf_node * +nir_loop_last_cf_node(nir_loop *loop) +{ + return exec_node_data(nir_cf_node, exec_list_get_tail(&loop->body), node); +} + +/** + * Various bits of metadata that can may be created or required by + * optimization and analysis passes + */ +typedef enum { + nir_metadata_none = 0x0, + nir_metadata_block_index = 0x1, + nir_metadata_dominance = 0x2, + nir_metadata_live_variables = 0x4, +} nir_metadata; + +typedef struct { + nir_cf_node cf_node; + + /** pointer to the overload of which this is an implementation */ + struct nir_function_overload *overload; + + struct exec_list body; /** < list of nir_cf_node */ + + nir_block *start_block, *end_block; + + /** list for all local variables in the function */ + struct exec_list locals; + + /** array of variables used as parameters */ + unsigned num_params; + nir_variable **params; + + /** variable used to hold the result of the function */ + nir_variable *return_var; + + /** list of local registers in the function */ + struct exec_list registers; + + /** next available local register index */ + unsigned reg_alloc; + + /** next available SSA value index */ + unsigned ssa_alloc; + + /* total number of basic blocks, only valid when block_index_dirty = false */ + unsigned num_blocks; + + nir_metadata valid_metadata; +} nir_function_impl; + +static inline nir_cf_node * +nir_cf_node_next(nir_cf_node *node) +{ + struct exec_node *next = exec_node_get_next(&node->node); + if (exec_node_is_tail_sentinel(next)) + return NULL; + else + return exec_node_data(nir_cf_node, next, node); +} + +static inline nir_cf_node * +nir_cf_node_prev(nir_cf_node *node) +{ + struct exec_node *prev = exec_node_get_prev(&node->node); + if (exec_node_is_head_sentinel(prev)) + return NULL; + else + return exec_node_data(nir_cf_node, prev, node); +} + +static inline bool +nir_cf_node_is_first(const nir_cf_node *node) +{ + return exec_node_is_head_sentinel(node->node.prev); +} + +static inline bool +nir_cf_node_is_last(const nir_cf_node *node) +{ + return exec_node_is_tail_sentinel(node->node.next); +} + +NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node) +NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node) +NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node) +NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, nir_function_impl, cf_node) + +typedef enum { + nir_parameter_in, + nir_parameter_out, + nir_parameter_inout, +} nir_parameter_type; + +typedef struct { + nir_parameter_type param_type; + const struct glsl_type *type; +} nir_parameter; + +typedef struct nir_function_overload { + struct exec_node node; + + unsigned num_params; + nir_parameter *params; + const struct glsl_type *return_type; + + nir_function_impl *impl; /** < NULL if the overload is only declared yet */ + + /** pointer to the function of which this is an overload */ + struct nir_function *function; +} nir_function_overload; + +typedef struct nir_function { + struct exec_node node; + + struct exec_list overload_list; /** < list of nir_function_overload */ + const char *name; + struct nir_shader *shader; +} nir_function; + +#define nir_function_first_overload(func) \ + exec_node_data(nir_function_overload, \ + exec_list_get_head(&(func)->overload_list), node) + +typedef struct nir_shader_compiler_options { + bool lower_ffma; + bool lower_fpow; + bool lower_fsat; + bool lower_fsqrt; + /** lowers fneg and ineg to fsub and isub. */ + bool lower_negate; +} nir_shader_compiler_options; + +typedef struct nir_shader { + /** hash table of name -> uniform nir_variable */ + struct hash_table *uniforms; + + /** hash table of name -> input nir_variable */ + struct hash_table *inputs; + + /** hash table of name -> output nir_variable */ + struct hash_table *outputs; + + /** Set of driver-specific options for the shader. + * + * The memory for the options is expected to be kept in a single static + * copy by the driver. + */ + const struct nir_shader_compiler_options *options; + + /** list of global variables in the shader */ + struct exec_list globals; + + /** list of system value variables in the shader */ + struct exec_list system_values; + + struct exec_list functions; /** < list of nir_function */ + + /** list of global register in the shader */ + struct exec_list registers; + + /** structures used in this shader */ + unsigned num_user_structures; + struct glsl_type **user_structures; + + /** next available global register index */ + unsigned reg_alloc; + + /** + * the highest index a load_input_*, load_uniform_*, etc. intrinsic can + * access plus one + */ + unsigned num_inputs, num_uniforms, num_outputs; +} nir_shader; + +#define nir_foreach_overload(shader, overload) \ + foreach_list_typed(nir_function, func, node, &(shader)->functions) \ + foreach_list_typed(nir_function_overload, overload, node, \ + &(func)->overload_list) + +nir_shader *nir_shader_create(void *mem_ctx, + const nir_shader_compiler_options *options); + +/** creates a register, including assigning it an index and adding it to the list */ +nir_register *nir_global_reg_create(nir_shader *shader); + +nir_register *nir_local_reg_create(nir_function_impl *impl); + +void nir_reg_remove(nir_register *reg); + +/** creates a function and adds it to the shader's list of functions */ +nir_function *nir_function_create(nir_shader *shader, const char *name); + +/** creates a null function returning null */ +nir_function_overload *nir_function_overload_create(nir_function *func); + +nir_function_impl *nir_function_impl_create(nir_function_overload *func); + +nir_block *nir_block_create(void *mem_ctx); +nir_if *nir_if_create(void *mem_ctx); +nir_loop *nir_loop_create(void *mem_ctx); + +nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); + +/** puts a control flow node immediately after another control flow node */ +void nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after); + +/** puts a control flow node immediately before another control flow node */ +void nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before); + +/** puts a control flow node at the beginning of a list from an if, loop, or function */ +void nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node); + +/** puts a control flow node at the end of a list from an if, loop, or function */ +void nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node); + +/** removes a control flow node, doing any cleanup necessary */ +void nir_cf_node_remove(nir_cf_node *node); + +/** requests that the given pieces of metadata be generated */ +void nir_metadata_require(nir_function_impl *impl, nir_metadata required); +/** dirties all but the preserved metadata */ +void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); + +/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ +nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op); + +nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type); + +nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx, + unsigned num_components); + +nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx, + nir_intrinsic_op op); + +nir_call_instr *nir_call_instr_create(void *mem_ctx, + nir_function_overload *callee); + +nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs); + +nir_phi_instr *nir_phi_instr_create(void *mem_ctx); + +nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx); + +nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx, + unsigned num_components); + +nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var); +nir_deref_array *nir_deref_array_create(void *mem_ctx); +nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index); + +nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref); + +void nir_instr_insert_before(nir_instr *instr, nir_instr *before); +void nir_instr_insert_after(nir_instr *instr, nir_instr *after); + +void nir_instr_insert_before_block(nir_block *block, nir_instr *before); +void nir_instr_insert_after_block(nir_block *block, nir_instr *after); + +void nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before); +void nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after); + +void nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before); +void nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after); + +void nir_instr_remove(nir_instr *instr); + +typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); +typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); +typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); +bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, + void *state); +bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); +bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); + +nir_const_value *nir_src_as_const_value(nir_src src); +bool nir_srcs_equal(nir_src src1, nir_src src2); +void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); + +void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, + unsigned num_components, const char *name); +void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, + unsigned num_components, const char *name); +void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx); + +/* visits basic blocks in source-code order */ +typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state); +bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, + void *state); +bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb, + void *state); + +/* If the following CF node is an if, this function returns that if. + * Otherwise, it returns NULL. + */ +nir_if *nir_block_get_following_if(nir_block *block); + +void nir_index_local_regs(nir_function_impl *impl); +void nir_index_global_regs(nir_shader *shader); +void nir_index_ssa_defs(nir_function_impl *impl); + +void nir_index_blocks(nir_function_impl *impl); + +void nir_print_shader(nir_shader *shader, FILE *fp); +void nir_print_instr(const nir_instr *instr, FILE *fp); + +#ifdef DEBUG +void nir_validate_shader(nir_shader *shader); +#else +static inline void nir_validate_shader(nir_shader *shader) { } +#endif /* DEBUG */ + +void nir_calc_dominance_impl(nir_function_impl *impl); +void nir_calc_dominance(nir_shader *shader); + +nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); +bool nir_block_dominates(nir_block *parent, nir_block *child); + +void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); +void nir_dump_dom_tree(nir_shader *shader, FILE *fp); + +void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); +void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); + +void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); +void nir_dump_cfg(nir_shader *shader, FILE *fp); + +void nir_split_var_copies(nir_shader *shader); + +void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); +void nir_lower_var_copies(nir_shader *shader); + +void nir_lower_global_vars_to_local(nir_shader *shader); + +void nir_lower_locals_to_regs(nir_shader *shader); + +void nir_lower_io(nir_shader *shader); + +void nir_lower_vars_to_ssa(nir_shader *shader); + +void nir_remove_dead_variables(nir_shader *shader); + +void nir_lower_vec_to_movs(nir_shader *shader); +void nir_lower_alu_to_scalar(nir_shader *shader); + +void nir_lower_phis_to_scalar(nir_shader *shader); + +void nir_lower_samplers(nir_shader *shader, + struct gl_shader_program *shader_program, + struct gl_program *prog); + +void nir_lower_system_values(nir_shader *shader); + +void nir_lower_atomics(nir_shader *shader); +void nir_lower_to_source_mods(nir_shader *shader); + +void nir_live_variables_impl(nir_function_impl *impl); +bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); + +void nir_convert_to_ssa_impl(nir_function_impl *impl); +void nir_convert_to_ssa(nir_shader *shader); +void nir_convert_from_ssa(nir_shader *shader); + +bool nir_opt_algebraic(nir_shader *shader); +bool nir_opt_constant_folding(nir_shader *shader); + +bool nir_opt_global_to_local(nir_shader *shader); + +bool nir_copy_prop_impl(nir_function_impl *impl); +bool nir_copy_prop(nir_shader *shader); + +bool nir_opt_cse(nir_shader *shader); + +bool nir_opt_dce_impl(nir_function_impl *impl); +bool nir_opt_dce(nir_shader *shader); + +void nir_opt_gcm(nir_shader *shader); + +bool nir_opt_peephole_select(nir_shader *shader); +bool nir_opt_peephole_ffma(nir_shader *shader); + +bool nir_opt_remove_phis(nir_shader *shader); + +#ifdef __cplusplus +} /* extern "C" */ +#endif diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py new file mode 100644 index 000000000..afab1a008 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_algebraic.py @@ -0,0 +1,300 @@ +#! /usr/bin/env python +# +# Copyright (C) 2014 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Jason Ekstrand (jason@jlekstrand.net) + +import itertools +import struct +import sys +import mako.template +import re + +# Represents a set of variables, each with a unique id +class VarSet(object): + def __init__(self): + self.names = {} + self.ids = itertools.count() + self.immutable = False; + + def __getitem__(self, name): + if name not in self.names: + assert not self.immutable, "Unknown replacement variable: " + name + self.names[name] = self.ids.next() + + return self.names[name] + + def lock(self): + self.immutable = True + +class Value(object): + @staticmethod + def create(val, name_base, varset): + if isinstance(val, tuple): + return Expression(val, name_base, varset) + elif isinstance(val, Expression): + return val + elif isinstance(val, (str, unicode)): + return Variable(val, name_base, varset) + elif isinstance(val, (bool, int, long, float)): + return Constant(val, name_base) + + __template = mako.template.Template(""" +static const ${val.c_type} ${val.name} = { + { ${val.type_enum} }, +% if isinstance(val, Constant): + { ${hex(val)} /* ${val.value} */ }, +% elif isinstance(val, Variable): + ${val.index}, /* ${val.var_name} */ + ${'true' if val.is_constant else 'false'}, + nir_type_${ val.required_type or 'invalid' }, +% elif isinstance(val, Expression): + nir_op_${val.opcode}, + { ${', '.join(src.c_ptr for src in val.sources)} }, +% endif +};""") + + def __init__(self, name, type_str): + self.name = name + self.type_str = type_str + + @property + def type_enum(self): + return "nir_search_value_" + self.type_str + + @property + def c_type(self): + return "nir_search_" + self.type_str + + @property + def c_ptr(self): + return "&{0}.value".format(self.name) + + def render(self): + return self.__template.render(val=self, + Constant=Constant, + Variable=Variable, + Expression=Expression) + +class Constant(Value): + def __init__(self, val, name): + Value.__init__(self, name, "constant") + self.value = val + + def __hex__(self): + # Even if it's an integer, we still need to unpack as an unsigned + # int. This is because, without C99, we can only assign to the first + # element of a union in an initializer. + if isinstance(self.value, (bool)): + return 'NIR_TRUE' if self.value else 'NIR_FALSE' + if isinstance(self.value, (int, long)): + return hex(struct.unpack('I', struct.pack('i', self.value))[0]) + elif isinstance(self.value, float): + return hex(struct.unpack('I', struct.pack('f', self.value))[0]) + else: + assert False + +_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?") + +class Variable(Value): + def __init__(self, val, name, varset): + Value.__init__(self, name, "variable") + + m = _var_name_re.match(val) + assert m and m.group('name') is not None + + self.var_name = m.group('name') + self.is_constant = m.group('const') is not None + self.required_type = m.group('type') + + if self.required_type is not None: + assert self.required_type in ('float', 'bool', 'int', 'unsigned') + + self.index = varset[self.var_name] + +class Expression(Value): + def __init__(self, expr, name_base, varset): + Value.__init__(self, name_base, "expression") + assert isinstance(expr, tuple) + + self.opcode = expr[0] + self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset) + for (i, src) in enumerate(expr[1:]) ] + + def render(self): + srcs = "\n".join(src.render() for src in self.sources) + return srcs + super(Expression, self).render() + +_optimization_ids = itertools.count() + +condition_list = ['true'] + +class SearchAndReplace(object): + def __init__(self, transform): + self.id = _optimization_ids.next() + + search = transform[0] + replace = transform[1] + if len(transform) > 2: + self.condition = transform[2] + else: + self.condition = 'true' + + if self.condition not in condition_list: + condition_list.append(self.condition) + self.condition_index = condition_list.index(self.condition) + + varset = VarSet() + if isinstance(search, Expression): + self.search = search + else: + self.search = Expression(search, "search{0}".format(self.id), varset) + + varset.lock() + + if isinstance(replace, Value): + self.replace = replace + else: + self.replace = Value.create(replace, "replace{0}".format(self.id), varset) + +_algebraic_pass_template = mako.template.Template(""" +#include "nir.h" +#include "nir_search.h" + +struct transform { + const nir_search_expression *search; + const nir_search_value *replace; + unsigned condition_offset; +}; + +% for (opcode, xform_list) in xform_dict.iteritems(): +% for xform in xform_list: + ${xform.search.render()} + ${xform.replace.render()} +% endfor + +static const struct transform ${pass_name}_${opcode}_xforms[] = { +% for xform in xform_list: + { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} }, +% endfor +}; +% endfor + +struct opt_state { + void *mem_ctx; + bool progress; + const bool *condition_flags; +}; + +static bool +${pass_name}_block(nir_block *block, void *void_state) +{ + struct opt_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (!alu->dest.dest.is_ssa) + continue; + + switch (alu->op) { + % for opcode in xform_dict.keys(): + case nir_op_${opcode}: + for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) { + const struct transform *xform = &${pass_name}_${opcode}_xforms[i]; + if (state->condition_flags[xform->condition_offset] && + nir_replace_instr(alu, xform->search, xform->replace, + state->mem_ctx)) { + state->progress = true; + break; + } + } + break; + % endfor + default: + break; + } + } + + return true; +} + +static bool +${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags) +{ + struct opt_state state; + + state.mem_ctx = ralloc_parent(impl); + state.progress = false; + state.condition_flags = condition_flags; + + nir_foreach_block(impl, ${pass_name}_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; +} + + +bool +${pass_name}(nir_shader *shader) +{ + bool progress = false; + bool condition_flags[${len(condition_list)}]; + const nir_shader_compiler_options *options = shader->options; + + % for index, condition in enumerate(condition_list): + condition_flags[${index}] = ${condition}; + % endfor + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress |= ${pass_name}_impl(overload->impl, condition_flags); + } + + return progress; +} +""") + +class AlgebraicPass(object): + def __init__(self, pass_name, transforms): + self.xform_dict = {} + self.pass_name = pass_name + + for xform in transforms: + if not isinstance(xform, SearchAndReplace): + xform = SearchAndReplace(xform) + + if xform.search.opcode not in self.xform_dict: + self.xform_dict[xform.search.opcode] = [] + + self.xform_dict[xform.search.opcode].append(xform) + + def render(self): + return _algebraic_pass_template.render(pass_name=self.pass_name, + xform_dict=self.xform_dict, + condition_list=condition_list) diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h new file mode 100644 index 000000000..7c4f7fd96 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_builder.h @@ -0,0 +1,130 @@ +/* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NIR_BUILDER_H +#define NIR_BUILDER_H + +struct exec_list; + +typedef struct nir_builder { + struct exec_list *cf_node_list; + nir_shader *shader; + nir_function_impl *impl; +} nir_builder; + +static inline void +nir_builder_init(nir_builder *build, nir_function_impl *impl) +{ + memset(build, 0, sizeof(*build)); + build->impl = impl; + build->shader = impl->overload->function->shader; +} + +static inline void +nir_builder_insert_after_cf_list(nir_builder *build, + struct exec_list *cf_node_list) +{ + build->cf_node_list = cf_node_list; +} + + +static inline nir_ssa_def * +nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, + nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) +{ + const nir_op_info *op_info = &nir_op_infos[op]; + nir_alu_instr *instr = nir_alu_instr_create(build->shader, op); + if (!instr) + return NULL; + + instr->src[0].src = nir_src_for_ssa(src0); + if (src1) + instr->src[1].src = nir_src_for_ssa(src1); + if (src2) + instr->src[2].src = nir_src_for_ssa(src2); + if (src3) + instr->src[3].src = nir_src_for_ssa(src3); + + /* Guess the number of components the destination temporary should have + * based on our input sizes, if it's not fixed for the op. + */ + unsigned num_components = op_info->output_size; + if (num_components == 0) { + for (unsigned i = 0; i < op_info->num_inputs; i++) { + if (op_info->input_sizes[i] == 0) + num_components = MAX2(num_components, + instr->src[i].src.ssa->num_components); + } + } + assert(num_components != 0); + + /* Make sure we don't swizzle from outside of our source vector (like if a + * scalar value was passed into a multiply with a vector). + */ + for (unsigned i = 0; i < op_info->num_inputs; i++) { + for (unsigned j = instr->src[i].src.ssa->num_components; j < 4; j++) { + instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1; + } + } + + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); + instr->dest.write_mask = (1 << num_components) - 1; + + nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr); + + return &instr->dest.dest.ssa; +} + +#define ALU1(op) \ +static inline nir_ssa_def * \ +nir_##op(nir_builder *build, nir_ssa_def *src0) \ +{ \ + return nir_build_alu(build, nir_op_##op, src0, NULL, NULL, NULL); \ +} + +#define ALU2(op) \ +static inline nir_ssa_def * \ +nir_##op(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) \ +{ \ + return nir_build_alu(build, nir_op_##op, src0, src1, NULL, NULL); \ +} + +#define ALU3(op) \ +static inline nir_ssa_def * \ +nir_##op(nir_builder *build, nir_ssa_def *src0, \ + nir_ssa_def *src1, nir_ssa_def *src2) \ +{ \ + return nir_build_alu(build, nir_op_##op, src0, src1, src2, NULL); \ +} + +#define ALU4(op) \ +static inline nir_ssa_def * \ +nir_##op(nir_builder *build, nir_ssa_def *src0, \ + nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) \ +{ \ + return nir_build_alu(build, nir_op_##op, src0, src1, src2, src3); \ +} + +#include "nir_builder_opcodes.h" + +#endif /* NIR_BUILDER_H */ diff --git a/mesalib/src/glsl/nir/nir_builder_opcodes_h.py b/mesalib/src/glsl/nir/nir_builder_opcodes_h.py new file mode 100644 index 000000000..e27206ea8 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_builder_opcodes_h.py @@ -0,0 +1,38 @@ +#! /usr/bin/env python + +template = """\ +/* Copyright (C) 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _NIR_BUILDER_OPCODES_ +#define _NIR_BUILDER_OPCODES_ + +% for name, opcode in sorted(opcodes.iteritems()): +ALU${opcode.num_inputs}(${name}); +% endfor + +#endif /* _NIR_BUILDER_OPCODES_ */""" + +from nir_opcodes import opcodes +from mako.template import Template + +print Template(template).render(opcodes=opcodes) diff --git a/mesalib/src/glsl/nir/nir_constant_expressions.h b/mesalib/src/glsl/nir/nir_constant_expressions.h new file mode 100644 index 000000000..97997f2e5 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_constant_expressions.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, + nir_const_value *src); diff --git a/mesalib/src/glsl/nir/nir_constant_expressions.py b/mesalib/src/glsl/nir/nir_constant_expressions.py new file mode 100644 index 000000000..22bc4f095 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_constant_expressions.py @@ -0,0 +1,352 @@ +#! /usr/bin/python2 +template = """\ +/* + * Copyright (C) 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + */ + +#include <math.h> +#include "main/core.h" +#include "nir_constant_expressions.h" + +#if defined(_MSC_VER) && (_MSC_VER < 1800) +static int isnormal(double x) +{ + return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN; +} +#elif defined(__SUNPRO_CC) +#include <ieeefp.h> +static int isnormal(double x) +{ + return fpclass(x) == FP_NORMAL; +} +#endif + +#if defined(_MSC_VER) +static double copysign(double x, double y) +{ + return _copysign(x, y); +} +#endif + +/** + * Evaluate one component of packSnorm4x8. + */ +static uint8_t +pack_snorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packSnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + * + * We must first cast the float to an int, because casting a negative + * float to a uint is undefined. + */ + return (uint8_t) (int8_t) + _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 127.0f); +} + +/** + * Evaluate one component of packSnorm2x16. + */ +static uint16_t +pack_snorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packSnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) + * + * We must first cast the float to an int, because casting a negative + * float to a uint is undefined. + */ + return (uint16_t) (int16_t) + _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f); +} + +/** + * Evaluate one component of unpackSnorm4x8. + */ +static float +unpack_snorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackSnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + */ + return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component of unpackSnorm2x16. + */ +static float +unpack_snorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackSnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) + */ + return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component packUnorm4x8. + */ +static uint8_t +pack_unorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packUnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + */ + return (uint8_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 255.0f); +} + +/** + * Evaluate one component packUnorm2x16. + */ +static uint16_t +pack_unorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packUnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) + */ + return (uint16_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f); +} + +/** + * Evaluate one component of unpackUnorm4x8. + */ +static float +unpack_unorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackUnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + */ + return (float) u / 255.0f; +} + +/** + * Evaluate one component of unpackUnorm2x16. + */ +static float +unpack_unorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackUnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm2x16: f / 65535.0 + */ + return (float) u / 65535.0f; +} + +/** + * Evaluate one component of packHalf2x16. + */ +static uint16_t +pack_half_1x16(float x) +{ + return _mesa_float_to_half(x); +} + +/** + * Evaluate one component of unpackHalf2x16. + */ +static float +unpack_half_1x16(uint16_t u) +{ + return _mesa_half_to_float(u); +} + +/* Some typed vector structures to make things like src0.y work */ +% for type in ["float", "int", "unsigned", "bool"]: +struct ${type}_vec { + ${type} x; + ${type} y; + ${type} z; + ${type} w; +}; +% endfor + +% for name, op in sorted(opcodes.iteritems()): +static nir_const_value +evaluate_${name}(unsigned num_components, nir_const_value *_src) +{ + nir_const_value _dst_val = { { {0, 0, 0, 0} } }; + + ## For each non-per-component input, create a variable srcN that + ## contains x, y, z, and w elements which are filled in with the + ## appropriately-typed values. + % for j in range(op.num_inputs): + % if op.input_sizes[j] == 0: + <% continue %> + % elif "src" + str(j) not in op.const_expr: + ## Avoid unused variable warnings + <% continue %> + %endif + + struct ${op.input_types[j]}_vec src${j} = { + % for k in range(op.input_sizes[j]): + % if op.input_types[j] == "bool": + _src[${j}].u[${k}] != 0, + % else: + _src[${j}].${op.input_types[j][:1]}[${k}], + % endif + % endfor + }; + % endfor + + % if op.output_size == 0: + ## For per-component instructions, we need to iterate over the + ## components and apply the constant expression one component + ## at a time. + for (unsigned _i = 0; _i < num_components; _i++) { + ## For each per-component input, create a variable srcN that + ## contains the value of the current (_i'th) component. + % for j in range(op.num_inputs): + % if op.input_sizes[j] != 0: + <% continue %> + % elif "src" + str(j) not in op.const_expr: + ## Avoid unused variable warnings + <% continue %> + % elif op.input_types[j] == "bool": + bool src${j} = _src[${j}].u[_i] != 0; + % else: + ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i]; + % endif + % endfor + + ## Create an appropriately-typed variable dst and assign the + ## result of the const_expr to it. If const_expr already contains + ## writes to dst, just include const_expr directly. + % if "dst" in op.const_expr: + ${op.output_type} dst; + ${op.const_expr} + % else: + ${op.output_type} dst = ${op.const_expr}; + % endif + + ## Store the current component of the actual destination to the + ## value of dst. + % if op.output_type == "bool": + ## Sanitize the C value to a proper NIR bool + _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; + % else: + _dst_val.${op.output_type[:1]}[_i] = dst; + % endif + } + % else: + ## In the non-per-component case, create a struct dst with + ## appropriately-typed elements x, y, z, and w and assign the result + ## of the const_expr to all components of dst, or include the + ## const_expr directly if it writes to dst already. + struct ${op.output_type}_vec dst; + + % if "dst" in op.const_expr: + ${op.const_expr} + % else: + ## Splat the value to all components. This way expressions which + ## write the same value to all components don't need to explicitly + ## write to dest. One such example is fnoise which has a + ## const_expr of 0.0f. + dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; + % endif + + ## For each component in the destination, copy the value of dst to + ## the actual destination. + % for k in range(op.output_size): + % if op.output_type == "bool": + ## Sanitize the C value to a proper NIR bool + _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; + % else: + _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]}; + % endif + % endfor + % endif + + return _dst_val; +} +% endfor + +nir_const_value +nir_eval_const_opcode(nir_op op, unsigned num_components, + nir_const_value *src) +{ + switch (op) { +% for name in sorted(opcodes.iterkeys()): + case nir_op_${name}: { + return evaluate_${name}(num_components, src); + break; + } +% endfor + default: + unreachable("shouldn't get here"); + } +}""" + +from nir_opcodes import opcodes +from mako.template import Template + +print Template(template).render(opcodes=opcodes) diff --git a/mesalib/src/glsl/nir/nir_dominance.c b/mesalib/src/glsl/nir/nir_dominance.c new file mode 100644 index 000000000..2f50db1c1 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_dominance.c @@ -0,0 +1,349 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +/* + * Implements the algorithms for computing the dominance tree and the + * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper, + * Harvey, and Kennedy. + */ + +typedef struct { + nir_function_impl *impl; + bool progress; +} dom_state; + +static bool +init_block_cb(nir_block *block, void *_state) +{ + dom_state *state = (dom_state *) _state; + if (block == state->impl->start_block) + block->imm_dom = block; + else + block->imm_dom = NULL; + block->num_dom_children = 0; + + struct set_entry *entry; + set_foreach(block->dom_frontier, entry) { + _mesa_set_remove(block->dom_frontier, entry); + } + + return true; +} + +static nir_block * +intersect(nir_block *b1, nir_block *b2) +{ + while (b1 != b2) { + /* + * Note, the comparisons here are the opposite of what the paper says + * because we index blocks from beginning -> end (i.e. reverse + * post-order) instead of post-order like they assume. + */ + while (b1->index > b2->index) + b1 = b1->imm_dom; + while (b2->index > b1->index) + b2 = b2->imm_dom; + } + + return b1; +} + +static bool +calc_dominance_cb(nir_block *block, void *_state) +{ + dom_state *state = (dom_state *) _state; + if (block == state->impl->start_block) + return true; + + nir_block *new_idom = NULL; + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + if (pred->imm_dom) { + if (new_idom) + new_idom = intersect(pred, new_idom); + else + new_idom = pred; + } + } + + assert(new_idom); + if (block->imm_dom != new_idom) { + block->imm_dom = new_idom; + state->progress = true; + } + + return true; +} + +static bool +calc_dom_frontier_cb(nir_block *block, void *state) +{ + (void) state; + + if (block->predecessors->entries > 1) { + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *runner = (nir_block *) entry->key; + while (runner != block->imm_dom) { + _mesa_set_add(runner->dom_frontier, block); + runner = runner->imm_dom; + } + } + } + + return true; +} + +/* + * Compute each node's children in the dominance tree from the immediate + * dominator information. We do this in three stages: + * + * 1. Calculate the number of children each node has + * 2. Allocate arrays, setting the number of children to 0 again + * 3. For each node, add itself to its parent's list of children, using + * num_dom_children as an index - at the end of this step, num_dom_children + * for each node will be the same as it was at the end of step #1. + */ + +static bool +block_count_children(nir_block *block, void *state) +{ + (void) state; + + if (block->imm_dom) + block->imm_dom->num_dom_children++; + + return true; +} + +static bool +block_alloc_children(nir_block *block, void *state) +{ + void *mem_ctx = state; + + block->dom_children = ralloc_array(mem_ctx, nir_block *, + block->num_dom_children); + block->num_dom_children = 0; + + return true; +} + +static bool +block_add_child(nir_block *block, void *state) +{ + (void) state; + + if (block->imm_dom) + block->imm_dom->dom_children[block->imm_dom->num_dom_children++] = block; + + return true; +} + +static void +calc_dom_children(nir_function_impl* impl) +{ + void *mem_ctx = ralloc_parent(impl); + + nir_foreach_block(impl, block_count_children, NULL); + nir_foreach_block(impl, block_alloc_children, mem_ctx); + nir_foreach_block(impl, block_add_child, NULL); +} + +static void +calc_dfs_indicies(nir_block *block, unsigned *index) +{ + block->dom_pre_index = (*index)++; + + for (unsigned i = 0; i < block->num_dom_children; i++) + calc_dfs_indicies(block->dom_children[i], index); + + block->dom_post_index = (*index)++; +} + +void +nir_calc_dominance_impl(nir_function_impl *impl) +{ + if (impl->valid_metadata & nir_metadata_dominance) + return; + + nir_metadata_require(impl, nir_metadata_block_index); + + dom_state state; + state.impl = impl; + state.progress = true; + + nir_foreach_block(impl, init_block_cb, &state); + + while (state.progress) { + state.progress = false; + nir_foreach_block(impl, calc_dominance_cb, &state); + } + + nir_foreach_block(impl, calc_dom_frontier_cb, &state); + + impl->start_block->imm_dom = NULL; + + calc_dom_children(impl); + + unsigned dfs_index = 0; + calc_dfs_indicies(impl->start_block, &dfs_index); +} + +void +nir_calc_dominance(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_calc_dominance_impl(overload->impl); + } +} + +/** + * Computes the least common anscestor of two blocks. If one of the blocks + * is null, the other block is returned. + */ +nir_block * +nir_dominance_lca(nir_block *b1, nir_block *b2) +{ + if (b1 == NULL) + return b2; + + if (b2 == NULL) + return b1; + + assert(nir_cf_node_get_function(&b1->cf_node) == + nir_cf_node_get_function(&b2->cf_node)); + + assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata & + nir_metadata_dominance); + + return intersect(b1, b2); +} + +/** + * Returns true if parent dominates child + */ +bool +nir_block_dominates(nir_block *parent, nir_block *child) +{ + assert(nir_cf_node_get_function(&parent->cf_node) == + nir_cf_node_get_function(&child->cf_node)); + + assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata & + nir_metadata_dominance); + + return child->dom_pre_index >= parent->dom_pre_index && + child->dom_post_index <= parent->dom_post_index; +} + +static bool +dump_block_dom(nir_block *block, void *state) +{ + FILE *fp = state; + if (block->imm_dom) + fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index); + return true; +} + +void +nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp) +{ + fprintf(fp, "digraph doms_%s {\n", impl->overload->function->name); + nir_foreach_block(impl, dump_block_dom, fp); + fprintf(fp, "}\n\n"); +} + +void +nir_dump_dom_tree(nir_shader *shader, FILE *fp) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_dump_dom_tree_impl(overload->impl, fp); + } +} + +static bool +dump_block_dom_frontier(nir_block *block, void *state) +{ + FILE *fp = state; + + fprintf(fp, "DF(%u) = {", block->index); + struct set_entry *entry; + set_foreach(block->dom_frontier, entry) { + nir_block *df = (nir_block *) entry->key; + fprintf(fp, "%u, ", df->index); + } + fprintf(fp, "}\n"); + return true; +} + +void +nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp) +{ + nir_foreach_block(impl, dump_block_dom_frontier, fp); +} + +void +nir_dump_dom_frontier(nir_shader *shader, FILE *fp) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_dump_dom_frontier_impl(overload->impl, fp); + } +} + +static bool +dump_block_succs(nir_block *block, void *state) +{ + FILE *fp = state; + if (block->successors[0]) + fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index); + if (block->successors[1]) + fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index); + return true; +} + +void +nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp) +{ + fprintf(fp, "digraph cfg_%s {\n", impl->overload->function->name); + nir_foreach_block(impl, dump_block_succs, fp); + fprintf(fp, "}\n\n"); +} + +void +nir_dump_cfg(nir_shader *shader, FILE *fp) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_dump_cfg_impl(overload->impl, fp); + } +} diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c new file mode 100644 index 000000000..7c5009577 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_from_ssa.c @@ -0,0 +1,876 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * This file implements an out-of-SSA pass as described in "Revisiting + * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by + * Boissinot et. al. + */ + +struct from_ssa_state { + void *mem_ctx; + void *dead_ctx; + struct hash_table *ssa_table; + struct hash_table *merge_node_table; + nir_instr *instr; + nir_function_impl *impl; +}; + +/* Returns true if a dominates b */ +static bool +ssa_def_dominates(nir_ssa_def *a, nir_ssa_def *b) +{ + if (a->live_index == 0) { + /* SSA undefs always dominate */ + return true; + } else if (b->live_index < a->live_index) { + return false; + } else if (a->parent_instr->block == b->parent_instr->block) { + return a->live_index <= b->live_index; + } else { + return nir_block_dominates(a->parent_instr->block, + b->parent_instr->block); + } +} + + +/* The following data structure, which I have named merge_set is a way of + * representing a set registers of non-interfering registers. This is + * based on the concept of a "dominence forest" presented in "Fast Copy + * Coalescing and Live-Range Identification" by Budimlic et. al. but the + * implementation concept is taken from "Revisiting Out-of-SSA Translation + * for Correctness, Code Quality, and Efficiency" by Boissinot et. al.. + * + * Each SSA definition is associated with a merge_node and the association + * is represented by a combination of a hash table and the "def" parameter + * in the merge_node structure. The merge_set stores a linked list of + * merge_node's in dominence order of the ssa definitions. (Since the + * liveness analysis pass indexes the SSA values in dominence order for us, + * this is an easy thing to keep up.) It is assumed that no pair of the + * nodes in a given set interfere. Merging two sets or checking for + * interference can be done in a single linear-time merge-sort walk of the + * two lists of nodes. + */ +struct merge_set; + +typedef struct { + struct exec_node node; + struct merge_set *set; + nir_ssa_def *def; +} merge_node; + +typedef struct merge_set { + struct exec_list nodes; + unsigned size; + nir_register *reg; +} merge_set; + +#if 0 +static void +merge_set_dump(merge_set *set, FILE *fp) +{ + nir_ssa_def *dom[set->size]; + int dom_idx = -1; + + foreach_list_typed(merge_node, node, node, &set->nodes) { + while (dom_idx >= 0 && !ssa_def_dominates(dom[dom_idx], node->def)) + dom_idx--; + + for (int i = 0; i <= dom_idx; i++) + fprintf(fp, " "); + + if (node->def->name) + fprintf(fp, "ssa_%d /* %s */\n", node->def->index, node->def->name); + else + fprintf(fp, "ssa_%d\n", node->def->index); + + dom[++dom_idx] = node->def; + } +} +#endif + +static merge_node * +get_merge_node(nir_ssa_def *def, struct from_ssa_state *state) +{ + struct hash_entry *entry = + _mesa_hash_table_search(state->merge_node_table, def); + if (entry) + return entry->data; + + merge_set *set = ralloc(state->dead_ctx, merge_set); + exec_list_make_empty(&set->nodes); + set->size = 1; + set->reg = NULL; + + merge_node *node = ralloc(state->dead_ctx, merge_node); + node->set = set; + node->def = def; + exec_list_push_head(&set->nodes, &node->node); + + _mesa_hash_table_insert(state->merge_node_table, def, node); + + return node; +} + +static bool +merge_nodes_interfere(merge_node *a, merge_node *b) +{ + return nir_ssa_defs_interfere(a->def, b->def); +} + +/* Merges b into a */ +static merge_set * +merge_merge_sets(merge_set *a, merge_set *b) +{ + struct exec_node *an = exec_list_get_head(&a->nodes); + struct exec_node *bn = exec_list_get_head(&b->nodes); + while (!exec_node_is_tail_sentinel(bn)) { + merge_node *a_node = exec_node_data(merge_node, an, node); + merge_node *b_node = exec_node_data(merge_node, bn, node); + + if (exec_node_is_tail_sentinel(an) || + a_node->def->live_index > b_node->def->live_index) { + struct exec_node *next = bn->next; + exec_node_remove(bn); + exec_node_insert_node_before(an, bn); + exec_node_data(merge_node, bn, node)->set = a; + bn = next; + } else { + an = an->next; + } + } + + a->size += b->size; + b->size = 0; + + return a; +} + +/* Checks for any interference between two merge sets + * + * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA + * Translation for Correctness, Code Quality, and Efficiency" by + * Boissinot et. al. + */ +static bool +merge_sets_interfere(merge_set *a, merge_set *b) +{ + merge_node *dom[a->size + b->size]; + int dom_idx = -1; + + struct exec_node *an = exec_list_get_head(&a->nodes); + struct exec_node *bn = exec_list_get_head(&b->nodes); + while (!exec_node_is_tail_sentinel(an) || + !exec_node_is_tail_sentinel(bn)) { + + merge_node *current; + if (exec_node_is_tail_sentinel(an)) { + current = exec_node_data(merge_node, bn, node); + bn = bn->next; + } else if (exec_node_is_tail_sentinel(bn)) { + current = exec_node_data(merge_node, an, node); + an = an->next; + } else { + merge_node *a_node = exec_node_data(merge_node, an, node); + merge_node *b_node = exec_node_data(merge_node, bn, node); + + if (a_node->def->live_index <= b_node->def->live_index) { + current = a_node; + an = an->next; + } else { + current = b_node; + bn = bn->next; + } + } + + while (dom_idx >= 0 && + !ssa_def_dominates(dom[dom_idx]->def, current->def)) + dom_idx--; + + if (dom_idx >= 0 && merge_nodes_interfere(current, dom[dom_idx])) + return true; + + dom[++dom_idx] = current; + } + + return false; +} + +static bool +add_parallel_copy_to_end_of_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + bool need_end_copy = false; + if (block->successors[0]) { + nir_instr *instr = nir_block_first_instr(block->successors[0]); + if (instr && instr->type == nir_instr_type_phi) + need_end_copy = true; + } + + if (block->successors[1]) { + nir_instr *instr = nir_block_first_instr(block->successors[1]); + if (instr && instr->type == nir_instr_type_phi) + need_end_copy = true; + } + + if (need_end_copy) { + /* If one of our successors has at least one phi node, we need to + * create a parallel copy at the end of the block but before the jump + * (if there is one). + */ + nir_parallel_copy_instr *pcopy = + nir_parallel_copy_instr_create(state->dead_ctx); + + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr && last_instr->type == nir_instr_type_jump) { + nir_instr_insert_before(last_instr, &pcopy->instr); + } else { + nir_instr_insert_after_block(block, &pcopy->instr); + } + } + + return true; +} + +static nir_parallel_copy_instr * +get_parallel_copy_at_end_of_block(nir_block *block) +{ + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr == NULL) + return NULL; + + /* The last instruction may be a jump in which case the parallel copy is + * right before it. + */ + if (last_instr->type == nir_instr_type_jump) + last_instr = nir_instr_prev(last_instr); + + if (last_instr && last_instr->type == nir_instr_type_parallel_copy) + return nir_instr_as_parallel_copy(last_instr); + else + return NULL; +} + +/** Isolate phi nodes with parallel copies + * + * In order to solve the dependency problems with the sources and + * destinations of phi nodes, we first isolate them by adding parallel + * copies to the beginnings and ends of basic blocks. For every block with + * phi nodes, we add a parallel copy immediately following the last phi + * node that copies the destinations of all of the phi nodes to new SSA + * values. We also add a parallel copy to the end of every block that has + * a successor with phi nodes that, for each phi node in each successor, + * copies the corresponding sorce of the phi node and adjust the phi to + * used the destination of the parallel copy. + * + * In SSA form, each value has exactly one definition. What this does is + * ensure that each value used in a phi also has exactly one use. The + * destinations of phis are only used by the parallel copy immediately + * following the phi nodes and. Thanks to the parallel copy at the end of + * the predecessor block, the sources of phi nodes are are the only use of + * that value. This allows us to immediately assign all the sources and + * destinations of any given phi node to the same register without worrying + * about interference at all. We do coalescing to get rid of the parallel + * copies where possible. + * + * Before this pass can be run, we have to iterate over the blocks with + * add_parallel_copy_to_end_of_block to ensure that the parallel copies at + * the ends of blocks exist. We can create the ones at the beginnings as + * we go, but the ones at the ends of blocks need to be created ahead of + * time because of potential back-edges in the CFG. + */ +static bool +isolate_phi_nodes_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + nir_instr *last_phi_instr = NULL; + nir_foreach_instr(block, instr) { + /* Phi nodes only ever come at the start of a block */ + if (instr->type != nir_instr_type_phi) + break; + + last_phi_instr = instr; + } + + /* If we don't have any phi's, then there's nothing for us to do. */ + if (last_phi_instr == NULL) + return true; + + /* If we have phi nodes, we need to create a parallel copy at the + * start of this block but after the phi nodes. + */ + nir_parallel_copy_instr *block_pcopy = + nir_parallel_copy_instr_create(state->dead_ctx); + nir_instr_insert_after(last_phi_instr, &block_pcopy->instr); + + nir_foreach_instr(block, instr) { + /* Phi nodes only ever come at the start of a block */ + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + assert(phi->dest.is_ssa); + nir_foreach_phi_src(phi, src) { + nir_parallel_copy_instr *pcopy = + get_parallel_copy_at_end_of_block(src->pred); + assert(pcopy); + + nir_parallel_copy_entry *entry = ralloc(state->dead_ctx, + nir_parallel_copy_entry); + exec_list_push_tail(&pcopy->entries, &entry->node); + + nir_src_copy(&entry->src, &src->src, state->dead_ctx); + _mesa_set_add(src->src.ssa->uses, &pcopy->instr); + + nir_ssa_dest_init(&pcopy->instr, &entry->dest, + phi->dest.ssa.num_components, src->src.ssa->name); + + struct set_entry *use_entry = + _mesa_set_search(src->src.ssa->uses, instr); + if (use_entry) + /* It is possible that a phi node can use the same source twice + * but for different basic blocks. If that happens, entry will + * be NULL because we already deleted it. This is safe + * because, by the time the loop is done, we will have deleted + * all of the sources of the phi from their respective use sets + * and moved them to the parallel copy definitions. + */ + _mesa_set_remove(src->src.ssa->uses, use_entry); + + src->src.ssa = &entry->dest.ssa; + _mesa_set_add(entry->dest.ssa.uses, instr); + } + + nir_parallel_copy_entry *entry = ralloc(state->dead_ctx, + nir_parallel_copy_entry); + exec_list_push_tail(&block_pcopy->entries, &entry->node); + + nir_ssa_dest_init(&block_pcopy->instr, &entry->dest, + phi->dest.ssa.num_components, phi->dest.ssa.name); + nir_ssa_def_rewrite_uses(&phi->dest.ssa, + nir_src_for_ssa(&entry->dest.ssa), + state->mem_ctx); + + entry->src.is_ssa = true; + entry->src.ssa = &phi->dest.ssa; + _mesa_set_add(phi->dest.ssa.uses, &block_pcopy->instr); + } + + return true; +} + +static bool +coalesce_phi_nodes_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + nir_foreach_instr(block, instr) { + /* Phi nodes only ever come at the start of a block */ + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + assert(phi->dest.is_ssa); + merge_node *dest_node = get_merge_node(&phi->dest.ssa, state); + + nir_foreach_phi_src(phi, src) { + assert(src->src.is_ssa); + merge_node *src_node = get_merge_node(src->src.ssa, state); + if (src_node->set != dest_node->set) + merge_merge_sets(dest_node->set, src_node->set); + } + } + + return true; +} + +static void +agressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, + struct from_ssa_state *state) +{ + nir_foreach_parallel_copy_entry(pcopy, entry) { + if (!entry->src.is_ssa) + continue; + + /* Since load_const instructions are SSA only, we can't replace their + * destinations with registers and, therefore, can't coalesce them. + */ + if (entry->src.ssa->parent_instr->type == nir_instr_type_load_const) + continue; + + /* Don't try and coalesce these */ + if (entry->dest.ssa.num_components != entry->src.ssa->num_components) + continue; + + merge_node *src_node = get_merge_node(entry->src.ssa, state); + merge_node *dest_node = get_merge_node(&entry->dest.ssa, state); + + if (src_node->set == dest_node->set) + continue; + + if (!merge_sets_interfere(src_node->set, dest_node->set)) + merge_merge_sets(src_node->set, dest_node->set); + } +} + +static bool +agressive_coalesce_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + nir_parallel_copy_instr *start_pcopy = NULL; + nir_foreach_instr(block, instr) { + /* Phi nodes only ever come at the start of a block */ + if (instr->type != nir_instr_type_phi) { + if (instr->type != nir_instr_type_parallel_copy) + break; /* The parallel copy must be right after the phis */ + + start_pcopy = nir_instr_as_parallel_copy(instr); + + agressive_coalesce_parallel_copy(start_pcopy, state); + + break; + } + } + + nir_parallel_copy_instr *end_pcopy = + get_parallel_copy_at_end_of_block(block); + + if (end_pcopy && end_pcopy != start_pcopy) + agressive_coalesce_parallel_copy(end_pcopy, state); + + return true; +} + +static nir_register * +get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) +{ + struct hash_entry *entry = + _mesa_hash_table_search(state->merge_node_table, def); + if (entry) { + merge_node *node = (merge_node *)entry->data; + + /* If it doesn't have a register yet, create one. Note that all of + * the things in the merge set should be the same so it doesn't + * matter which node's definition we use. + */ + if (node->set->reg == NULL) { + node->set->reg = nir_local_reg_create(state->impl); + node->set->reg->name = def->name; + node->set->reg->num_components = def->num_components; + node->set->reg->num_array_elems = 0; + } + + return node->set->reg; + } + + entry = _mesa_hash_table_search(state->ssa_table, def); + if (entry) { + return (nir_register *)entry->data; + } else { + /* We leave load_const SSA values alone. They act as immediates to + * the backend. If it got coalesced into a phi, that's ok. + */ + if (def->parent_instr->type == nir_instr_type_load_const) + return NULL; + + nir_register *reg = nir_local_reg_create(state->impl); + reg->name = def->name; + reg->num_components = def->num_components; + reg->num_array_elems = 0; + + _mesa_hash_table_insert(state->ssa_table, def, reg); + return reg; + } +} + +static bool +rewrite_ssa_src(nir_src *src, void *void_state) +{ + struct from_ssa_state *state = void_state; + + if (src->is_ssa) { + nir_register *reg = get_register_for_ssa_def(src->ssa, state); + + if (reg == NULL) { + assert(src->ssa->parent_instr->type == nir_instr_type_load_const); + return true; + } + + memset(src, 0, sizeof *src); + src->reg.reg = reg; + + /* We don't need to remove it from the uses set because that is going + * away. We just need to add it to the one for the register. */ + _mesa_set_add(reg->uses, state->instr); + } + + return true; +} + +static bool +rewrite_ssa_dest(nir_dest *dest, void *void_state) +{ + struct from_ssa_state *state = void_state; + + if (dest->is_ssa) { + nir_register *reg = get_register_for_ssa_def(&dest->ssa, state); + + if (reg == NULL) { + assert(dest->ssa.parent_instr->type == nir_instr_type_load_const); + return true; + } + + _mesa_set_destroy(dest->ssa.uses, NULL); + _mesa_set_destroy(dest->ssa.if_uses, NULL); + + memset(dest, 0, sizeof *dest); + dest->reg.reg = reg; + + _mesa_set_add(reg->defs, state->instr); + } + + return true; +} + +/* Resolves ssa definitions to registers. While we're at it, we also + * remove phi nodes and ssa_undef instructions + */ +static bool +resolve_registers_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + state->instr = instr; + nir_foreach_src(instr, rewrite_ssa_src, state); + nir_foreach_dest(instr, rewrite_ssa_dest, state); + + if (instr->type == nir_instr_type_ssa_undef || + instr->type == nir_instr_type_phi) { + nir_instr_remove(instr); + ralloc_steal(state->dead_ctx, instr); + } + } + state->instr = NULL; + + nir_if *following_if = nir_block_get_following_if(block); + if (following_if && following_if->condition.is_ssa) { + nir_register *reg = get_register_for_ssa_def(following_if->condition.ssa, + state); + if (reg) { + memset(&following_if->condition, 0, sizeof following_if->condition); + following_if->condition.reg.reg = reg; + + _mesa_set_add(reg->if_uses, following_if); + } else { + /* FIXME: We really shouldn't hit this. We should be doing + * constant control flow propagation. + */ + assert(following_if->condition.ssa->parent_instr->type == nir_instr_type_load_const); + } + } + + return true; +} + +static void +emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src, + void *mem_ctx) +{ + assert(!dest_src.is_ssa && + dest_src.reg.indirect == NULL && + dest_src.reg.base_offset == 0); + + if (src.is_ssa) + assert(src.ssa->num_components >= dest_src.reg.reg->num_components); + else + assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components); + + nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov); + nir_src_copy(&mov->src[0].src, &src, mem_ctx); + mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg); + mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1; + + nir_instr_insert_before(&pcopy->instr, &mov->instr); +} + +/* Resolves a single parallel copy operation into a sequence of mov's + * + * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for + * Correctness, Code Quality, and Efficiency" by Boissinot et. al.. + * However, I never got the algorithm to work as written, so this version + * is slightly modified. + * + * The algorithm works by playing this little shell game with the values. + * We start by recording where every source value is and which source value + * each destination value should recieve. We then grab any copy whose + * destination is "empty", i.e. not used as a source, and do the following: + * - Find where its source value currently lives + * - Emit the move instruction + * - Set the location of the source value to the destination + * - Mark the location containing the source value + * - Mark the destination as no longer needing to be copied + * + * When we run out of "empty" destinations, we have a cycle and so we + * create a temporary register, copy to that register, and mark the value + * we copied as living in that temporary. Now, the cycle is broken, so we + * can continue with the above steps. + */ +static void +resolve_parallel_copy(nir_parallel_copy_instr *pcopy, + struct from_ssa_state *state) +{ + unsigned num_copies = 0; + nir_foreach_parallel_copy_entry(pcopy, entry) { + /* Sources may be SSA */ + if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) + continue; + + num_copies++; + } + + if (num_copies == 0) { + /* Hooray, we don't need any copies! */ + nir_instr_remove(&pcopy->instr); + return; + } + + /* The register/source corresponding to the given index */ + nir_src values[num_copies * 2]; + memset(values, 0, sizeof values); + + /* The current location of a given piece of data */ + int loc[num_copies * 2]; + + /* The piece of data that the given piece of data is to be copied from */ + int pred[num_copies * 2]; + + /* Initialize loc and pred. We will use -1 for "null" */ + memset(loc, -1, sizeof loc); + memset(pred, -1, sizeof pred); + + /* The destinations we have yet to properly fill */ + int to_do[num_copies * 2]; + int to_do_idx = -1; + + /* Now we set everything up: + * - All values get assigned a temporary index + * - Current locations are set from sources + * - Predicessors are recorded from sources and destinations + */ + int num_vals = 0; + nir_foreach_parallel_copy_entry(pcopy, entry) { + /* Sources may be SSA */ + if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) + continue; + + int src_idx = -1; + for (int i = 0; i < num_vals; ++i) { + if (nir_srcs_equal(values[i], entry->src)) + src_idx = i; + } + if (src_idx < 0) { + src_idx = num_vals++; + values[src_idx] = entry->src; + } + + nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg); + + int dest_idx = -1; + for (int i = 0; i < num_vals; ++i) { + if (nir_srcs_equal(values[i], dest_src)) { + /* Each destination of a parallel copy instruction should be + * unique. A destination may get used as a source, so we still + * have to walk the list. However, the predecessor should not, + * at this point, be set yet, so we should have -1 here. + */ + assert(pred[i] == -1); + dest_idx = i; + } + } + if (dest_idx < 0) { + dest_idx = num_vals++; + values[dest_idx] = dest_src; + } + + loc[src_idx] = src_idx; + pred[dest_idx] = src_idx; + + to_do[++to_do_idx] = dest_idx; + } + + /* Currently empty destinations we can go ahead and fill */ + int ready[num_copies * 2]; + int ready_idx = -1; + + /* Mark the ones that are ready for copying. We know an index is a + * destination if it has a predecessor and it's ready for copying if + * it's not marked as containing data. + */ + for (int i = 0; i < num_vals; i++) { + if (pred[i] != -1 && loc[i] == -1) + ready[++ready_idx] = i; + } + + while (to_do_idx >= 0) { + while (ready_idx >= 0) { + int b = ready[ready_idx--]; + int a = pred[b]; + emit_copy(pcopy, values[loc[a]], values[b], state->mem_ctx); + + /* If any other copies want a they can find it at b */ + loc[a] = b; + + /* b has been filled, mark it as not needing to be copied */ + pred[b] = -1; + + /* If a needs to be filled, it's ready for copying now */ + if (pred[a] != -1) + ready[++ready_idx] = a; + } + int b = to_do[to_do_idx--]; + if (pred[b] == -1) + continue; + + /* If we got here, then we don't have any more trivial copies that we + * can do. We have to break a cycle, so we create a new temporary + * register for that purpose. Normally, if going out of SSA after + * register allocation, you would want to avoid creating temporary + * registers. However, we are going out of SSA before register + * allocation, so we would rather not create extra register + * dependencies for the backend to deal with. If it wants, the + * backend can coalesce the (possibly multiple) temporaries. + */ + assert(num_vals < num_copies * 2); + nir_register *reg = nir_local_reg_create(state->impl); + reg->name = "copy_temp"; + reg->num_array_elems = 0; + if (values[b].is_ssa) + reg->num_components = values[b].ssa->num_components; + else + reg->num_components = values[b].reg.reg->num_components; + values[num_vals].is_ssa = false; + values[num_vals].reg.reg = reg; + + emit_copy(pcopy, values[b], values[num_vals], state->mem_ctx); + loc[b] = num_vals; + ready[++ready_idx] = b; + num_vals++; + } + + nir_instr_remove(&pcopy->instr); +} + +/* Resolves the parallel copies in a block. Each block can have at most + * two: One at the beginning, right after all the phi noces, and one at + * the end (or right before the final jump if it exists). + */ +static bool +resolve_parallel_copies_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + /* At this point, we have removed all of the phi nodes. If a parallel + * copy existed right after the phi nodes in this block, it is now the + * first instruction. + */ + nir_instr *first_instr = nir_block_first_instr(block); + if (first_instr == NULL) + return true; /* Empty, nothing to do. */ + + if (first_instr->type == nir_instr_type_parallel_copy) { + nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr); + + resolve_parallel_copy(pcopy, state); + } + + /* It's possible that the above code already cleaned up the end parallel + * copy. However, doing so removed it form the instructions list so we + * won't find it here. Therefore, it's safe to go ahead and just look + * for one and clean it up if it exists. + */ + nir_parallel_copy_instr *end_pcopy = + get_parallel_copy_at_end_of_block(block); + if (end_pcopy) + resolve_parallel_copy(end_pcopy, state); + + return true; +} + +static void +nir_convert_from_ssa_impl(nir_function_impl *impl) +{ + struct from_ssa_state state; + + state.mem_ctx = ralloc_parent(impl); + state.dead_ctx = ralloc_context(NULL); + state.impl = impl; + state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state); + nir_foreach_block(impl, isolate_phi_nodes_block, &state); + + /* Mark metadata as dirty before we ask for liveness analysis */ + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + nir_metadata_require(impl, nir_metadata_live_variables | + nir_metadata_dominance); + + nir_foreach_block(impl, coalesce_phi_nodes_block, &state); + nir_foreach_block(impl, agressive_coalesce_block, &state); + + state.ssa_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + nir_foreach_block(impl, resolve_registers_block, &state); + + nir_foreach_block(impl, resolve_parallel_copies_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + /* Clean up dead instructions and the hash tables */ + _mesa_hash_table_destroy(state.ssa_table, NULL); + _mesa_hash_table_destroy(state.merge_node_table, NULL); + ralloc_free(state.dead_ctx); +} + +void +nir_convert_from_ssa(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_convert_from_ssa_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_intrinsics.c b/mesalib/src/glsl/nir/nir_intrinsics.c new file mode 100644 index 000000000..a7c868c39 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_intrinsics.c @@ -0,0 +1,49 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +#define OPCODE(name) nir_intrinsic_##name + +#define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \ + _dest_components, _num_variables, _num_indices, _flags) \ +{ \ + .name = #_name, \ + .num_srcs = _num_srcs, \ + .src_components = _src_components, \ + .has_dest = _has_dest, \ + .dest_components = _dest_components, \ + .num_variables = _num_variables, \ + .num_indices = _num_indices, \ + .flags = _flags \ +}, + +#define LAST_INTRINSIC(name) + +const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = { +#include "nir_intrinsics.h" +};
\ No newline at end of file diff --git a/mesalib/src/glsl/nir/nir_intrinsics.h b/mesalib/src/glsl/nir/nir_intrinsics.h new file mode 100644 index 000000000..d94866c85 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_intrinsics.h @@ -0,0 +1,140 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +/** + * This header file defines all the available intrinsics in one place. It + * expands to a list of macros of the form: + * + * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, + * num_variables, num_indices, flags) + * + * Which should correspond one-to-one with the nir_intrinsic_info structure. It + * is included in both ir.h to create the nir_intrinsic enum (with members of + * the form nir_intrinsic_(name)) and and in opcodes.c to create + * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures + * for each intrinsic. + */ + +#define ARR(...) { __VA_ARGS__ } + + +INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0) +INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) + +/* + * Interpolation of input. The interp_var_at* intrinsics are similar to the + * load_var intrinsic acting an a shader input except that they interpolate + * the input differently. The at_sample and at_offset intrinsics take an + * aditional source that is a integer sample id or a vec2 position offset + * respectively. + */ + +INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + +/* + * a barrier is an intrinsic with no inputs/outputs but which can't be moved + * around/optimized in general + */ +#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) + +BARRIER(discard) + +INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0) +INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0) + +/* + * Atomic counters + * + * The *_var variants take an atomic_uint nir_variable, while the other, + * lowered, variants take a constant buffer index and register offset. + */ + +#define ATOMIC(name, flags) \ + INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \ + INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags) + +ATOMIC(inc, 0) +ATOMIC(dec, 0) +ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE) + +#define SYSTEM_VALUE(name, components) \ + INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \ + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + +SYSTEM_VALUE(front_face, 1) +SYSTEM_VALUE(vertex_id, 1) +SYSTEM_VALUE(instance_id, 1) +SYSTEM_VALUE(sample_id, 1) +SYSTEM_VALUE(sample_pos, 2) +SYSTEM_VALUE(sample_mask_in, 1) +SYSTEM_VALUE(invocation_id, 1) + +/* + * The first index is the address to load from, and the second index is the + * number of array elements to load. Indirect loads have an additional + * register input, which is added to the constant address to compute the + * final address to load from. For UBO's (and SSBO's), the first source is + * the (possibly constant) UBO buffer index and the indirect (if it exists) + * is the second source. + * + * For vector backends, the address is in terms of one vec4, and so each array + * element is +4 scalar components from the previous array element. For scalar + * backends, the address is in terms of a single 4-byte float/int and arrays + * elements begin immediately after the previous array element. + */ + +#define LOAD(name, extra_srcs, flags) \ + INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \ + INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ + true, 0, 0, 2, flags) + +LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* LOAD(ssbo, 1, 0) */ + +/* + * Stores work the same way as loads, except now the first register input is + * the value or array to store and the optional second input is the indirect + * offset. + */ + +#define STORE(name, num_indices, flags) \ + INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \ + INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \ + num_indices, flags) \ + +STORE(output, 2, 0) +/* STORE(ssbo, 3, 0) */ + +LAST_INTRINSIC(store_output_indirect) diff --git a/mesalib/src/glsl/nir/nir_live_variables.c b/mesalib/src/glsl/nir/nir_live_variables.c new file mode 100644 index 000000000..7402dc087 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_live_variables.c @@ -0,0 +1,296 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + */ + +#include "nir.h" +#include "nir_worklist.h" + +/* + * Basic liveness analysis. This works only in SSA form. + * + * This liveness pass treats phi nodes as being melded to the space between + * blocks so that the destinations of a phi are in the livein of the block + * in which it resides and the sources are in the liveout of the + * corresponding block. By formulating the liveness information in this + * way, we ensure that the definition of any variable dominates its entire + * live range. This is true because the only way that the definition of an + * SSA value may not dominate a use is if the use is in a phi node and the + * uses in phi no are in the live-out of the corresponding predecessor + * block but not in the live-in of the block containing the phi node. + */ + +struct live_variables_state { + unsigned num_ssa_defs; + unsigned bitset_words; + + nir_block_worklist worklist; +}; + +static bool +index_ssa_def(nir_ssa_def *def, void *void_state) +{ + struct live_variables_state *state = void_state; + + if (def->parent_instr->type == nir_instr_type_ssa_undef) + def->live_index = 0; + else + def->live_index = state->num_ssa_defs++; + + return true; +} + +static bool +index_ssa_definitions_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, index_ssa_def, state); + + return true; +} + +/* Initialize the liveness data to zero and add the given block to the + * worklist. + */ +static bool +init_liveness_block(nir_block *block, void *void_state) +{ + struct live_variables_state *state = void_state; + + block->live_in = reralloc(block, block->live_in, BITSET_WORD, + state->bitset_words); + memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); + + block->live_out = reralloc(block, block->live_out, BITSET_WORD, + state->bitset_words); + memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD)); + + nir_block_worklist_push_head(&state->worklist, block); + + return true; +} + +static bool +set_src_live(nir_src *src, void *void_live) +{ + BITSET_WORD *live = void_live; + + if (!src->is_ssa) + return true; + + if (src->ssa->live_index == 0) + return true; /* undefined variables are never live */ + + BITSET_SET(live, src->ssa->live_index); + + return true; +} + +static bool +set_ssa_def_dead(nir_ssa_def *def, void *void_live) +{ + BITSET_WORD *live = void_live; + + BITSET_CLEAR(live, def->live_index); + + return true; +} + +/** Propagates the live in of succ across the edge to the live out of pred + * + * Phi nodes exist "between" blocks and all the phi nodes at the start of a + * block act "in parallel". When we propagate from the live_in of one + * block to the live out of the other, we have to kill any writes from phis + * and make live any sources. + * + * Returns true if updating live out of pred added anything + */ +static bool +propagate_across_edge(nir_block *pred, nir_block *succ, + struct live_variables_state *state) +{ + BITSET_WORD live[state->bitset_words]; + memcpy(live, succ->live_in, sizeof live); + + nir_foreach_instr(succ, instr) { + if (instr->type != nir_instr_type_phi) + break; + nir_phi_instr *phi = nir_instr_as_phi(instr); + + assert(phi->dest.is_ssa); + set_ssa_def_dead(&phi->dest.ssa, live); + } + + nir_foreach_instr(succ, instr) { + if (instr->type != nir_instr_type_phi) + break; + nir_phi_instr *phi = nir_instr_as_phi(instr); + + nir_foreach_phi_src(phi, src) { + if (src->pred == pred) { + set_src_live(&src->src, live); + break; + } + } + } + + BITSET_WORD progress = 0; + for (unsigned i = 0; i < state->bitset_words; ++i) { + progress |= live[i] & ~pred->live_out[i]; + pred->live_out[i] |= live[i]; + } + return progress != 0; +} + +void +nir_live_variables_impl(nir_function_impl *impl) +{ + struct live_variables_state state; + + /* We start at 1 because we reserve the index value of 0 for ssa_undef + * instructions. Those are never live, so their liveness information + * can be compacted into a single bit. + */ + state.num_ssa_defs = 1; + nir_foreach_block(impl, index_ssa_definitions_block, &state); + + nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL); + + /* We now know how many unique ssa definitions we have and we can go + * ahead and allocate live_in and live_out sets and add all of the + * blocks to the worklist. + */ + state.bitset_words = BITSET_WORDS(state.num_ssa_defs); + nir_foreach_block(impl, init_liveness_block, &state); + + /* We're now ready to work through the worklist and update the liveness + * sets of each of the blocks. By the time we get to this point, every + * block in the function implementation has been pushed onto the + * worklist in reverse order. As long as we keep the worklist + * up-to-date as we go, everything will get covered. + */ + while (!nir_block_worklist_is_empty(&state.worklist)) { + /* We pop them off in the reverse order we pushed them on. This way + * the first walk of the instructions is backwards so we only walk + * once in the case of no control flow. + */ + nir_block *block = nir_block_worklist_pop_head(&state.worklist); + + memcpy(block->live_in, block->live_out, + state.bitset_words * sizeof(BITSET_WORD)); + + nir_if *following_if = nir_block_get_following_if(block); + if (following_if) + set_src_live(&following_if->condition, block->live_in); + + nir_foreach_instr_reverse(block, instr) { + /* Phi nodes are handled seperately so we want to skip them. Since + * we are going backwards and they are at the beginning, we can just + * break as soon as we see one. + */ + if (instr->type == nir_instr_type_phi) + break; + + nir_foreach_ssa_def(instr, set_ssa_def_dead, block->live_in); + nir_foreach_src(instr, set_src_live, block->live_in); + } + + /* Walk over all of the predecessors of the current block updating + * their live in with the live out of this one. If anything has + * changed, add the predecessor to the work list so that we ensure + * that the new information is used. + */ + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *)entry->key; + if (propagate_across_edge(pred, block, &state)) + nir_block_worklist_push_tail(&state.worklist, pred); + } + } + + nir_block_worklist_fini(&state.worklist); +} + +static bool +src_does_not_use_def(nir_src *src, void *def) +{ + return !src->is_ssa || src->ssa != (nir_ssa_def *)def; +} + +static bool +search_for_use_after_instr(nir_instr *start, nir_ssa_def *def) +{ + /* Only look for a use strictly after the given instruction */ + struct exec_node *node = start->node.next; + while (!exec_node_is_tail_sentinel(node)) { + nir_instr *instr = exec_node_data(nir_instr, node, node); + if (!nir_foreach_src(instr, src_does_not_use_def, def)) + return true; + node = node->next; + } + return false; +} + +/* Returns true if def is live at instr assuming that def comes before + * instr in a pre DFS search of the dominance tree. + */ +static bool +nir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr) +{ + if (BITSET_TEST(instr->block->live_out, def->live_index)) { + /* Since def dominates instr, if def is in the liveout of the block, + * it's live at instr + */ + return true; + } else { + if (BITSET_TEST(instr->block->live_in, def->live_index) || + def->parent_instr->block == instr->block) { + /* In this case it is either live coming into instr's block or it + * is defined in the same block. In this case, we simply need to + * see if it is used after instr. + */ + return search_for_use_after_instr(instr, def); + } else { + return false; + } + } +} + +bool +nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b) +{ + if (a->parent_instr == b->parent_instr) { + /* Two variables defined at the same time interfere assuming at + * least one isn't dead. + */ + return true; + } else if (a->live_index == 0 || b->live_index == 0) { + /* If either variable is an ssa_undef, then there's no interference */ + return false; + } else if (a->live_index < b->live_index) { + return nir_ssa_def_is_live_at(a, b->parent_instr); + } else { + return nir_ssa_def_is_live_at(b, a->parent_instr); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_alu_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_alu_to_scalar.c new file mode 100644 index 000000000..25bba4ef0 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_alu_to_scalar.c @@ -0,0 +1,186 @@ +/* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +/** @file nir_lower_alu_to_scalar.c + * + * Replaces nir_alu_instr operations with more than one channel used in the + * arguments with individual per-channel operations. + */ + +static void +nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components) +{ + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); + instr->dest.write_mask = (1 << num_components) - 1; +} + +static void +lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op, + void *mem_ctx) +{ + unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; + + nir_ssa_def *last = NULL; + for (unsigned i = 0; i < num_components; i++) { + nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op); + nir_alu_ssa_dest_init(chan, 1); + nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx); + chan->src[0].swizzle[0] = chan->src[0].swizzle[i]; + if (nir_op_infos[chan_op].num_inputs > 1) { + assert(nir_op_infos[chan_op].num_inputs == 2); + nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx); + chan->src[1].swizzle[0] = chan->src[1].swizzle[i]; + } + + nir_instr_insert_before(&instr->instr, &chan->instr); + + if (i == 0) { + last = &chan->dest.dest.ssa; + } else { + nir_alu_instr *merge = nir_alu_instr_create(mem_ctx, merge_op); + nir_alu_ssa_dest_init(merge, 1); + merge->dest.write_mask = 1; + merge->src[0].src = nir_src_for_ssa(last); + merge->src[1].src = nir_src_for_ssa(&chan->dest.dest.ssa); + nir_instr_insert_before(&instr->instr, &merge->instr); + last = &merge->dest.dest.ssa; + } + } + + assert(instr->dest.write_mask == 1); + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last), + mem_ctx); + nir_instr_remove(&instr->instr); +} + +static void +lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx) +{ + unsigned num_src = nir_op_infos[instr->op].num_inputs; + unsigned i, chan; + + assert(instr->dest.dest.is_ssa); + assert(instr->dest.write_mask != 0); + +#define LOWER_REDUCTION(name, chan, merge) \ + case name##2: \ + case name##3: \ + case name##4: \ + lower_reduction(instr, chan, merge, mem_ctx); \ + break; + + switch (instr->op) { + case nir_op_vec4: + case nir_op_vec3: + case nir_op_vec2: + /* We don't need to scalarize these ops, they're the ones generated to + * group up outputs into a value that can be SSAed. + */ + return; + + LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); + LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand); + LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand); + LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior); + LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior); + LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand); + LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for); + LOWER_REDUCTION(nir_op_ball, nir_op_imov, nir_op_iand); + LOWER_REDUCTION(nir_op_bany, nir_op_imov, nir_op_ior); + LOWER_REDUCTION(nir_op_fall, nir_op_fmov, nir_op_fand); + LOWER_REDUCTION(nir_op_fany, nir_op_fmov, nir_op_for); + + default: + break; + } + + if (instr->dest.dest.ssa.num_components == 1) + return; + + unsigned num_components = instr->dest.dest.ssa.num_components; + static const nir_op nir_op_map[] = {nir_op_vec2, nir_op_vec3, nir_op_vec4}; + nir_alu_instr *vec_instr = + nir_alu_instr_create(mem_ctx, nir_op_map[num_components - 2]); + nir_alu_ssa_dest_init(vec_instr, num_components); + + for (chan = 0; chan < 4; chan++) { + if (!(instr->dest.write_mask & (1 << chan))) + continue; + + nir_alu_instr *lower = nir_alu_instr_create(mem_ctx, instr->op); + for (i = 0; i < num_src; i++) { + /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar + * args (input_sizes[] == 1). + */ + assert(nir_op_infos[instr->op].input_sizes[i] < 2); + unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ? + 0 : chan); + + nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx); + for (int j = 0; j < 4; j++) + lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan]; + } + + nir_alu_ssa_dest_init(lower, 1); + lower->dest.saturate = instr->dest.saturate; + vec_instr->src[chan].src = nir_src_for_ssa(&lower->dest.dest.ssa); + + nir_instr_insert_before(&instr->instr, &lower->instr); + } + + nir_instr_insert_before(&instr->instr, &vec_instr->instr); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, + nir_src_for_ssa(&vec_instr->dest.dest.ssa), + mem_ctx); + + nir_instr_remove(&instr->instr); +} + +static bool +lower_alu_to_scalar_block(nir_block *block, void *data) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_alu) + lower_alu_instr_scalar((nir_alu_instr *)instr, data); + } + + return true; +} + +static void +nir_lower_alu_to_scalar_impl(nir_function_impl *impl) +{ + nir_foreach_block(impl, lower_alu_to_scalar_block, ralloc_parent(impl)); +} + +void +nir_lower_alu_to_scalar(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_alu_to_scalar_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_atomics.c b/mesalib/src/glsl/nir/nir_lower_atomics.c new file mode 100644 index 000000000..e82df0169 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_atomics.c @@ -0,0 +1,148 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include "main/config.h" +#include <assert.h> + +/* + * replace atomic counter intrinsics that use a variable with intrinsics + * that directly store the buffer index and byte offset + */ + +static void +lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) +{ + nir_intrinsic_op op; + switch (instr->intrinsic) { + case nir_intrinsic_atomic_counter_read_var: + op = nir_intrinsic_atomic_counter_read; + break; + + case nir_intrinsic_atomic_counter_inc_var: + op = nir_intrinsic_atomic_counter_inc; + break; + + case nir_intrinsic_atomic_counter_dec_var: + op = nir_intrinsic_atomic_counter_dec; + break; + + default: + return; + } + + if (instr->variables[0]->var->data.mode != nir_var_uniform) + return; /* atomics passed as function arguments can't be lowered */ + + void *mem_ctx = ralloc_parent(instr); + + nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); + new_instr->const_index[0] = + (int) instr->variables[0]->var->data.atomic.buffer_index; + + nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); + offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset; + + nir_instr_insert_before(&instr->instr, &offset_const->instr); + + nir_ssa_def *offset_def = &offset_const->def; + + if (instr->variables[0]->deref.child != NULL) { + assert(instr->variables[0]->deref.child->deref_type == + nir_deref_type_array); + nir_deref_array *deref_array = + nir_deref_as_array(instr->variables[0]->deref.child); + assert(deref_array->deref.child == NULL); + + offset_const->value.u[0] += deref_array->base_offset; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_load_const_instr *atomic_counter_size = + nir_load_const_instr_create(mem_ctx, 1); + atomic_counter_size->value.u[0] = ATOMIC_COUNTER_SIZE; + nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr); + + nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul); + nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); + mul->dest.write_mask = 0x1; + nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx); + mul->src[1].src.is_ssa = true; + mul->src[1].src.ssa = &atomic_counter_size->def; + nir_instr_insert_before(&instr->instr, &mul->instr); + + nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd); + nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); + add->dest.write_mask = 0x1; + add->src[0].src.is_ssa = true; + add->src[0].src.ssa = &mul->dest.dest.ssa; + add->src[1].src.is_ssa = true; + add->src[1].src.ssa = &offset_const->def; + nir_instr_insert_before(&instr->instr, &add->instr); + + offset_def = &add->dest.dest.ssa; + } + } + + new_instr->src[0].is_ssa = true; + new_instr->src[0].ssa = offset_def;; + + if (instr->dest.is_ssa) { + nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, + instr->dest.ssa.num_components, NULL); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, + nir_src_for_ssa(&new_instr->dest.ssa), + mem_ctx); + } else { + nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx); + } + + nir_instr_insert_before(&instr->instr, &new_instr->instr); + nir_instr_remove(&instr->instr); +} + +static bool +lower_block(nir_block *block, void *state) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_intrinsic) + lower_instr(nir_instr_as_intrinsic(instr), state); + } + + return true; +} + +void +nir_lower_atomics(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_foreach_block(overload->impl, lower_block, overload->impl); + nir_metadata_preserve(overload->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_global_vars_to_local.c b/mesalib/src/glsl/nir/nir_lower_global_vars_to_local.c new file mode 100644 index 000000000..0cd8740cc --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_global_vars_to_local.c @@ -0,0 +1,106 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +/* + * This lowering pass detects when a global variable is only being used by + * one function and makes it local to that function + */ + +#include "nir.h" + +struct global_to_local_state { + nir_function_impl *impl; + /* A hash table keyed on variable pointers that stores the unique + * nir_function_impl that uses the given variable. If a variable is + * used in multiple functions, the data for the given key will be NULL. + */ + struct hash_table *var_func_table; +}; + +static bool +mark_global_var_uses_block(nir_block *block, void *void_state) +{ + struct global_to_local_state *state = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + unsigned num_vars = nir_intrinsic_infos[intrin->intrinsic].num_variables; + + for (unsigned i = 0; i < num_vars; i++) { + nir_variable *var = intrin->variables[i]->var; + if (var->data.mode != nir_var_global) + continue; + + struct hash_entry *entry = + _mesa_hash_table_search(state->var_func_table, var); + + if (entry) { + if (entry->data != state->impl) + entry->data = NULL; + } else { + _mesa_hash_table_insert(state->var_func_table, var, state->impl); + } + } + } + + return true; +} + +void +nir_lower_global_vars_to_local(nir_shader *shader) +{ + struct global_to_local_state state; + + state.var_func_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + nir_foreach_overload(shader, overload) { + if (overload->impl) { + state.impl = overload->impl; + nir_foreach_block(overload->impl, mark_global_var_uses_block, &state); + } + } + + struct hash_entry *entry; + hash_table_foreach(state.var_func_table, entry) { + nir_variable *var = (void *)entry->key; + nir_function_impl *impl = entry->data; + + assert(var->data.mode == nir_var_global); + + if (impl != NULL) { + exec_node_remove(&var->node); + var->data.mode = nir_var_local; + exec_list_push_tail(&impl->locals, &var->node); + } + } + + _mesa_hash_table_destroy(state.var_func_table, NULL); +} diff --git a/mesalib/src/glsl/nir/nir_lower_io.c b/mesalib/src/glsl/nir/nir_lower_io.c new file mode 100644 index 000000000..207f8daa1 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_io.c @@ -0,0 +1,316 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +/* + * This lowering pass converts references to input/output variables with + * loads/stores to actual input/output intrinsics. + * + * NOTE: This pass really only works for scalar backends at the moment due + * to the way it packes the input/output data. + */ + +#include "nir.h" + +struct lower_io_state { + void *mem_ctx; +}; + +static unsigned +type_size(const struct glsl_type *type) +{ + unsigned int size, i; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return glsl_get_components(type); + case GLSL_TYPE_ARRAY: + return type_size(glsl_get_array_element(type)) * glsl_get_length(type); + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < glsl_get_length(type); i++) { + size += type_size(glsl_get_struct_field(type, i)); + } + return size; + case GLSL_TYPE_SAMPLER: + return 0; + case GLSL_TYPE_ATOMIC_UINT: + return 0; + case GLSL_TYPE_INTERFACE: + return 0; + case GLSL_TYPE_IMAGE: + return 0; + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_DOUBLE: + unreachable("not reached"); + } + + return 0; +} + +static void +assign_var_locations(struct hash_table *ht, unsigned *size) +{ + unsigned location = 0; + + struct hash_entry *entry; + hash_table_foreach(ht, entry) { + nir_variable *var = (nir_variable *) entry->data; + + /* + * UBO's have their own address spaces, so don't count them towards the + * number of global uniforms + */ + if (var->data.mode == nir_var_uniform && var->interface_type != NULL) + continue; + + var->data.driver_location = location; + location += type_size(var->type); + } + + *size = location; +} + +static void +assign_var_locations_shader(nir_shader *shader) +{ + assign_var_locations(shader->inputs, &shader->num_inputs); + assign_var_locations(shader->outputs, &shader->num_outputs); + assign_var_locations(shader->uniforms, &shader->num_uniforms); +} + +static bool +deref_has_indirect(nir_deref_var *deref) +{ + for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { + if (tail->deref_type == nir_deref_type_array) { + nir_deref_array *arr = nir_deref_as_array(tail); + if (arr->deref_array_type == nir_deref_array_type_indirect) + return true; + } + } + + return false; +} + +static unsigned +get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect, + struct lower_io_state *state) +{ + bool found_indirect = false; + unsigned base_offset = 0; + + nir_deref *tail = &deref->deref; + while (tail->child != NULL) { + const struct glsl_type *parent_type = tail->type; + tail = tail->child; + + if (tail->deref_type == nir_deref_type_array) { + nir_deref_array *deref_array = nir_deref_as_array(tail); + unsigned size = type_size(tail->type); + + base_offset += size * deref_array->base_offset; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_load_const_instr *load_const = + nir_load_const_instr_create(state->mem_ctx, 1); + load_const->value.u[0] = size; + nir_instr_insert_before(instr, &load_const->instr); + + nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx, + nir_op_imul); + mul->src[0].src.is_ssa = true; + mul->src[0].src.ssa = &load_const->def; + nir_src_copy(&mul->src[1].src, &deref_array->indirect, + state->mem_ctx); + mul->dest.write_mask = 1; + nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); + nir_instr_insert_before(instr, &mul->instr); + + if (found_indirect) { + nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx, + nir_op_iadd); + add->src[0].src = *indirect; + add->src[1].src.is_ssa = true; + add->src[1].src.ssa = &mul->dest.dest.ssa; + add->dest.write_mask = 1; + nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); + nir_instr_insert_before(instr, &add->instr); + + indirect->is_ssa = true; + indirect->ssa = &add->dest.dest.ssa; + } else { + indirect->is_ssa = true; + indirect->ssa = &mul->dest.dest.ssa; + found_indirect = true; + } + } + } else if (tail->deref_type == nir_deref_type_struct) { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + + for (unsigned i = 0; i < deref_struct->index; i++) + base_offset += type_size(glsl_get_struct_field(parent_type, i)); + } + } + + return base_offset; +} + +static bool +nir_lower_io_block(nir_block *block, void *void_state) +{ + struct lower_io_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + nir_variable_mode mode = intrin->variables[0]->var->data.mode; + if (mode != nir_var_shader_in && mode != nir_var_uniform) + continue; + + bool has_indirect = deref_has_indirect(intrin->variables[0]); + + /* Figure out the opcode */ + nir_intrinsic_op load_op; + switch (mode) { + case nir_var_shader_in: + load_op = has_indirect ? nir_intrinsic_load_input_indirect : + nir_intrinsic_load_input; + break; + case nir_var_uniform: + load_op = has_indirect ? nir_intrinsic_load_uniform_indirect : + nir_intrinsic_load_uniform; + break; + default: + unreachable("Unknown variable mode"); + } + + nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, + load_op); + load->num_components = intrin->num_components; + + nir_src indirect; + unsigned offset = get_io_offset(intrin->variables[0], + &intrin->instr, &indirect, state); + offset += intrin->variables[0]->var->data.driver_location; + + load->const_index[0] = offset; + load->const_index[1] = 1; + + if (has_indirect) + load->src[0] = indirect; + + if (intrin->dest.is_ssa) { + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->num_components, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa), + state->mem_ctx); + } else { + nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx); + } + + nir_instr_insert_before(&intrin->instr, &load->instr); + nir_instr_remove(&intrin->instr); + break; + } + + case nir_intrinsic_store_var: { + if (intrin->variables[0]->var->data.mode != nir_var_shader_out) + continue; + + bool has_indirect = deref_has_indirect(intrin->variables[0]); + + nir_intrinsic_op store_op; + if (has_indirect) { + store_op = nir_intrinsic_store_output_indirect; + } else { + store_op = nir_intrinsic_store_output; + } + + nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, + store_op); + store->num_components = intrin->num_components; + + nir_src indirect; + unsigned offset = get_io_offset(intrin->variables[0], + &intrin->instr, &indirect, state); + offset += intrin->variables[0]->var->data.driver_location; + + store->const_index[0] = offset; + store->const_index[1] = 1; + + nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx); + + if (has_indirect) + store->src[1] = indirect; + + nir_instr_insert_before(&intrin->instr, &store->instr); + nir_instr_remove(&intrin->instr); + break; + } + + default: + break; + } + } + + return true; +} + +static void +nir_lower_io_impl(nir_function_impl *impl) +{ + struct lower_io_state state; + + state.mem_ctx = ralloc_parent(impl); + + nir_foreach_block(impl, nir_lower_io_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_io(nir_shader *shader) +{ + assign_var_locations_shader(shader); + + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_io_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c new file mode 100644 index 000000000..8c5df7be6 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c @@ -0,0 +1,284 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +struct locals_to_regs_state { + void *mem_ctx; + nir_function_impl *impl; + + /* A hash table mapping derefs to registers */ + struct hash_table *regs_table; +}; + +/* The following two functions implement a hash and equality check for + * variable dreferences. When the hash or equality function encounters an + * array, it ignores the offset and whether it is direct or indirect + * entirely. + */ +static uint32_t +hash_deref(const void *void_deref) +{ + uint32_t hash = _mesa_fnv32_1a_offset_bias; + + const nir_deref_var *deref_var = void_deref; + hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var); + + for (const nir_deref *deref = deref_var->deref.child; + deref; deref = deref->child) { + if (deref->deref_type == nir_deref_type_struct) { + const nir_deref_struct *deref_struct = nir_deref_as_struct(deref); + hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index); + } + } + + return hash; +} + +static bool +derefs_equal(const void *void_a, const void *void_b) +{ + const nir_deref_var *a_var = void_a; + const nir_deref_var *b_var = void_b; + + if (a_var->var != b_var->var) + return false; + + for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child; + a != NULL; a = a->child, b = b->child) { + if (a->deref_type != b->deref_type) + return false; + + if (a->deref_type == nir_deref_type_struct) { + if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index) + return false; + } + /* Do nothing for arrays. They're all the same. */ + + assert((a->child == NULL) == (b->child == NULL)); + if((a->child == NULL) != (b->child == NULL)) + return false; + } + + return true; +} + +static nir_register * +get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state) +{ + uint32_t hash = hash_deref(deref); + + struct hash_entry *entry = + _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref); + if (entry) + return entry->data; + + unsigned array_size = 1; + nir_deref *tail = &deref->deref; + while (tail->child) { + if (tail->child->deref_type == nir_deref_type_array) { + /* Multiply by the parent's type. */ + if (glsl_type_is_matrix(tail->type)) { + array_size *= glsl_get_matrix_columns(tail->type); + } else { + assert(glsl_get_length(tail->type) > 0); + array_size *= glsl_get_length(tail->type); + } + } + tail = tail->child; + } + + assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type)); + + nir_register *reg = nir_local_reg_create(state->impl); + reg->num_components = glsl_get_vector_elements(tail->type); + reg->num_array_elems = array_size > 1 ? array_size : 0; + + _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg); + + return reg; +} + +static nir_src +get_deref_reg_src(nir_deref_var *deref, nir_instr *instr, + struct locals_to_regs_state *state) +{ + nir_src src; + + src.is_ssa = false; + src.reg.reg = get_reg_for_deref(deref, state); + src.reg.base_offset = 0; + src.reg.indirect = NULL; + + nir_deref *tail = &deref->deref; + while (tail->child != NULL) { + const struct glsl_type *parent_type = tail->type; + tail = tail->child; + + if (tail->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *deref_array = nir_deref_as_array(tail); + + src.reg.base_offset *= glsl_get_length(parent_type); + src.reg.base_offset += deref_array->base_offset; + + if (src.reg.indirect) { + nir_load_const_instr *load_const = + nir_load_const_instr_create(state->mem_ctx, 1); + load_const->value.u[0] = glsl_get_length(parent_type); + nir_instr_insert_before(instr, &load_const->instr); + + nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx, nir_op_imul); + mul->src[0].src = *src.reg.indirect; + mul->src[1].src.is_ssa = true; + mul->src[1].src.ssa = &load_const->def; + mul->dest.write_mask = 1; + nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); + nir_instr_insert_before(instr, &mul->instr); + + src.reg.indirect->is_ssa = true; + src.reg.indirect->ssa = &mul->dest.dest.ssa; + } + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + if (src.reg.indirect == NULL) { + src.reg.indirect = ralloc(state->mem_ctx, nir_src); + nir_src_copy(src.reg.indirect, &deref_array->indirect, + state->mem_ctx); + } else { + nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx, + nir_op_iadd); + add->src[0].src = *src.reg.indirect; + nir_src_copy(&add->src[1].src, &deref_array->indirect, + state->mem_ctx); + add->dest.write_mask = 1; + nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); + nir_instr_insert_before(instr, &add->instr); + + src.reg.indirect->is_ssa = true; + src.reg.indirect->ssa = &add->dest.dest.ssa; + } + } + } + + return src; +} + +static bool +lower_locals_to_regs_block(nir_block *block, void *void_state) +{ + struct locals_to_regs_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + if (intrin->variables[0]->var->data.mode != nir_var_local) + continue; + + nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov); + mov->src[0].src = get_deref_reg_src(intrin->variables[0], + &intrin->instr, state); + mov->dest.write_mask = (1 << intrin->num_components) - 1; + if (intrin->dest.is_ssa) { + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + intrin->num_components, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa), + state->mem_ctx); + } else { + nir_dest_copy(&mov->dest.dest, &intrin->dest, state->mem_ctx); + } + nir_instr_insert_before(&intrin->instr, &mov->instr); + + nir_instr_remove(&intrin->instr); + break; + } + + case nir_intrinsic_store_var: { + if (intrin->variables[0]->var->data.mode != nir_var_local) + continue; + + nir_src reg_src = get_deref_reg_src(intrin->variables[0], + &intrin->instr, state); + + nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov); + nir_src_copy(&mov->src[0].src, &intrin->src[0], state->mem_ctx); + mov->dest.write_mask = (1 << intrin->num_components) - 1; + mov->dest.dest.is_ssa = false; + mov->dest.dest.reg.reg = reg_src.reg.reg; + mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; + mov->dest.dest.reg.indirect = reg_src.reg.indirect; + + nir_instr_insert_before(&intrin->instr, &mov->instr); + + nir_instr_remove(&intrin->instr); + break; + } + + case nir_intrinsic_copy_var: + unreachable("There should be no copies whatsoever at this point"); + break; + + default: + continue; + } + } + + return true; +} + +static void +nir_lower_locals_to_regs_impl(nir_function_impl *impl) +{ + struct locals_to_regs_state state; + + state.mem_ctx = ralloc_parent(impl); + state.impl = impl; + state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal); + + nir_foreach_block(impl, lower_locals_to_regs_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + _mesa_hash_table_destroy(state.regs_table, NULL); +} + +void +nir_lower_locals_to_regs(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_locals_to_regs_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c new file mode 100644 index 000000000..7cd93ea0a --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c @@ -0,0 +1,290 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements a pass that lowers vector phi nodes to scalar phi nodes when + * we don't think it will hurt anything. + */ + +struct lower_phis_to_scalar_state { + void *mem_ctx; + void *dead_ctx; + + /* Hash table marking which phi nodes are scalarizable. The key is + * pointers to phi instructions and the entry is either NULL for not + * scalarizable or non-null for scalarizable. + */ + struct hash_table *phi_table; +}; + +static bool +should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state); + +static bool +is_phi_src_scalarizable(nir_phi_src *src, + struct lower_phis_to_scalar_state *state) +{ + /* Don't know what to do with non-ssa sources */ + if (!src->src.is_ssa) + return false; + + nir_instr *src_instr = src->src.ssa->parent_instr; + switch (src_instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); + + /* ALU operations with output_size == 0 should be scalarized. We + * will also see a bunch of vecN operations from scalarizing ALU + * operations and, since they can easily be copy-propagated, they + * are ok too. + */ + return nir_op_infos[src_alu->op].output_size == 0 || + src_alu->op == nir_op_vec2 || + src_alu->op == nir_op_vec3 || + src_alu->op == nir_op_vec4; + } + + case nir_instr_type_phi: + /* A phi is scalarizable if we're going to lower it */ + return should_lower_phi(nir_instr_as_phi(src_instr), state); + + case nir_instr_type_load_const: + /* These are trivially scalarizable */ + return true; + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr); + + switch (src_intrin->intrinsic) { + case nir_intrinsic_load_var: + return src_intrin->variables[0]->var->data.mode == nir_var_shader_in || + src_intrin->variables[0]->var->data.mode == nir_var_uniform; + + case nir_intrinsic_interp_var_at_centroid: + case nir_intrinsic_interp_var_at_sample: + case nir_intrinsic_interp_var_at_offset: + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_uniform_indirect: + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ubo_indirect: + case nir_intrinsic_load_input: + case nir_intrinsic_load_input_indirect: + return true; + default: + break; + } + } + + default: + /* We can't scalarize this type of instruction */ + return false; + } +} + +/** + * Determines if the given phi node should be lowered. The only phi nodes + * we will scalarize at the moment are those where all of the sources are + * scalarizable. + * + * The reason for this comes down to coalescing. Since phi sources can't + * swizzle, swizzles on phis have to be resolved by inserting a mov right + * before the phi. The choice then becomes between movs to pick off + * components for a scalar phi or potentially movs to recombine components + * for a vector phi. The problem is that the movs generated to pick off + * the components are almost uncoalescable. We can't coalesce them in NIR + * because we need them to pick off components and we can't coalesce them + * in the backend because the source register is a vector and the + * destination is a scalar that may be used at other places in the program. + * On the other hand, if we have a bunch of scalars going into a vector + * phi, the situation is much better. In this case, if the SSA def is + * generated in the predecessor block to the corresponding phi source, the + * backend code will be an ALU op into a temporary and then a mov into the + * given vector component; this move can almost certainly be coalesced + * away. + */ +static bool +should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state) +{ + /* Already scalar */ + if (phi->dest.ssa.num_components == 1) + return false; + + struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi); + if (entry) + return entry->data != NULL; + + /* Insert an entry and mark it as scalarizable for now. That way + * we don't recurse forever and a cycle in the dependence graph + * won't automatically make us fail to scalarize. + */ + entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1); + + bool scalarizable = true; + + nir_foreach_phi_src(phi, src) { + scalarizable = is_phi_src_scalarizable(src, state); + if (!scalarizable) + break; + } + + entry->data = (void *)(intptr_t)scalarizable; + + return scalarizable; +} + +static bool +lower_phis_to_scalar_block(nir_block *block, void *void_state) +{ + struct lower_phis_to_scalar_state *state = void_state; + + /* Find the last phi node in the block */ + nir_phi_instr *last_phi = NULL; + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + last_phi = nir_instr_as_phi(instr); + } + + /* We have to handle the phi nodes in their own pass due to the way + * we're modifying the linked list of instructions. + */ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + if (!should_lower_phi(phi, state)) + continue; + + /* Create a vecN operation to combine the results. Most of these + * will be redundant, but copy propagation should clean them up for + * us. No need to add the complexity here. + */ + nir_op vec_op; + switch (phi->dest.ssa.num_components) { + case 2: vec_op = nir_op_vec2; break; + case 3: vec_op = nir_op_vec3; break; + case 4: vec_op = nir_op_vec4; break; + default: unreachable("Invalid number of components"); + } + + nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, + phi->dest.ssa.num_components, NULL); + vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; + + for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) { + nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx); + nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL); + + vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa); + + nir_foreach_phi_src(phi, src) { + /* We need to insert a mov to grab the i'th component of src */ + nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, + nir_op_imov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL); + mov->dest.write_mask = 1; + nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx); + mov->src[0].swizzle[0] = i; + + /* Insert at the end of the predecessor but before the jump */ + nir_instr *pred_last_instr = nir_block_last_instr(src->pred); + if (pred_last_instr && pred_last_instr->type == nir_instr_type_jump) + nir_instr_insert_before(pred_last_instr, &mov->instr); + else + nir_instr_insert_after_block(src->pred, &mov->instr); + + nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src); + new_src->pred = src->pred; + new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa); + + exec_list_push_tail(&new_phi->srcs, &new_src->node); + } + + nir_instr_insert_before(&phi->instr, &new_phi->instr); + } + + nir_instr_insert_after(&last_phi->instr, &vec->instr); + + nir_ssa_def_rewrite_uses(&phi->dest.ssa, + nir_src_for_ssa(&vec->dest.dest.ssa), + state->mem_ctx); + + ralloc_steal(state->dead_ctx, phi); + nir_instr_remove(&phi->instr); + + /* We're using the safe iterator and inserting all the newly + * scalarized phi nodes before their non-scalarized version so that's + * ok. However, we are also inserting vec operations after all of + * the last phi node so once we get here, we can't trust even the + * safe iterator to stop properly. We have to break manually. + */ + if (instr == &last_phi->instr) + break; + } + + return true; +} + +static void +lower_phis_to_scalar_impl(nir_function_impl *impl) +{ + struct lower_phis_to_scalar_state state; + + state.mem_ctx = ralloc_parent(impl); + state.dead_ctx = ralloc_context(NULL); + state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + nir_foreach_block(impl, lower_phis_to_scalar_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + ralloc_free(state.dead_ctx); +} + +/** A pass that lowers vector phi nodes to scalar + * + * This pass loops through the blocks and lowers looks for vector phi nodes + * it can lower to scalar phi nodes. Not all phi nodes are lowered. For + * instance, if one of the sources is a non-scalarizable vector, then we + * don't bother lowering because that would generate hard-to-coalesce movs. + */ +void +nir_lower_phis_to_scalar(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + lower_phis_to_scalar_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp new file mode 100644 index 000000000..3015dbd09 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nir.h" +#include "../program.h" +#include "program/hash_table.h" +#include "ir_uniform.h" + +extern "C" { +#include "main/compiler.h" +#include "main/mtypes.h" +#include "program/prog_parameter.h" +#include "program/program.h" +} + +static unsigned +get_sampler_index(struct gl_shader_program *shader_program, const char *name, + const struct gl_program *prog) +{ + GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); + + unsigned location; + if (!shader_program->UniformHash->get(location, name)) { + linker_error(shader_program, + "failed to find sampler named %s.\n", name); + return 0; + } + + if (!shader_program->UniformStorage[location].sampler[shader].active) { + assert(0 && "cannot return a sampler"); + linker_error(shader_program, + "cannot return a sampler named %s, because it is not " + "used in this shader stage. This is a driver bug.\n", + name); + return 0; + } + + return shader_program->UniformStorage[location].sampler[shader].index; +} + +static void +lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, + const struct gl_program *prog, void *mem_ctx) +{ + if (instr->sampler == NULL) + return; + + /* Get the name and the offset */ + instr->sampler_index = 0; + char *name = ralloc_strdup(mem_ctx, instr->sampler->var->name); + + for (nir_deref *deref = &instr->sampler->deref; + deref->child; deref = deref->child) { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(deref->child); + + /* XXX: We're assuming here that the indirect is the last array + * thing we have. This should be ok for now as we don't support + * arrays_of_arrays yet. + */ + + instr->sampler_index *= glsl_get_length(deref->type); + switch (deref_array->deref_array_type) { + case nir_deref_array_type_direct: + instr->sampler_index += deref_array->base_offset; + if (deref_array->deref.child) + ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset); + break; + case nir_deref_array_type_indirect: { + instr->src = reralloc(mem_ctx, instr->src, nir_tex_src, + instr->num_srcs + 1); + memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src); + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; + instr->num_srcs++; + + nir_instr_rewrite_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + deref_array->indirect); + + instr->sampler_array_size = glsl_get_length(deref->type); + + nir_src empty; + memset(&empty, 0, sizeof empty); + nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, empty); + + if (deref_array->deref.child) + ralloc_strcat(&name, "[0]"); + break; + } + + case nir_deref_array_type_wildcard: + unreachable("Cannot copy samplers"); + default: + unreachable("Invalid deref array type"); + } + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(deref->child); + const char *field = glsl_get_struct_elem_name(deref->type, + deref_struct->index); + ralloc_asprintf_append(&name, ".%s", field); + break; + } + + default: + unreachable("Invalid deref type"); + break; + } + } + + instr->sampler_index += get_sampler_index(shader_program, name, prog); + + instr->sampler = NULL; +} + +typedef struct { + void *mem_ctx; + struct gl_shader_program *shader_program; + struct gl_program *prog; +} lower_state; + +static bool +lower_block_cb(nir_block *block, void *_state) +{ + lower_state *state = (lower_state *) _state; + + nir_foreach_instr(block, instr) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex_instr = nir_instr_as_tex(instr); + lower_sampler(tex_instr, state->shader_program, state->prog, + state->mem_ctx); + } + } + + return true; +} + +static void +lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program, + struct gl_program *prog) +{ + lower_state state; + + state.mem_ctx = ralloc_parent(impl); + state.shader_program = shader_program; + state.prog = prog; + + nir_foreach_block(impl, lower_block_cb, &state); +} + +extern "C" void +nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program, + struct gl_program *prog) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + lower_impl(overload->impl, shader_program, prog); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_system_values.c b/mesalib/src/glsl/nir/nir_lower_system_values.c new file mode 100644 index 000000000..328d4f1ab --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_system_values.c @@ -0,0 +1,117 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include "main/mtypes.h" + +static void +convert_instr(nir_intrinsic_instr *instr) +{ + if (instr->intrinsic != nir_intrinsic_load_var) + return; + + nir_variable *var = instr->variables[0]->var; + if (var->data.mode != nir_var_system_value) + return; + + void *mem_ctx = ralloc_parent(instr); + + nir_intrinsic_op op; + + switch (var->data.location) { + case SYSTEM_VALUE_FRONT_FACE: + op = nir_intrinsic_load_front_face; + break; + case SYSTEM_VALUE_VERTEX_ID: + op = nir_intrinsic_load_vertex_id; + break; + case SYSTEM_VALUE_INSTANCE_ID: + op = nir_intrinsic_load_instance_id; + break; + case SYSTEM_VALUE_SAMPLE_ID: + op = nir_intrinsic_load_sample_id; + break; + case SYSTEM_VALUE_SAMPLE_POS: + op = nir_intrinsic_load_sample_pos; + break; + case SYSTEM_VALUE_SAMPLE_MASK_IN: + op = nir_intrinsic_load_sample_mask_in; + break; + case SYSTEM_VALUE_INVOCATION_ID: + op = nir_intrinsic_load_invocation_id; + break; + default: + unreachable("not reached"); + } + + nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); + + if (instr->dest.is_ssa) { + nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, + instr->dest.ssa.num_components, NULL); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, + nir_src_for_ssa(&new_instr->dest.ssa), + mem_ctx); + } else { + nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx); + } + + nir_instr_insert_before(&instr->instr, &new_instr->instr); + nir_instr_remove(&instr->instr); +} + +static bool +convert_block(nir_block *block, void *state) +{ + (void) state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_intrinsic) + convert_instr(nir_instr_as_intrinsic(instr)); + } + + return true; +} + +static void +convert_impl(nir_function_impl *impl) +{ + nir_foreach_block(impl, convert_block, NULL); + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_system_values(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + convert_impl(overload->impl); + } + + exec_list_make_empty(&shader->system_values); +} diff --git a/mesalib/src/glsl/nir/nir_lower_to_source_mods.c b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c new file mode 100644 index 000000000..d6bf77f17 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c @@ -0,0 +1,185 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * This pass lowers the neg, abs, and sat operations to source modifiers on + * ALU operations to make things nicer for the backend. It's just much + * easier to not have them when we're doing optimizations. + */ + +static bool +nir_lower_to_source_mods_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + if (!alu->src[i].src.is_ssa) + continue; + + if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr); + + if (parent->dest.saturate) + continue; + + switch (nir_op_infos[alu->op].input_types[i]) { + case nir_type_float: + if (parent->op != nir_op_fmov) + continue; + break; + case nir_type_int: + if (parent->op != nir_op_imov) + continue; + break; + default: + continue; + } + + nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src); + if (alu->src[i].abs) { + /* abs trumps both neg and abs, do nothing */ + } else { + alu->src[i].negate = (alu->src[i].negate != parent->src[0].negate); + alu->src[i].abs |= parent->src[0].abs; + } + + for (int j = 0; j < 4; ++j) { + if (!nir_alu_instr_channel_used(alu, i, j)) + continue; + alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]]; + } + + if (parent->dest.dest.ssa.uses->entries == 0 && + parent->dest.dest.ssa.if_uses->entries == 0) + nir_instr_remove(&parent->instr); + } + + switch (alu->op) { + case nir_op_fsat: + alu->op = nir_op_fmov; + alu->dest.saturate = true; + break; + case nir_op_ineg: + alu->op = nir_op_imov; + alu->src[0].negate = !alu->src[0].negate; + break; + case nir_op_fneg: + alu->op = nir_op_fmov; + alu->src[0].negate = !alu->src[0].negate; + break; + case nir_op_iabs: + alu->op = nir_op_imov; + alu->src[0].abs = true; + alu->src[0].negate = false; + break; + case nir_op_fabs: + alu->op = nir_op_fmov; + alu->src[0].abs = true; + alu->src[0].negate = false; + break; + default: + break; + } + + /* We've covered sources. Now we're going to try and saturate the + * destination if we can. + */ + + if (!alu->dest.dest.is_ssa) + continue; + + /* We can only saturate float destinations */ + if (nir_op_infos[alu->op].output_type != nir_type_float) + continue; + + if (alu->dest.dest.ssa.if_uses->entries != 0) + continue; + + bool all_children_are_sat = true; + struct set_entry *entry; + set_foreach(alu->dest.dest.ssa.uses, entry) { + const nir_instr *child = entry->key; + if (child->type != nir_instr_type_alu) { + all_children_are_sat = false; + continue; + } + + nir_alu_instr *child_alu = nir_instr_as_alu(child); + if (child_alu->src[0].negate || child_alu->src[0].abs) { + all_children_are_sat = false; + continue; + } + + if (child_alu->op != nir_op_fsat && + !(child_alu->op == nir_op_fmov && child_alu->dest.saturate)) { + all_children_are_sat = false; + continue; + } + } + + if (!all_children_are_sat) + continue; + + alu->dest.saturate = true; + + set_foreach(alu->dest.dest.ssa.uses, entry) { + nir_alu_instr *child_alu = nir_instr_as_alu((nir_instr *)entry->key); + child_alu->op = nir_op_fmov; + child_alu->dest.saturate = false; + /* We could propagate the dest of our instruction to the + * destinations of the uses here. However, one quick round of + * copy propagation will clean that all up and then we don't have + * the complexity. + */ + } + } + + return true; +} + +static void +nir_lower_to_source_mods_impl(nir_function_impl *impl) +{ + nir_foreach_block(impl, nir_lower_to_source_mods_block, NULL); +} + +void +nir_lower_to_source_mods(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_to_source_mods_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c new file mode 100644 index 000000000..85ebb281c --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c @@ -0,0 +1,222 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" +#include "nir_types.h" + +/* + * Lowers all copy intrinsics to sequences of load/store intrinsics. + */ + +/* Walks down the deref chain and returns the next deref in the chain whose + * child is a wildcard. In other words, given the chain a[1].foo[*].bar, + * this function will return the deref to foo. Calling it a second time + * with the [*].bar, it will return NULL. + */ +static nir_deref * +deref_next_wildcard_parent(nir_deref *deref) +{ + for (nir_deref *tail = deref; tail->child; tail = tail->child) { + if (tail->child->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail->child); + + if (arr->deref_array_type == nir_deref_array_type_wildcard) + return tail; + } + + return NULL; +} + +/* Returns the last deref in the chain. + */ +static nir_deref * +get_deref_tail(nir_deref *deref) +{ + while (deref->child) + deref = deref->child; + + return deref; +} + +static int +type_get_length(const struct glsl_type *type) +{ + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + return glsl_get_length(type); + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(type)) + return glsl_get_matrix_columns(type); + else + return glsl_get_vector_elements(type); + default: + unreachable("Invalid deref base type"); + } +} + +/* This function recursively walks the given deref chain and replaces the + * given copy instruction with an equivalent sequence load/store + * operations. + * + * @copy_instr The copy instruction to replace; new instructions will be + * inserted before this one + * + * @dest_head The head of the destination variable deref chain + * + * @src_head The head of the source variable deref chain + * + * @dest_tail The current tail of the destination variable deref chain; + * this is used for recursion and external callers of this + * function should call it with tail == head + * + * @src_tail The current tail of the source variable deref chain; + * this is used for recursion and external callers of this + * function should call it with tail == head + * + * @state The current variable lowering state + */ +static void +emit_copy_load_store(nir_intrinsic_instr *copy_instr, + nir_deref_var *dest_head, nir_deref_var *src_head, + nir_deref *dest_tail, nir_deref *src_tail, void *mem_ctx) +{ + /* Find the next pair of wildcards */ + nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail); + nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail); + + if (src_arr_parent || dest_arr_parent) { + /* Wildcards had better come in matched pairs */ + assert(dest_arr_parent && dest_arr_parent); + + nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child); + nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child); + + unsigned length = type_get_length(src_arr_parent->type); + /* The wildcards should represent the same number of elements */ + assert(length == type_get_length(dest_arr_parent->type)); + assert(length > 0); + + /* Walk over all of the elements that this wildcard refers to and + * call emit_copy_load_store on each one of them */ + src_arr->deref_array_type = nir_deref_array_type_direct; + dest_arr->deref_array_type = nir_deref_array_type_direct; + for (unsigned i = 0; i < length; i++) { + src_arr->base_offset = i; + dest_arr->base_offset = i; + emit_copy_load_store(copy_instr, dest_head, src_head, + &dest_arr->deref, &src_arr->deref, mem_ctx); + } + src_arr->deref_array_type = nir_deref_array_type_wildcard; + dest_arr->deref_array_type = nir_deref_array_type_wildcard; + } else { + /* In this case, we have no wildcards anymore, so all we have to do + * is just emit the load and store operations. */ + src_tail = get_deref_tail(src_tail); + dest_tail = get_deref_tail(dest_tail); + + assert(src_tail->type == dest_tail->type); + + unsigned num_components = glsl_get_vector_elements(src_tail->type); + + nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref); + nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var); + load->num_components = num_components; + load->variables[0] = nir_deref_as_var(src_deref); + nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); + + nir_instr_insert_before(©_instr->instr, &load->instr); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); + store->num_components = num_components; + store->variables[0] = nir_deref_as_var(dest_deref); + store->src[0].is_ssa = true; + store->src[0].ssa = &load->dest.ssa; + + nir_instr_insert_before(©_instr->instr, &store->instr); + } +} + +/* Lowers a copy instruction to a sequence of load/store instructions + * + * The new instructions are placed before the copy instruction in the IR. + */ +void +nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx) +{ + assert(copy->intrinsic == nir_intrinsic_copy_var); + emit_copy_load_store(copy, copy->variables[0], copy->variables[1], + ©->variables[0]->deref, + ©->variables[1]->deref, mem_ctx); +} + +static bool +lower_var_copies_block(nir_block *block, void *mem_ctx) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); + if (copy->intrinsic != nir_intrinsic_copy_var) + continue; + + nir_lower_var_copy_instr(copy, mem_ctx); + + nir_instr_remove(©->instr); + ralloc_free(copy); + } + + return true; +} + +static void +lower_var_copies_impl(nir_function_impl *impl) +{ + nir_foreach_block(impl, lower_var_copies_block, ralloc_parent(impl)); +} + +/* Lowers every copy_var instruction in the program to a sequence of + * load/store instructions. + */ +void +nir_lower_var_copies(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + lower_var_copies_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c new file mode 100644 index 000000000..8af753029 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -0,0 +1,1102 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +struct deref_node { + struct deref_node *parent; + const struct glsl_type *type; + + bool lower_to_ssa; + + struct set *loads; + struct set *stores; + struct set *copies; + + nir_ssa_def **def_stack; + nir_ssa_def **def_stack_tail; + + struct deref_node *wildcard; + struct deref_node *indirect; + struct deref_node *children[0]; +}; + +struct lower_variables_state { + void *mem_ctx; + void *dead_ctx; + nir_function_impl *impl; + + /* A hash table mapping variables to deref_node data */ + struct hash_table *deref_var_nodes; + + /* A hash table mapping fully-qualified direct dereferences, i.e. + * dereferences with no indirect or wildcard array dereferences, to + * deref_node data. + * + * At the moment, we only lower loads, stores, and copies that can be + * trivially lowered to loads and stores, i.e. copies with no indirects + * and no wildcards. If a part of a variable that is being loaded from + * and/or stored into is also involved in a copy operation with + * wildcards, then we lower that copy operation to loads and stores, but + * otherwise we leave copies with wildcards alone. Since the only derefs + * used in these loads, stores, and trivial copies are ones with no + * wildcards and no indirects, these are precisely the derefs that we + * can actually consider lowering. + */ + struct hash_table *direct_deref_nodes; + + /* Controls whether get_deref_node will add variables to the + * direct_deref_nodes table. This is turned on when we are initially + * scanning for load/store instructions. It is then turned off so we + * don't accidentally change the direct_deref_nodes table while we're + * iterating throug it. + */ + bool add_to_direct_deref_nodes; + + /* A hash table mapping phi nodes to deref_state data */ + struct hash_table *phi_table; +}; + +/* The following two functions implement a hash and equality check for + * variable dreferences. When the hash or equality function encounters an + * array, all indirects are treated as equal and are never equal to a + * direct dereference or a wildcard. + */ +static uint32_t +hash_deref(const void *void_deref) +{ + uint32_t hash = _mesa_fnv32_1a_offset_bias; + + const nir_deref_var *deref_var = void_deref; + hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var); + + for (const nir_deref *deref = deref_var->deref.child; + deref; deref = deref->child) { + switch (deref->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(deref); + + hash = _mesa_fnv32_1a_accumulate(hash, deref_array->deref_array_type); + + if (deref_array->deref_array_type == nir_deref_array_type_direct) + hash = _mesa_fnv32_1a_accumulate(hash, deref_array->base_offset); + break; + } + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(deref); + hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index); + break; + } + default: + assert("Invalid deref chain"); + } + } + + return hash; +} + +static bool +derefs_equal(const void *void_a, const void *void_b) +{ + const nir_deref_var *a_var = void_a; + const nir_deref_var *b_var = void_b; + + if (a_var->var != b_var->var) + return false; + + for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child; + a != NULL; a = a->child, b = b->child) { + if (a->deref_type != b->deref_type) + return false; + + switch (a->deref_type) { + case nir_deref_type_array: { + nir_deref_array *a_arr = nir_deref_as_array(a); + nir_deref_array *b_arr = nir_deref_as_array(b); + + if (a_arr->deref_array_type != b_arr->deref_array_type) + return false; + + if (a_arr->deref_array_type == nir_deref_array_type_direct && + a_arr->base_offset != b_arr->base_offset) + return false; + break; + } + case nir_deref_type_struct: + if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index) + return false; + break; + default: + assert("Invalid deref chain"); + return false; + } + + assert((a->child == NULL) == (b->child == NULL)); + if((a->child == NULL) != (b->child == NULL)) + return false; + } + + return true; +} + +static int +type_get_length(const struct glsl_type *type) +{ + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + return glsl_get_length(type); + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(type)) + return glsl_get_matrix_columns(type); + else + return glsl_get_vector_elements(type); + default: + unreachable("Invalid deref base type"); + } +} + +static struct deref_node * +deref_node_create(struct deref_node *parent, + const struct glsl_type *type, void *mem_ctx) +{ + size_t size = sizeof(struct deref_node) + + type_get_length(type) * sizeof(struct deref_node *); + + struct deref_node *node = rzalloc_size(mem_ctx, size); + node->type = type; + node->parent = parent; + + return node; +} + +/* Returns the deref node associated with the given variable. This will be + * the root of the tree representing all of the derefs of the given variable. + */ +static struct deref_node * +get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state) +{ + struct deref_node *node; + + struct hash_entry *var_entry = + _mesa_hash_table_search(state->deref_var_nodes, var); + + if (var_entry) { + return var_entry->data; + } else { + node = deref_node_create(NULL, var->type, state->dead_ctx); + _mesa_hash_table_insert(state->deref_var_nodes, var, node); + return node; + } +} + +/* Gets the deref_node for the given deref chain and creates it if it + * doesn't yet exist. If the deref is fully-qualified and direct and + * state->add_to_direct_deref_nodes is true, it will be added to the hash + * table of of fully-qualified direct derefs. + */ +static struct deref_node * +get_deref_node(nir_deref_var *deref, struct lower_variables_state *state) +{ + bool is_direct = true; + + /* Start at the base of the chain. */ + struct deref_node *node = get_deref_node_for_var(deref->var, state); + assert(deref->deref.type == node->type); + + for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { + switch (tail->deref_type) { + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + + assert(deref_struct->index < type_get_length(node->type)); + + if (node->children[deref_struct->index] == NULL) + node->children[deref_struct->index] = + deref_node_create(node, tail->type, state->dead_ctx); + + node = node->children[deref_struct->index]; + break; + } + + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(tail); + + switch (arr->deref_array_type) { + case nir_deref_array_type_direct: + /* This is possible if a loop unrolls and generates an + * out-of-bounds offset. We need to handle this at least + * somewhat gracefully. + */ + if (arr->base_offset >= type_get_length(node->type)) + return NULL; + + if (node->children[arr->base_offset] == NULL) + node->children[arr->base_offset] = + deref_node_create(node, tail->type, state->dead_ctx); + + node = node->children[arr->base_offset]; + break; + + case nir_deref_array_type_indirect: + if (node->indirect == NULL) + node->indirect = deref_node_create(node, tail->type, + state->dead_ctx); + + node = node->indirect; + is_direct = false; + break; + + case nir_deref_array_type_wildcard: + if (node->wildcard == NULL) + node->wildcard = deref_node_create(node, tail->type, + state->dead_ctx); + + node = node->wildcard; + is_direct = false; + break; + + default: + unreachable("Invalid array deref type"); + } + break; + } + default: + unreachable("Invalid deref type"); + } + } + + assert(node); + + if (is_direct && state->add_to_direct_deref_nodes) + _mesa_hash_table_insert(state->direct_deref_nodes, deref, node); + + return node; +} + +/* \sa foreach_deref_node_match */ +static bool +foreach_deref_node_worker(struct deref_node *node, nir_deref *deref, + bool (* cb)(struct deref_node *node, + struct lower_variables_state *state), + struct lower_variables_state *state) +{ + if (deref->child == NULL) { + return cb(node, state); + } else { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(deref->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (node->children[arr->base_offset] && + !foreach_deref_node_worker(node->children[arr->base_offset], + deref->child, cb, state)) + return false; + + if (node->wildcard && + !foreach_deref_node_worker(node->wildcard, + deref->child, cb, state)) + return false; + + return true; + } + + case nir_deref_type_struct: { + nir_deref_struct *str = nir_deref_as_struct(deref->child); + return foreach_deref_node_worker(node->children[str->index], + deref->child, cb, state); + } + + default: + unreachable("Invalid deref child type"); + } + } +} + +/* Walks over every "matching" deref_node and calls the callback. A node + * is considered to "match" if either refers to that deref or matches up t + * a wildcard. In other words, the following would match a[6].foo[3].bar: + * + * a[6].foo[3].bar + * a[*].foo[3].bar + * a[6].foo[*].bar + * a[*].foo[*].bar + * + * The given deref must be a full-length and fully qualified (no wildcards + * or indirects) deref chain. + */ +static bool +foreach_deref_node_match(nir_deref_var *deref, + bool (* cb)(struct deref_node *node, + struct lower_variables_state *state), + struct lower_variables_state *state) +{ + nir_deref_var var_deref = *deref; + var_deref.deref.child = NULL; + struct deref_node *node = get_deref_node(&var_deref, state); + + if (node == NULL) + return false; + + return foreach_deref_node_worker(node, &deref->deref, cb, state); +} + +/* \sa deref_may_be_aliased */ +static bool +deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref, + struct lower_variables_state *state) +{ + if (deref->child == NULL) { + return false; + } else { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(deref->child); + if (arr->deref_array_type == nir_deref_array_type_indirect) + return true; + + assert(arr->deref_array_type == nir_deref_array_type_direct); + + if (node->children[arr->base_offset] && + deref_may_be_aliased_node(node->children[arr->base_offset], + deref->child, state)) + return true; + + if (node->wildcard && + deref_may_be_aliased_node(node->wildcard, deref->child, state)) + return true; + + return false; + } + + case nir_deref_type_struct: { + nir_deref_struct *str = nir_deref_as_struct(deref->child); + if (node->children[str->index]) { + return deref_may_be_aliased_node(node->children[str->index], + deref->child, state); + } else { + return false; + } + } + + default: + unreachable("Invalid nir_deref child type"); + } + } +} + +/* Returns true if there are no indirects that can ever touch this deref. + * + * For example, if the given deref is a[6].foo, then any uses of a[i].foo + * would cause this to return false, but a[i].bar would not affect it + * because it's a different structure member. A var_copy involving of + * a[*].bar also doesn't affect it because that can be lowered to entirely + * direct load/stores. + * + * We only support asking this question about fully-qualified derefs. + * Obviously, it's pointless to ask this about indirects, but we also + * rule-out wildcards. Handling Wildcard dereferences would involve + * checking each array index to make sure that there aren't any indirect + * references. + */ +static bool +deref_may_be_aliased(nir_deref_var *deref, + struct lower_variables_state *state) +{ + return deref_may_be_aliased_node(get_deref_node_for_var(deref->var, state), + &deref->deref, state); +} + +static void +register_load_instr(nir_intrinsic_instr *load_instr, + struct lower_variables_state *state) +{ + struct deref_node *node = get_deref_node(load_instr->variables[0], state); + if (node == NULL) + return; + + if (node->loads == NULL) + node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + _mesa_set_add(node->loads, load_instr); +} + +static void +register_store_instr(nir_intrinsic_instr *store_instr, + struct lower_variables_state *state) +{ + struct deref_node *node = get_deref_node(store_instr->variables[0], state); + if (node == NULL) + return; + + if (node->stores == NULL) + node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + _mesa_set_add(node->stores, store_instr); +} + +static void +register_copy_instr(nir_intrinsic_instr *copy_instr, + struct lower_variables_state *state) +{ + for (unsigned idx = 0; idx < 2; idx++) { + struct deref_node *node = + get_deref_node(copy_instr->variables[idx], state); + + if (node == NULL) + continue; + + if (node->copies == NULL) + node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + _mesa_set_add(node->copies, copy_instr); + } +} + +/* Registers all variable uses in the given block. */ +static bool +register_variable_uses_block(nir_block *block, void *void_state) +{ + struct lower_variables_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: + register_load_instr(intrin, state); + break; + + case nir_intrinsic_store_var: + register_store_instr(intrin, state); + break; + + case nir_intrinsic_copy_var: + register_copy_instr(intrin, state); + break; + + default: + continue; + } + } + + return true; +} + +/* Walks over all of the copy instructions to or from the given deref_node + * and lowers them to load/store intrinsics. + */ +static bool +lower_copies_to_load_store(struct deref_node *node, + struct lower_variables_state *state) +{ + if (!node->copies) + return true; + + struct set_entry *copy_entry; + set_foreach(node->copies, copy_entry) { + nir_intrinsic_instr *copy = (void *)copy_entry->key; + + nir_lower_var_copy_instr(copy, state->mem_ctx); + + for (unsigned i = 0; i < 2; ++i) { + struct deref_node *arg_node = + get_deref_node(copy->variables[i], state); + + if (arg_node == NULL) + continue; + + struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy); + assert(arg_entry); + _mesa_set_remove(node->copies, arg_entry); + } + + nir_instr_remove(©->instr); + } + + return true; +} + +/* Returns a load_const instruction that represents the constant + * initializer for the given deref chain. The caller is responsible for + * ensuring that there actually is a constant initializer. + */ +static nir_load_const_instr * +get_const_initializer_load(const nir_deref_var *deref, + struct lower_variables_state *state) +{ + nir_constant *constant = deref->var->constant_initializer; + const nir_deref *tail = &deref->deref; + unsigned matrix_offset = 0; + while (tail->child) { + switch (tail->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(tail->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (glsl_type_is_matrix(tail->type)) { + assert(arr->deref.child == NULL); + matrix_offset = arr->base_offset; + } else { + constant = constant->elements[arr->base_offset]; + } + break; + } + + case nir_deref_type_struct: { + constant = constant->elements[nir_deref_as_struct(tail->child)->index]; + break; + } + + default: + unreachable("Invalid deref child type"); + } + + tail = tail->child; + } + + nir_load_const_instr *load = + nir_load_const_instr_create(state->mem_ctx, + glsl_get_vector_elements(tail->type)); + + matrix_offset *= load->def.num_components; + for (unsigned i = 0; i < load->def.num_components; i++) { + switch (glsl_get_base_type(tail->type)) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + load->value.u[i] = constant->value.u[matrix_offset + i]; + break; + case GLSL_TYPE_BOOL: + load->value.u[i] = constant->value.b[matrix_offset + i] ? + NIR_TRUE : NIR_FALSE; + break; + default: + unreachable("Invalid immediate type"); + } + } + + return load; +} + +/** Pushes an SSA def onto the def stack for the given node + * + * Each node is potentially associated with a stack of SSA definitions. + * This stack is used for determining what SSA definition reaches a given + * point in the program for variable renaming. The stack is always kept in + * dominance-order with at most one SSA def per block. If the SSA + * definition on the top of the stack is in the same block as the one being + * pushed, the top element is replaced. + */ +static void +def_stack_push(struct deref_node *node, nir_ssa_def *def, + struct lower_variables_state *state) +{ + if (node->def_stack == NULL) { + node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *, + state->impl->num_blocks); + node->def_stack_tail = node->def_stack - 1; + } + + if (node->def_stack_tail >= node->def_stack) { + nir_ssa_def *top_def = *node->def_stack_tail; + + if (def->parent_instr->block == top_def->parent_instr->block) { + /* They're in the same block, just replace the top */ + *node->def_stack_tail = def; + return; + } + } + + *(++node->def_stack_tail) = def; +} + +/* Pop the top of the def stack if it's in the given block */ +static void +def_stack_pop_if_in_block(struct deref_node *node, nir_block *block) +{ + /* If we're popping, then we have presumably pushed at some time in the + * past so this should exist. + */ + assert(node->def_stack != NULL); + + /* The stack is already empty. Do nothing. */ + if (node->def_stack_tail < node->def_stack) + return; + + nir_ssa_def *def = *node->def_stack_tail; + if (def->parent_instr->block == block) + node->def_stack_tail--; +} + +/** Retrieves the SSA definition on the top of the stack for the given + * node, if one exists. If the stack is empty, then we return the constant + * initializer (if it exists) or an SSA undef. + */ +static nir_ssa_def * +get_ssa_def_for_block(struct deref_node *node, nir_block *block, + struct lower_variables_state *state) +{ + /* If we have something on the stack, go ahead and return it. We're + * assuming that the top of the stack dominates the given block. + */ + if (node->def_stack && node->def_stack_tail >= node->def_stack) + return *node->def_stack_tail; + + /* If we got here then we don't have a definition that dominates the + * given block. This means that we need to add an undef and use that. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->mem_ctx, + glsl_get_vector_elements(node->type)); + nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr); + def_stack_push(node, &undef->def, state); + return &undef->def; +} + +/* Given a block and one of its predecessors, this function fills in the + * souces of the phi nodes to take SSA defs from the given predecessor. + * This function must be called exactly once per block/predecessor pair. + */ +static void +add_phi_sources(nir_block *block, nir_block *pred, + struct lower_variables_state *state) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + struct hash_entry *entry = + _mesa_hash_table_search(state->phi_table, phi); + if (!entry) + continue; + + struct deref_node *node = entry->data; + + nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src); + src->pred = pred; + src->src.is_ssa = true; + src->src.ssa = get_ssa_def_for_block(node, pred, state); + + _mesa_set_add(src->src.ssa->uses, instr); + + exec_list_push_tail(&phi->srcs, &src->node); + } +} + +/* Performs variable renaming by doing a DFS of the dominance tree + * + * This algorithm is very similar to the one outlined in "Efficiently + * Computing Static Single Assignment Form and the Control Dependence + * Graph" by Cytron et. al. The primary difference is that we only put one + * SSA def on the stack per block. + */ +static bool +rename_variables_block(nir_block *block, struct lower_variables_state *state) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(instr); + + struct hash_entry *entry = + _mesa_hash_table_search(state->phi_table, phi); + + /* This can happen if we already have phi nodes in the program + * that were not created in this pass. + */ + if (!entry) + continue; + + struct deref_node *node = entry->data; + + def_stack_push(node, &phi->dest.ssa, state); + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); + + if (node == NULL) { + /* If we hit this path then we are referencing an invalid + * value. Most likely, we unrolled something and are + * reading past the end of some array. In any case, this + * should result in an undefined value. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->mem_ctx, + intrin->num_components); + + nir_instr_insert_before(&intrin->instr, &undef->instr); + nir_instr_remove(&intrin->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&undef->def), + state->mem_ctx); + continue; + } + + if (!node->lower_to_ssa) + continue; + + nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, + nir_op_imov); + mov->src[0].src.is_ssa = true; + mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state); + for (unsigned i = intrin->num_components; i < 4; i++) + mov->src[0].swizzle[i] = 0; + + assert(intrin->dest.is_ssa); + + mov->dest.write_mask = (1 << intrin->num_components) - 1; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + intrin->num_components, NULL); + + nir_instr_insert_before(&intrin->instr, &mov->instr); + nir_instr_remove(&intrin->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa), + state->mem_ctx); + break; + } + + case nir_intrinsic_store_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); + + if (node == NULL) { + /* Probably an out-of-bounds array store. That should be a + * no-op. */ + nir_instr_remove(&intrin->instr); + continue; + } + + if (!node->lower_to_ssa) + continue; + + assert(intrin->num_components == + glsl_get_vector_elements(node->type)); + + assert(intrin->src[0].is_ssa); + + nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, + nir_op_imov); + mov->src[0].src.is_ssa = true; + mov->src[0].src.ssa = intrin->src[0].ssa; + for (unsigned i = intrin->num_components; i < 4; i++) + mov->src[0].swizzle[i] = 0; + + mov->dest.write_mask = (1 << intrin->num_components) - 1; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + intrin->num_components, NULL); + + nir_instr_insert_before(&intrin->instr, &mov->instr); + + def_stack_push(node, &mov->dest.dest.ssa, state); + + /* We'll wait to remove the instruction until the next pass + * where we pop the node we just pushed back off the stack. + */ + break; + } + + default: + break; + } + } + } + + if (block->successors[0]) + add_phi_sources(block->successors[0], block, state); + if (block->successors[1]) + add_phi_sources(block->successors[1], block, state); + + for (unsigned i = 0; i < block->num_dom_children; ++i) + rename_variables_block(block->dom_children[i], state); + + /* Now we iterate over the instructions and pop off any SSA defs that we + * pushed in the first loop. + */ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(instr); + + struct hash_entry *entry = + _mesa_hash_table_search(state->phi_table, phi); + + /* This can happen if we already have phi nodes in the program + * that were not created in this pass. + */ + if (!entry) + continue; + + struct deref_node *node = entry->data; + + def_stack_pop_if_in_block(node, block); + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic != nir_intrinsic_store_var) + continue; + + struct deref_node *node = get_deref_node(intrin->variables[0], state); + if (!node) + continue; + + if (!node->lower_to_ssa) + continue; + + def_stack_pop_if_in_block(node, block); + nir_instr_remove(&intrin->instr); + } + } + + return true; +} + +/* Inserts phi nodes for all variables marked lower_to_ssa + * + * This is the same algorithm as presented in "Efficiently Computing Static + * Single Assignment Form and the Control Dependence Graph" by Cytron et. + * al. + */ +static void +insert_phi_nodes(struct lower_variables_state *state) +{ + unsigned work[state->impl->num_blocks]; + unsigned has_already[state->impl->num_blocks]; + + /* + * Since the work flags already prevent us from inserting a node that has + * ever been inserted into W, we don't need to use a set to represent W. + * Also, since no block can ever be inserted into W more than once, we know + * that the maximum size of W is the number of basic blocks in the + * function. So all we need to handle W is an array and a pointer to the + * next element to be inserted and the next element to be removed. + */ + nir_block *W[state->impl->num_blocks]; + + memset(work, 0, sizeof work); + memset(has_already, 0, sizeof has_already); + + unsigned w_start, w_end; + unsigned iter_count = 0; + + struct hash_entry *deref_entry; + hash_table_foreach(state->direct_deref_nodes, deref_entry) { + struct deref_node *node = deref_entry->data; + + if (node->stores == NULL) + continue; + + if (!node->lower_to_ssa) + continue; + + w_start = w_end = 0; + iter_count++; + + struct set_entry *store_entry; + set_foreach(node->stores, store_entry) { + nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key; + if (work[store->instr.block->index] < iter_count) + W[w_end++] = store->instr.block; + work[store->instr.block->index] = iter_count; + } + + while (w_start != w_end) { + nir_block *cur = W[w_start++]; + struct set_entry *dom_entry; + set_foreach(cur->dom_frontier, dom_entry) { + nir_block *next = (nir_block *) dom_entry->key; + + /* + * If there's more than one return statement, then the end block + * can be a join point for some definitions. However, there are + * no instructions in the end block, so nothing would use those + * phi nodes. Of course, we couldn't place those phi nodes + * anyways due to the restriction of having no instructions in the + * end block... + */ + if (next == state->impl->end_block) + continue; + + if (has_already[next->index] < iter_count) { + nir_phi_instr *phi = nir_phi_instr_create(state->mem_ctx); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(node->type), NULL); + nir_instr_insert_before_block(next, &phi->instr); + + _mesa_hash_table_insert(state->phi_table, phi, node); + + has_already[next->index] = iter_count; + if (work[next->index] < iter_count) { + work[next->index] = iter_count; + W[w_end++] = next; + } + } + } + } + } +} + + +/** Implements a pass to lower variable uses to SSA values + * + * This path walks the list of instructions and tries to lower as many + * local variable load/store operations to SSA defs and uses as it can. + * The process involves four passes: + * + * 1) Iterate over all of the instructions and mark where each local + * variable deref is used in a load, store, or copy. While we're at + * it, we keep track of all of the fully-qualified (no wildcards) and + * fully-direct references we see and store them in the + * direct_deref_nodes hash table. + * + * 2) Walk over the the list of fully-qualified direct derefs generated in + * the previous pass. For each deref, we determine if it can ever be + * aliased, i.e. if there is an indirect reference anywhere that may + * refer to it. If it cannot be aliased, we mark it for lowering to an + * SSA value. At this point, we lower any var_copy instructions that + * use the given deref to load/store operations and, if the deref has a + * constant initializer, we go ahead and add a load_const value at the + * beginning of the function with the initialized value. + * + * 3) Walk over the list of derefs we plan to lower to SSA values and + * insert phi nodes as needed. + * + * 4) Perform "variable renaming" by replacing the load/store instructions + * with SSA definitions and SSA uses. + */ +static bool +nir_lower_vars_to_ssa_impl(nir_function_impl *impl) +{ + struct lower_variables_state state; + + state.mem_ctx = ralloc_parent(impl); + state.dead_ctx = ralloc_context(state.mem_ctx); + state.impl = impl; + + state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + state.direct_deref_nodes = _mesa_hash_table_create(state.dead_ctx, + hash_deref, derefs_equal); + state.phi_table = _mesa_hash_table_create(state.dead_ctx, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + /* Build the initial deref structures and direct_deref_nodes table */ + state.add_to_direct_deref_nodes = true; + nir_foreach_block(impl, register_variable_uses_block, &state); + + struct set *outputs = _mesa_set_create(state.dead_ctx, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + bool progress = false; + + nir_metadata_require(impl, nir_metadata_block_index); + + /* We're about to iterate through direct_deref_nodes. Don't modify it. */ + state.add_to_direct_deref_nodes = false; + + struct hash_entry *entry; + hash_table_foreach(state.direct_deref_nodes, entry) { + nir_deref_var *deref = (void *)entry->key; + struct deref_node *node = entry->data; + + if (deref->var->data.mode != nir_var_local) { + _mesa_hash_table_remove(state.direct_deref_nodes, entry); + continue; + } + + if (deref_may_be_aliased(deref, &state)) { + _mesa_hash_table_remove(state.direct_deref_nodes, entry); + continue; + } + + node->lower_to_ssa = true; + progress = true; + + if (deref->var->constant_initializer) { + nir_load_const_instr *load = get_const_initializer_load(deref, &state); + nir_ssa_def_init(&load->instr, &load->def, + glsl_get_vector_elements(node->type), NULL); + nir_instr_insert_before_cf_list(&impl->body, &load->instr); + def_stack_push(node, &load->def, &state); + } + + if (deref->var->data.mode == nir_var_shader_out) + _mesa_set_add(outputs, node); + + foreach_deref_node_match(deref, lower_copies_to_load_store, &state); + } + + if (!progress) + return false; + + nir_metadata_require(impl, nir_metadata_dominance); + + /* We may have lowered some copy instructions to load/store + * instructions. The uses from the copy instructions hav already been + * removed but we need to rescan to ensure that the uses from the newly + * added load/store instructions are registered. We need this + * information for phi node insertion below. + */ + nir_foreach_block(impl, register_variable_uses_block, &state); + + insert_phi_nodes(&state); + rename_variables_block(impl->start_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + ralloc_free(state.dead_ctx); + + return progress; +} + +void +nir_lower_vars_to_ssa(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_vars_to_ssa_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_lower_vec_to_movs.c b/mesalib/src/glsl/nir/nir_lower_vec_to_movs.c new file mode 100644 index 000000000..602853ea6 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_lower_vec_to_movs.c @@ -0,0 +1,155 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements a simple pass that lowers vecN instructions to a series of + * moves with partial writes. + */ + +static bool +src_matches_dest_reg(nir_dest *dest, nir_src *src) +{ + if (dest->is_ssa || src->is_ssa) + return false; + + return (dest->reg.reg == src->reg.reg && + dest->reg.base_offset == src->reg.base_offset && + !dest->reg.indirect && + !src->reg.indirect); +} + +/** + * For a given starting writemask channel and corresponding source index in + * the vec instruction, insert a MOV to the vec instruction's dest of all the + * writemask channels that get read from the same src reg. + * + * Returns the writemask of our MOV, so the parent loop calling this knows + * which ones have been processed. + */ +static unsigned +insert_mov(nir_alu_instr *vec, unsigned start_channel, + unsigned start_src_idx, void *mem_ctx) +{ + unsigned src_idx = start_src_idx; + assert(src_idx < nir_op_infos[vec->op].num_inputs); + + nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov); + nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx); + nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx); + + mov->dest.write_mask = (1u << start_channel); + mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0]; + src_idx++; + + for (unsigned i = start_channel + 1; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (nir_srcs_equal(vec->src[src_idx].src, vec->src[start_src_idx].src)) { + mov->dest.write_mask |= (1 << i); + mov->src[0].swizzle[i] = vec->src[src_idx].swizzle[0]; + } + src_idx++; + } + + nir_instr_insert_before(&vec->instr, &mov->instr); + + return mov->dest.write_mask; +} + +static bool +lower_vec_to_movs_block(nir_block *block, void *mem_ctx) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *vec = (nir_alu_instr *)instr; + + switch (vec->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + break; + default: + continue; /* The loop */ + } + + /* Since we insert multiple MOVs, we have to be non-SSA. */ + assert(!vec->dest.dest.is_ssa); + + unsigned finished_write_mask = 0; + + /* First, emit a MOV for all the src channels that are in the + * destination reg, in case other values we're populating in the dest + * might overwrite them. + */ + for (unsigned i = 0, src_idx = 0; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) { + finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx); + break; + } + src_idx++; + } + + /* Now, emit MOVs for all the other src channels. */ + for (unsigned i = 0, src_idx = 0; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (!(finished_write_mask & (1 << i))) + finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx); + + src_idx++; + } + + nir_instr_remove(&vec->instr); + ralloc_free(vec); + } + + return true; +} + +static void +nir_lower_vec_to_movs_impl(nir_function_impl *impl) +{ + nir_foreach_block(impl, lower_vec_to_movs_block, ralloc_parent(impl)); +} + +void +nir_lower_vec_to_movs(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_vec_to_movs_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_metadata.c b/mesalib/src/glsl/nir/nir_metadata.c new file mode 100644 index 000000000..a03e12456 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_metadata.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + */ + +#include "nir.h" + +/* + * Handles management of the metadata. + */ + +void +nir_metadata_require(nir_function_impl *impl, nir_metadata required) +{ +#define NEEDS_UPDATE(X) ((required & ~impl->valid_metadata) & (X)) + + if (NEEDS_UPDATE(nir_metadata_block_index)) + nir_index_blocks(impl); + if (NEEDS_UPDATE(nir_metadata_dominance)) + nir_calc_dominance_impl(impl); + if (NEEDS_UPDATE(nir_metadata_live_variables)) + nir_live_variables_impl(impl); + +#undef NEEDS_UPDATE + + impl->valid_metadata |= required; +} + +void +nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved) +{ + impl->valid_metadata &= preserved; +} diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py new file mode 100644 index 000000000..77f3bb826 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opcodes.py @@ -0,0 +1,591 @@ +#! /usr/bin/env python +# +# Copyright (C) 2014 Connor Abbott +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Connor Abbott (cwabbott0@gmail.com) + + +# Class that represents all the information we have about the opcode +# NOTE: this must be kept in sync with nir_op_info + +class Opcode(object): + """Class that represents all the information we have about the opcode + NOTE: this must be kept in sync with nir_op_info + """ + def __init__(self, name, output_size, output_type, input_sizes, + input_types, algebraic_properties, const_expr): + """Parameters: + + - name is the name of the opcode (prepend nir_op_ for the enum name) + - all types are strings that get nir_type_ prepended to them + - input_types is a list of types + - algebraic_properties is a space-seperated string, where nir_op_is_ is + prepended before each entry + - const_expr is an expression or series of statements that computes the + constant value of the opcode given the constant values of its inputs. + + Constant expressions are formed from the variables src0, src1, ..., + src(N-1), where N is the number of arguments. The output of the + expression should be stored in the dst variable. Per-component input + and output variables will be scalars and non-per-component input and + output variables will be a struct with fields named x, y, z, and w + all of the correct type. Input and output variables can be assumed + to already be of the correct type and need no conversion. In + particular, the conversion from the C bool type to/from NIR_TRUE and + NIR_FALSE happens automatically. + + For per-component instructions, the entire expression will be + executed once for each component. For non-per-component + instructions, the expression is expected to store the correct values + in dst.x, dst.y, etc. If "dst" does not exist anywhere in the + constant expression, an assignment to dst will happen automatically + and the result will be equivalent to "dst = <expression>" for + per-component instructions and "dst.x = dst.y = ... = <expression>" + for non-per-component instructions. + """ + assert isinstance(name, str) + assert isinstance(output_size, int) + assert isinstance(output_type, str) + assert isinstance(input_sizes, list) + assert isinstance(input_sizes[0], int) + assert isinstance(input_types, list) + assert isinstance(input_types[0], str) + assert isinstance(algebraic_properties, str) + assert isinstance(const_expr, str) + assert len(input_sizes) == len(input_types) + assert 0 <= output_size <= 4 + for size in input_sizes: + assert 0 <= size <= 4 + if output_size != 0: + assert size != 0 + self.name = name + self.num_inputs = len(input_sizes) + self.output_size = output_size + self.output_type = output_type + self.input_sizes = input_sizes + self.input_types = input_types + self.algebraic_properties = algebraic_properties + self.const_expr = const_expr + +# helper variables for strings +tfloat = "float" +tint = "int" +tbool = "bool" +tunsigned = "unsigned" + +commutative = "commutative " +associative = "associative " + +# global dictionary of opcodes +opcodes = {} + +def opcode(name, output_size, output_type, input_sizes, input_types, + algebraic_properties, const_expr): + assert name not in opcodes + opcodes[name] = Opcode(name, output_size, output_type, input_sizes, + input_types, algebraic_properties, const_expr) + +def unop_convert(name, in_type, out_type, const_expr): + opcode(name, 0, out_type, [0], [in_type], "", const_expr) + +def unop(name, ty, const_expr): + opcode(name, 0, ty, [0], [ty], "", const_expr) + +def unop_horiz(name, output_size, output_type, input_size, input_type, + const_expr): + opcode(name, output_size, output_type, [input_size], [input_type], "", + const_expr) + +def unop_reduce(name, output_size, output_type, input_type, prereduce_expr, + reduce_expr, final_expr): + def prereduce(src): + return "(" + prereduce_expr.format(src=src) + ")" + def final(src): + return final_expr.format(src="(" + src + ")") + def reduce_(src0, src1): + return reduce_expr.format(src0=src0, src1=src1) + src0 = prereduce("src0.x") + src1 = prereduce("src0.y") + src2 = prereduce("src0.z") + src3 = prereduce("src0.w") + unop_horiz(name + "2", output_size, output_type, 2, input_type, + final(reduce_(src0, src1))) + unop_horiz(name + "3", output_size, output_type, 3, input_type, + final(reduce_(reduce_(src0, src1), src2))) + unop_horiz(name + "4", output_size, output_type, 4, input_type, + final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) + + +# These two move instructions differ in what modifiers they support and what +# the negate modifier means. Otherwise, they are identical. +unop("fmov", tfloat, "src0") +unop("imov", tint, "src0") + +unop("ineg", tint, "-src0") +unop("fneg", tfloat, "-src0") +unop("inot", tint, "~src0") # invert every bit of the integer +unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f") +unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)") +unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)") +unop("iabs", tint, "(src0 < 0) ? -src0 : src0") +unop("fabs", tfloat, "fabsf(src0)") +unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)") +unop("frcp", tfloat, "1.0f / src0") +unop("frsq", tfloat, "1.0f / sqrtf(src0)") +unop("fsqrt", tfloat, "sqrtf(src0)") +unop("fexp", tfloat, "expf(src0)") # < e^x +unop("flog", tfloat, "logf(src0)") # log base e +unop("fexp2", tfloat, "exp2f(src0)") +unop("flog2", tfloat, "log2f(src0)") +unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. +unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion +unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. +# Float-to-boolean conversion +unop_convert("f2b", tfloat, tbool, "src0 == 0.0f") +# Boolean-to-float conversion +unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") +# Int-to-boolean conversion +unop_convert("i2b", tint, tbool, "src0 == 0") +unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion +unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion. + +unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}") +unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}") +unop_reduce("fany", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} || {src1}", + "{src} ? 1.0f : 0.0f") +unop_reduce("fall", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} && {src1}", + "{src} ? 1.0f : 0.0f") + +# Unary floating-point rounding operations. + + +unop("ftrunc", tfloat, "truncf(src0)") +unop("fceil", tfloat, "ceilf(src0)") +unop("ffloor", tfloat, "floorf(src0)") +unop("ffract", tfloat, "src0 - floorf(src0)") +unop("fround_even", tfloat, "_mesa_round_to_even(src0)") + + +# Trigonometric operations. + + +unop("fsin", tfloat, "sinf(src0)") +unop("fcos", tfloat, "cosf(src0)") +unop("fsin_reduced", tfloat, "sinf(src0)") +unop("fcos_reduced", tfloat, "cosf(src0)") + + +# Partial derivatives. + + +unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0. +unop("fddy", tfloat, "0.0f") +unop("fddx_fine", tfloat, "0.0f") +unop("fddy_fine", tfloat, "0.0f") +unop("fddx_coarse", tfloat, "0.0f") +unop("fddy_coarse", tfloat, "0.0f") + + +# Floating point pack and unpack operations. + +def pack_2x16(fmt): + unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """ +dst.x = (uint32_t) pack_fmt_1x16(src0.x); +dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16; +""".replace("fmt", fmt)) + +def pack_4x8(fmt): + unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """ +dst.x = (uint32_t) pack_fmt_1x8(src0.x); +dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8; +dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16; +dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24; +""".replace("fmt", fmt)) + +def unpack_2x16(fmt): + unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """ +dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff)); +dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16)); +""".replace("fmt", fmt)) + +def unpack_4x8(fmt): + unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """ +dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff)); +dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff)); +dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff)); +dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24)); +""".replace("fmt", fmt)) + + +pack_2x16("snorm") +pack_4x8("snorm") +pack_2x16("unorm") +pack_4x8("unorm") +pack_2x16("half") +unpack_2x16("snorm") +unpack_4x8("snorm") +unpack_2x16("unorm") +unpack_4x8("unorm") +unpack_2x16("half") + + +# Lowered floating point unpacking operations. + + +unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned, + "unpack_half_1x16((uint16_t)(src0.x & 0xffff))") +unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned, + "unpack_half_1x16((uint16_t)(src0.x >> 16))") + + +# Bit operations, part of ARB_gpu_shader5. + + +unop("bitfield_reverse", tunsigned, """ +/* we're not winning any awards for speed here, but that's ok */ +dst = 0; +for (unsigned bit = 0; bit < 32; bit++) + dst |= ((src0 >> bit) & 1) << (31 - bit); +""") +unop("bit_count", tunsigned, """ +dst = 0; +for (unsigned bit = 0; bit < 32; bit++) { + if ((src0 >> bit) & 1) + dst++; +} +""") + +unop_convert("ufind_msb", tunsigned, tint, """ +dst = -1; +for (int bit = 31; bit > 0; bit--) { + if ((src0 >> bit) & 1) { + dst = bit; + break; + } +} +""") + +unop("ifind_msb", tint, """ +dst = -1; +for (int bit = 31; bit >= 0; bit--) { + /* If src0 < 0, we're looking for the first 0 bit. + * if src0 >= 0, we're looking for the first 1 bit. + */ + if ((((src0 >> bit) & 1) && (src0 >= 0)) || + (!((src0 >> bit) & 1) && (src0 < 0))) { + dst = bit; + break; + } +} +""") + +unop("find_lsb", tint, """ +dst = -1; +for (unsigned bit = 0; bit < 32; bit++) { + if ((src0 >> bit) & 1) { + dst = bit; + break; + } +} +""") + + +for i in xrange(1, 5): + for j in xrange(1, 5): + unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f") + +def binop_convert(name, out_type, in_type, alg_props, const_expr): + opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr) + +def binop(name, ty, alg_props, const_expr): + binop_convert(name, ty, ty, alg_props, const_expr) + +def binop_compare(name, ty, alg_props, const_expr): + binop_convert(name, tbool, ty, alg_props, const_expr) + +def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size, + src2_type, const_expr): + opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type], + "", const_expr) + +def binop_reduce(name, output_size, output_type, src_type, prereduce_expr, + reduce_expr, final_expr): + def final(src): + return final_expr.format(src= "(" + src + ")") + def reduce_(src0, src1): + return reduce_expr.format(src0=src0, src1=src1) + def prereduce(src0, src1): + return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")" + src0 = prereduce("src0.x", "src1.x") + src1 = prereduce("src0.y", "src1.y") + src2 = prereduce("src0.z", "src1.z") + src3 = prereduce("src0.w", "src1.w") + opcode(name + "2", output_size, output_type, + [2, 2], [src_type, src_type], commutative, + final(reduce_(src0, src1))) + opcode(name + "3", output_size, output_type, + [3, 3], [src_type, src_type], commutative, + final(reduce_(reduce_(src0, src1), src2))) + opcode(name + "4", output_size, output_type, + [4, 4], [src_type, src_type], commutative, + final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) + +binop("fadd", tfloat, commutative + associative, "src0 + src1") +binop("iadd", tint, commutative + associative, "src0 + src1") +binop("fsub", tfloat, "", "src0 - src1") +binop("isub", tint, "", "src0 - src1") + +binop("fmul", tfloat, commutative + associative, "src0 * src1") +# low 32-bits of signed/unsigned integer multiply +binop("imul", tint, commutative + associative, "src0 * src1") +# high 32-bits of signed integer multiply +binop("imul_high", tint, commutative, + "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)") +# high 32-bits of unsigned integer multiply +binop("umul_high", tunsigned, commutative, + "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)") + +binop("fdiv", tfloat, "", "src0 / src1") +binop("idiv", tint, "", "src0 / src1") +binop("udiv", tunsigned, "", "src0 / src1") + +# returns a boolean representing the carry resulting from the addition of +# the two unsigned arguments. + +binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0") + +# returns a boolean representing the borrow resulting from the subtraction +# of the two unsigned arguments. + +binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0") + +binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") +binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1") + +# +# Comparisons +# + + +# these integer-aware comparisons return a boolean (0 or ~0) + +binop_compare("flt", tfloat, "", "src0 < src1") +binop_compare("fge", tfloat, "", "src0 >= src1") +binop_compare("feq", tfloat, commutative, "src0 == src1") +binop_compare("fne", tfloat, commutative, "src0 != src1") +binop_compare("ilt", tint, "", "src0 < src1") +binop_compare("ige", tint, "", "src0 >= src1") +binop_compare("ieq", tint, commutative, "src0 == src1") +binop_compare("ine", tint, commutative, "src0 != src1") +binop_compare("ult", tunsigned, "", "src0 < src1") +binop_compare("uge", tunsigned, "", "src0 >= src1") + +# integer-aware GLSL-style comparisons that compare floats and ints + +binop_reduce("ball_fequal", 1, tbool, tfloat, "{src0} == {src1}", + "{src0} && {src1}", "{src}") +binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}", + "{src0} || {src1}", "{src}") +binop_reduce("ball_iequal", 1, tbool, tint, "{src0} == {src1}", + "{src0} && {src1}", "{src}") +binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}", + "{src0} || {src1}", "{src}") + +# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0 + +binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}", + "{src0} && {src1}", "{src} ? 1.0f : 0.0f") +binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}", + "{src0} || {src1}", "{src} ? 1.0f : 0.0f") + +# These comparisons for integer-less hardware return 1.0 and 0.0 for true +# and false respectively + +binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than +binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal +binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal +binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal + + +binop("ishl", tint, "", "src0 << src1") +binop("ishr", tint, "", "src0 >> src1") +binop("ushr", tunsigned, "", "src0 >> src1") + +# bitwise logic operators +# +# These are also used as boolean and, or, xor for hardware supporting +# integers. + + +binop("iand", tunsigned, commutative + associative, "src0 & src1") +binop("ior", tunsigned, commutative + associative, "src0 | src1") +binop("ixor", tunsigned, commutative + associative, "src0 ^ src1") + + +# floating point logic operators +# +# These use (src != 0.0) for testing the truth of the input, and output 1.0 +# for true and 0.0 for false + +binop("fand", tfloat, commutative, + "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f") +binop("for", tfloat, commutative, + "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f") +binop("fxor", tfloat, commutative, + "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f") + +binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}", + "{src}") + +binop("fmin", tfloat, "", "fminf(src0, src1)") +binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1") +binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1") +binop("fmax", tfloat, "", "fmaxf(src0, src1)") +binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") +binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0") + +binop("fpow", tfloat, "", "powf(src0, src1)") + +binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat, + "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)") + +binop_convert("bfm", tunsigned, tint, "", """ +int offset = src0, bits = src1; +if (offset < 0 || bits < 0 || offset + bits > 32) + dst = 0; /* undefined per the spec */ +else + dst = ((1 << bits)- 1) << offset; +""") + +opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """ +dst = ldexp(src0, src1); +/* flush denormals to zero. */ +if (!isnormal(dst)) + dst = copysign(0.0f, src0); +""") + +# Combines the first component of each input to make a 2-component vector. + +binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """ +dst.x = src0.x; +dst.y = src1.x; +""") + +def triop(name, ty, const_expr): + opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr) +def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): + opcode(name, output_size, tunsigned, + [src1_size, src2_size, src3_size], + [tunsigned, tunsigned, tunsigned], "", const_expr) + +triop("ffma", tfloat, "src0 * src1 + src2") + +triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2") + +# Conditional Select +# +# A vector conditional select instruction (like ?:, but operating per- +# component on vectors). There are two versions, one for floating point +# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0). + + +triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2") +opcode("bcsel", 0, tunsigned, [0, 0, 0], + [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2") + +triop("bfi", tunsigned, """ +unsigned mask = src0, insert = src1 & mask, base = src2; +if (mask == 0) { + dst = base; +} else { + unsigned tmp = mask; + while (!(tmp & 1)) { + tmp >>= 1; + insert <<= 1; + } + dst = (base & ~mask) | insert; +} +""") + +opcode("ubitfield_extract", 0, tunsigned, + [0, 1, 1], [tunsigned, tint, tint], "", """ +unsigned base = src0; +int offset = src1.x, bits = src2.x; +if (bits == 0) { + dst = 0; +} else if (bits < 0 || offset < 0 || offset + bits > 32) { + dst = 0; /* undefined per the spec */ +} else { + dst = (base >> offset) & ((1 << bits) - 1); +} +""") +opcode("ibitfield_extract", 0, tint, + [0, 1, 1], [tint, tint, tint], "", """ +int base = src0; +int offset = src1.x, bits = src2.x; +if (bits == 0) { + dst = 0; +} else if (offset < 0 || bits < 0 || offset + bits > 32) { + dst = 0; +} else { + dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */ +} +""") + +# Combines the first component of each input to make a 3-component vector. + +triop_horiz("vec3", 3, 1, 1, 1, """ +dst.x = src0.x; +dst.y = src1.x; +dst.z = src2.x; +""") + +def quadop_horiz(name, output_size, src1_size, src2_size, src3_size, + src4_size, const_expr): + opcode(name, output_size, tunsigned, + [src1_size, src2_size, src3_size, src4_size], + [tunsigned, tunsigned, tunsigned, tunsigned], + "", const_expr) + +opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1], + [tunsigned, tunsigned, tint, tint], "", """ +unsigned base = src0, insert = src1; +int offset = src2.x, bits = src3.x; +if (bits == 0) { + dst = 0; +} else if (offset < 0 || bits < 0 || bits + offset > 32) { + dst = 0; +} else { + unsigned mask = ((1 << bits) - 1) << offset; + dst = (base & ~mask) | ((insert << bits) & mask); +} +""") + +quadop_horiz("vec4", 4, 1, 1, 1, 1, """ +dst.x = src0.x; +dst.y = src1.x; +dst.z = src2.x; +dst.w = src3.x; +""") + + diff --git a/mesalib/src/glsl/nir/nir_opcodes_c.py b/mesalib/src/glsl/nir/nir_opcodes_c.py new file mode 100644 index 000000000..7049c5be6 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opcodes_c.py @@ -0,0 +1,55 @@ +#! /usr/bin/env python +# +# Copyright (C) 2014 Connor Abbott +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Connor Abbott (cwabbott0@gmail.com) + +from nir_opcodes import opcodes +from mako.template import Template + +template = Template(""" +#include "nir.h" + +const nir_op_info nir_op_infos[nir_num_opcodes] = { +% for name, opcode in sorted(opcodes.iteritems()): +{ + .name = "${name}", + .num_inputs = ${opcode.num_inputs}, + .output_size = ${opcode.output_size}, + .output_type = ${"nir_type_" + opcode.output_type}, + .input_sizes = { + ${ ", ".join(str(size) for size in opcode.input_sizes) } + }, + .input_types = { + ${ ", ".join("nir_type_" + type for type in opcode.input_types) } + }, + .algebraic_properties = + ${ "0" if opcode.algebraic_properties == "" else " | ".join( + "NIR_OP_IS_" + prop.upper() for prop in + opcode.algebraic_properties.strip().split(" ")) } +}, +% endfor +}; +""") + +print template.render(opcodes=opcodes) diff --git a/mesalib/src/glsl/nir/nir_opcodes_h.py b/mesalib/src/glsl/nir/nir_opcodes_h.py new file mode 100644 index 000000000..be15a96d2 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opcodes_h.py @@ -0,0 +1,47 @@ +#! /usr/bin/env python + +template = """\ +/* Copyright (C) 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + */ + +#ifndef _NIR_OPCODES_ +#define _NIR_OPCODES_ + +<% opcode_names = sorted(opcodes.iterkeys()) %> + +typedef enum { +% for name in opcode_names: + nir_op_${name}, +% endfor + nir_last_opcode = nir_op_${opcode_names[-1]}, + nir_num_opcodes = nir_last_opcode + 1 +} nir_op; + +#endif /* _NIR_OPCODES_ */""" + +from nir_opcodes import opcodes +from mako.template import Template + +print Template(template).render(opcodes=opcodes) diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py new file mode 100644 index 000000000..7bf643134 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py @@ -0,0 +1,188 @@ +#! /usr/bin/env python +# +# Copyright (C) 2014 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Jason Ekstrand (jason@jlekstrand.net) + +import nir_algebraic + +# Convenience variables +a = 'a' +b = 'b' +c = 'c' +d = 'd' + +# Written in the form (<search>, <replace>) where <search> is an expression +# and <replace> is either an expression or a value. An expression is +# defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>) +# where each source is either an expression or a value. A value can be +# either a numeric constant or a string representing a variable name. +# +# Variable names are specified as "[#]name[@type]" where "#" inicates that +# the given variable will only match constants and the type indicates that +# the given variable will only match values from ALU instructions with the +# given output type. +# +# For constants, you have to be careful to make sure that it is the right +# type because python is unaware of the source and destination types of the +# opcodes. + +optimizations = [ + (('fneg', ('fneg', a)), a), + (('ineg', ('ineg', a)), a), + (('fabs', ('fabs', a)), ('fabs', a)), + (('fabs', ('fneg', a)), ('fabs', a)), + (('iabs', ('iabs', a)), ('iabs', a)), + (('iabs', ('ineg', a)), ('iabs', a)), + (('fadd', a, 0.0), a), + (('iadd', a, 0), a), + (('fmul', a, 0.0), 0.0), + (('imul', a, 0), 0), + (('fmul', a, 1.0), a), + (('imul', a, 1), a), + (('fmul', a, -1.0), ('fneg', a)), + (('imul', a, -1), ('ineg', a)), + (('ffma', 0.0, a, b), b), + (('ffma', a, 0.0, b), b), + (('ffma', a, b, 0.0), ('fmul', a, b)), + (('ffma', a, 1.0, b), ('fadd', a, b)), + (('ffma', 1.0, a, b), ('fadd', a, b)), + (('flrp', a, b, 0.0), a), + (('flrp', a, b, 1.0), b), + (('flrp', a, a, b), a), + (('flrp', 0.0, a, b), ('fmul', a, b)), + (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), + (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'), + # Comparison simplifications + (('inot', ('flt', a, b)), ('fge', a, b)), + (('inot', ('fge', a, b)), ('flt', a, b)), + (('inot', ('ilt', a, b)), ('ige', a, b)), + (('inot', ('ige', a, b)), ('ilt', a, b)), + (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), + (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), + (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), + (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), + (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), + (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)), + (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), + (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)), + (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)), + (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'), + (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), + (('fsat', ('fsat', a)), ('fsat', a)), + (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)), + # Comparison with the same args. Note that these are not done for + # the float versions because NaN always returns false on float + # inequalities. + (('ilt', a, a), False), + (('ige', a, a), True), + (('ieq', a, a), True), + (('ine', a, a), False), + (('ult', a, a), False), + (('uge', a, a), True), + # Logical and bit operations + (('fand', a, 0.0), 0.0), + (('iand', a, a), a), + (('iand', a, 0), 0), + (('ior', a, a), a), + (('ior', a, 0), a), + (('fxor', a, a), 0.0), + (('ixor', a, a), 0), + (('inot', ('inot', a)), a), + # DeMorgan's Laws + (('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))), + (('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))), + # Shift optimizations + (('ishl', 0, a), 0), + (('ishl', a, 0), a), + (('ishr', 0, a), 0), + (('ishr', a, 0), a), + (('ushr', 0, a), 0), + (('ushr', a, 0), 0), + # Exponential/logarithmic identities + (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a + (('fexp', ('flog', a)), a), # e^ln(a) = a + (('flog2', ('fexp2', a)), a), # lg2(2^a) = a + (('flog', ('fexp', a)), a), # ln(e^a) = a + (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b) + (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b + (('fexp', ('fmul', ('flog', a), b)), ('fpow', a, b), '!options->lower_fpow'), # e^(ln(a)*b) = a^b + (('fpow', a, 1.0), a), + (('fpow', a, 2.0), ('fmul', a, a)), + (('fpow', 2.0, a), ('fexp2', a)), + # Division and reciprocal + (('fdiv', 1.0, a), ('frcp', a)), + (('frcp', ('frcp', a)), a), + (('frcp', ('fsqrt', a)), ('frsq', a)), + (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'), + (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'), + # Boolean simplifications + (('ine', 'a@bool', 0), 'a'), + (('ieq', 'a@bool', 0), ('inot', 'a')), + (('bcsel', a, True, False), ('ine', a, 0)), + (('bcsel', a, False, True), ('ieq', a, 0)), + (('bcsel', True, b, c), b), + (('bcsel', False, b, c), c), + # The result of this should be hit by constant propagation and, in the + # next round of opt_algebraic, get picked up by one of the above two. + (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)), + + (('bcsel', a, b, b), b), + (('fcsel', a, b, b), b), + + # Subtracts + (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), + (('isub', a, ('isub', 0, b)), ('iadd', a, b)), + (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), + (('ineg', a), ('isub', 0, a), 'options->lower_negate'), + (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)), + (('iadd', a, ('isub', 0, b)), ('isub', a, b)), + (('fabs', ('fsub', 0.0, a)), ('fabs', a)), + (('iabs', ('isub', 0, a)), ('iabs', a)), + +# This one may not be exact + (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), +] + +# Add optimizations to handle the case where the result of a ternary is +# compared to a constant. This way we can take things like +# +# (a ? 0 : 1) > 0 +# +# and turn it into +# +# a ? (0 > 0) : (1 > 0) +# +# which constant folding will eat for lunch. The resulting ternary will +# further get cleaned up by the boolean reductions above and we will be +# left with just the original variable "a". +for op in ['flt', 'fge', 'feq', 'fne', + 'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']: + optimizations += [ + ((op, ('bcsel', 'a', '#b', '#c'), '#d'), + ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))), + ((op, '#d', ('bcsel', a, '#b', '#c')), + ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), + ] + +print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render() diff --git a/mesalib/src/glsl/nir/nir_opt_constant_folding.c b/mesalib/src/glsl/nir/nir_opt_constant_folding.c new file mode 100644 index 000000000..85c09fc48 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_constant_folding.c @@ -0,0 +1,201 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_constant_expressions.h" +#include <math.h> + +/* + * Implements SSA-based constant folding. + */ + +struct constant_fold_state { + void *mem_ctx; + nir_function_impl *impl; + bool progress; +}; + +static bool +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) +{ + nir_const_value src[4]; + + if (!instr->dest.dest.is_ssa) + return false; + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (!instr->src[i].src.is_ssa) + return false; + + nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; + + if (src_instr->type != nir_instr_type_load_const) + return false; + nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr); + + for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i); + j++) { + src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]]; + } + + /* We shouldn't have any source modifiers in the optimization loop. */ + assert(!instr->src[i].abs && !instr->src[i].negate); + } + + /* We shouldn't have any saturate modifiers in the optimization loop. */ + assert(!instr->dest.saturate); + + nir_const_value dest = + nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, + src); + + nir_load_const_instr *new_instr = + nir_load_const_instr_create(mem_ctx, + instr->dest.dest.ssa.num_components); + + new_instr->value = dest; + + nir_instr_insert_before(&instr->instr, &new_instr->instr); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(&new_instr->def), + mem_ctx); + + nir_instr_remove(&instr->instr); + ralloc_free(instr); + + return true; +} + +static bool +constant_fold_deref(nir_instr *instr, nir_deref_var *deref) +{ + bool progress = false; + + for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { + if (tail->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail); + + if (arr->deref_array_type == nir_deref_array_type_indirect && + arr->indirect.is_ssa && + arr->indirect.ssa->parent_instr->type == nir_instr_type_load_const) { + nir_load_const_instr *indirect = + nir_instr_as_load_const(arr->indirect.ssa->parent_instr); + + arr->base_offset += indirect->value.u[0]; + + /* Clear out the source */ + nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL)); + + arr->deref_array_type = nir_deref_array_type_direct; + + progress = true; + } + } + + return progress; +} + +static bool +constant_fold_intrinsic_instr(nir_intrinsic_instr *instr) +{ + bool progress = false; + + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + progress |= constant_fold_deref(&instr->instr, instr->variables[i]); + } + + return progress; +} + +static bool +constant_fold_tex_instr(nir_tex_instr *instr) +{ + if (instr->sampler) + return constant_fold_deref(&instr->instr, instr->sampler); + else + return false; +} + +static bool +constant_fold_block(nir_block *block, void *void_state) +{ + struct constant_fold_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_alu: + state->progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), + state->mem_ctx); + break; + case nir_instr_type_intrinsic: + state->progress |= + constant_fold_intrinsic_instr(nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + state->progress |= constant_fold_tex_instr(nir_instr_as_tex(instr)); + break; + default: + /* Don't know how to constant fold */ + break; + } + } + + return true; +} + +static bool +nir_opt_constant_folding_impl(nir_function_impl *impl) +{ + struct constant_fold_state state; + + state.mem_ctx = ralloc_parent(impl); + state.impl = impl; + state.progress = false; + + nir_foreach_block(impl, constant_fold_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; +} + +bool +nir_opt_constant_folding(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress |= nir_opt_constant_folding_impl(overload->impl); + } + + return progress; +} diff --git a/mesalib/src/glsl/nir/nir_opt_copy_propagate.c b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c new file mode 100644 index 000000000..dd0ec01ef --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c @@ -0,0 +1,317 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include <main/imports.h> + +/** + * SSA-based copy propagation + */ + +static bool is_move(nir_alu_instr *instr) +{ + if (instr->op != nir_op_fmov && + instr->op != nir_op_imov) + return false; + + if (instr->dest.saturate) + return false; + + /* we handle modifiers in a separate pass */ + + if (instr->src[0].abs || instr->src[0].negate) + return false; + + if (!instr->src[0].src.is_ssa) + return false; + + return true; + +} + +static bool +is_swizzleless_move(nir_alu_instr *instr) +{ + if (!is_move(instr)) + return false; + + for (unsigned i = 0; i < 4; i++) { + if (!((instr->dest.write_mask >> i) & 1)) + break; + if (instr->src[0].swizzle[i] != i) + return false; + } + + return true; +} + +static bool is_vec(nir_alu_instr *instr) +{ + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + if (!instr->src[i].src.is_ssa) + return false; + + return instr->op == nir_op_vec2 || + instr->op == nir_op_vec3 || + instr->op == nir_op_vec4; +} + +typedef struct { + nir_ssa_def *def; + bool found; +} search_def_state; + +static bool +search_def(nir_src *src, void *_state) +{ + search_def_state *state = (search_def_state *) _state; + + if (src->is_ssa && src->ssa == state->def) + state->found = true; + + return true; +} + +static void +rewrite_src_instr(nir_src *src, nir_ssa_def *new_def, nir_instr *parent_instr) +{ + nir_ssa_def *old_def = src->ssa; + + src->ssa = new_def; + + /* + * The instruction could still use the old definition in one of its other + * sources, so only remove the instruction from the uses if there are no + * more uses left. + */ + + search_def_state search_state; + search_state.def = old_def; + search_state.found = false; + nir_foreach_src(parent_instr, search_def, &search_state); + if (!search_state.found) { + struct set_entry *entry = _mesa_set_search(old_def->uses, parent_instr); + assert(entry); + _mesa_set_remove(old_def->uses, entry); + } + + _mesa_set_add(new_def->uses, parent_instr); +} + +static void +rewrite_src_if(nir_if *if_stmt, nir_ssa_def *new_def) +{ + nir_ssa_def *old_def = if_stmt->condition.ssa; + + if_stmt->condition.ssa = new_def; + + struct set_entry *entry = _mesa_set_search(old_def->if_uses, if_stmt); + assert(entry); + _mesa_set_remove(old_def->if_uses, entry); + + _mesa_set_add(new_def->if_uses, if_stmt); +} + +static bool +copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) +{ + if (!src->is_ssa) { + if (src->reg.indirect) + return copy_prop_src(src, parent_instr, parent_if); + return false; + } + + nir_instr *src_instr = src->ssa->parent_instr; + if (src_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr); + if (!is_swizzleless_move(alu_instr)) + return false; + + /* Don't let copy propagation land us with a phi that has more + * components in its source than it has in its destination. That badly + * messes up out-of-ssa. + */ + if (parent_instr && parent_instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(parent_instr); + assert(phi->dest.is_ssa); + if (phi->dest.ssa.num_components != + alu_instr->src[0].src.ssa->num_components) + return false; + } + + if (parent_instr) + rewrite_src_instr(src, alu_instr->src[0].src.ssa, parent_instr); + else + rewrite_src_if(parent_if, alu_instr->src[0].src.ssa); + + return true; +} + +static bool +copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) +{ + nir_alu_src *src = &parent_alu_instr->src[index]; + if (!src->src.is_ssa) { + if (src->src.reg.indirect) + return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr, + NULL); + return false; + } + + nir_instr *src_instr = src->src.ssa->parent_instr; + if (src_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr); + if (!is_move(alu_instr) && !is_vec(alu_instr)) + return false; + + nir_ssa_def *def; + unsigned new_swizzle[4] = {0, 0, 0, 0}; + + if (alu_instr->op == nir_op_fmov || + alu_instr->op == nir_op_imov) { + for (unsigned i = 0; i < 4; i++) + new_swizzle[i] = alu_instr->src[0].swizzle[src->swizzle[i]]; + def = alu_instr->src[0].src.ssa; + } else { + def = NULL; + + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(parent_alu_instr, index, i)) + continue; + + nir_ssa_def *new_def = alu_instr->src[src->swizzle[i]].src.ssa; + if (def == NULL) + def = new_def; + else { + if (def != new_def) + return false; + } + new_swizzle[i] = alu_instr->src[src->swizzle[i]].swizzle[0]; + } + } + + for (unsigned i = 0; i < 4; i++) + src->swizzle[i] = new_swizzle[i]; + + rewrite_src_instr(&src->src, def, &parent_alu_instr->instr); + + return true; +} + +typedef struct { + nir_instr *parent_instr; + bool progress; +} copy_prop_state; + +static bool +copy_prop_src_cb(nir_src *src, void *_state) +{ + copy_prop_state *state = (copy_prop_state *) _state; + while (copy_prop_src(src, state->parent_instr, NULL)) + state->progress = true; + + return true; +} + +static bool +copy_prop_instr(nir_instr *instr) +{ + if (instr->type == nir_instr_type_alu) { + nir_alu_instr *alu_instr = nir_instr_as_alu(instr); + bool progress = false; + + for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) + while (copy_prop_alu_src(alu_instr, i)) + progress = true; + + if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect) + while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL)) + progress = true; + + return progress; + } + + copy_prop_state state; + state.parent_instr = instr; + state.progress = false; + nir_foreach_src(instr, copy_prop_src_cb, &state); + + return state.progress; +} + +static bool +copy_prop_if(nir_if *if_stmt) +{ + return copy_prop_src(&if_stmt->condition, NULL, if_stmt); +} + +static bool +copy_prop_block(nir_block *block, void *_state) +{ + bool *progress = (bool *) _state; + + nir_foreach_instr(block, instr) { + if (copy_prop_instr(instr)) + *progress = true; + } + + if (block->cf_node.node.next != NULL && /* check that we aren't the end node */ + !nir_cf_node_is_last(&block->cf_node) && + nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) { + nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node)); + if (copy_prop_if(if_stmt)) + *progress = true; + } + + return true; +} + +bool +nir_copy_prop_impl(nir_function_impl *impl) +{ + bool progress = false; + + nir_foreach_block(impl, copy_prop_block, &progress); + return progress; +} + +bool +nir_copy_prop(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl && nir_copy_prop_impl(overload->impl)) + progress = true; + } + + return progress; +} diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c new file mode 100644 index 000000000..9b383202d --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_cse.c @@ -0,0 +1,297 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements common subexpression elimination + */ + +struct cse_state { + void *mem_ctx; + bool progress; +}; + +static bool +nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask) +{ + if (src1.abs != src2.abs || src1.negate != src2.negate) + return false; + + for (int i = 0; i < 4; ++i) { + if (!(read_mask & (1 << i))) + continue; + + if (src1.swizzle[i] != src2.swizzle[i]) + return false; + } + + return nir_srcs_equal(src1.src, src2.src); +} + +static bool +nir_instrs_equal(nir_instr *instr1, nir_instr *instr2) +{ + if (instr1->type != instr2->type) + return false; + + switch (instr1->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu1 = nir_instr_as_alu(instr1); + nir_alu_instr *alu2 = nir_instr_as_alu(instr2); + + if (alu1->op != alu2->op) + return false; + + /* TODO: We can probably acutally do something more inteligent such + * as allowing different numbers and taking a maximum or something + * here */ + if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) + return false; + + for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { + if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i], + (1 << alu1->dest.dest.ssa.num_components) - 1)) + return false; + } + return true; + } + case nir_instr_type_tex: + return false; + case nir_instr_type_load_const: { + nir_load_const_instr *load1 = nir_instr_as_load_const(instr1); + nir_load_const_instr *load2 = nir_instr_as_load_const(instr2); + + if (load1->def.num_components != load2->def.num_components) + return false; + + return memcmp(load1->value.f, load2->value.f, + load1->def.num_components * sizeof(*load2->value.f)) == 0; + } + case nir_instr_type_phi: { + nir_phi_instr *phi1 = nir_instr_as_phi(instr1); + nir_phi_instr *phi2 = nir_instr_as_phi(instr2); + + if (phi1->instr.block != phi2->instr.block) + return false; + + nir_foreach_phi_src(phi1, src1) { + nir_foreach_phi_src(phi2, src2) { + if (src1->pred == src2->pred) { + if (!nir_srcs_equal(src1->src, src2->src)) + return false; + + break; + } + } + } + + return true; + } + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1); + nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2); + const nir_intrinsic_info *info = + &nir_intrinsic_infos[intrinsic1->intrinsic]; + + if (intrinsic1->intrinsic != intrinsic2->intrinsic || + intrinsic1->num_components != intrinsic2->num_components) + return false; + + if (info->has_dest && intrinsic1->dest.ssa.num_components != + intrinsic2->dest.ssa.num_components) + return false; + + for (unsigned i = 0; i < info->num_srcs; i++) { + if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) + return false; + } + + assert(info->num_variables == 0); + + for (unsigned i = 0; i < info->num_indices; i++) { + if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) + return false; + } + + return true; + } + case nir_instr_type_call: + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + case nir_instr_type_parallel_copy: + default: + unreachable("Invalid instruction type"); + } + + return false; +} + +static bool +src_is_ssa(nir_src *src, void *data) +{ + return src->is_ssa; +} + +static bool +dest_is_ssa(nir_dest *dest, void *data) +{ + return dest->is_ssa; +} + +static bool +nir_instr_can_cse(nir_instr *instr) +{ + /* We only handle SSA. */ + if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || + !nir_foreach_src(instr, src_is_ssa, NULL)) + return false; + + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_load_const: + case nir_instr_type_phi: + return true; + case nir_instr_type_tex: + return false; /* TODO */ + case nir_instr_type_intrinsic: { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; + return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && + (info->flags & NIR_INTRINSIC_CAN_REORDER) && + info->num_variables == 0; /* not implemented yet */ + } + case nir_instr_type_call: + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + return false; + case nir_instr_type_parallel_copy: + default: + unreachable("Invalid instruction type"); + } + + return false; +} + +static nir_ssa_def * +nir_instr_get_dest_ssa_def(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); + return &nir_instr_as_alu(instr)->dest.dest.ssa; + case nir_instr_type_load_const: + return &nir_instr_as_load_const(instr)->def; + case nir_instr_type_phi: + assert(nir_instr_as_phi(instr)->dest.is_ssa); + return &nir_instr_as_phi(instr)->dest.ssa; + case nir_instr_type_intrinsic: + assert(nir_instr_as_intrinsic(instr)->dest.is_ssa); + return &nir_instr_as_intrinsic(instr)->dest.ssa; + default: + unreachable("We never ask for any of these"); + } +} + +static void +nir_opt_cse_instr(nir_instr *instr, struct cse_state *state) +{ + if (!nir_instr_can_cse(instr)) + return; + + for (struct exec_node *node = instr->node.prev; + !exec_node_is_head_sentinel(node); node = node->prev) { + nir_instr *other = exec_node_data(nir_instr, node, node); + if (nir_instrs_equal(instr, other)) { + nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other); + nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr), + nir_src_for_ssa(other_def), + state->mem_ctx); + nir_instr_remove(instr); + state->progress = true; + return; + } + } + + for (nir_block *block = instr->block->imm_dom; + block != NULL; block = block->imm_dom) { + nir_foreach_instr_reverse(block, other) { + if (nir_instrs_equal(instr, other)) { + nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other); + nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr), + nir_src_for_ssa(other_def), + state->mem_ctx); + nir_instr_remove(instr); + state->progress = true; + return; + } + } + } +} + +static bool +nir_opt_cse_block(nir_block *block, void *void_state) +{ + struct cse_state *state = void_state; + + nir_foreach_instr_safe(block, instr) + nir_opt_cse_instr(instr, state); + + return true; +} + +static bool +nir_opt_cse_impl(nir_function_impl *impl) +{ + struct cse_state state; + + state.mem_ctx = ralloc_parent(impl); + state.progress = false; + + nir_metadata_require(impl, nir_metadata_dominance); + + nir_foreach_block(impl, nir_opt_cse_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; +} + +bool +nir_opt_cse(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress |= nir_opt_cse_impl(overload->impl); + } + + return progress; +} diff --git a/mesalib/src/glsl/nir/nir_opt_dce.c b/mesalib/src/glsl/nir/nir_opt_dce.c new file mode 100644 index 000000000..e0ebdc61c --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_dce.c @@ -0,0 +1,183 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +/* SSA-based mark-and-sweep dead code elimination */ + +typedef struct { + struct exec_node node; + nir_instr *instr; +} worklist_elem; + +static void +worklist_push(struct exec_list *worklist, nir_instr *instr) +{ + worklist_elem *elem = ralloc(worklist, worklist_elem); + elem->instr = instr; + instr->pass_flags = 1; + exec_list_push_tail(worklist, &elem->node); +} + +static nir_instr * +worklist_pop(struct exec_list *worklist) +{ + struct exec_node *node = exec_list_pop_head(worklist); + worklist_elem *elem = exec_node_data(worklist_elem, node, node); + return elem->instr; +} + +static bool +mark_live_cb(nir_src *src, void *_state) +{ + struct exec_list *worklist = (struct exec_list *) _state; + + if (src->is_ssa && !src->ssa->parent_instr->pass_flags) { + worklist_push(worklist, src->ssa->parent_instr); + } + + return true; +} + +static void +init_instr(nir_instr *instr, struct exec_list *worklist) +{ + nir_alu_instr *alu_instr; + nir_intrinsic_instr *intrin_instr; + nir_tex_instr *tex_instr; + + /* We use the pass_flags to store the live/dead information. In DCE, we + * just treat it as a zero/non-zerl boolean for whether or not the + * instruction is live. + */ + instr->pass_flags = 0; + + switch (instr->type) { + case nir_instr_type_call: + case nir_instr_type_jump: + worklist_push(worklist, instr); + break; + + case nir_instr_type_alu: + alu_instr = nir_instr_as_alu(instr); + if (!alu_instr->dest.dest.is_ssa) + worklist_push(worklist, instr); + break; + + case nir_instr_type_intrinsic: + intrin_instr = nir_instr_as_intrinsic(instr); + if (nir_intrinsic_infos[intrin_instr->intrinsic].flags & + NIR_INTRINSIC_CAN_ELIMINATE) { + if (nir_intrinsic_infos[intrin_instr->intrinsic].has_dest && + !intrin_instr->dest.is_ssa) { + worklist_push(worklist, instr); + } + } else { + worklist_push(worklist, instr); + } + break; + + case nir_instr_type_tex: + tex_instr = nir_instr_as_tex(instr); + if (!tex_instr->dest.is_ssa) + worklist_push(worklist, instr); + break; + + default: + break; + } +} + +static bool +init_block_cb(nir_block *block, void *_state) +{ + struct exec_list *worklist = (struct exec_list *) _state; + + nir_foreach_instr(block, instr) + init_instr(instr, worklist); + + nir_if *following_if = nir_block_get_following_if(block); + if (following_if) { + if (following_if->condition.is_ssa && + !following_if->condition.ssa->parent_instr->pass_flags) + worklist_push(worklist, following_if->condition.ssa->parent_instr); + } + + return true; +} + +static bool +delete_block_cb(nir_block *block, void *_state) +{ + bool *progress = (bool *) _state; + + nir_foreach_instr_safe(block, instr) { + if (!instr->pass_flags) { + nir_instr_remove(instr); + *progress = true; + } + } + + return true; +} + +bool +nir_opt_dce_impl(nir_function_impl *impl) +{ + struct exec_list *worklist = ralloc(NULL, struct exec_list); + exec_list_make_empty(worklist); + + nir_foreach_block(impl, init_block_cb, worklist); + + while (!exec_list_is_empty(worklist)) { + nir_instr *instr = worklist_pop(worklist); + nir_foreach_src(instr, mark_live_cb, worklist); + } + + ralloc_free(worklist); + + bool progress = false; + nir_foreach_block(impl, delete_block_cb, &progress); + + if (progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return progress; +} + +bool +nir_opt_dce(nir_shader *shader) +{ + bool progress = false; + nir_foreach_overload(shader, overload) { + if (overload->impl && nir_opt_dce_impl(overload->impl)) + progress = true; + } + + return progress; +} diff --git a/mesalib/src/glsl/nir/nir_opt_gcm.c b/mesalib/src/glsl/nir/nir_opt_gcm.c new file mode 100644 index 000000000..bf565b969 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_gcm.c @@ -0,0 +1,492 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements Global Code Motion. A description of GCM can be found in + * "Global Code Motion; Global Value Numbering" by Cliff Click. + * Unfortunately, the algorithm presented in the paper is broken in a + * number of ways. The algorithm used here differs substantially from the + * one in the paper but it is, in my opinion, much easier to read and + * verify correcness. + */ + +struct gcm_block_info { + /* Number of loops this block is inside */ + unsigned loop_depth; + + /* The last instruction inserted into this block. This is used as we + * traverse the instructions and insert them back into the program to + * put them in the right order. + */ + nir_instr *last_instr; +}; + +/* Flags used in the instr->pass_flags field for various instruction states */ +enum { + GCM_INSTR_PINNED = (1 << 0), + GCM_INSTR_SCHEDULED_EARLY = (1 << 1), + GCM_INSTR_SCHEDULED_LATE = (1 << 2), + GCM_INSTR_PLACED = (1 << 3), +}; + +struct gcm_state { + nir_function_impl *impl; + nir_instr *instr; + + /* The list of non-pinned instructions. As we do the late scheduling, + * we pull non-pinned instructions out of their blocks and place them in + * this list. This saves us from having linked-list problems when we go + * to put instructions back in their blocks. + */ + struct exec_list instrs; + + struct gcm_block_info *blocks; +}; + +/* Recursively walks the CFG and builds the block_info structure */ +static void +gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state, + unsigned loop_depth) +{ + foreach_list_typed(nir_cf_node, node, node, cf_list) { + switch (node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(node); + state->blocks[block->index].loop_depth = loop_depth; + break; + } + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + gcm_build_block_info(&if_stmt->then_list, state, loop_depth); + gcm_build_block_info(&if_stmt->else_list, state, loop_depth); + break; + } + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + gcm_build_block_info(&loop->body, state, loop_depth + 1); + break; + } + default: + unreachable("Invalid CF node type"); + } + } +} + +/* Walks the instruction list and marks immovable instructions as pinned + * + * This function also serves to initialize the instr->pass_flags field. + * After this is completed, all instructions' pass_flags fields will be set + * to either GCM_INSTR_PINNED or 0. + */ +static bool +gcm_pin_instructions_block(nir_block *block, void *void_state) +{ + struct gcm_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_alu: + switch (nir_instr_as_alu(instr)->op) { + case nir_op_fddx: + case nir_op_fddy: + case nir_op_fddx_fine: + case nir_op_fddy_fine: + case nir_op_fddx_coarse: + case nir_op_fddy_coarse: + /* These can only go in uniform control flow; pin them for now */ + instr->pass_flags = GCM_INSTR_PINNED; + + default: + instr->pass_flags = 0; + } + break; + + case nir_instr_type_tex: + switch (nir_instr_as_tex(instr)->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_lod: + /* These two take implicit derivatives so they need to be pinned */ + instr->pass_flags = GCM_INSTR_PINNED; + + default: + instr->pass_flags = 0; + } + break; + + case nir_instr_type_load_const: + instr->pass_flags = 0; + break; + + case nir_instr_type_intrinsic: { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; + + if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && + (info->flags & NIR_INTRINSIC_CAN_REORDER)) { + instr->pass_flags = 0; + } else { + instr->pass_flags = GCM_INSTR_PINNED; + } + break; + } + + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + case nir_instr_type_phi: + instr->pass_flags = GCM_INSTR_PINNED; + break; + + default: + unreachable("Invalid instruction type in GCM"); + } + + if (!(instr->pass_flags & GCM_INSTR_PINNED)) { + /* If this is an unpinned instruction, go ahead and pull it out of + * the program and put it on the instrs list. This has a couple + * of benifits. First, it makes the scheduling algorithm more + * efficient because we can avoid walking over basic blocks and + * pinned instructions. Second, it keeps us from causing linked + * list confusion when we're trying to put everything in its + * proper place at the end of the pass. + * + * Note that we don't use nir_instr_remove here because that also + * cleans up uses and defs and we want to keep that information. + */ + exec_node_remove(&instr->node); + exec_list_push_tail(&state->instrs, &instr->node); + } + } + + return true; +} + +static void +gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state); + +/** Update an instructions schedule for the given source + * + * This function is called iteratively as we walk the sources of an + * instruction. It ensures that the given source instruction has been + * scheduled and then update this instruction's block if the source + * instruction is lower down the tree. + */ +static bool +gcm_schedule_early_src(nir_src *src, void *void_state) +{ + struct gcm_state *state = void_state; + nir_instr *instr = state->instr; + + assert(src->is_ssa); + + gcm_schedule_early_instr(src->ssa->parent_instr, void_state); + + /* While the index isn't a proper dominance depth, it does have the + * property that if A dominates B then A->index <= B->index. Since we + * know that this instruction must have been dominated by all of its + * sources at some point (even if it's gone through value-numbering), + * all of the sources must lie on the same branch of the dominance tree. + * Therefore, we can just go ahead and just compare indices. + */ + if (instr->block->index < src->ssa->parent_instr->block->index) + instr->block = src->ssa->parent_instr->block; + + /* We need to restore the state instruction because it may have been + * changed through the gcm_schedule_early_instr call above. Since we + * may still be iterating through sources and future calls to + * gcm_schedule_early_src for the same instruction will still need it. + */ + state->instr = instr; + + return true; +} + +/** Schedules an instruction early + * + * This function performs a recursive depth-first search starting at the + * given instruction and proceeding through the sources to schedule + * instructions as early as they can possibly go in the dominance tree. + * The instructions are "scheduled" by updating their instr->block field. + */ +static void +gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state) +{ + if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY) + return; + + instr->pass_flags |= GCM_INSTR_SCHEDULED_EARLY; + + /* Pinned instructions are already scheduled so we don't need to do + * anything. Also, bailing here keeps us from ever following the + * sources of phi nodes which can be back-edges. + */ + if (instr->pass_flags & GCM_INSTR_PINNED) + return; + + /* Start with the instruction at the top. As we iterate over the + * sources, it will get moved down as needed. + */ + instr->block = state->impl->start_block; + state->instr = instr; + + nir_foreach_src(instr, gcm_schedule_early_src, state); +} + +static void +gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state); + +/** Schedules the instruction associated with the given SSA def late + * + * This function works by first walking all of the uses of the given SSA + * definition, ensuring that they are scheduled, and then computing the LCA + * (least common ancestor) of its uses. It then schedules this instruction + * as close to the LCA as possible while trying to stay out of loops. + */ +static bool +gcm_schedule_late_def(nir_ssa_def *def, void *void_state) +{ + struct gcm_state *state = void_state; + + nir_block *lca = NULL; + + struct set_entry *entry; + set_foreach(def->uses, entry) { + nir_instr *use_instr = (nir_instr *)entry->key; + + gcm_schedule_late_instr(use_instr, state); + + /* Phi instructions are a bit special. SSA definitions don't have to + * dominate the sources of the phi nodes that use them; instead, they + * have to dominate the predecessor block corresponding to the phi + * source. We handle this by looking through the sources, finding + * any that are usingg this SSA def, and using those blocks instead + * of the one the phi lives in. + */ + if (use_instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(use_instr); + + nir_foreach_phi_src(phi, phi_src) { + if (phi_src->src.ssa == def) + lca = nir_dominance_lca(lca, phi_src->pred); + } + } else { + lca = nir_dominance_lca(lca, use_instr->block); + } + } + + set_foreach(def->if_uses, entry) { + nir_if *if_stmt = (nir_if *)entry->key; + + /* For if statements, we consider the block to be the one immediately + * preceding the if CF node. + */ + nir_block *pred_block = + nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node)); + + lca = nir_dominance_lca(lca, pred_block); + } + + /* Some instructions may never be used. We'll just leave them scheduled + * early and let dead code clean them up. + */ + if (lca == NULL) + return true; + + /* We know have the LCA of all of the uses. If our invariants hold, + * this is dominated by the block that we chose when scheduling early. + * We now walk up the dominance tree and pick the lowest block that is + * as far outside loops as we can get. + */ + nir_block *best = lca; + while (lca != def->parent_instr->block) { + assert(lca); + if (state->blocks[lca->index].loop_depth < + state->blocks[best->index].loop_depth) + best = lca; + lca = lca->imm_dom; + } + def->parent_instr->block = best; + + return true; +} + +/** Schedules an instruction late + * + * This function performs a depth-first search starting at the given + * instruction and proceeding through its uses to schedule instructions as + * late as they can reasonably go in the dominance tree. The instructions + * are "scheduled" by updating their instr->block field. + * + * The name of this function is actually a bit of a misnomer as it doesn't + * schedule them "as late as possible" as the paper implies. Instead, it + * first finds the lates possible place it can schedule the instruction and + * then possibly schedules it earlier than that. The actual location is as + * far down the tree as we can go while trying to stay out of loops. + */ +static void +gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state) +{ + if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE) + return; + + instr->pass_flags |= GCM_INSTR_SCHEDULED_LATE; + + /* Pinned instructions are already scheduled so we don't need to do + * anything. Also, bailing here keeps us from ever following phi nodes + * which can be back-edges. + */ + if (instr->pass_flags & GCM_INSTR_PINNED) + return; + + nir_foreach_ssa_def(instr, gcm_schedule_late_def, state); +} + +static void +gcm_place_instr(nir_instr *instr, struct gcm_state *state); + +static bool +gcm_place_instr_def(nir_ssa_def *def, void *state) +{ + struct set_entry *entry; + set_foreach(def->uses, entry) + gcm_place_instr((nir_instr *)entry->key, state); + + return false; +} + +/** Places an instrution back into the program + * + * The earlier passes of GCM simply choose blocks for each instruction and + * otherwise leave them alone. This pass actually places the instructions + * into their chosen blocks. + * + * To do so, we use a standard post-order depth-first search linearization + * algorithm. We walk over the uses of the given instruction and ensure + * that they are placed and then place this instruction. Because we are + * working on multiple blocks at a time, we keep track of the last inserted + * instruction per-block in the state structure's block_info array. When + * we insert an instruction in a block we insert it before the last + * instruction inserted in that block rather than the last instruction + * inserted globally. + */ +static void +gcm_place_instr(nir_instr *instr, struct gcm_state *state) +{ + if (instr->pass_flags & GCM_INSTR_PLACED) + return; + + instr->pass_flags |= GCM_INSTR_PLACED; + + /* Phi nodes are our once source of back-edges. Since right now we are + * only doing scheduling within blocks, we don't need to worry about + * them since they are always at the top. Just skip them completely. + */ + if (instr->type == nir_instr_type_phi) { + assert(instr->pass_flags & GCM_INSTR_PINNED); + return; + } + + nir_foreach_ssa_def(instr, gcm_place_instr_def, state); + + if (instr->pass_flags & GCM_INSTR_PINNED) { + /* Pinned instructions have an implicit dependence on the pinned + * instructions that come after them in the block. Since the pinned + * instructions will naturally "chain" together, we only need to + * explicitly visit one of them. + */ + for (nir_instr *after = nir_instr_next(instr); + after; + after = nir_instr_next(after)) { + if (after->pass_flags & GCM_INSTR_PINNED) { + gcm_place_instr(after, state); + break; + } + } + } + + struct gcm_block_info *block_info = &state->blocks[instr->block->index]; + if (!(instr->pass_flags & GCM_INSTR_PINNED)) { + exec_node_remove(&instr->node); + + if (block_info->last_instr) { + exec_node_insert_node_before(&block_info->last_instr->node, + &instr->node); + } else { + /* Schedule it at the end of the block */ + nir_instr *jump_instr = nir_block_last_instr(instr->block); + if (jump_instr && jump_instr->type == nir_instr_type_jump) { + exec_node_insert_node_before(&jump_instr->node, &instr->node); + } else { + exec_list_push_tail(&instr->block->instr_list, &instr->node); + } + } + } + + block_info->last_instr = instr; +} + +static void +opt_gcm_impl(nir_function_impl *impl) +{ + struct gcm_state state; + + state.impl = impl; + state.instr = NULL; + exec_list_make_empty(&state.instrs); + state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks); + + nir_metadata_require(impl, nir_metadata_block_index | + nir_metadata_dominance); + + gcm_build_block_info(&impl->body, &state, 0); + nir_foreach_block(impl, gcm_pin_instructions_block, &state); + + foreach_list_typed(nir_instr, instr, node, &state.instrs) + gcm_schedule_early_instr(instr, &state); + + foreach_list_typed(nir_instr, instr, node, &state.instrs) + gcm_schedule_late_instr(instr, &state); + + while (!exec_list_is_empty(&state.instrs)) { + nir_instr *instr = exec_node_data(nir_instr, + state.instrs.tail_pred, node); + gcm_place_instr(instr, &state); + } + + ralloc_free(state.blocks); +} + +void +nir_opt_gcm(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + opt_gcm_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_opt_global_to_local.c b/mesalib/src/glsl/nir/nir_opt_global_to_local.c new file mode 100644 index 000000000..00db37ba7 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_global_to_local.c @@ -0,0 +1,103 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +static bool +global_to_local(nir_register *reg) +{ + nir_function_impl *impl = NULL; + + assert(reg->is_global); + + struct set_entry *entry; + set_foreach(reg->defs, entry) { + nir_instr *instr = (nir_instr *) entry->key; + nir_function_impl *instr_impl = + nir_cf_node_get_function(&instr->block->cf_node); + if (impl != NULL) { + if (impl != instr_impl) + return false; + } else { + impl = instr_impl; + } + } + + set_foreach(reg->uses, entry) { + nir_instr *instr = (nir_instr *) entry->key; + nir_function_impl *instr_impl = + nir_cf_node_get_function(&instr->block->cf_node); + if (impl != NULL) { + if (impl != instr_impl) + return false; + } else { + impl = instr_impl; + } + } + + set_foreach(reg->if_uses, entry) { + nir_if *if_stmt = (nir_if *) entry->key; + nir_function_impl *if_impl = nir_cf_node_get_function(&if_stmt->cf_node); + if (impl != NULL) { + if (impl != if_impl) + return false; + } else { + impl = if_impl; + } + } + + if (impl == NULL) { + /* this instruction is never used/defined, delete it */ + nir_reg_remove(reg); + return true; + } + + /* + * if we've gotten to this point, the register is always used/defined in + * the same implementation so we can move it to be local to that + * implementation. + */ + + exec_node_remove(®->node); + exec_list_push_tail(&impl->registers, ®->node); + reg->index = impl->reg_alloc++; + reg->is_global = false; + return true; +} + +bool +nir_opt_global_to_local(nir_shader *shader) +{ + bool progress = false; + + foreach_list_typed_safe(nir_register, reg, node, &shader->registers) { + if (global_to_local(reg)) + progress = true; + } + + return progress; +} diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c new file mode 100644 index 000000000..ab08f286f --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c @@ -0,0 +1,208 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements a small peephole optimization that looks for + * + * if (cond) { + * <empty> + * } else { + * <empty> + * } + * phi + * ... + * phi + * + * and replaces it with a series of selects. It can also handle the case + * where, instead of being empty, the if may contain some move operations + * whose only use is one of the following phi nodes. This happens all the + * time when the SSA form comes from a conditional assignment with a + * swizzle. + */ + +struct peephole_select_state { + void *mem_ctx; + bool progress; +}; + +static bool +are_all_move_to_phi(nir_block *block) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_alu) + return false; + + /* It must be a move operation */ + nir_alu_instr *mov = nir_instr_as_alu(instr); + if (mov->op != nir_op_fmov && mov->op != nir_op_imov) + return false; + + /* Can't handle saturate */ + if (mov->dest.saturate) + return false; + + /* It must be SSA */ + if (!mov->dest.dest.is_ssa) + return false; + + /* It cannot have any if-uses */ + if (mov->dest.dest.ssa.if_uses->entries != 0) + return false; + + /* The only uses of this definition must be phi's in the successor */ + struct set_entry *entry; + set_foreach(mov->dest.dest.ssa.uses, entry) { + const nir_instr *dest_instr = entry->key; + if (dest_instr->type != nir_instr_type_phi || + dest_instr->block != block->successors[0]) + return false; + } + } + + return true; +} + +static bool +nir_opt_peephole_select_block(nir_block *block, void *void_state) +{ + struct peephole_select_state *state = void_state; + + /* If the block is empty, then it certainly doesn't have any phi nodes, + * so we can skip it. This also ensures that we do an early skip on the + * end block of the function which isn't actually attached to the CFG. + */ + if (exec_list_is_empty(&block->instr_list)) + return true; + + if (nir_cf_node_is_first(&block->cf_node)) + return true; + + nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node); + if (prev_node->type != nir_cf_node_if) + return true; + + nir_if *if_stmt = nir_cf_node_as_if(prev_node); + nir_cf_node *then_node = nir_if_first_then_node(if_stmt); + nir_cf_node *else_node = nir_if_first_else_node(if_stmt); + + /* We can only have one block in each side ... */ + if (nir_if_last_then_node(if_stmt) != then_node || + nir_if_last_else_node(if_stmt) != else_node) + return true; + + nir_block *then_block = nir_cf_node_as_block(then_node); + nir_block *else_block = nir_cf_node_as_block(else_node); + + /* ... and those blocks must only contain move-to-phi. */ + if (!are_all_move_to_phi(then_block) || !are_all_move_to_phi(else_block)) + return true; + + /* At this point, we know that the previous CFG node is an if-then + * statement containing only moves to phi nodes in this block. We can + * just remove that entire CF node and replace all of the phi nodes with + * selects. + */ + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel); + nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx); + /* Splat the condition to all channels */ + memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle); + + assert(exec_list_length(&phi->srcs) == 2); + nir_foreach_phi_src(phi, src) { + assert(src->pred == then_block || src->pred == else_block); + assert(src->src.is_ssa); + + unsigned idx = src->pred == then_block ? 1 : 2; + + if (src->src.ssa->parent_instr->block == src->pred) { + /* We already know that this instruction must be a move with + * this phi's in this block as its only users. + */ + nir_alu_instr *mov = nir_instr_as_alu(src->src.ssa->parent_instr); + assert(mov->instr.type == nir_instr_type_alu); + assert(mov->op == nir_op_fmov || mov->op == nir_op_imov); + + nir_alu_src_copy(&sel->src[idx], &mov->src[0], state->mem_ctx); + } else { + nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx); + } + } + + nir_ssa_dest_init(&sel->instr, &sel->dest.dest, + phi->dest.ssa.num_components, phi->dest.ssa.name); + sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; + + nir_ssa_def_rewrite_uses(&phi->dest.ssa, + nir_src_for_ssa(&sel->dest.dest.ssa), + state->mem_ctx); + + nir_instr_insert_before(&phi->instr, &sel->instr); + nir_instr_remove(&phi->instr); + } + + nir_cf_node_remove(&if_stmt->cf_node); + state->progress = true; + + return true; +} + +static bool +nir_opt_peephole_select_impl(nir_function_impl *impl) +{ + struct peephole_select_state state; + + state.mem_ctx = ralloc_parent(impl); + state.progress = false; + + nir_foreach_block(impl, nir_opt_peephole_select_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_none); + + return state.progress; +} + +bool +nir_opt_peephole_select(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress |= nir_opt_peephole_select_impl(overload->impl); + } + + return progress; +} diff --git a/mesalib/src/glsl/nir/nir_opt_remove_phis.c b/mesalib/src/glsl/nir/nir_opt_remove_phis.c new file mode 100644 index 000000000..7896584b4 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_opt_remove_phis.c @@ -0,0 +1,109 @@ +/* + * Copyright © 2015 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +/* + * This is a pass for removing phi nodes that look like: + * a = phi(b, b, b, ...) + * + * Note that we can't ignore undef sources here, or else we may create a + * situation where the definition of b isn't dominated by its uses. We're + * allowed to do this since the definition of b must dominate all of the + * phi node's predecessors, which means it must dominate the phi node as well + * as all of the phi node's uses. In essence, the phi node acts as a copy + * instruction. b can't be another phi node in the same block, since the only + * time when phi nodes can source other phi nodes defined in the same block is + * at the loop header, and in that case one of the sources of the phi has to + * be from before the loop and that source can't be b. + */ + +static bool +remove_phis_block(nir_block *block, void *state) +{ + bool *progress = state; + + void *mem_ctx = ralloc_parent(block); + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + nir_ssa_def *def = NULL; + bool srcs_same = true; + + nir_foreach_phi_src(phi, src) { + assert(src->src.is_ssa); + + if (def == NULL) { + def = src->src.ssa; + } else { + if (src->src.ssa != def) { + srcs_same = false; + break; + } + } + } + + if (!srcs_same) + continue; + + assert(phi->dest.is_ssa); + nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def), + mem_ctx); + nir_instr_remove(instr); + + *progress = true; + } + + return true; +} + +static bool +remove_phis_impl(nir_function_impl *impl) +{ + bool progress = false; + + nir_foreach_block(impl, remove_phis_block, &progress); + + return progress; +} + +bool +nir_opt_remove_phis(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) + if (overload->impl) + progress = remove_phis_impl(overload->impl) || progress; + + return progress; +} + diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c new file mode 100644 index 000000000..6a3c6a027 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_print.c @@ -0,0 +1,888 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include <stdio.h> +#include <stdlib.h> + +static void +print_tabs(unsigned num_tabs, FILE *fp) +{ + for (unsigned i = 0; i < num_tabs; i++) + fprintf(fp, "\t"); +} + +typedef struct { + /** map from nir_variable -> printable name */ + struct hash_table *ht; + + /** set of names used so far for nir_variables */ + struct set *syms; + + /* an index used to make new non-conflicting names */ + unsigned index; +} print_var_state; + +static void +print_register(nir_register *reg, FILE *fp) +{ + if (reg->name != NULL) + fprintf(fp, "/* %s */ ", reg->name); + if (reg->is_global) + fprintf(fp, "gr%u", reg->index); + else + fprintf(fp, "r%u", reg->index); +} + +static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" }; + +static void +print_register_decl(nir_register *reg, FILE *fp) +{ + fprintf(fp, "decl_reg %s ", sizes[reg->num_components]); + if (reg->is_packed) + fprintf(fp, "(packed) "); + print_register(reg, fp); + if (reg->num_array_elems != 0) + fprintf(fp, "[%u]", reg->num_array_elems); + fprintf(fp, "\n"); +} + +static void +print_ssa_def(nir_ssa_def *def, FILE *fp) +{ + if (def->name != NULL) + fprintf(fp, "/* %s */ ", def->name); + fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index); +} + +static void +print_ssa_use(nir_ssa_def *def, FILE *fp) +{ + if (def->name != NULL) + fprintf(fp, "/* %s */ ", def->name); + fprintf(fp, "ssa_%u", def->index); +} + +static void print_src(nir_src *src, FILE *fp); + +static void +print_reg_src(nir_reg_src *src, FILE *fp) +{ + print_register(src->reg, fp); + if (src->reg->num_array_elems != 0) { + fprintf(fp, "[%u", src->base_offset); + if (src->indirect != NULL) { + fprintf(fp, " + "); + print_src(src->indirect, fp); + } + fprintf(fp, "]"); + } +} + +static void +print_reg_dest(nir_reg_dest *dest, FILE *fp) +{ + print_register(dest->reg, fp); + if (dest->reg->num_array_elems != 0) { + fprintf(fp, "[%u", dest->base_offset); + if (dest->indirect != NULL) { + fprintf(fp, " + "); + print_src(dest->indirect, fp); + } + fprintf(fp, "]"); + } +} + +static void +print_src(nir_src *src, FILE *fp) +{ + if (src->is_ssa) + print_ssa_use(src->ssa, fp); + else + print_reg_src(&src->reg, fp); +} + +static void +print_dest(nir_dest *dest, FILE *fp) +{ + if (dest->is_ssa) + print_ssa_def(&dest->ssa, fp); + else + print_reg_dest(&dest->reg, fp); +} + +static void +print_alu_src(nir_alu_src *src, FILE *fp) +{ + if (src->negate) + fprintf(fp, "-"); + if (src->abs) + fprintf(fp, "abs("); + + print_src(&src->src, fp); + + if (src->swizzle[0] != 0 || + src->swizzle[1] != 1 || + src->swizzle[2] != 2 || + src->swizzle[3] != 3) { + fprintf(fp, "."); + for (unsigned i = 0; i < 4; i++) + fprintf(fp, "%c", "xyzw"[src->swizzle[i]]); + } + + if (src->abs) + fprintf(fp, ")"); +} + +static void +print_alu_dest(nir_alu_dest *dest, FILE *fp) +{ + /* we're going to print the saturate modifier later, after the opcode */ + + print_dest(&dest->dest, fp); + + if (!dest->dest.is_ssa && + dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) { + fprintf(fp, "."); + for (unsigned i = 0; i < 4; i++) + if ((dest->write_mask >> i) & 1) + fprintf(fp, "%c", "xyzw"[i]); + } +} + +static void +print_alu_instr(nir_alu_instr *instr, FILE *fp) +{ + print_alu_dest(&instr->dest, fp); + + fprintf(fp, " = %s", nir_op_infos[instr->op].name); + if (instr->dest.saturate) + fprintf(fp, ".sat"); + fprintf(fp, " "); + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_alu_src(&instr->src[i], fp); + } +} + +static void +print_var_decl(nir_variable *var, print_var_state *state, FILE *fp) +{ + fprintf(fp, "decl_var "); + + const char *const cent = (var->data.centroid) ? "centroid " : ""; + const char *const samp = (var->data.sample) ? "sample " : ""; + const char *const inv = (var->data.invariant) ? "invariant " : ""; + const char *const mode[] = { "shader_in ", "shader_out ", "", "", + "uniform ", "system " }; + const char *const interp[] = { "", "smooth", "flat", "noperspective" }; + + fprintf(fp, "%s%s%s%s%s ", + cent, samp, inv, mode[var->data.mode], interp[var->data.interpolation]); + + glsl_print_type(var->type, fp); + + struct set_entry *entry = NULL; + if (state) + entry = _mesa_set_search(state->syms, var->name); + + char *name; + + if (entry != NULL) { + /* we have a collision with another name, append an @ + a unique index */ + name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++); + } else { + name = var->name; + } + + fprintf(fp, " %s", name); + + if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out || + var->data.mode == nir_var_uniform) { + fprintf(fp, " (%u)", var->data.driver_location); + } + + fprintf(fp, "\n"); + + if (state) { + _mesa_set_add(state->syms, name); + _mesa_hash_table_insert(state->ht, var, name); + } +} + +static void +print_var(nir_variable *var, print_var_state *state, FILE *fp) +{ + const char *name; + if (state) { + struct hash_entry *entry = _mesa_hash_table_search(state->ht, var); + + assert(entry != NULL); + name = entry->data; + } else { + name = var->name; + } + + fprintf(fp, "%s", name); +} + +static void +print_deref_var(nir_deref_var *deref, print_var_state *state, FILE *fp) +{ + print_var(deref->var, state, fp); +} + +static void +print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp) +{ + fprintf(fp, "["); + switch (deref->deref_array_type) { + case nir_deref_array_type_direct: + fprintf(fp, "%u", deref->base_offset); + break; + case nir_deref_array_type_indirect: + if (deref->base_offset != 0) + fprintf(fp, "%u + ", deref->base_offset); + print_src(&deref->indirect, fp); + break; + case nir_deref_array_type_wildcard: + fprintf(fp, "*"); + break; + } + fprintf(fp, "]"); +} + +static void +print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type, + print_var_state *state, FILE *fp) +{ + fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index)); +} + +static void +print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp) +{ + nir_deref *tail = &deref->deref; + nir_deref *pretail = NULL; + while (tail != NULL) { + switch (tail->deref_type) { + case nir_deref_type_var: + assert(pretail == NULL); + assert(tail == &deref->deref); + print_deref_var(deref, state, fp); + break; + + case nir_deref_type_array: + assert(pretail != NULL); + print_deref_array(nir_deref_as_array(tail), state, fp); + break; + + case nir_deref_type_struct: + assert(pretail != NULL); + print_deref_struct(nir_deref_as_struct(tail), + pretail->type, state, fp); + break; + + default: + unreachable("Invalid deref type"); + } + + pretail = tail; + tail = pretail->child; + } +} + +static void +print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state, + FILE *fp) +{ + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + + if (nir_intrinsic_infos[instr->intrinsic].has_dest) { + print_dest(&instr->dest, fp); + fprintf(fp, " = "); + } + + fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name); + + for (unsigned i = 0; i < num_srcs; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_src(&instr->src[i], fp); + } + + fprintf(fp, ") ("); + + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + + for (unsigned i = 0; i < num_vars; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_deref(instr->variables[i], state, fp); + } + + fprintf(fp, ") ("); + + unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices; + + for (unsigned i = 0; i < num_indices; i++) { + if (i != 0) + fprintf(fp, ", "); + + fprintf(fp, "%u", instr->const_index[i]); + } + + fprintf(fp, ")"); +} + +static void +print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp) +{ + print_dest(&instr->dest, fp); + + fprintf(fp, " = "); + + switch (instr->op) { + case nir_texop_tex: + fprintf(fp, "tex "); + break; + case nir_texop_txb: + fprintf(fp, "txb "); + break; + case nir_texop_txl: + fprintf(fp, "txl "); + break; + case nir_texop_txd: + fprintf(fp, "txd "); + break; + case nir_texop_txf: + fprintf(fp, "txf "); + break; + case nir_texop_txf_ms: + fprintf(fp, "txf_ms "); + break; + case nir_texop_txs: + fprintf(fp, "txs "); + break; + case nir_texop_lod: + fprintf(fp, "lod "); + break; + case nir_texop_tg4: + fprintf(fp, "tg4 "); + break; + case nir_texop_query_levels: + fprintf(fp, "query_levels "); + break; + + default: + unreachable("Invalid texture operation"); + break; + } + + for (unsigned i = 0; i < instr->num_srcs; i++) { + print_src(&instr->src[i].src, fp); + + fprintf(fp, " "); + + switch(instr->src[i].src_type) { + case nir_tex_src_coord: + fprintf(fp, "(coord)"); + break; + case nir_tex_src_projector: + fprintf(fp, "(projector)"); + break; + case nir_tex_src_comparitor: + fprintf(fp, "(comparitor)"); + break; + case nir_tex_src_offset: + fprintf(fp, "(offset)"); + break; + case nir_tex_src_bias: + fprintf(fp, "(bias)"); + break; + case nir_tex_src_lod: + fprintf(fp, "(lod)"); + break; + case nir_tex_src_ms_index: + fprintf(fp, "(ms_index)"); + break; + case nir_tex_src_ddx: + fprintf(fp, "(ddx)"); + break; + case nir_tex_src_ddy: + fprintf(fp, "(ddy)"); + break; + case nir_tex_src_sampler_offset: + fprintf(fp, "(sampler_offset)"); + break; + + default: + unreachable("Invalid texture source type"); + break; + } + + fprintf(fp, ", "); + } + + bool has_nonzero_offset = false; + for (unsigned i = 0; i < 4; i++) { + if (instr->const_offset[i] != 0) { + has_nonzero_offset = true; + break; + } + } + + if (has_nonzero_offset) { + fprintf(fp, "[%i %i %i %i] (offset), ", + instr->const_offset[0], instr->const_offset[1], + instr->const_offset[2], instr->const_offset[3]); + } + + if (instr->op == nir_texop_tg4) { + fprintf(fp, "%u (gather_component), ", instr->component); + } + + if (instr->sampler) { + print_deref(instr->sampler, state, fp); + } else { + fprintf(fp, "%u", instr->sampler_index); + } + + fprintf(fp, " (sampler)"); +} + +static void +print_call_instr(nir_call_instr *instr, print_var_state *state, FILE *fp) +{ + fprintf(fp, "call %s ", instr->callee->function->name); + + for (unsigned i = 0; i < instr->num_params; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_deref(instr->params[i], state, fp); + } + + if (instr->return_deref != NULL) { + if (instr->num_params != 0) + fprintf(fp, ", "); + fprintf(fp, "returning "); + print_deref(instr->return_deref, state, fp); + } +} + +static void +print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp) +{ + print_ssa_def(&instr->def, fp); + + fprintf(fp, " = load_const ("); + + for (unsigned i = 0; i < instr->def.num_components; i++) { + if (i != 0) + fprintf(fp, ", "); + + /* + * we don't really know the type of the constant (if it will be used as a + * float or an int), so just print the raw constant in hex for fidelity + * and then print the float in a comment for readability. + */ + + fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]); + } +} + +static void +print_jump_instr(nir_jump_instr *instr, FILE *fp) +{ + switch (instr->type) { + case nir_jump_break: + fprintf(fp, "break"); + break; + + case nir_jump_continue: + fprintf(fp, "continue"); + break; + + case nir_jump_return: + fprintf(fp, "return"); + break; + } +} + +static void +print_ssa_undef_instr(nir_ssa_undef_instr* instr, FILE *fp) +{ + print_ssa_def(&instr->def, fp); + fprintf(fp, " = undefined"); +} + +static void +print_phi_instr(nir_phi_instr *instr, FILE *fp) +{ + print_dest(&instr->dest, fp); + fprintf(fp, " = phi "); + nir_foreach_phi_src(instr, src) { + if (&src->node != exec_list_get_head(&instr->srcs)) + fprintf(fp, ", "); + + fprintf(fp, "block_%u: ", src->pred->index); + print_src(&src->src, fp); + } +} + +static void +print_parallel_copy_instr(nir_parallel_copy_instr *instr, FILE *fp) +{ + nir_foreach_parallel_copy_entry(instr, entry) { + if (&entry->node != exec_list_get_head(&instr->entries)) + fprintf(fp, "; "); + + print_dest(&entry->dest, fp); + fprintf(fp, " = "); + print_src(&entry->src, fp); + } +} + +static void +print_instr(const nir_instr *instr, print_var_state *state, unsigned tabs, FILE *fp) +{ + print_tabs(tabs, fp); + + switch (instr->type) { + case nir_instr_type_alu: + print_alu_instr(nir_instr_as_alu(instr), fp); + break; + + case nir_instr_type_call: + print_call_instr(nir_instr_as_call(instr), state, fp); + break; + + case nir_instr_type_intrinsic: + print_intrinsic_instr(nir_instr_as_intrinsic(instr), state, fp); + break; + + case nir_instr_type_tex: + print_tex_instr(nir_instr_as_tex(instr), state, fp); + break; + + case nir_instr_type_load_const: + print_load_const_instr(nir_instr_as_load_const(instr), tabs, fp); + break; + + case nir_instr_type_jump: + print_jump_instr(nir_instr_as_jump(instr), fp); + break; + + case nir_instr_type_ssa_undef: + print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), fp); + break; + + case nir_instr_type_phi: + print_phi_instr(nir_instr_as_phi(instr), fp); + break; + + case nir_instr_type_parallel_copy: + print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), fp); + break; + + default: + unreachable("Invalid instruction type"); + break; + } +} + +static int +compare_block_index(const void *p1, const void *p2) +{ + const nir_block *block1 = *((const nir_block **) p1); + const nir_block *block2 = *((const nir_block **) p2); + + return (int) block1->index - (int) block2->index; +} + +static void print_cf_node(nir_cf_node *node, print_var_state *state, + unsigned tabs, FILE *fp); + +static void +print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp) +{ + print_tabs(tabs, fp); + fprintf(fp, "block block_%u:\n", block->index); + + /* sort the predecessors by index so we consistently print the same thing */ + + nir_block **preds = + malloc(block->predecessors->entries * sizeof(nir_block *)); + + struct set_entry *entry; + unsigned i = 0; + set_foreach(block->predecessors, entry) { + preds[i++] = (nir_block *) entry->key; + } + + qsort(preds, block->predecessors->entries, sizeof(nir_block *), + compare_block_index); + + print_tabs(tabs, fp); + fprintf(fp, "/* preds: "); + for (unsigned i = 0; i < block->predecessors->entries; i++) { + fprintf(fp, "block_%u ", preds[i]->index); + } + fprintf(fp, "*/\n"); + + free(preds); + + nir_foreach_instr(block, instr) { + print_instr(instr, state, tabs, fp); + fprintf(fp, "\n"); + } + + print_tabs(tabs, fp); + fprintf(fp, "/* succs: "); + for (unsigned i = 0; i < 2; i++) + if (block->successors[i]) { + fprintf(fp, "block_%u ", block->successors[i]->index); + } + fprintf(fp, "*/\n"); +} + +static void +print_if(nir_if *if_stmt, print_var_state *state, unsigned tabs, FILE *fp) +{ + print_tabs(tabs, fp); + fprintf(fp, "if "); + print_src(&if_stmt->condition, fp); + fprintf(fp, " {\n"); + foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) { + print_cf_node(node, state, tabs + 1, fp); + } + print_tabs(tabs, fp); + fprintf(fp, "} else {\n"); + foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) { + print_cf_node(node, state, tabs + 1, fp); + } + print_tabs(tabs, fp); + fprintf(fp, "}\n"); +} + +static void +print_loop(nir_loop *loop, print_var_state *state, unsigned tabs, FILE *fp) +{ + print_tabs(tabs, fp); + fprintf(fp, "loop {\n"); + foreach_list_typed(nir_cf_node, node, node, &loop->body) { + print_cf_node(node, state, tabs + 1, fp); + } + print_tabs(tabs, fp); + fprintf(fp, "}\n"); +} + +static void +print_cf_node(nir_cf_node *node, print_var_state *state, unsigned int tabs, + FILE *fp) +{ + switch (node->type) { + case nir_cf_node_block: + print_block(nir_cf_node_as_block(node), state, tabs, fp); + break; + + case nir_cf_node_if: + print_if(nir_cf_node_as_if(node), state, tabs, fp); + break; + + case nir_cf_node_loop: + print_loop(nir_cf_node_as_loop(node), state, tabs, fp); + break; + + default: + unreachable("Invalid CFG node type"); + } +} + +static void +print_function_impl(nir_function_impl *impl, print_var_state *state, FILE *fp) +{ + fprintf(fp, "\nimpl %s ", impl->overload->function->name); + + for (unsigned i = 0; i < impl->num_params; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_var(impl->params[i], state, fp); + } + + if (impl->return_var != NULL) { + if (impl->num_params != 0) + fprintf(fp, ", "); + fprintf(fp, "returning "); + print_var(impl->return_var, state, fp); + } + + fprintf(fp, "{\n"); + + foreach_list_typed(nir_variable, var, node, &impl->locals) { + fprintf(fp, "\t"); + print_var_decl(var, state, fp); + } + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + fprintf(fp, "\t"); + print_register_decl(reg, fp); + } + + nir_index_blocks(impl); + + foreach_list_typed(nir_cf_node, node, node, &impl->body) { + print_cf_node(node, state, 1, fp); + } + + fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index); +} + +static void +print_function_overload(nir_function_overload *overload, + print_var_state *state, FILE *fp) +{ + fprintf(fp, "decl_overload %s ", overload->function->name); + + for (unsigned i = 0; i < overload->num_params; i++) { + if (i != 0) + fprintf(fp, ", "); + + switch (overload->params[i].param_type) { + case nir_parameter_in: + fprintf(fp, "in "); + break; + case nir_parameter_out: + fprintf(fp, "out "); + break; + case nir_parameter_inout: + fprintf(fp, "inout "); + break; + default: + unreachable("Invalid parameter type"); + } + + glsl_print_type(overload->params[i].type, fp); + } + + if (overload->return_type != NULL) { + if (overload->num_params != 0) + fprintf(fp, ", "); + fprintf(fp, "returning "); + glsl_print_type(overload->return_type, fp); + } + + fprintf(fp, "\n"); + + if (overload->impl != NULL) { + print_function_impl(overload->impl, state, fp); + return; + } +} + +static void +print_function(nir_function *func, print_var_state *state, FILE *fp) +{ + foreach_list_typed(nir_function_overload, overload, node, &func->overload_list) { + print_function_overload(overload, state, fp); + } +} + +static void +init_print_state(print_var_state *state) +{ + state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->syms = _mesa_set_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal); + state->index = 0; +} + +static void +destroy_print_state(print_var_state *state) +{ + _mesa_hash_table_destroy(state->ht, NULL); + _mesa_set_destroy(state->syms, NULL); +} + +void +nir_print_shader(nir_shader *shader, FILE *fp) +{ + print_var_state state; + init_print_state(&state); + + for (unsigned i = 0; i < shader->num_user_structures; i++) { + glsl_print_struct(shader->user_structures[i], fp); + } + + struct hash_entry *entry; + + hash_table_foreach(shader->uniforms, entry) { + print_var_decl((nir_variable *) entry->data, &state, fp); + } + + hash_table_foreach(shader->inputs, entry) { + print_var_decl((nir_variable *) entry->data, &state, fp); + } + + hash_table_foreach(shader->outputs, entry) { + print_var_decl((nir_variable *) entry->data, &state, fp); + } + + foreach_list_typed(nir_variable, var, node, &shader->globals) { + print_var_decl(var, &state, fp); + } + + foreach_list_typed(nir_variable, var, node, &shader->system_values) { + print_var_decl(var, &state, fp); + } + + foreach_list_typed(nir_register, reg, node, &shader->registers) { + print_register_decl(reg, fp); + } + + foreach_list_typed(nir_function, func, node, &shader->functions) { + print_function(func, &state, fp); + } + + destroy_print_state(&state); +} + +void +nir_print_instr(const nir_instr *instr, FILE *fp) +{ + print_instr(instr, NULL, 0, fp); +} diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c new file mode 100644 index 000000000..e7f8aeacb --- /dev/null +++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c @@ -0,0 +1,136 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +static void +add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live) +{ + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + nir_variable *var = instr->variables[i]->var; + _mesa_set_add(live, var); + } +} + +static void +add_var_use_call(nir_call_instr *instr, struct set *live) +{ + if (instr->return_deref != NULL) { + nir_variable *var = instr->return_deref->var; + _mesa_set_add(live, var); + } + + for (unsigned i = 0; i < instr->num_params; i++) { + nir_variable *var = instr->params[i]->var; + _mesa_set_add(live, var); + } +} + +static void +add_var_use_tex(nir_tex_instr *instr, struct set *live) +{ + if (instr->sampler != NULL) { + nir_variable *var = instr->sampler->var; + _mesa_set_add(live, var); + } +} + +static bool +add_var_use_block(nir_block *block, void *state) +{ + struct set *live = state; + + nir_foreach_instr(block, instr) { + switch(instr->type) { + case nir_instr_type_intrinsic: + add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live); + break; + + case nir_instr_type_call: + add_var_use_call(nir_instr_as_call(instr), live); + break; + + case nir_instr_type_tex: + add_var_use_tex(nir_instr_as_tex(instr), live); + break; + + default: + break; + } + } + + return true; +} + +static void +add_var_use_shader(nir_shader *shader, struct set *live) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_foreach_block(overload->impl, add_var_use_block, live); + } + } +} + +static void +remove_dead_local_vars(nir_function_impl *impl, struct set *live) +{ + foreach_list_typed_safe(nir_variable, var, node, &impl->locals) { + struct set_entry *entry = _mesa_set_search(live, var); + if (entry == NULL) + exec_node_remove(&var->node); + } +} + +static void +remove_dead_global_vars(nir_shader *shader, struct set *live) +{ + foreach_list_typed_safe(nir_variable, var, node, &shader->globals) { + struct set_entry *entry = _mesa_set_search(live, var); + if (entry == NULL) + exec_node_remove(&var->node); + } +} + +void +nir_remove_dead_variables(nir_shader *shader) +{ + struct set *live = + _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + + add_var_use_shader(shader, live); + + remove_dead_global_vars(shader, live); + + nir_foreach_overload(shader, overload) { + if (overload->impl) + remove_dead_local_vars(overload->impl, live); + } + + _mesa_set_destroy(live, NULL); +} diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c new file mode 100644 index 000000000..73a802be7 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_search.c @@ -0,0 +1,366 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_search.h" + +struct match_state { + unsigned variables_seen; + nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES]; +}; + +static bool +match_expression(const nir_search_expression *expr, nir_alu_instr *instr, + unsigned num_components, const uint8_t *swizzle, + struct match_state *state); + +static const uint8_t identity_swizzle[] = { 0, 1, 2, 3 }; + +static bool alu_instr_is_bool(nir_alu_instr *instr); + +static bool +src_is_bool(nir_src src) +{ + if (!src.is_ssa) + return false; + if (src.ssa->parent_instr->type != nir_instr_type_alu) + return false; + return alu_instr_is_bool((nir_alu_instr *)src.ssa->parent_instr); +} + +static bool +alu_instr_is_bool(nir_alu_instr *instr) +{ + switch (instr->op) { + case nir_op_iand: + case nir_op_ior: + case nir_op_ixor: + return src_is_bool(instr->src[0].src) && src_is_bool(instr->src[1].src); + case nir_op_inot: + return src_is_bool(instr->src[0].src); + default: + return nir_op_infos[instr->op].output_type == nir_type_bool; + } +} + +static bool +match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, + unsigned num_components, const uint8_t *swizzle, + struct match_state *state) +{ + uint8_t new_swizzle[4]; + + for (int i = 0; i < num_components; ++i) + new_swizzle[i] = instr->src[src].swizzle[swizzle[i]]; + + switch (value->type) { + case nir_search_value_expression: + if (!instr->src[src].src.is_ssa) + return false; + + if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu) + return false; + + return match_expression(nir_search_value_as_expression(value), + nir_instr_as_alu(instr->src[src].src.ssa->parent_instr), + num_components, new_swizzle, state); + + case nir_search_value_variable: { + nir_search_variable *var = nir_search_value_as_variable(value); + + if (state->variables_seen & (1 << var->variable)) { + if (!nir_srcs_equal(state->variables[var->variable].src, + instr->src[src].src)) + return false; + + assert(!instr->src[src].abs && !instr->src[src].negate); + + for (int i = 0; i < num_components; ++i) { + if (state->variables[var->variable].swizzle[i] != new_swizzle[i]) + return false; + } + + return true; + } else { + if (var->is_constant && + instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const) + return false; + + if (var->type != nir_type_invalid) { + if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *src_alu = + nir_instr_as_alu(instr->src[src].src.ssa->parent_instr); + + if (nir_op_infos[src_alu->op].output_type != var->type && + !(var->type == nir_type_bool && alu_instr_is_bool(src_alu))) + return false; + } + + state->variables_seen |= (1 << var->variable); + state->variables[var->variable].src = instr->src[src].src; + state->variables[var->variable].abs = false; + state->variables[var->variable].negate = false; + + for (int i = 0; i < 4; ++i) { + if (i < num_components) + state->variables[var->variable].swizzle[i] = new_swizzle[i]; + else + state->variables[var->variable].swizzle[i] = 0; + } + + return true; + } + } + + case nir_search_value_constant: { + nir_search_constant *const_val = nir_search_value_as_constant(value); + + if (!instr->src[src].src.is_ssa) + return false; + + if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const) + return false; + + nir_load_const_instr *load = + nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr); + + switch (nir_op_infos[instr->op].input_types[src]) { + case nir_type_float: + for (unsigned i = 0; i < num_components; ++i) { + if (load->value.f[new_swizzle[i]] != const_val->data.f) + return false; + } + return true; + case nir_type_int: + case nir_type_unsigned: + case nir_type_bool: + for (unsigned i = 0; i < num_components; ++i) { + if (load->value.i[new_swizzle[i]] != const_val->data.i) + return false; + } + return true; + default: + unreachable("Invalid alu source type"); + } + } + + default: + unreachable("Invalid search value type"); + } +} + +static bool +match_expression(const nir_search_expression *expr, nir_alu_instr *instr, + unsigned num_components, const uint8_t *swizzle, + struct match_state *state) +{ + if (instr->op != expr->opcode) + return false; + + assert(!instr->dest.saturate); + assert(nir_op_infos[instr->op].num_inputs > 0); + + /* If we have an explicitly sized destination, we can only handle the + * identity swizzle. While dot(vec3(a, b, c).zxy) is a valid + * expression, we don't have the information right now to propagate that + * swizzle through. We can only properly propagate swizzles if the + * instruction is vectorized. + */ + if (nir_op_infos[instr->op].output_size != 0) { + for (unsigned i = 0; i < num_components; i++) { + if (swizzle[i] != i) + return false; + } + } + + bool matched = true; + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + /* If the source is an explicitly sized source, then we need to reset + * both the number of components and the swizzle. + */ + if (nir_op_infos[instr->op].input_sizes[i] != 0) { + num_components = nir_op_infos[instr->op].input_sizes[i]; + swizzle = identity_swizzle; + } + + if (!match_value(expr->srcs[i], instr, i, num_components, + swizzle, state)) { + matched = false; + break; + } + } + + if (matched) + return true; + + if (nir_op_infos[instr->op].num_inputs == 2 && + (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) { + if (!match_value(expr->srcs[0], instr, 1, num_components, + swizzle, state)) + return false; + + return match_value(expr->srcs[1], instr, 0, num_components, + swizzle, state); + } else { + return false; + } +} + +static nir_alu_src +construct_value(const nir_search_value *value, nir_alu_type type, + unsigned num_components, struct match_state *state, + nir_instr *instr, void *mem_ctx) +{ + switch (value->type) { + case nir_search_value_expression: { + const nir_search_expression *expr = nir_search_value_as_expression(value); + + if (nir_op_infos[expr->opcode].output_size != 0) + num_components = nir_op_infos[expr->opcode].output_size; + + nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode); + nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL); + alu->dest.write_mask = (1 << num_components) - 1; + alu->dest.saturate = false; + + for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) { + /* If the source is an explicitly sized source, then we need to reset + * the number of components to match. + */ + if (nir_op_infos[alu->op].input_sizes[i] != 0) + num_components = nir_op_infos[alu->op].input_sizes[i]; + + alu->src[i] = construct_value(expr->srcs[i], + nir_op_infos[alu->op].input_types[i], + num_components, + state, instr, mem_ctx); + } + + nir_instr_insert_before(instr, &alu->instr); + + nir_alu_src val; + val.src = nir_src_for_ssa(&alu->dest.dest.ssa); + val.negate = false; + val.abs = false, + memcpy(val.swizzle, identity_swizzle, sizeof val.swizzle); + + return val; + } + + case nir_search_value_variable: { + const nir_search_variable *var = nir_search_value_as_variable(value); + assert(state->variables_seen & (1 << var->variable)); + + nir_alu_src val; + nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx); + + assert(!var->is_constant); + + return val; + } + + case nir_search_value_constant: { + const nir_search_constant *c = nir_search_value_as_constant(value); + nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1); + + switch (type) { + case nir_type_float: + load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f); + load->value.f[0] = c->data.f; + break; + case nir_type_int: + load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i); + load->value.i[0] = c->data.i; + break; + case nir_type_unsigned: + case nir_type_bool: + load->value.u[0] = c->data.u; + break; + default: + unreachable("Invalid alu source type"); + } + + nir_instr_insert_before(instr, &load->instr); + + nir_alu_src val; + val.src = nir_src_for_ssa(&load->def); + val.negate = false; + val.abs = false, + memset(val.swizzle, 0, sizeof val.swizzle); + + return val; + } + + default: + unreachable("Invalid search value type"); + } +} + +nir_alu_instr * +nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search, + const nir_search_value *replace, void *mem_ctx) +{ + uint8_t swizzle[4] = { 0, 0, 0, 0 }; + + for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; ++i) + swizzle[i] = i; + + assert(instr->dest.dest.is_ssa); + + struct match_state state; + state.variables_seen = 0; + + if (!match_expression(search, instr, instr->dest.dest.ssa.num_components, + swizzle, &state)) + return NULL; + + /* Inserting a mov may be unnecessary. However, it's much easier to + * simply let copy propagation clean this up than to try to go through + * and rewrite swizzles ourselves. + */ + nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov); + mov->dest.write_mask = instr->dest.write_mask; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + instr->dest.dest.ssa.num_components, NULL); + + mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type, + instr->dest.dest.ssa.num_components, &state, + &instr->instr, mem_ctx); + nir_instr_insert_before(&instr->instr, &mov->instr); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa), mem_ctx); + + /* We know this one has no more uses because we just rewrote them all, + * so we can remove it. The rest of the matched expression, however, we + * don't know so much about. We'll just let dead code clean them up. + */ + nir_instr_remove(&instr->instr); + + return mov; +} diff --git a/mesalib/src/glsl/nir/nir_search.h b/mesalib/src/glsl/nir/nir_search.h new file mode 100644 index 000000000..7d4779294 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_search.h @@ -0,0 +1,99 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#ifndef _NIR_SEARCH_ +#define _NIR_SEARCH_ + +#include "nir.h" + +#define NIR_SEARCH_MAX_VARIABLES 16 + +typedef enum { + nir_search_value_expression, + nir_search_value_variable, + nir_search_value_constant, +} nir_search_value_type; + +typedef struct { + nir_search_value_type type; +} nir_search_value; + +typedef struct { + nir_search_value value; + + /** The variable index; Must be less than NIR_SEARCH_MAX_VARIABLES */ + unsigned variable; + + /** Indicates that the given variable must be a constant + * + * This is only alloed in search expressions and indicates that the + * given variable is only allowed to match constant values. + */ + bool is_constant; + + /** Indicates that the given variable must have a certain type + * + * This is only allowed in search expressions and indicates that the + * given variable is only allowed to match values that come from an ALU + * instruction with the given output type. A type of nir_type_void + * means it can match any type. + * + * Note: A variable that is both constant and has a non-void type will + * never match anything. + */ + nir_alu_type type; +} nir_search_variable; + +typedef struct { + nir_search_value value; + + union { + uint32_t u; + int32_t i; + float f; + } data; +} nir_search_constant; + +typedef struct { + nir_search_value value; + + nir_op opcode; + const nir_search_value *srcs[4]; +} nir_search_expression; + +NIR_DEFINE_CAST(nir_search_value_as_variable, nir_search_value, + nir_search_variable, value) +NIR_DEFINE_CAST(nir_search_value_as_constant, nir_search_value, + nir_search_constant, value) +NIR_DEFINE_CAST(nir_search_value_as_expression, nir_search_value, + nir_search_expression, value) + +nir_alu_instr * +nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search, + const nir_search_value *replace, void *mem_ctx); + +#endif /* _NIR_SEARCH_ */ diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c new file mode 100644 index 000000000..4d663b51b --- /dev/null +++ b/mesalib/src/glsl/nir/nir_split_var_copies.c @@ -0,0 +1,279 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements "copy splitting" which is similar to structure splitting only + * it works on copy operations rather than the datatypes themselves. The + * GLSL language allows you to copy one variable to another an entire + * structure (which may contain arrays or other structures) at a time. + * Normally, in a language such as C this would be handled by a "structure + * splitting" pass that breaks up the structures. Unfortunately for us, + * structures used in inputs or outputs can't be split. Therefore, + * regardlesss of what we do, we have to be able to copy to/from + * structures. + * + * The primary purpose of structure splitting is to allow you to better + * optimize variable access and lower things to registers where you can. + * The primary issue here is that, if you lower the copy to a bunch of + * loads and stores, you loose a lot of information about the copy + * operation that you would like to keep around. To solve this problem, we + * have a "copy splitting" pass that, instead of splitting the structures + * or lowering the copy into loads and storres, splits the copy operation + * into a bunch of copy operations one for each leaf of the structure tree. + * If an intermediate array is encountered, it is referenced with a + * wildcard reference to indicate that the entire array is to be copied. + * + * As things become direct, array copies may be able to be losslessly + * lowered to having fewer and fewer wildcards. However, until that + * happens we want to keep the information about the arrays intact. + * + * Prior to the copy splitting pass, there are no wildcard references but + * there may be incomplete references where the tail of the deref chain is + * an array or a structure and not a specific element. After the copy + * splitting pass has completed, every variable deref will be a full-length + * dereference pointing to a single leaf in the structure type tree with + * possibly a few wildcard array dereferences. + */ + +struct split_var_copies_state { + void *mem_ctx; + void *dead_ctx; +}; + +static nir_deref * +get_deref_tail(nir_deref *deref) +{ + while (deref->child != NULL) + deref = deref->child; + return deref; +} + +/* Recursively constructs deref chains to split a copy instruction into + * multiple (if needed) copy instructions with full-length deref chains. + * External callers of this function should pass the tail and head of the + * deref chains found as the source and destination of the copy instruction + * into this function. + * + * \param old_copy The copy instruction we are splitting + * \param dest_head The head of the destination deref chain we are building + * \param src_head The head of the source deref chain we are building + * \param dest_tail The tail of the destination deref chain we are building + * \param src_tail The tail of the source deref chain we are building + * \param state The current split_var_copies_state object + */ +static void +split_var_copy_instr(nir_intrinsic_instr *old_copy, + nir_deref *dest_head, nir_deref *src_head, + nir_deref *dest_tail, nir_deref *src_tail, + struct split_var_copies_state *state) +{ + assert(src_tail->type == dest_tail->type); + + /* Make sure these really are the tails of the deref chains */ + assert(dest_tail->child == NULL); + assert(src_tail->child == NULL); + + switch (glsl_get_base_type(src_tail->type)) { + case GLSL_TYPE_ARRAY: { + /* Make a wildcard dereference */ + nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); + deref->deref.type = glsl_get_array_element(src_tail->type); + deref->deref_array_type = nir_deref_array_type_wildcard; + + /* Set the tail of both as the newly created wildcard deref. It is + * safe to use the same wildcard in both places because a) we will be + * copying it before we put it in an actual instruction and b) + * everything that will potentially add another link in the deref + * chain will also add the same thing to both chains. + */ + src_tail->child = &deref->deref; + dest_tail->child = &deref->deref; + + split_var_copy_instr(old_copy, dest_head, src_head, + dest_tail->child, src_tail->child, state); + + /* Set it back to the way we found it */ + src_tail->child = NULL; + dest_tail->child = NULL; + break; + } + + case GLSL_TYPE_STRUCT: + /* This is the only part that actually does any interesting + * splitting. For array types, we just use wildcards and resolve + * them later. For structure types, we need to emit one copy + * instruction for every structure element. Because we may have + * structs inside structs, we just recurse and let the next level + * take care of any additional structures. + */ + for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) { + nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i); + deref->deref.type = glsl_get_struct_field(src_tail->type, i); + + /* Set the tail of both as the newly created structure deref. It + * is safe to use the same wildcard in both places because a) we + * will be copying it before we put it in an actual instruction + * and b) everything that will potentially add another link in the + * deref chain will also add the same thing to both chains. + */ + src_tail->child = &deref->deref; + dest_tail->child = &deref->deref; + + split_var_copy_instr(old_copy, dest_head, src_head, + dest_tail->child, src_tail->child, state); + } + /* Set it back to the way we found it */ + src_tail->child = NULL; + dest_tail->child = NULL; + break; + + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(src_tail->type)) { + nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); + deref->deref.type = glsl_get_column_type(src_tail->type); + deref->deref_array_type = nir_deref_array_type_wildcard; + + /* Set the tail of both as the newly created wildcard deref. It + * is safe to use the same wildcard in both places because a) we + * will be copying it before we put it in an actual instruction + * and b) everything that will potentially add another link in the + * deref chain will also add the same thing to both chains. + */ + src_tail->child = &deref->deref; + dest_tail->child = &deref->deref; + + split_var_copy_instr(old_copy, dest_head, src_head, + dest_tail->child, src_tail->child, state); + + /* Set it back to the way we found it */ + src_tail->child = NULL; + dest_tail->child = NULL; + } else { + /* At this point, we have fully built our deref chains and can + * actually add the new copy instruction. + */ + nir_intrinsic_instr *new_copy = + nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var); + + /* We need to make copies because a) this deref chain actually + * belongs to the copy instruction and b) the deref chains may + * have some of the same links due to the way we constructed them + */ + nir_deref *src = nir_copy_deref(state->mem_ctx, src_head); + nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head); + + new_copy->variables[0] = nir_deref_as_var(dest); + new_copy->variables[1] = nir_deref_as_var(src); + + /* Emit the copy instruction after the old instruction. We'll + * remove the old one later. + */ + nir_instr_insert_after(&old_copy->instr, &new_copy->instr); + } + break; + + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_INTERFACE: + default: + unreachable("Cannot copy these types"); + } +} + +static bool +split_var_copies_block(nir_block *block, void *void_state) +{ + struct split_var_copies_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); + if (intrinsic->intrinsic != nir_intrinsic_copy_var) + continue; + + nir_deref *dest_head = &intrinsic->variables[0]->deref; + nir_deref *src_head = &intrinsic->variables[1]->deref; + nir_deref *dest_tail = get_deref_tail(dest_head); + nir_deref *src_tail = get_deref_tail(src_head); + + switch (glsl_get_base_type(src_tail->type)) { + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: + split_var_copy_instr(intrinsic, dest_head, src_head, + dest_tail, src_tail, state); + nir_instr_remove(&intrinsic->instr); + ralloc_steal(state->dead_ctx, instr); + break; + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(src_tail->type)) { + split_var_copy_instr(intrinsic, dest_head, src_head, + dest_tail, src_tail, state); + nir_instr_remove(&intrinsic->instr); + ralloc_steal(state->dead_ctx, instr); + } + break; + default: + unreachable("Invalid type"); + break; + } + } + + return true; +} + +static void +split_var_copies_impl(nir_function_impl *impl) +{ + struct split_var_copies_state state; + + state.mem_ctx = ralloc_parent(impl); + state.dead_ctx = ralloc_context(NULL); + + nir_foreach_block(impl, split_var_copies_block, &state); + + ralloc_free(state.dead_ctx); +} + +void +nir_split_var_copies(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + split_var_copies_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c new file mode 100644 index 000000000..47cf45393 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_to_ssa.c @@ -0,0 +1,535 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include <stdlib.h> +#include <unistd.h> + +/* + * Implements the classic to-SSA algorithm described by Cytron et. al. in + * "Efficiently Computing Static Single Assignment Form and the Control + * Dependence Graph." + */ + +/* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */ + +static void +insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx) +{ + nir_phi_instr *instr = nir_phi_instr_create(mem_ctx); + + instr->dest.reg.reg = reg; + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + nir_phi_src *src = ralloc(mem_ctx, nir_phi_src); + src->pred = pred; + src->src.is_ssa = false; + src->src.reg.base_offset = 0; + src->src.reg.indirect = NULL; + src->src.reg.reg = reg; + exec_list_push_tail(&instr->srcs, &src->node); + } + + nir_instr_insert_before_block(block, &instr->instr); +} + +static void +insert_phi_nodes(nir_function_impl *impl) +{ + void *mem_ctx = ralloc_parent(impl); + + unsigned *work = calloc(impl->num_blocks, sizeof(unsigned)); + unsigned *has_already = calloc(impl->num_blocks, sizeof(unsigned)); + + /* + * Since the work flags already prevent us from inserting a node that has + * ever been inserted into W, we don't need to use a set to represent W. + * Also, since no block can ever be inserted into W more than once, we know + * that the maximum size of W is the number of basic blocks in the + * function. So all we need to handle W is an array and a pointer to the + * next element to be inserted and the next element to be removed. + */ + nir_block **W = malloc(impl->num_blocks * sizeof(nir_block *)); + unsigned w_start, w_end; + + unsigned iter_count = 0; + + nir_index_blocks(impl); + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + if (reg->num_array_elems != 0) + continue; + + w_start = w_end = 0; + iter_count++; + + struct set_entry *entry; + set_foreach(reg->defs, entry) { + nir_instr *def = (nir_instr *) entry->key; + if (work[def->block->index] < iter_count) + W[w_end++] = def->block; + work[def->block->index] = iter_count; + } + + while (w_start != w_end) { + nir_block *cur = W[w_start++]; + set_foreach(cur->dom_frontier, entry) { + nir_block *next = (nir_block *) entry->key; + + /* + * If there's more than one return statement, then the end block + * can be a join point for some definitions. However, there are + * no instructions in the end block, so nothing would use those + * phi nodes. Of course, we couldn't place those phi nodes + * anyways due to the restriction of having no instructions in the + * end block... + */ + if (next == impl->end_block) + continue; + + if (has_already[next->index] < iter_count) { + insert_trivial_phi(reg, next, mem_ctx); + has_already[next->index] = iter_count; + if (work[next->index] < iter_count) { + work[next->index] = iter_count; + W[w_end++] = next; + } + } + } + } + } + + free(work); + free(has_already); + free(W); +} + +typedef struct { + nir_ssa_def **stack; + int index; + unsigned num_defs; /** < used to add indices to debug names */ +#ifndef NDEBUG + unsigned stack_size; +#endif +} reg_state; + +typedef struct { + reg_state *states; + void *mem_ctx; + nir_instr *parent_instr; + nir_if *parent_if; + nir_function_impl *impl; + + /* map from SSA value -> original register */ + struct hash_table *ssa_map; +} rewrite_state; + +static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state) +{ + unsigned index = reg->index; + + if (state->states[index].index == -1) { + /* + * We're using an undefined register, create a new undefined SSA value + * to preserve the information that this source is undefined + */ + nir_ssa_undef_instr *instr = + nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components); + + /* + * We could just insert the undefined instruction before the instruction + * we're rewriting, but we could be rewriting a phi source in which case + * we can't do that, so do the next easiest thing - insert it at the + * beginning of the program. In the end, it doesn't really matter where + * the undefined instructions are because they're going to be ignored + * in the backend. + */ + nir_instr_insert_before_cf_list(&state->impl->body, &instr->instr); + return &instr->def; + } + + return state->states[index].stack[state->states[index].index]; +} + +static bool +rewrite_use(nir_src *src, void *_state) +{ + rewrite_state *state = (rewrite_state *) _state; + + if (src->is_ssa) + return true; + + unsigned index = src->reg.reg->index; + + if (state->states[index].stack == NULL) + return true; + + src->is_ssa = true; + src->ssa = get_ssa_src(src->reg.reg, state); + + if (state->parent_instr) + _mesa_set_add(src->ssa->uses, state->parent_instr); + else + _mesa_set_add(src->ssa->if_uses, state->parent_if); + return true; +} + +static bool +rewrite_def_forwards(nir_dest *dest, void *_state) +{ + rewrite_state *state = (rewrite_state *) _state; + + if (dest->is_ssa) + return true; + + nir_register *reg = dest->reg.reg; + unsigned index = reg->index; + + if (state->states[index].stack == NULL) + return true; + + char *name = NULL; + if (dest->reg.reg->name) + name = ralloc_asprintf(state->mem_ctx, "%s_%u", dest->reg.reg->name, + state->states[index].num_defs); + + nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name); + + /* push our SSA destination on the stack */ + state->states[index].index++; + assert(state->states[index].index < state->states[index].stack_size); + state->states[index].stack[state->states[index].index] = &dest->ssa; + state->states[index].num_defs++; + + _mesa_hash_table_insert(state->ssa_map, &dest->ssa, reg); + + return true; +} + +static void +rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state) +{ + state->parent_instr = &instr->instr; + + nir_foreach_src(&instr->instr, rewrite_use, state); + + if (instr->dest.dest.is_ssa) + return; + + nir_register *reg = instr->dest.dest.reg.reg; + unsigned index = reg->index; + + if (state->states[index].stack == NULL) + return; + + unsigned write_mask = instr->dest.write_mask; + if (write_mask != (1 << instr->dest.dest.reg.reg->num_components) - 1) { + /* + * Calculate the number of components the final instruction, which for + * per-component things is the number of output components of the + * instruction and non-per-component things is the number of enabled + * channels in the write mask. + */ + unsigned num_components; + if (nir_op_infos[instr->op].output_size == 0) { + unsigned temp = (write_mask & 0x5) + ((write_mask >> 1) & 0x5); + num_components = (temp & 0x3) + ((temp >> 2) & 0x3); + } else { + num_components = nir_op_infos[instr->op].output_size; + } + + char *name = NULL; + if (instr->dest.dest.reg.reg->name) + name = ralloc_asprintf(state->mem_ctx, "%s_%u", + reg->name, state->states[index].num_defs); + + instr->dest.write_mask = (1 << num_components) - 1; + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name); + + if (nir_op_infos[instr->op].output_size == 0) { + /* + * When we change the output writemask, we need to change the + * swizzles for per-component inputs too + */ + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (nir_op_infos[instr->op].input_sizes[i] != 0) + continue; + + unsigned new_swizzle[4] = {0, 0, 0, 0}; + + /* + * We keep two indices: + * 1. The index of the original (non-SSA) component + * 2. The index of the post-SSA, compacted, component + * + * We need to map the swizzle component at index 1 to the swizzle + * component at index 2. + */ + + unsigned ssa_index = 0; + for (unsigned index = 0; index < 4; index++) { + if (!((write_mask >> index) & 1)) + continue; + + new_swizzle[ssa_index] = instr->src[i].swizzle[index]; + ssa_index++; + } + + for (unsigned j = 0; j < 4; j++) + instr->src[i].swizzle[j] = new_swizzle[j]; + } + } + + nir_op op; + switch (reg->num_components) { + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("not reached"); + } + + nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, op); + + vec->dest.dest.reg.reg = reg; + vec->dest.write_mask = (1 << reg->num_components) - 1; + + nir_ssa_def *old_src = get_ssa_src(reg, state); + nir_ssa_def *new_src = &instr->dest.dest.ssa; + + unsigned ssa_index = 0; + for (unsigned i = 0; i < reg->num_components; i++) { + vec->src[i].src.is_ssa = true; + if ((write_mask >> i) & 1) { + vec->src[i].src.ssa = new_src; + if (nir_op_infos[instr->op].output_size == 0) + vec->src[i].swizzle[0] = ssa_index; + else + vec->src[i].swizzle[0] = i; + ssa_index++; + } else { + vec->src[i].src.ssa = old_src; + vec->src[i].swizzle[0] = i; + } + } + + nir_instr_insert_after(&instr->instr, &vec->instr); + + state->parent_instr = &vec->instr; + rewrite_def_forwards(&vec->dest.dest, state); + } else { + rewrite_def_forwards(&instr->dest.dest, state); + } +} + +static void +rewrite_phi_instr(nir_phi_instr *instr, rewrite_state *state) +{ + state->parent_instr = &instr->instr; + rewrite_def_forwards(&instr->dest, state); +} + +static void +rewrite_instr_forward(nir_instr *instr, rewrite_state *state) +{ + if (instr->type == nir_instr_type_alu) { + rewrite_alu_instr_forward(nir_instr_as_alu(instr), state); + return; + } + + if (instr->type == nir_instr_type_phi) { + rewrite_phi_instr(nir_instr_as_phi(instr), state); + return; + } + + state->parent_instr = instr; + + nir_foreach_src(instr, rewrite_use, state); + nir_foreach_dest(instr, rewrite_def_forwards, state); +} + +static void +rewrite_phi_sources(nir_block *block, nir_block *pred, rewrite_state *state) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi_instr = nir_instr_as_phi(instr); + + state->parent_instr = instr; + + nir_foreach_phi_src(phi_instr, src) { + if (src->pred == pred) { + rewrite_use(&src->src, state); + break; + } + } + } +} + +static bool +rewrite_def_backwards(nir_dest *dest, void *_state) +{ + rewrite_state *state = (rewrite_state *) _state; + + if (!dest->is_ssa) + return true; + + struct hash_entry *entry = + _mesa_hash_table_search(state->ssa_map, &dest->ssa); + + if (!entry) + return true; + + nir_register *reg = (nir_register *) entry->data; + unsigned index = reg->index; + + state->states[index].index--; + assert(state->states[index].index >= -1); + + return true; +} + +static void +rewrite_instr_backwards(nir_instr *instr, rewrite_state *state) +{ + nir_foreach_dest(instr, rewrite_def_backwards, state); +} + +static void +rewrite_block(nir_block *block, rewrite_state *state) +{ + /* This will skip over any instructions after the current one, which is + * what we want because those instructions (vector gather, conditional + * select) will already be in SSA form. + */ + nir_foreach_instr_safe(block, instr) { + rewrite_instr_forward(instr, state); + } + + if (block != state->impl->end_block && + !nir_cf_node_is_last(&block->cf_node) && + nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) { + nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node)); + state->parent_instr = NULL; + state->parent_if = if_stmt; + rewrite_use(&if_stmt->condition, state); + } + + if (block->successors[0]) + rewrite_phi_sources(block->successors[0], block, state); + if (block->successors[1]) + rewrite_phi_sources(block->successors[1], block, state); + + for (unsigned i = 0; i < block->num_dom_children; i++) + rewrite_block(block->dom_children[i], state); + + nir_foreach_instr_reverse(block, instr) { + rewrite_instr_backwards(instr, state); + } +} + +static void +remove_unused_regs(nir_function_impl *impl, rewrite_state *state) +{ + foreach_list_typed_safe(nir_register, reg, node, &impl->registers) { + if (state->states[reg->index].stack != NULL) + exec_node_remove(®->node); + } +} + +static void +init_rewrite_state(nir_function_impl *impl, rewrite_state *state) +{ + state->impl = impl; + state->mem_ctx = ralloc_parent(impl); + state->ssa_map = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->states = ralloc_array(NULL, reg_state, impl->reg_alloc); + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + assert(reg->index < impl->reg_alloc); + if (reg->num_array_elems > 0) { + state->states[reg->index].stack = NULL; + } else { + /* + * Calculate a conservative estimate of the stack size based on the + * number of definitions there are. Note that this function *must* be + * called after phi nodes are inserted so we can count phi node + * definitions too. + */ + unsigned stack_size = reg->defs->entries; + + state->states[reg->index].stack = ralloc_array(state->states, + nir_ssa_def *, + stack_size); +#ifndef NDEBUG + state->states[reg->index].stack_size = stack_size; +#endif + state->states[reg->index].index = -1; + state->states[reg->index].num_defs = 0; + } + } +} + +static void +destroy_rewrite_state(rewrite_state *state) +{ + _mesa_hash_table_destroy(state->ssa_map, NULL); + ralloc_free(state->states); +} + +void +nir_convert_to_ssa_impl(nir_function_impl *impl) +{ + nir_metadata_require(impl, nir_metadata_dominance); + + insert_phi_nodes(impl); + + rewrite_state state; + init_rewrite_state(impl, &state); + + rewrite_block(impl->start_block, &state); + + remove_unused_regs(impl, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + destroy_rewrite_state(&state); +} + +void +nir_convert_to_ssa(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_convert_to_ssa_impl(overload->impl); + } +} diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp new file mode 100644 index 000000000..a13c3e12a --- /dev/null +++ b/mesalib/src/glsl/nir/nir_types.cpp @@ -0,0 +1,155 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir_types.h" +#include "ir.h" + +void +glsl_print_type(const glsl_type *type, FILE *fp) +{ + if (type->base_type == GLSL_TYPE_ARRAY) { + glsl_print_type(type->fields.array, fp); + fprintf(fp, "[%u]", type->length); + } else if ((type->base_type == GLSL_TYPE_STRUCT) + && !is_gl_identifier(type->name)) { + fprintf(fp, "%s@%p", type->name, (void *) type); + } else { + fprintf(fp, "%s", type->name); + } +} + +void +glsl_print_struct(const glsl_type *type, FILE *fp) +{ + assert(type->base_type == GLSL_TYPE_STRUCT); + + fprintf(fp, "struct {\n"); + for (unsigned i = 0; i < type->length; i++) { + fprintf(fp, "\t"); + glsl_print_type(type->fields.structure[i].type, fp); + fprintf(fp, " %s;\n", type->fields.structure[i].name); + } + fprintf(fp, "}\n"); +} + +const glsl_type * +glsl_get_array_element(const glsl_type* type) +{ + if (type->is_matrix()) + return type->column_type(); + return type->fields.array; +} + +const glsl_type * +glsl_get_struct_field(const glsl_type *type, unsigned index) +{ + return type->fields.structure[index].type; +} + +const struct glsl_type * +glsl_get_column_type(const struct glsl_type *type) +{ + return type->column_type(); +} + +enum glsl_base_type +glsl_get_base_type(const struct glsl_type *type) +{ + return type->base_type; +} + +unsigned +glsl_get_vector_elements(const struct glsl_type *type) +{ + return type->vector_elements; +} + +unsigned +glsl_get_components(const struct glsl_type *type) +{ + return type->components(); +} + +unsigned +glsl_get_matrix_columns(const struct glsl_type *type) +{ + return type->matrix_columns; +} + +unsigned +glsl_get_length(const struct glsl_type *type) +{ + return type->length; +} + +const char * +glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index) +{ + return type->fields.structure[index].name; +} + +bool +glsl_type_is_void(const glsl_type *type) +{ + return type->is_void(); +} + +bool +glsl_type_is_vector(const struct glsl_type *type) +{ + return type->is_vector(); +} + +bool +glsl_type_is_scalar(const struct glsl_type *type) +{ + return type->is_scalar(); +} + +bool +glsl_type_is_matrix(const struct glsl_type *type) +{ + return type->is_matrix(); +} + +const glsl_type * +glsl_void_type(void) +{ + return glsl_type::void_type; +} + +const glsl_type * +glsl_vec4_type(void) +{ + return glsl_type::vec4_type; +} + +const glsl_type * +glsl_array_type(const glsl_type *base, unsigned elements) +{ + return glsl_type::get_array_instance(base, elements); +} diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h new file mode 100644 index 000000000..494051a67 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_types.h @@ -0,0 +1,78 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#pragma once + +/* C wrapper around glsl_types.h */ + +#include "../glsl_types.h" + +#ifdef __cplusplus +extern "C" { +#else +struct glsl_type; +#endif + +#include <stdio.h> + +void glsl_print_type(const struct glsl_type *type, FILE *fp); +void glsl_print_struct(const struct glsl_type *type, FILE *fp); + +const struct glsl_type *glsl_get_struct_field(const struct glsl_type *type, + unsigned index); + +const struct glsl_type *glsl_get_array_element(const struct glsl_type *type); + +const struct glsl_type *glsl_get_column_type(const struct glsl_type *type); + +enum glsl_base_type glsl_get_base_type(const struct glsl_type *type); + +unsigned glsl_get_vector_elements(const struct glsl_type *type); + +unsigned glsl_get_components(const struct glsl_type *type); + +unsigned glsl_get_matrix_columns(const struct glsl_type *type); + +unsigned glsl_get_length(const struct glsl_type *type); + +const char *glsl_get_struct_elem_name(const struct glsl_type *type, + unsigned index); + + +bool glsl_type_is_void(const struct glsl_type *type); +bool glsl_type_is_vector(const struct glsl_type *type); +bool glsl_type_is_scalar(const struct glsl_type *type); +bool glsl_type_is_matrix(const struct glsl_type *type); + +const struct glsl_type *glsl_void_type(void); +const struct glsl_type *glsl_vec4_type(void); +const struct glsl_type *glsl_array_type(const struct glsl_type *base, + unsigned elements); + +#ifdef __cplusplus +} +#endif diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c new file mode 100644 index 000000000..a3fe9d620 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_validate.c @@ -0,0 +1,979 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include <assert.h> + +/* + * This file checks for invalid IR indicating a bug somewhere in the compiler. + */ + +/* Since this file is just a pile of asserts, don't bother compiling it if + * we're not building a debug build. + */ +#ifdef DEBUG + +/* + * Per-register validation state. + */ + +typedef struct { + /* + * equivalent to the uses and defs in nir_register, but built up by the + * validator. At the end, we verify that the sets have the same entries. + */ + struct set *uses, *if_uses, *defs; + nir_function_impl *where_defined; /* NULL for global registers */ +} reg_validate_state; + +typedef struct { + /* + * equivalent to the uses in nir_ssa_def, but built up by the validator. + * At the end, we verify that the sets have the same entries. + */ + struct set *uses, *if_uses; + nir_function_impl *where_defined; +} ssa_def_validate_state; + +typedef struct { + /* map of register -> validation state (struct above) */ + struct hash_table *regs; + + /* the current shader being validated */ + nir_shader *shader; + + /* the current instruction being validated */ + nir_instr *instr; + + /* the current basic block being validated */ + nir_block *block; + + /* the current if statement being validated */ + nir_if *if_stmt; + + /* the parent of the current cf node being visited */ + nir_cf_node *parent_node; + + /* the current function implementation being validated */ + nir_function_impl *impl; + + /* map of SSA value -> function implementation where it is defined */ + struct hash_table *ssa_defs; + + /* bitset of ssa definitions we have found; used to check uniqueness */ + BITSET_WORD *ssa_defs_found; + + /* bitset of registers we have currently found; used to check uniqueness */ + BITSET_WORD *regs_found; + + /* map of local variable -> function implementation where it is defined */ + struct hash_table *var_defs; +} validate_state; + +static void validate_src(nir_src *src, validate_state *state); + +static void +validate_reg_src(nir_reg_src *src, validate_state *state) +{ + assert(src->reg != NULL); + + struct hash_entry *entry; + entry = _mesa_hash_table_search(state->regs, src->reg); + assert(entry); + + reg_validate_state *reg_state = (reg_validate_state *) entry->data; + + if (state->instr) { + _mesa_set_add(reg_state->uses, state->instr); + + assert(_mesa_set_search(src->reg->uses, state->instr)); + } else { + assert(state->if_stmt); + _mesa_set_add(reg_state->if_uses, state->if_stmt); + + assert(_mesa_set_search(src->reg->if_uses, state->if_stmt)); + } + + if (!src->reg->is_global) { + assert(reg_state->where_defined == state->impl && + "using a register declared in a different function"); + } + + assert((src->reg->num_array_elems == 0 || + src->base_offset < src->reg->num_array_elems) && + "definitely out-of-bounds array access"); + + if (src->indirect) { + assert(src->reg->num_array_elems != 0); + assert((src->indirect->is_ssa || src->indirect->reg.indirect == NULL) && + "only one level of indirection allowed"); + validate_src(src->indirect, state); + } +} + +static void +validate_ssa_src(nir_ssa_def *def, validate_state *state) +{ + assert(def != NULL); + + struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); + + assert(entry); + + ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; + + assert(def_state->where_defined == state->impl && + "using an SSA value defined in a different function"); + + if (state->instr) { + _mesa_set_add(def_state->uses, state->instr); + + assert(_mesa_set_search(def->uses, state->instr)); + } else { + assert(state->if_stmt); + _mesa_set_add(def_state->if_uses, state->if_stmt); + + assert(_mesa_set_search(def->if_uses, state->if_stmt)); + } + + /* TODO validate that the use is dominated by the definition */ +} + +static void +validate_src(nir_src *src, validate_state *state) +{ + if (src->is_ssa) + validate_ssa_src(src->ssa, state); + else + validate_reg_src(&src->reg, state); +} + +static void +validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state) +{ + nir_alu_src *src = &instr->src[index]; + + unsigned num_components; + if (src->src.is_ssa) + num_components = src->src.ssa->num_components; + else { + if (src->src.reg.reg->is_packed) + num_components = 4; /* can't check anything */ + else + num_components = src->src.reg.reg->num_components; + } + for (unsigned i = 0; i < 4; i++) { + assert(src->swizzle[i] < 4); + + if (nir_alu_instr_channel_used(instr, index, i)) + assert(src->swizzle[i] < num_components); + } + + validate_src(&src->src, state); +} + +static void +validate_reg_dest(nir_reg_dest *dest, validate_state *state) +{ + assert(dest->reg != NULL); + + struct set_entry *entry = _mesa_set_search(dest->reg->defs, state->instr); + assert(entry && "definition not in nir_register.defs"); + + struct hash_entry *entry2; + entry2 = _mesa_hash_table_search(state->regs, dest->reg); + + assert(entry2); + + reg_validate_state *reg_state = (reg_validate_state *) entry2->data; + _mesa_set_add(reg_state->defs, state->instr); + + if (!dest->reg->is_global) { + assert(reg_state->where_defined == state->impl && + "writing to a register declared in a different function"); + } + + assert((dest->reg->num_array_elems == 0 || + dest->base_offset < dest->reg->num_array_elems) && + "definitely out-of-bounds array access"); + + if (dest->indirect) { + assert(dest->reg->num_array_elems != 0); + assert((dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) && + "only one level of indirection allowed"); + validate_src(dest->indirect, state); + } +} + +static void +validate_ssa_def(nir_ssa_def *def, validate_state *state) +{ + assert(def->index < state->impl->ssa_alloc); + assert(!BITSET_TEST(state->ssa_defs_found, def->index)); + BITSET_SET(state->ssa_defs_found, def->index); + + assert(def->num_components <= 4); + + ssa_def_validate_state *def_state = ralloc(state->ssa_defs, + ssa_def_validate_state); + def_state->where_defined = state->impl; + def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + _mesa_hash_table_insert(state->ssa_defs, def, def_state); +} + +static void +validate_dest(nir_dest *dest, validate_state *state) +{ + if (dest->is_ssa) + validate_ssa_def(&dest->ssa, state); + else + validate_reg_dest(&dest->reg, state); +} + +static void +validate_alu_dest(nir_alu_dest *dest, validate_state *state) +{ + unsigned dest_size = + dest->dest.is_ssa ? dest->dest.ssa.num_components + : dest->dest.reg.reg->num_components; + bool is_packed = !dest->dest.is_ssa && dest->dest.reg.reg->is_packed; + /* + * validate that the instruction doesn't write to components not in the + * register/SSA value + */ + assert(is_packed || !(dest->write_mask & ~((1 << dest_size) - 1))); + + /* validate that saturate is only ever used on instructions with + * destinations of type float + */ + nir_alu_instr *alu = nir_instr_as_alu(state->instr); + assert(nir_op_infos[alu->op].output_type == nir_type_float || + !dest->saturate); + + validate_dest(&dest->dest, state); +} + +static void +validate_alu_instr(nir_alu_instr *instr, validate_state *state) +{ + assert(instr->op < nir_num_opcodes); + + validate_alu_dest(&instr->dest, state); + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + validate_alu_src(instr, i, state); + } +} + +static void +validate_deref_chain(nir_deref *deref, validate_state *state) +{ + nir_deref *parent = NULL; + while (deref != NULL) { + switch (deref->deref_type) { + case nir_deref_type_array: + assert(deref->type == glsl_get_array_element(parent->type)); + if (nir_deref_as_array(deref)->deref_array_type == + nir_deref_array_type_indirect) + validate_src(&nir_deref_as_array(deref)->indirect, state); + break; + + case nir_deref_type_struct: + assert(deref->type == + glsl_get_struct_field(parent->type, + nir_deref_as_struct(deref)->index)); + break; + + case nir_deref_type_var: + break; + + default: + assert(!"Invalid deref type"); + break; + } + + parent = deref; + deref = deref->child; + } +} + +static void +validate_var_use(nir_variable *var, validate_state *state) +{ + if (var->data.mode == nir_var_local) { + struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var); + + assert(entry); + assert((nir_function_impl *) entry->data == state->impl); + } +} + +static void +validate_deref_var(nir_deref_var *deref, validate_state *state) +{ + assert(deref != NULL); + assert(deref->deref.type == deref->var->type); + + validate_var_use(deref->var, state); + + validate_deref_chain(&deref->deref, state); +} + +static void +validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) +{ + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + for (unsigned i = 0; i < num_srcs; i++) { + unsigned components_read = + nir_intrinsic_infos[instr->intrinsic].src_components[i]; + if (components_read == 0) + components_read = instr->num_components; + + assert(components_read > 0); + + if (instr->src[i].is_ssa) { + assert(components_read <= instr->src[i].ssa->num_components); + } else if (!instr->src[i].reg.reg->is_packed) { + assert(components_read <= instr->src[i].reg.reg->num_components); + } + + validate_src(&instr->src[i], state); + } + + if (nir_intrinsic_infos[instr->intrinsic].has_dest) { + unsigned components_written = + nir_intrinsic_infos[instr->intrinsic].dest_components; + if (components_written == 0) + components_written = instr->num_components; + + assert(components_written > 0); + + if (instr->dest.is_ssa) { + assert(components_written <= instr->dest.ssa.num_components); + } else if (!instr->dest.reg.reg->is_packed) { + assert(components_written <= instr->dest.reg.reg->num_components); + } + + validate_dest(&instr->dest, state); + } + + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + validate_deref_var(instr->variables[i], state); + } + + switch (instr->intrinsic) { + case nir_intrinsic_load_var: + assert(instr->variables[0]->var->data.mode != nir_var_shader_out); + break; + case nir_intrinsic_store_var: + assert(instr->variables[0]->var->data.mode != nir_var_shader_in && + instr->variables[0]->var->data.mode != nir_var_uniform); + break; + case nir_intrinsic_copy_var: + assert(instr->variables[0]->var->data.mode != nir_var_shader_in && + instr->variables[0]->var->data.mode != nir_var_uniform); + assert(instr->variables[1]->var->data.mode != nir_var_shader_out); + break; + default: + break; + } +} + +static void +validate_tex_instr(nir_tex_instr *instr, validate_state *state) +{ + validate_dest(&instr->dest, state); + + bool src_type_seen[nir_num_tex_src_types]; + for (unsigned i = 0; i < nir_num_tex_src_types; i++) + src_type_seen[i] = false; + + for (unsigned i = 0; i < instr->num_srcs; i++) { + assert(!src_type_seen[instr->src[i].src_type]); + src_type_seen[instr->src[i].src_type] = true; + validate_src(&instr->src[i].src, state); + } + + if (instr->sampler != NULL) + validate_deref_var(instr->sampler, state); +} + +static void +validate_call_instr(nir_call_instr *instr, validate_state *state) +{ + if (instr->return_deref == NULL) + assert(glsl_type_is_void(instr->callee->return_type)); + else + assert(instr->return_deref->deref.type == instr->callee->return_type); + + assert(instr->num_params == instr->callee->num_params); + + for (unsigned i = 0; i < instr->num_params; i++) { + assert(instr->callee->params[i].type == instr->params[i]->deref.type); + validate_deref_var(instr->params[i], state); + } + + validate_deref_var(instr->return_deref, state); +} + +static void +validate_load_const_instr(nir_load_const_instr *instr, validate_state *state) +{ + validate_ssa_def(&instr->def, state); +} + +static void +validate_ssa_undef_instr(nir_ssa_undef_instr *instr, validate_state *state) +{ + validate_ssa_def(&instr->def, state); +} + +static void +validate_phi_instr(nir_phi_instr *instr, validate_state *state) +{ + /* + * don't validate the sources until we get to them from their predecessor + * basic blocks, to avoid validating an SSA use before its definition. + */ + + validate_dest(&instr->dest, state); + + exec_list_validate(&instr->srcs); + assert(exec_list_length(&instr->srcs) == + state->block->predecessors->entries); +} + +static void +validate_instr(nir_instr *instr, validate_state *state) +{ + assert(instr->block == state->block); + + state->instr = instr; + + switch (instr->type) { + case nir_instr_type_alu: + validate_alu_instr(nir_instr_as_alu(instr), state); + break; + + case nir_instr_type_call: + validate_call_instr(nir_instr_as_call(instr), state); + break; + + case nir_instr_type_intrinsic: + validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state); + break; + + case nir_instr_type_tex: + validate_tex_instr(nir_instr_as_tex(instr), state); + break; + + case nir_instr_type_load_const: + validate_load_const_instr(nir_instr_as_load_const(instr), state); + break; + + case nir_instr_type_phi: + validate_phi_instr(nir_instr_as_phi(instr), state); + break; + + case nir_instr_type_ssa_undef: + validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state); + break; + + case nir_instr_type_jump: + break; + + default: + assert(!"Invalid ALU instruction type"); + break; + } + + state->instr = NULL; +} + +static void +validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state) +{ + state->instr = &instr->instr; + + assert(instr->dest.is_ssa); + + exec_list_validate(&instr->srcs); + nir_foreach_phi_src(instr, src) { + if (src->pred == pred) { + assert(src->src.is_ssa); + assert(src->src.ssa->num_components == + instr->dest.ssa.num_components); + + validate_src(&src->src, state); + state->instr = NULL; + return; + } + } + + abort(); +} + +static void +validate_phi_srcs(nir_block *block, nir_block *succ, validate_state *state) +{ + nir_foreach_instr(succ, instr) { + if (instr->type != nir_instr_type_phi) + break; + + validate_phi_src(nir_instr_as_phi(instr), block, state); + } +} + +static void validate_cf_node(nir_cf_node *node, validate_state *state); + +static void +validate_block(nir_block *block, validate_state *state) +{ + assert(block->cf_node.parent == state->parent_node); + + state->block = block; + + exec_list_validate(&block->instr_list); + nir_foreach_instr(block, instr) { + if (instr->type == nir_instr_type_phi) { + assert(instr == nir_block_first_instr(block) || + nir_instr_prev(instr)->type == nir_instr_type_phi); + } + + if (instr->type == nir_instr_type_jump) { + assert(instr == nir_block_last_instr(block)); + } + + validate_instr(instr, state); + } + + assert(block->successors[0] != NULL); + + for (unsigned i = 0; i < 2; i++) { + if (block->successors[i] != NULL) { + struct set_entry *entry = + _mesa_set_search(block->successors[i]->predecessors, block); + assert(entry); + + validate_phi_srcs(block, block->successors[i], state); + } + } + + if (!exec_list_is_empty(&block->instr_list) && + nir_block_last_instr(block)->type == nir_instr_type_jump) + assert(block->successors[1] == NULL); +} + +static void +validate_if(nir_if *if_stmt, validate_state *state) +{ + state->if_stmt = if_stmt; + + assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev)); + nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); + assert(prev_node->type == nir_cf_node_block); + + nir_block *prev_block = nir_cf_node_as_block(prev_node); + assert(&prev_block->successors[0]->cf_node == + nir_if_first_then_node(if_stmt)); + assert(&prev_block->successors[1]->cf_node == + nir_if_first_else_node(if_stmt)); + + assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next)); + nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node); + assert(next_node->type == nir_cf_node_block); + + validate_src(&if_stmt->condition, state); + + assert(!exec_list_is_empty(&if_stmt->then_list)); + assert(!exec_list_is_empty(&if_stmt->else_list)); + + nir_cf_node *old_parent = state->parent_node; + state->parent_node = &if_stmt->cf_node; + + exec_list_validate(&if_stmt->then_list); + foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->then_list) { + validate_cf_node(cf_node, state); + } + + exec_list_validate(&if_stmt->else_list); + foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->else_list) { + validate_cf_node(cf_node, state); + } + + state->parent_node = old_parent; + state->if_stmt = NULL; +} + +static void +validate_loop(nir_loop *loop, validate_state *state) +{ + assert(!exec_node_is_head_sentinel(loop->cf_node.node.prev)); + nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node); + assert(prev_node->type == nir_cf_node_block); + + nir_block *prev_block = nir_cf_node_as_block(prev_node); + assert(&prev_block->successors[0]->cf_node == nir_loop_first_cf_node(loop)); + assert(prev_block->successors[1] == NULL); + + assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next)); + nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node); + assert(next_node->type == nir_cf_node_block); + + assert(!exec_list_is_empty(&loop->body)); + + nir_cf_node *old_parent = state->parent_node; + state->parent_node = &loop->cf_node; + + exec_list_validate(&loop->body); + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { + validate_cf_node(cf_node, state); + } + + state->parent_node = old_parent; +} + +static void +validate_cf_node(nir_cf_node *node, validate_state *state) +{ + assert(node->parent == state->parent_node); + + switch (node->type) { + case nir_cf_node_block: + validate_block(nir_cf_node_as_block(node), state); + break; + + case nir_cf_node_if: + validate_if(nir_cf_node_as_if(node), state); + break; + + case nir_cf_node_loop: + validate_loop(nir_cf_node_as_loop(node), state); + break; + + default: + assert(!"Invalid ALU instruction type"); + break; + } +} + +static void +prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state) +{ + assert(reg->is_global == is_global); + + if (is_global) + assert(reg->index < state->shader->reg_alloc); + else + assert(reg->index < state->impl->reg_alloc); + assert(!BITSET_TEST(state->regs_found, reg->index)); + BITSET_SET(state->regs_found, reg->index); + + reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state); + reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + reg_state->where_defined = is_global ? NULL : state->impl; + + _mesa_hash_table_insert(state->regs, reg, reg_state); +} + +static void +postvalidate_reg_decl(nir_register *reg, validate_state *state) +{ + struct hash_entry *entry = _mesa_hash_table_search(state->regs, reg); + + reg_validate_state *reg_state = (reg_validate_state *) entry->data; + + if (reg_state->uses->entries != reg->uses->entries) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(reg->uses, entry) { + struct set_entry *entry2 = + _mesa_set_search(reg_state->uses, entry->key); + + if (entry2 == NULL) { + printf("%p\n", entry->key); + } + } + + abort(); + } + + if (reg_state->if_uses->entries != reg->if_uses->entries) { + printf("extra entries in register if_uses:\n"); + struct set_entry *entry; + set_foreach(reg->if_uses, entry) { + struct set_entry *entry2 = + _mesa_set_search(reg_state->if_uses, entry->key); + + if (entry2 == NULL) { + printf("%p\n", entry->key); + } + } + + abort(); + } + + if (reg_state->defs->entries != reg->defs->entries) { + printf("extra entries in register defs:\n"); + struct set_entry *entry; + set_foreach(reg->defs, entry) { + struct set_entry *entry2 = + _mesa_set_search(reg_state->defs, entry->key); + + if (entry2 == NULL) { + printf("%p\n", entry->key); + } + } + + abort(); + } +} + +static void +validate_var_decl(nir_variable *var, bool is_global, validate_state *state) +{ + assert(is_global != (var->data.mode == nir_var_local)); + + /* + * TODO validate some things ir_validate.cpp does (requires more GLSL type + * support) + */ + + if (!is_global) { + _mesa_hash_table_insert(state->var_defs, var, state->impl); + } +} + +static bool +postvalidate_ssa_def(nir_ssa_def *def, void *void_state) +{ + validate_state *state = void_state; + + struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); + ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; + + if (def_state->uses->entries != def->uses->entries) { + printf("extra entries in SSA def uses:\n"); + struct set_entry *entry; + set_foreach(def->uses, entry) { + struct set_entry *entry2 = + _mesa_set_search(def_state->uses, entry->key); + + if (entry2 == NULL) { + printf("%p\n", entry->key); + } + } + + abort(); + } + + if (def_state->if_uses->entries != def->if_uses->entries) { + printf("extra entries in SSA def uses:\n"); + struct set_entry *entry; + set_foreach(def->if_uses, entry) { + struct set_entry *entry2 = + _mesa_set_search(def_state->if_uses, entry->key); + + if (entry2 == NULL) { + printf("%p\n", entry->key); + } + } + + abort(); + } + + return true; +} + +static bool +postvalidate_ssa_defs_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, postvalidate_ssa_def, state); + + return true; +} + +static void +validate_function_impl(nir_function_impl *impl, validate_state *state) +{ + assert(impl->overload->impl == impl); + assert(impl->cf_node.parent == NULL); + + assert(impl->num_params == impl->overload->num_params); + for (unsigned i = 0; i < impl->num_params; i++) + assert(impl->params[i]->type == impl->overload->params[i].type); + + if (glsl_type_is_void(impl->overload->return_type)) + assert(impl->return_var == NULL); + else + assert(impl->return_var->type == impl->overload->return_type); + + assert(exec_list_is_empty(&impl->end_block->instr_list)); + assert(impl->end_block->successors[0] == NULL); + assert(impl->end_block->successors[1] == NULL); + + state->impl = impl; + state->parent_node = &impl->cf_node; + + exec_list_validate(&impl->locals); + foreach_list_typed(nir_variable, var, node, &impl->locals) { + validate_var_decl(var, false, state); + } + + state->regs_found = realloc(state->regs_found, + BITSET_WORDS(impl->reg_alloc) * + sizeof(BITSET_WORD)); + memset(state->regs_found, 0, BITSET_WORDS(impl->reg_alloc) * + sizeof(BITSET_WORD)); + exec_list_validate(&impl->registers); + foreach_list_typed(nir_register, reg, node, &impl->registers) { + prevalidate_reg_decl(reg, false, state); + } + + state->ssa_defs_found = realloc(state->ssa_defs_found, + BITSET_WORDS(impl->ssa_alloc) * + sizeof(BITSET_WORD)); + memset(state->ssa_defs_found, 0, BITSET_WORDS(impl->ssa_alloc) * + sizeof(BITSET_WORD)); + exec_list_validate(&impl->body); + foreach_list_typed(nir_cf_node, node, node, &impl->body) { + validate_cf_node(node, state); + } + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + postvalidate_reg_decl(reg, state); + } + + nir_foreach_block(impl, postvalidate_ssa_defs_block, state); +} + +static void +validate_function_overload(nir_function_overload *overload, + validate_state *state) +{ + if (overload->impl != NULL) + validate_function_impl(overload->impl, state); +} + +static void +validate_function(nir_function *func, validate_state *state) +{ + exec_list_validate(&func->overload_list); + foreach_list_typed(nir_function_overload, overload, node, &func->overload_list) { + assert(overload->function == func); + validate_function_overload(overload, state); + } +} + +static void +init_validate_state(validate_state *state) +{ + state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->ssa_defs_found = NULL; + state->regs_found = NULL; + state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + +static void +destroy_validate_state(validate_state *state) +{ + _mesa_hash_table_destroy(state->regs, NULL); + _mesa_hash_table_destroy(state->ssa_defs, NULL); + free(state->ssa_defs_found); + free(state->regs_found); + _mesa_hash_table_destroy(state->var_defs, NULL); +} + +void +nir_validate_shader(nir_shader *shader) +{ + validate_state state; + init_validate_state(&state); + + state.shader = shader; + + struct hash_entry *entry; + hash_table_foreach(shader->uniforms, entry) { + validate_var_decl((nir_variable *) entry->data, true, &state); + } + + hash_table_foreach(shader->inputs, entry) { + validate_var_decl((nir_variable *) entry->data, true, &state); + } + + hash_table_foreach(shader->outputs, entry) { + validate_var_decl((nir_variable *) entry->data, true, &state); + } + + exec_list_validate(&shader->globals); + foreach_list_typed(nir_variable, var, node, &shader->globals) { + validate_var_decl(var, true, &state); + } + + exec_list_validate(&shader->system_values); + foreach_list_typed(nir_variable, var, node, &shader->system_values) { + validate_var_decl(var, true, &state); + } + + state.regs_found = realloc(state.regs_found, + BITSET_WORDS(shader->reg_alloc) * + sizeof(BITSET_WORD)); + memset(state.regs_found, 0, BITSET_WORDS(shader->reg_alloc) * + sizeof(BITSET_WORD)); + exec_list_validate(&shader->registers); + foreach_list_typed(nir_register, reg, node, &shader->registers) { + prevalidate_reg_decl(reg, true, &state); + } + + exec_list_validate(&shader->functions); + foreach_list_typed(nir_function, func, node, &shader->functions) { + validate_function(func, &state); + } + + foreach_list_typed(nir_register, reg, node, &shader->registers) { + postvalidate_reg_decl(reg, &state); + } + + destroy_validate_state(&state); +} + +#endif /* NDEBUG */ diff --git a/mesalib/src/glsl/nir/nir_worklist.c b/mesalib/src/glsl/nir/nir_worklist.c new file mode 100644 index 000000000..a8baae937 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_worklist.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_worklist.h" + +void +nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks, + void *mem_ctx) +{ + w->size = num_blocks; + w->count = 0; + w->start = 0; + + w->blocks_present = rzalloc_array(mem_ctx, BITSET_WORD, + BITSET_WORDS(num_blocks)); + w->blocks = ralloc_array(mem_ctx, nir_block *, num_blocks); +} + +void +nir_block_worklist_fini(nir_block_worklist *w) +{ + ralloc_free(w->blocks_present); + ralloc_free(w->blocks); +} + +static bool +worklist_add_block(nir_block *block, void *w) +{ + nir_block_worklist_push_tail(w, block); + + return true; +} + +void +nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl) +{ + nir_foreach_block(impl, worklist_add_block, w); +} + +void +nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block) +{ + /* Pushing a block we already have is a no-op */ + if (BITSET_TEST(w->blocks_present, block->index)) + return; + + assert(w->count < w->size); + + if (w->start == 0) + w->start = w->size - 1; + else + w->start--; + + w->count++; + + w->blocks[w->start] = block; + BITSET_SET(w->blocks_present, block->index); +} + +nir_block * +nir_block_worklist_peek_head(nir_block_worklist *w) +{ + assert(w->count > 0); + + return w->blocks[w->start]; +} + +nir_block * +nir_block_worklist_pop_head(nir_block_worklist *w) +{ + assert(w->count > 0); + + unsigned head = w->start; + + w->start = (w->start + 1) % w->size; + w->count--; + + BITSET_CLEAR(w->blocks_present, w->blocks[head]->index); + return w->blocks[head]; +} + +void +nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block) +{ + /* Pushing a block we already have is a no-op */ + if (BITSET_TEST(w->blocks_present, block->index)) + return; + + assert(w->count < w->size); + + w->count++; + + unsigned tail = w->start = (w->start + w->count - 1) % w->size; + + w->blocks[tail] = block; + BITSET_SET(w->blocks_present, block->index); +} + +nir_block * +nir_block_worklist_peek_tail(nir_block_worklist *w) +{ + assert(w->count > 0); + + unsigned tail = w->start = (w->start + w->count - 1) % w->size; + + return w->blocks[tail]; +} + +nir_block * +nir_block_worklist_pop_tail(nir_block_worklist *w) +{ + assert(w->count > 0); + + unsigned tail = w->start = (w->start + w->count - 1) % w->size; + + w->count--; + + BITSET_CLEAR(w->blocks_present, w->blocks[tail]->index); + return w->blocks[tail]; +} diff --git a/mesalib/src/glsl/nir/nir_worklist.h b/mesalib/src/glsl/nir/nir_worklist.h new file mode 100644 index 000000000..d5a8568e4 --- /dev/null +++ b/mesalib/src/glsl/nir/nir_worklist.h @@ -0,0 +1,91 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#pragma once + +#ifndef _NIR_WORKLIST_ +#define _NIR_WORKLIST_ + +#include "nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Represents a double-ended queue of unique blocks + * + * The worklist datastructure guarantees that eacy block is in the queue at + * most once. Pushing a block onto either end of the queue is a no-op if + * the block is already in the queue. In order for this to work, the + * caller must ensure that the blocks are properly indexed. + */ +typedef struct { + /* The total size of the worklist */ + unsigned size; + + /* The number of blocks currently in the worklist */ + unsigned count; + + /* The offset in the array of blocks at which the list starts */ + unsigned start; + + /* A bitset of all of the blocks currently present in the worklist */ + BITSET_WORD *blocks_present; + + /* The actual worklist */ + nir_block **blocks; +} nir_block_worklist; + +void nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks, + void *mem_ctx); +void nir_block_worklist_fini(nir_block_worklist *w); + +void nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl); + +static inline bool +nir_block_worklist_is_empty(const nir_block_worklist *w) +{ + return w->count == 0; +} + +void nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block); + +nir_block *nir_block_worklist_peek_head(nir_block_worklist *w); + +nir_block *nir_block_worklist_pop_head(nir_block_worklist *w); + +void nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block); + +nir_block *nir_block_worklist_peek_tail(nir_block_worklist *w); + +nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _NIR_WORKLIST_ */ diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp index c6f4a9c78..6784242ff 100644 --- a/mesalib/src/glsl/opt_algebraic.cpp +++ b/mesalib/src/glsl/opt_algebraic.cpp @@ -119,6 +119,8 @@ is_valid_vec_const(ir_constant *ir) static inline bool is_less_than_one(ir_constant *ir) { + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + if (!is_valid_vec_const(ir)) return false; @@ -134,6 +136,8 @@ is_less_than_one(ir_constant *ir) static inline bool is_greater_than_zero(ir_constant *ir) { + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + if (!is_valid_vec_const(ir)) return false; @@ -376,6 +380,15 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) } break; + case ir_unop_f2i: + case ir_unop_f2u: + if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) { + return new(mem_ctx) ir_expression(ir->operation, + ir->type, + op_expr[0]->operands[0]); + } + break; + case ir_unop_logic_not: { enum ir_expression_operation new_op = ir_unop_logic_not; @@ -514,10 +527,45 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if (op_const[1] && !op_const[0]) reassociate_constant(ir, 1, op_const[1], op_expr[0]); + /* Optimizes + * + * (mul (floor (add (abs x) 0.5) (sign x))) + * + * into + * + * (trunc (add x (mul (sign x) 0.5))) + */ + for (int i = 0; i < 2; i++) { + ir_expression *sign_expr = ir->operands[i]->as_expression(); + ir_expression *floor_expr = ir->operands[1 - i]->as_expression(); + + if (!sign_expr || sign_expr->operation != ir_unop_sign || + !floor_expr || floor_expr->operation != ir_unop_floor) + continue; + + ir_expression *add_expr = floor_expr->operands[0]->as_expression(); + + for (int j = 0; j < 2; j++) { + ir_expression *abs_expr = add_expr->operands[j]->as_expression(); + if (!abs_expr || abs_expr->operation != ir_unop_abs) + continue; + + ir_constant *point_five = add_expr->operands[1 - j]->as_constant(); + if (!point_five->is_value(0.5, 0)) + continue; + + if (abs_expr->operands[0]->equals(sign_expr->operands[0])) { + return trunc(add(abs_expr->operands[0], + mul(sign_expr, point_five))); + } + } + } break; case ir_binop_div: - if (is_vec_one(op_const[0]) && ir->type->base_type == GLSL_TYPE_FLOAT) { + if (is_vec_one(op_const[0]) && ( + ir->type->base_type == GLSL_TYPE_FLOAT || + ir->type->base_type == GLSL_TYPE_DOUBLE)) { return new(mem_ctx) ir_expression(ir_unop_rcp, ir->operands[1]->type, ir->operands[1], @@ -538,7 +586,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) unsigned components[4] = { 0 }, count = 0; for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) { - if (op_const[i]->value.f[c] == 0.0) + if (op_const[i]->is_zero()) continue; components[count] = c; @@ -554,7 +602,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) /* Swizzle both operands to remove the channels that were zero. */ return new(mem_ctx) - ir_expression(op, glsl_type::float_type, + ir_expression(op, ir->type, new(mem_ctx) ir_swizzle(ir->operands[0], components, count), new(mem_ctx) ir_swizzle(ir->operands[1], @@ -747,6 +795,12 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) return op_expr[0]->operands[0]; + if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 || + op_expr[0]->operation == ir_unop_exp)) { + return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type, + neg(op_expr[0]->operands[0])); + } + /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at * its IR level, so we can always apply this transformation. */ @@ -785,7 +839,19 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) return mul(ir->operands[1], ir->operands[2]); } else if (is_vec_zero(op_const[1])) { unsigned op2_components = ir->operands[2]->type->vector_elements; - ir_constant *one = new(mem_ctx) ir_constant(1.0f, op2_components); + ir_constant *one; + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + one = new(mem_ctx) ir_constant(1.0f, op2_components); + break; + case GLSL_TYPE_DOUBLE: + one = new(mem_ctx) ir_constant(1.0, op2_components); + break; + default: + unreachable("unexpected type"); + } + return mul(ir->operands[0], add(one, neg(ir->operands[2]))); } break; diff --git a/mesalib/src/glsl/opt_constant_propagation.cpp b/mesalib/src/glsl/opt_constant_propagation.cpp index c334e1276..90cc0c89b 100644 --- a/mesalib/src/glsl/opt_constant_propagation.cpp +++ b/mesalib/src/glsl/opt_constant_propagation.cpp @@ -194,6 +194,9 @@ ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue) case GLSL_TYPE_FLOAT: data.f[i] = found->constant->value.f[rhs_channel]; break; + case GLSL_TYPE_DOUBLE: + data.d[i] = found->constant->value.d[rhs_channel]; + break; case GLSL_TYPE_INT: data.i[i] = found->constant->value.i[rhs_channel]; break; diff --git a/mesalib/src/glsl/opt_copy_propagation.cpp b/mesalib/src/glsl/opt_copy_propagation.cpp index 5c65af66b..806027b28 100644 --- a/mesalib/src/glsl/opt_copy_propagation.cpp +++ b/mesalib/src/glsl/opt_copy_propagation.cpp @@ -128,6 +128,9 @@ ir_copy_propagation_visitor::visit_enter(ir_function_signature *ir) visit_list_elements(this, &ir->body); + ralloc_free(this->acp); + ralloc_free(this->kills); + this->kills = orig_kills; this->acp = orig_acp; this->killed_all = orig_killed_all; @@ -215,7 +218,7 @@ ir_copy_propagation_visitor::handle_if_block(exec_list *instructions) /* Populate the initial acp with a copy of the original */ foreach_in_list(acp_entry, a, orig_acp) { - this->acp->push_tail(new(this->mem_ctx) acp_entry(a->lhs, a->rhs)); + this->acp->push_tail(new(this->acp) acp_entry(a->lhs, a->rhs)); } visit_list_elements(this, instructions); @@ -226,12 +229,15 @@ ir_copy_propagation_visitor::handle_if_block(exec_list *instructions) exec_list *new_kills = this->kills; this->kills = orig_kills; + ralloc_free(this->acp); this->acp = orig_acp; this->killed_all = this->killed_all || orig_killed_all; foreach_in_list(kill_entry, k, new_kills) { kill(k->var); } + + ralloc_free(new_kills); } ir_visitor_status @@ -269,6 +275,7 @@ ir_copy_propagation_visitor::visit_enter(ir_loop *ir) exec_list *new_kills = this->kills; this->kills = orig_kills; + ralloc_free(this->acp); this->acp = orig_acp; this->killed_all = this->killed_all || orig_killed_all; @@ -276,6 +283,8 @@ ir_copy_propagation_visitor::visit_enter(ir_loop *ir) kill(k->var); } + ralloc_free(new_kills); + /* already descended into the children. */ return visit_continue_with_parent; } @@ -294,7 +303,7 @@ ir_copy_propagation_visitor::kill(ir_variable *var) /* Add the LHS variable to the list of killed variables in this block. */ - this->kills->push_tail(new(this->mem_ctx) kill_entry(var)); + this->kills->push_tail(new(this->kills) kill_entry(var)); } /** @@ -322,7 +331,7 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir) ir->condition = new(ralloc_parent(ir)) ir_constant(false); this->progress = true; } else { - entry = new(this->mem_ctx) acp_entry(lhs_var, rhs_var); + entry = new(this->acp) acp_entry(lhs_var, rhs_var); this->acp->push_tail(entry); } } diff --git a/mesalib/src/glsl/opt_copy_propagation_elements.cpp b/mesalib/src/glsl/opt_copy_propagation_elements.cpp index c3e55bcd1..353a5c668 100644 --- a/mesalib/src/glsl/opt_copy_propagation_elements.cpp +++ b/mesalib/src/glsl/opt_copy_propagation_elements.cpp @@ -156,6 +156,9 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir) visit_list_elements(this, &ir->body); + ralloc_free(this->acp); + ralloc_free(this->kills); + this->kills = orig_kills; this->acp = orig_acp; this->killed_all = orig_killed_all; @@ -173,9 +176,9 @@ ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir) kill_entry *k; if (lhs) - k = new(mem_ctx) kill_entry(var, ir->write_mask); + k = new(this->kills) kill_entry(var, ir->write_mask); else - k = new(mem_ctx) kill_entry(var, ~0); + k = new(this->kills) kill_entry(var, ~0); kill(k); } @@ -334,7 +337,7 @@ ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions) /* Populate the initial acp with a copy of the original */ foreach_in_list(acp_entry, a, orig_acp) { - this->acp->push_tail(new(this->mem_ctx) acp_entry(a)); + this->acp->push_tail(new(this->acp) acp_entry(a)); } visit_list_elements(this, instructions); @@ -345,6 +348,7 @@ ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions) exec_list *new_kills = this->kills; this->kills = orig_kills; + ralloc_free(this->acp); this->acp = orig_acp; this->killed_all = this->killed_all || orig_killed_all; @@ -354,6 +358,8 @@ ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions) foreach_in_list_safe(kill_entry, k, new_kills) { kill(k); } + + ralloc_free(new_kills); } ir_visitor_status @@ -391,6 +397,7 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir) exec_list *new_kills = this->kills; this->kills = orig_kills; + ralloc_free(this->acp); this->acp = orig_acp; this->killed_all = this->killed_all || orig_killed_all; @@ -398,6 +405,8 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir) kill(k); } + ralloc_free(new_kills); + /* already descended into the children. */ return visit_continue_with_parent; } @@ -423,6 +432,7 @@ ir_copy_propagation_elements_visitor::kill(kill_entry *k) if (k->next) k->remove(); + ralloc_steal(this->kills, k); this->kills->push_tail(k); } diff --git a/mesalib/src/glsl/opt_dead_builtin_variables.cpp b/mesalib/src/glsl/opt_dead_builtin_variables.cpp index 85c75d6f2..0d4e3a8f0 100644 --- a/mesalib/src/glsl/opt_dead_builtin_variables.cpp +++ b/mesalib/src/glsl/opt_dead_builtin_variables.cpp @@ -52,7 +52,7 @@ optimize_dead_builtin_variables(exec_list *instructions, && var->data.how_declared != ir_var_declared_implicitly) continue; - if (strncmp(var->name, "gl_", 3) != 0) + if (!is_gl_identifier(var->name)) continue; /* gl_ModelViewProjectionMatrix and gl_Vertex are special because they diff --git a/mesalib/src/glsl/opt_minmax.cpp b/mesalib/src/glsl/opt_minmax.cpp index 32fb2d7ea..23d0b109d 100644 --- a/mesalib/src/glsl/opt_minmax.cpp +++ b/mesalib/src/glsl/opt_minmax.cpp @@ -133,6 +133,14 @@ compare_components(ir_constant *a, ir_constant *b) else foundequal = true; break; + case GLSL_TYPE_DOUBLE: + if (a->value.d[c0] < b->value.d[c1]) + foundless = true; + else if (a->value.d[c0] > b->value.d[c1]) + foundgreater = true; + else + foundequal = true; + break; default: unreachable("not reached"); } @@ -178,6 +186,11 @@ combine_constant(bool ismin, ir_constant *a, ir_constant *b) (!ismin && b->value.f[i] > c->value.f[i])) c->value.f[i] = b->value.f[i]; break; + case GLSL_TYPE_DOUBLE: + if ((ismin && b->value.d[i] < c->value.d[i]) || + (!ismin && b->value.d[i] > c->value.d[i])) + c->value.d[i] = b->value.d[i]; + break; default: assert(!"not reached"); } diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp index 67b0d0c82..ad0d75bf8 100644 --- a/mesalib/src/glsl/standalone_scaffolding.cpp +++ b/mesalib/src/glsl/standalone_scaffolding.cpp @@ -127,6 +127,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) ctx->Extensions.ARB_fragment_coord_conventions = true; ctx->Extensions.ARB_fragment_layer_viewport = true; ctx->Extensions.ARB_gpu_shader5 = true; + ctx->Extensions.ARB_gpu_shader_fp64 = true; ctx->Extensions.ARB_sample_shading = true; ctx->Extensions.ARB_shader_bit_encoding = true; ctx->Extensions.ARB_shader_stencil_export = true; |