From 982ac918afe6a1c02d5cf735d7b6c56443a048cc Mon Sep 17 00:00:00 2001 From: marha Date: Fri, 7 Feb 2014 23:28:38 +0100 Subject: xkbcomp xkeyboard-config libxcb libxtrans fontconfig libX11 libxcb mesa xserver git update 7 Feb 2014 Update to openssl1.0.1f xserver commit 83e38eb73fd8c852513aac2da2975b4c01070ec2 libxcb commit d7eb0bdf3b5b11ee9f40ee5e73df8fc0bdfa59f3 xkeyboard-config commit 7596672b96315465df8d8d691e3a567a52f70743 libX11 commit aacf95dacc7c598e7297894580d4d655593813b2 xkbcomp commit 31b90ee4ffc774e0da540277907fc5540c0b012c libxtrans commit 3f0de269abe59353acbd7a5587d68ce0da91db67 fontconfig commit e310d2fac2d874d5aa76c609df70cc7b871c0b6d mesa commit dd2229d4c68ed78a50104637aef904f8ab6d7dd3 --- mesalib/src/glsl/ast.h | 37 +++++++- mesalib/src/glsl/ast_to_hir.cpp | 128 +++++++++++++++++++++++++--- mesalib/src/glsl/ast_type.cpp | 15 ++++ mesalib/src/glsl/builtin_variables.cpp | 68 +++++++++++++++ mesalib/src/glsl/glcpp/glcpp-lex.l | 2 - mesalib/src/glsl/glcpp/glcpp-parse.y | 49 +++++++---- mesalib/src/glsl/glcpp/glcpp.c | 3 +- mesalib/src/glsl/glcpp/glcpp.h | 5 +- mesalib/src/glsl/glcpp/pp.c | 4 +- mesalib/src/glsl/glsl_lexer.ll | 3 +- mesalib/src/glsl/glsl_parser.yy | 96 ++++++++++++++++----- mesalib/src/glsl/glsl_parser_extras.cpp | 58 ++++++++++--- mesalib/src/glsl/glsl_parser_extras.h | 21 +++++ mesalib/src/glsl/ir.h | 2 +- mesalib/src/glsl/ir_constant_expression.cpp | 17 ++++ mesalib/src/glsl/linker.cpp | 79 ++++++++++++++++- mesalib/src/glsl/lower_instructions.cpp | 14 ++- mesalib/src/glsl/main.cpp | 16 +++- mesalib/src/glsl/opt_algebraic.cpp | 67 +++++++++++++++ mesalib/src/glsl/opt_vectorize.cpp | 66 +++++++++++--- mesalib/src/glsl/standalone_scaffolding.cpp | 12 +++ mesalib/src/glsl/standalone_scaffolding.h | 2 + 22 files changed, 668 insertions(+), 96 deletions(-) (limited to 'mesalib/src/glsl') diff --git a/mesalib/src/glsl/ast.h b/mesalib/src/glsl/ast.h index 0bda28d20..61fd923bc 100644 --- a/mesalib/src/glsl/ast.h +++ b/mesalib/src/glsl/ast.h @@ -460,6 +460,12 @@ struct ast_type_qualifier { unsigned prim_type:1; unsigned max_vertices:1; /** \} */ + + /** + * local_size_{x,y,z} flags for compute shaders. Bit 0 represents + * local_size_x, and so on. + */ + unsigned local_size:3; } /** \brief Set of flags, accessed by name. */ q; @@ -509,6 +515,13 @@ struct ast_type_qualifier { */ int offset; + /** + * Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}" + * layout qualifier. Element i of this array is only valid if + * flags.q.local_size & (1 << i) is set. + */ + int local_size[3]; + /** * Return true if and only if an interpolation qualifier is present. */ @@ -888,14 +901,13 @@ public: ast_node *body; -private: /** * Generate IR from the condition of a loop * * This is factored out of ::hir because some loops have the condition * test at the top (for and while), and others have it at the end (do-while). */ - void condition_to_hir(class ir_loop *, struct _mesa_glsl_parse_state *); + void condition_to_hir(exec_list *, struct _mesa_glsl_parse_state *); }; @@ -990,6 +1002,27 @@ private: const GLenum prim_type; }; + +/** + * AST node representing a decalaration of the input layout for compute + * shaders. + */ +class ast_cs_input_layout : public ast_node +{ +public: + ast_cs_input_layout(const struct YYLTYPE &locp, const unsigned *local_size) + { + memcpy(this->local_size, local_size, sizeof(this->local_size)); + set_location(locp); + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +private: + unsigned local_size[3]; +}; + /*@}*/ extern void diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp index 1bfb4e531..c89a26bf9 100644 --- a/mesalib/src/glsl/ast_to_hir.cpp +++ b/mesalib/src/glsl/ast_to_hir.cpp @@ -77,6 +77,7 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) state->toplevel_ir = instructions; state->gs_input_prim_type_specified = false; + state->cs_input_local_size_specified = false; /* Section 4.2 of the GLSL 1.20 specification states: * "The built-in functions are scoped in a scope outside the global scope @@ -2155,6 +2156,12 @@ validate_explicit_location(const struct ast_type_qualifier *qual, fail = true; break; + + case MESA_SHADER_COMPUTE: + _mesa_glsl_error(loc, state, + "compute shader variables cannot be given " + "explicit locations"); + return; }; if (fail) { @@ -2275,6 +2282,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, var->data.mode = ir_var_uniform; if (!is_parameter && is_varying_var(var, state->stage)) { + /* User-defined ins/outs are not permitted in compute shaders. */ + if (state->stage == MESA_SHADER_COMPUTE) { + _mesa_glsl_error(loc, state, + "user-defined input and output variables are not " + "permitted in compute shaders"); + } + /* This variable is being used to link data between shader stages (in * pre-glsl-1.30 parlance, it's a "varying"). Check that it has a type * that is allowed for such purposes. @@ -2337,6 +2351,9 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, if (var->data.mode == ir_var_shader_in) var->data.invariant = true; break; + case MESA_SHADER_COMPUTE: + /* Invariance isn't meaningful in compute shaders. */ + break; } } @@ -4029,17 +4046,22 @@ ast_jump_statement::hir(exec_list *instructions, _mesa_glsl_error(& loc, state, "break may only appear in a loop or a switch"); } else { - /* For a loop, inline the for loop expression again, - * since we don't know where near the end of - * the loop body the normal copy of it - * is going to be placed. + /* For a loop, inline the for loop expression again, since we don't + * know where near the end of the loop body the normal copy of it is + * going to be placed. Same goes for the condition for a do-while + * loop. */ if (state->loop_nesting_ast != NULL && - mode == ast_continue && - state->loop_nesting_ast->rest_expression) { - state->loop_nesting_ast->rest_expression->hir(instructions, - state); - } + mode == ast_continue) { + if (state->loop_nesting_ast->rest_expression) { + state->loop_nesting_ast->rest_expression->hir(instructions, + state); + } + if (state->loop_nesting_ast->mode == + ast_iteration_statement::ast_do_while) { + state->loop_nesting_ast->condition_to_hir(instructions, state); + } + } if (state->switch_state.is_switch_innermost && mode == ast_break) { @@ -4369,14 +4391,14 @@ ast_case_label::hir(exec_list *instructions, } void -ast_iteration_statement::condition_to_hir(ir_loop *stmt, +ast_iteration_statement::condition_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) { void *ctx = state; if (condition != NULL) { ir_rvalue *const cond = - condition->hir(& stmt->body_instructions, state); + condition->hir(instructions, state); if ((cond == NULL) || !cond->type->is_boolean() || !cond->type->is_scalar()) { @@ -4397,7 +4419,7 @@ ast_iteration_statement::condition_to_hir(ir_loop *stmt, new(ctx) ir_loop_jump(ir_loop_jump::jump_break); if_stmt->then_instructions.push_tail(break_stmt); - stmt->body_instructions.push_tail(if_stmt); + instructions->push_tail(if_stmt); } } } @@ -4432,7 +4454,7 @@ ast_iteration_statement::hir(exec_list *instructions, state->switch_state.is_switch_innermost = false; if (mode != ast_do_while) - condition_to_hir(stmt, state); + condition_to_hir(&stmt->body_instructions, state); if (body != NULL) body->hir(& stmt->body_instructions, state); @@ -4441,7 +4463,7 @@ ast_iteration_statement::hir(exec_list *instructions, rest_expression->hir(& stmt->body_instructions, state); if (mode == ast_do_while) - condition_to_hir(stmt, state); + condition_to_hir(&stmt->body_instructions, state); if (mode != ast_do_while) state->symbols->pop_scope(); @@ -5289,6 +5311,84 @@ ast_gs_input_layout::hir(exec_list *instructions, } +ir_rvalue * +ast_cs_input_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + /* If any compute input layout declaration preceded this one, make sure it + * was consistent with this one. + */ + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) { + if (state->cs_input_local_size[i] != this->local_size[i]) { + _mesa_glsl_error(&loc, state, + "compute shader input layout does not match" + " previous declaration"); + return NULL; + } + } + } + + /* From the ARB_compute_shader specification: + * + * If the local size of the shader in any dimension is greater + * than the maximum size supported by the implementation for that + * dimension, a compile-time error results. + * + * It is not clear from the spec how the error should be reported if + * the total size of the work group exceeds + * MAX_COMPUTE_WORK_GROUP_INVOCATIONS, but it seems reasonable to + * report it at compile time as well. + */ + GLuint64 total_invocations = 1; + for (int i = 0; i < 3; i++) { + if (this->local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) { + _mesa_glsl_error(&loc, state, + "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE" + " (%d)", 'x' + i, + state->ctx->Const.MaxComputeWorkGroupSize[i]); + break; + } + total_invocations *= this->local_size[i]; + if (total_invocations > + state->ctx->Const.MaxComputeWorkGroupInvocations) { + _mesa_glsl_error(&loc, state, + "product of local_sizes exceeds " + "MAX_COMPUTE_WORK_GROUP_INVOCATIONS (%d)", + state->ctx->Const.MaxComputeWorkGroupInvocations); + break; + } + } + + state->cs_input_local_size_specified = true; + for (int i = 0; i < 3; i++) + state->cs_input_local_size[i] = this->local_size[i]; + + /* We may now declare the built-in constant gl_WorkGroupSize (see + * builtin_variable_generator::generate_constants() for why we didn't + * declare it earlier). + */ + ir_variable *var = new(state->symbols) + ir_variable(glsl_type::ivec3_type, "gl_WorkGroupSize", ir_var_auto); + var->data.how_declared = ir_var_declared_implicitly; + var->data.read_only = true; + instructions->push_tail(var); + state->symbols->add_variable(var); + ir_constant_data data; + memset(&data, 0, sizeof(data)); + for (int i = 0; i < 3; i++) + data.i[i] = this->local_size[i]; + var->constant_value = new(var) ir_constant(glsl_type::ivec3_type, &data); + var->constant_initializer = + new(var) ir_constant(glsl_type::ivec3_type, &data); + var->data.has_initializer = true; + + return NULL; +} + + static void detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, exec_list *instructions) diff --git a/mesalib/src/glsl/ast_type.cpp b/mesalib/src/glsl/ast_type.cpp index 637da0dfb..bbc430808 100644 --- a/mesalib/src/glsl/ast_type.cpp +++ b/mesalib/src/glsl/ast_type.cpp @@ -118,6 +118,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, ubo_layout_mask.flags.q.shared = 1; ast_type_qualifier ubo_binding_mask; + ubo_binding_mask.flags.i = 0; ubo_binding_mask.flags.q.explicit_binding = 1; ubo_binding_mask.flags.q.explicit_offset = 1; @@ -158,6 +159,20 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, if ((q.flags.i & ubo_layout_mask.flags.i) != 0) this->flags.i &= ~ubo_layout_mask.flags.i; + for (int i = 0; i < 3; i++) { + if (q.flags.q.local_size & (1 << i)) { + if ((this->flags.q.local_size & (1 << i)) && + this->local_size[i] != q.local_size[i]) { + _mesa_glsl_error(loc, state, + "compute shader set conflicting values for " + "local_size_%c (%d and %d)", 'x' + i, + this->local_size[i], q.local_size[i]); + return false; + } + this->local_size[i] = q.local_size[i]; + } + } + this->flags.i |= q.flags.i; if (q.flags.q.explicit_location) diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp index d6bc3c073..cc423383d 100644 --- a/mesalib/src/glsl/builtin_variables.cpp +++ b/mesalib/src/glsl/builtin_variables.cpp @@ -356,6 +356,7 @@ public: void generate_vs_special_vars(); void generate_gs_special_vars(); void generate_fs_special_vars(); + void generate_cs_special_vars(); void generate_varyings(); private: @@ -389,6 +390,7 @@ private: enum ir_variable_mode mode, int slot); ir_variable *add_uniform(const glsl_type *type, const char *name); ir_variable *add_const(const char *name, int value); + ir_variable *add_const_ivec3(const char *name, int x, int y, int z); void add_varying(int slot, const glsl_type *type, const char *name, const char *name_as_gs_input); @@ -529,6 +531,25 @@ builtin_variable_generator::add_const(const char *name, int value) } +ir_variable * +builtin_variable_generator::add_const_ivec3(const char *name, int x, int y, + int z) +{ + ir_variable *const var = add_variable(name, glsl_type::ivec3_type, + ir_var_auto, -1); + ir_constant_data data; + memset(&data, 0, sizeof(data)); + data.i[0] = x; + data.i[1] = y; + data.i[2] = z; + var->constant_value = new(var) ir_constant(glsl_type::ivec3_type, &data); + var->constant_initializer = + new(var) ir_constant(glsl_type::ivec3_type, &data); + var->data.has_initializer = true; + return var; +} + + void builtin_variable_generator::generate_constants() { @@ -659,6 +680,37 @@ builtin_variable_generator::generate_constants() add_const("gl_MaxTessControlAtomicCounters", 0); add_const("gl_MaxTessEvaluationAtomicCounters", 0); } + + if (state->is_version(430, 0) || state->ARB_compute_shader_enable) { + add_const_ivec3("gl_MaxComputeWorkGroupCount", + state->Const.MaxComputeWorkGroupCount[0], + state->Const.MaxComputeWorkGroupCount[1], + state->Const.MaxComputeWorkGroupCount[2]); + add_const_ivec3("gl_MaxComputeWorkGroupSize", + state->Const.MaxComputeWorkGroupSize[0], + state->Const.MaxComputeWorkGroupSize[1], + state->Const.MaxComputeWorkGroupSize[2]); + + /* From the GLSL 4.40 spec, section 7.1 (Built-In Language Variables): + * + * The built-in constant gl_WorkGroupSize is a compute-shader + * constant containing the local work-group size of the shader. The + * size of the work group in the X, Y, and Z dimensions is stored in + * the x, y, and z components. The constants values in + * gl_WorkGroupSize will match those specified in the required + * local_size_x, local_size_y, and local_size_z layout qualifiers + * for the current shader. This is a constant so that it can be + * used to size arrays of memory that can be shared within the local + * work group. It is a compile-time error to use gl_WorkGroupSize + * in a shader that does not declare a fixed local group size, or + * before that shader has declared a fixed local group size, using + * local_size_x, local_size_y, and local_size_z. + * + * To prevent the shader from trying to refer to gl_WorkGroupSize before + * the layout declaration, we don't define it here. Intead we define it + * in ast_cs_input_layout::hir(). + */ + } } @@ -867,6 +919,16 @@ builtin_variable_generator::generate_fs_special_vars() } +/** + * Generate variables which only exist in compute shaders. + */ +void +builtin_variable_generator::generate_cs_special_vars() +{ + /* TODO: finish this. */ +} + + /** * Add a single "varying" variable. The variable's type and direction (input * or output) are adjusted as appropriate for the type of shader being @@ -888,6 +950,9 @@ builtin_variable_generator::add_varying(int slot, const glsl_type *type, case MESA_SHADER_FRAGMENT: add_input(slot, type, name); break; + case MESA_SHADER_COMPUTE: + /* Compute shaders don't have varyings. */ + break; } } @@ -975,5 +1040,8 @@ _mesa_glsl_initialize_variables(exec_list *instructions, case MESA_SHADER_FRAGMENT: gen.generate_fs_special_vars(); break; + case MESA_SHADER_COMPUTE: + gen.generate_cs_special_vars(); + break; } } diff --git a/mesalib/src/glsl/glcpp/glcpp-lex.l b/mesalib/src/glsl/glcpp/glcpp-lex.l index f1fa192c5..ea3b862e4 100644 --- a/mesalib/src/glsl/glcpp/glcpp-lex.l +++ b/mesalib/src/glsl/glcpp/glcpp-lex.l @@ -155,8 +155,6 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? /* Single-line comments */ "//"[^\n]* { - if (parser->commented_newlines) - BEGIN NEWLINE_CATCHUP; } /* Multi-line comments */ diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y index 184e5c237..eeafa4d39 100644 --- a/mesalib/src/glsl/glcpp/glcpp-parse.y +++ b/mesalib/src/glsl/glcpp/glcpp-parse.y @@ -30,6 +30,7 @@ #include "glcpp.h" #include "main/core.h" /* for struct gl_extensions */ +#include "main/mtypes.h" /* for gl_api enum */ static void yyerror (YYLTYPE *locp, glcpp_parser_t *parser, const char *error); @@ -194,7 +195,7 @@ line: ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n"); } | HASH_LINE { - glcpp_parser_resolve_version(parser); + glcpp_parser_resolve_implicit_version(parser); } pp_tokens NEWLINE { if (parser->skip_stack == NULL || @@ -254,10 +255,10 @@ define: control_line: HASH_DEFINE { - glcpp_parser_resolve_version(parser); + glcpp_parser_resolve_implicit_version(parser); } define | HASH_UNDEF { - glcpp_parser_resolve_version(parser); + glcpp_parser_resolve_implicit_version(parser); } IDENTIFIER NEWLINE { macro_t *macro = hash_table_find (parser->defines, $3); if (macro) { @@ -267,7 +268,7 @@ control_line: ralloc_free ($3); } | HASH_IF { - glcpp_parser_resolve_version(parser); + glcpp_parser_resolve_implicit_version(parser); } conditional_tokens NEWLINE { /* Be careful to only evaluate the 'if' expression if * we are not skipping. When we are skipping, we @@ -299,14 +300,14 @@ control_line: _glcpp_parser_skip_stack_push_if (parser, & @1, 0); } | HASH_IFDEF { - glcpp_parser_resolve_version(parser); + glcpp_parser_resolve_implicit_version(parser); } IDENTIFIER junk NEWLINE { macro_t *macro = hash_table_find (parser->defines, $3); ralloc_free ($3); _glcpp_parser_skip_stack_push_if (parser, & @1, macro != NULL); } | HASH_IFNDEF { - glcpp_parser_resolve_version(parser); + glcpp_parser_resolve_implicit_version(parser); } IDENTIFIER junk NEWLINE { macro_t *macro = hash_table_find (parser->defines, $3); ralloc_free ($3); @@ -374,13 +375,19 @@ control_line: _glcpp_parser_skip_stack_pop (parser, & @1); } NEWLINE | HASH_VERSION integer_constant NEWLINE { + if (parser->version_resolved) { + glcpp_error(& @1, parser, "#version must appear on the first line"); + } _glcpp_parser_handle_version_declaration(parser, $2, NULL, true); } | HASH_VERSION integer_constant IDENTIFIER NEWLINE { + if (parser->version_resolved) { + glcpp_error(& @1, parser, "#version must appear on the first line"); + } _glcpp_parser_handle_version_declaration(parser, $2, $3, true); } | HASH NEWLINE { - glcpp_parser_resolve_version(parser); + glcpp_parser_resolve_implicit_version(parser); } ; @@ -1186,7 +1193,7 @@ static void add_builtin_define(glcpp_parser_t *parser, } glcpp_parser_t * -glcpp_parser_create (const struct gl_extensions *extensions) +glcpp_parser_create (const struct gl_extensions *extensions, gl_api api) { glcpp_parser_t *parser; @@ -1215,6 +1222,7 @@ glcpp_parser_create (const struct gl_extensions *extensions) parser->error = 0; parser->extensions = extensions; + parser->api = api; parser->version_resolved = false; parser->has_new_line_number = 0; @@ -2024,6 +2032,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio { const struct gl_extensions *extensions = parser->extensions; + if (parser->version_resolved) + return; + parser->version_resolved = true; add_builtin_define (parser, "__VERSION__", version); @@ -2043,6 +2054,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio } else { add_builtin_define(parser, "GL_ARB_draw_buffers", 1); add_builtin_define(parser, "GL_ARB_texture_rectangle", 1); + add_builtin_define(parser, "GL_AMD_shader_trinary_minmax", 1); + if (extensions != NULL) { if (extensions->EXT_texture_array) @@ -2108,11 +2121,11 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions->ARB_shader_atomic_counters) add_builtin_define(parser, "GL_ARB_shader_atomic_counters", 1); - if (extensions->AMD_shader_trinary_minmax) - add_builtin_define(parser, "GL_AMD_shader_trinary_minmax", 1); - if (extensions->ARB_viewport_array) add_builtin_define(parser, "GL_ARB_viewport_array", 1); + + if (extensions->ARB_compute_shader) + add_builtin_define(parser, "GL_ARB_compute_shader", 1); } } @@ -2140,15 +2153,19 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio } } -/* GLSL version is no version is explicitly specified. */ +/* GLSL version if no version is explicitly specified. */ #define IMPLICIT_GLSL_VERSION 110 +/* GLSL ES version if no version is explicitly specified. */ +#define IMPLICIT_GLSL_ES_VERSION 100 + void -glcpp_parser_resolve_version(glcpp_parser_t *parser) +glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser) { - if (parser->version_resolved) - return; + int language_version = parser->api == API_OPENGLES2 ? + IMPLICIT_GLSL_ES_VERSION : + IMPLICIT_GLSL_VERSION; - _glcpp_parser_handle_version_declaration(parser, IMPLICIT_GLSL_VERSION, + _glcpp_parser_handle_version_declaration(parser, language_version, NULL, false); } diff --git a/mesalib/src/glsl/glcpp/glcpp.c b/mesalib/src/glsl/glcpp/glcpp.c index c9c2ff29e..07b1500b6 100644 --- a/mesalib/src/glsl/glcpp/glcpp.c +++ b/mesalib/src/glsl/glcpp/glcpp.c @@ -30,7 +30,7 @@ #include "main/mtypes.h" #include "main/shaderobj.h" -extern int yydebug; +extern int glcpp_parser_debug; void _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, @@ -101,6 +101,7 @@ load_text_file(void *ctx, const char *filename) static void init_fake_gl_context (struct gl_context *gl_ctx) { + gl_ctx->API = API_OPENGL_COMPAT; gl_ctx->Const.DisableGLSLLineContinuations = false; } diff --git a/mesalib/src/glsl/glcpp/glcpp.h b/mesalib/src/glsl/glcpp/glcpp.h index 4aa200a63..79ccb234f 100644 --- a/mesalib/src/glsl/glcpp/glcpp.h +++ b/mesalib/src/glsl/glcpp/glcpp.h @@ -183,6 +183,7 @@ struct glcpp_parser { size_t info_log_length; int error; const struct gl_extensions *extensions; + gl_api api; bool version_resolved; bool has_new_line_number; int new_line_number; @@ -194,7 +195,7 @@ struct glcpp_parser { struct gl_extensions; glcpp_parser_t * -glcpp_parser_create (const struct gl_extensions *extensions); +glcpp_parser_create (const struct gl_extensions *extensions, gl_api api); int glcpp_parser_parse (glcpp_parser_t *parser); @@ -203,7 +204,7 @@ void glcpp_parser_destroy (glcpp_parser_t *parser); void -glcpp_parser_resolve_version(glcpp_parser_t *parser); +glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser); int glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log, diff --git a/mesalib/src/glsl/glcpp/pp.c b/mesalib/src/glsl/glcpp/pp.c index 637a58f9c..4a623f81e 100644 --- a/mesalib/src/glsl/glcpp/pp.c +++ b/mesalib/src/glsl/glcpp/pp.c @@ -139,7 +139,7 @@ glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log, const struct gl_extensions *extensions, struct gl_context *gl_ctx) { int errors; - glcpp_parser_t *parser = glcpp_parser_create (extensions); + glcpp_parser_t *parser = glcpp_parser_create (extensions, gl_ctx->API); if (! gl_ctx->Const.DisableGLSLLineContinuations) *shader = remove_line_continuations(parser, *shader); @@ -151,7 +151,7 @@ glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log, if (parser->skip_stack) glcpp_error (&parser->skip_stack->loc, parser, "Unterminated #if\n"); - glcpp_parser_resolve_version(parser); + glcpp_parser_resolve_implicit_version(parser); ralloc_strcat(info_log, parser->info_log); diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll index 50875bf3b..3208b32da 100644 --- a/mesalib/src/glsl/glsl_lexer.ll +++ b/mesalib/src/glsl/glsl_lexer.ll @@ -349,7 +349,8 @@ layout { || yyextra->ARB_explicit_attrib_location_enable || yyextra->ARB_uniform_buffer_object_enable || yyextra->ARB_fragment_coord_conventions_enable - || yyextra->ARB_shading_language_420pack_enable) { + || yyextra->ARB_shading_language_420pack_enable + || yyextra->ARB_compute_shader_enable) { return LAYOUT_TOK; } else { yylval->identifier = strdup(yytext); diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy index 928c57e20..dc35c1a51 100644 --- a/mesalib/src/glsl/glsl_parser.yy +++ b/mesalib/src/glsl/glsl_parser.yy @@ -1291,6 +1291,34 @@ layout_qualifier_id: } } + static const char *local_size_qualifiers[3] = { + "local_size_x", + "local_size_y", + "local_size_z", + }; + for (int i = 0; i < 3; i++) { + if (match_layout_qualifier(local_size_qualifiers[i], $1, + state) == 0) { + if ($3 <= 0) { + _mesa_glsl_error(& @3, state, + "invalid %s of %d specified", + local_size_qualifiers[i], $3); + YYERROR; + } else if (!state->is_version(430, 0) && + !state->ARB_compute_shader_enable) { + _mesa_glsl_error(& @3, state, + "%s qualifier requires GLSL 4.30 or " + "ARB_compute_shader", + local_size_qualifiers[i]); + YYERROR; + } else { + $$.flags.q.local_size |= (1 << i); + $$.local_size[i] = $3; + } + break; + } + } + /* If the identifier didn't match any known layout identifiers, * emit an error. */ @@ -1466,7 +1494,7 @@ type_qualifier: "just before storage qualifiers"); } $$ = $1; - $$.flags.i |= $2.flags.i; + $$.merge_qualifier(&@1, state, $2); } | storage_qualifier type_qualifier { @@ -2334,29 +2362,53 @@ layout_defaults: { void *ctx = state; $$ = NULL; - if (state->stage != MESA_SHADER_GEOMETRY) { + switch (state->stage) { + case MESA_SHADER_GEOMETRY: { + if (!$1.flags.q.prim_type) { + _mesa_glsl_error(& @1, state, + "input layout qualifiers must specify a primitive" + " type"); + } else { + /* Make sure this is a valid input primitive type. */ + switch ($1.prim_type) { + case GL_POINTS: + case GL_LINES: + case GL_LINES_ADJACENCY: + case GL_TRIANGLES: + case GL_TRIANGLES_ADJACENCY: + $$ = new(ctx) ast_gs_input_layout(@1, $1.prim_type); + break; + default: + _mesa_glsl_error(&@1, state, + "invalid geometry shader input primitive type"); + break; + } + } + } + break; + case MESA_SHADER_COMPUTE: { + if ($1.flags.q.local_size == 0) { + _mesa_glsl_error(& @1, state, + "input layout qualifiers must specify a local " + "size"); + } else { + /* Infer a local_size of 1 for every unspecified dimension */ + unsigned local_size[3]; + for (int i = 0; i < 3; i++) { + if ($1.flags.q.local_size & (1 << i)) + local_size[i] = $1.local_size[i]; + else + local_size[i] = 1; + } + $$ = new(ctx) ast_cs_input_layout(@1, local_size); + } + } + break; + default: _mesa_glsl_error(& @1, state, "input layout qualifiers only valid in " - "geometry shaders"); - } else if (!$1.flags.q.prim_type) { - _mesa_glsl_error(& @1, state, - "input layout qualifiers must specify a primitive" - " type"); - } else { - /* Make sure this is a valid input primitive type. */ - switch ($1.prim_type) { - case GL_POINTS: - case GL_LINES: - case GL_LINES_ADJACENCY: - case GL_TRIANGLES: - case GL_TRIANGLES_ADJACENCY: - $$ = new(ctx) ast_gs_input_layout(@1, $1.prim_type); - break; - default: - _mesa_glsl_error(&@1, state, - "invalid geometry shader input primitive type"); - break; - } + "geometry and compute shaders"); + break; } } diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp index 87784ed69..b822d2292 100644 --- a/mesalib/src/glsl/glsl_parser_extras.cpp +++ b/mesalib/src/glsl/glsl_parser_extras.cpp @@ -56,7 +56,8 @@ static unsigned known_desktop_glsl_versions[] = _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, gl_shader_stage stage, void *mem_ctx) - : ctx(_ctx), switch_state() + : ctx(_ctx), cs_input_local_size_specified(false), cs_input_local_size(), + switch_state() { assert(stage < MESA_SHADER_STAGES); this->stage = stage; @@ -123,6 +124,12 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters; this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings; + /* Compute shader constants */ + for (unsigned i = 0; i < Elements(this->Const.MaxComputeWorkGroupCount); i++) + this->Const.MaxComputeWorkGroupCount[i] = ctx->Const.MaxComputeWorkGroupCount[i]; + for (unsigned i = 0; i < Elements(this->Const.MaxComputeWorkGroupSize); i++) + this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i]; + this->current_function = NULL; this->toplevel_ir = NULL; this->found_return = false; @@ -519,6 +526,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_sample_shading, true, false, ARB_sample_shading), EXT(AMD_shader_trinary_minmax, true, false, dummy_true), EXT(ARB_viewport_array, true, false, ARB_viewport_array), + EXT(ARB_compute_shader, true, false, ARB_compute_shader), }; #undef EXT @@ -1332,23 +1340,45 @@ set_shader_inout_layout(struct gl_shader *shader, /* Should have been prevented by the parser. */ assert(!state->gs_input_prim_type_specified); assert(!state->out_qualifier->flags.i); - return; } - shader->Geom.VerticesOut = 0; - if (state->out_qualifier->flags.q.max_vertices) - shader->Geom.VerticesOut = state->out_qualifier->max_vertices; - - if (state->gs_input_prim_type_specified) { - shader->Geom.InputType = state->gs_input_prim_type; - } else { - shader->Geom.InputType = PRIM_UNKNOWN; + if (shader->Stage != MESA_SHADER_COMPUTE) { + /* Should have been prevented by the parser. */ + assert(!state->cs_input_local_size_specified); } - if (state->out_qualifier->flags.q.prim_type) { - shader->Geom.OutputType = state->out_qualifier->prim_type; - } else { - shader->Geom.OutputType = PRIM_UNKNOWN; + switch (shader->Stage) { + case MESA_SHADER_GEOMETRY: + shader->Geom.VerticesOut = 0; + if (state->out_qualifier->flags.q.max_vertices) + shader->Geom.VerticesOut = state->out_qualifier->max_vertices; + + if (state->gs_input_prim_type_specified) { + shader->Geom.InputType = state->gs_input_prim_type; + } else { + shader->Geom.InputType = PRIM_UNKNOWN; + } + + if (state->out_qualifier->flags.q.prim_type) { + shader->Geom.OutputType = state->out_qualifier->prim_type; + } else { + shader->Geom.OutputType = PRIM_UNKNOWN; + } + break; + + case MESA_SHADER_COMPUTE: + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) + shader->Comp.LocalSize[i] = state->cs_input_local_size[i]; + } else { + for (int i = 0; i < 3; i++) + shader->Comp.LocalSize[i] = 0; + } + break; + + default: + /* Nothing to do. */ + break; } } diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h index 8a4cbf14c..7d661473d 100644 --- a/mesalib/src/glsl/glsl_parser_extras.h +++ b/mesalib/src/glsl/glsl_parser_extras.h @@ -196,6 +196,21 @@ struct _mesa_glsl_parse_state { */ GLenum gs_input_prim_type; + /** + * True if a compute shader input local size was specified using a layout + * directive. + * + * Note: this value is computed at ast_to_hir time rather than at parse + * time. + */ + bool cs_input_local_size_specified; + + /** + * If cs_input_local_size_specified is true, the local size that was + * specified. Otherwise ignored. + */ + unsigned cs_input_local_size[3]; + /** Output layout qualifiers from GLSL 1.50. (geometry shader controls)*/ struct ast_type_qualifier *out_qualifier; @@ -250,6 +265,10 @@ struct _mesa_glsl_parse_state { unsigned MaxFragmentAtomicCounters; unsigned MaxCombinedAtomicCounters; unsigned MaxAtomicBufferBindings; + + /* ARB_compute_shader */ + unsigned MaxComputeWorkGroupCount[3]; + unsigned MaxComputeWorkGroupSize[3]; } Const; /** @@ -356,6 +375,8 @@ struct _mesa_glsl_parse_state { bool AMD_shader_trinary_minmax_warn; bool ARB_viewport_array_enable; bool ARB_viewport_array_warn; + bool ARB_compute_shader_enable; + bool ARB_compute_shader_warn; /*@}*/ /** Extensions supported by the OpenGL implementation. */ diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h index 19e8383b2..e266328b2 100644 --- a/mesalib/src/glsl/ir.h +++ b/mesalib/src/glsl/ir.h @@ -471,7 +471,7 @@ public: void reinit_interface_type(const struct glsl_type *type) { if (this->max_ifc_array_access != NULL) { -#ifndef _NDEBUG +#ifndef NDEBUG /* Redeclaring gl_PerVertex is only allowed if none of the built-ins * it defines have been accessed yet; so it's safe to throw away the * old max_ifc_array_access pointer, since all of its values are diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp index f811fd138..7fa5a09d4 100644 --- a/mesalib/src/glsl/ir_constant_expression.cpp +++ b/mesalib/src/glsl/ir_constant_expression.cpp @@ -1397,6 +1397,23 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) break; } + case ir_binop_bfm: { + int bits = op[0]->value.i[0]; + int offset = op[1]->value.i[0]; + + for (unsigned c = 0; c < components; c++) { + if (bits == 0) + data.u[c] = op[0]->value.u[c]; + else if (offset < 0 || bits < 0) + data.u[c] = 0; /* Undefined for bitfieldInsert, per spec. */ + else if (offset + bits > 32) + data.u[c] = 0; /* Undefined for bitfieldInsert, per spec. */ + else + data.u[c] = ((1 << bits) - 1) << offset; + } + break; + } + case ir_binop_ldexp: for (unsigned c = 0; c < components; c++) { data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]); diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index 93b475497..bcd739476 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -1287,6 +1287,69 @@ link_gs_inout_layout_qualifiers(struct gl_shader_program *prog, prog->Geom.VerticesOut = linked_shader->Geom.VerticesOut; } + +/** + * Perform cross-validation of compute shader local_size_{x,y,z} layout + * qualifiers for the attached compute shaders, and propagate them to the + * linked CS and linked shader program. + */ +static void +link_cs_input_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + for (int i = 0; i < 3; i++) + linked_shader->Comp.LocalSize[i] = 0; + + /* This function is called for all shader stages, but it only has an effect + * for compute shaders. + */ + if (linked_shader->Stage != MESA_SHADER_COMPUTE) + return; + + /* From the ARB_compute_shader spec, in the section describing local size + * declarations: + * + * If multiple compute shaders attached to a single program object + * declare local work-group size, the declarations must be identical; + * otherwise a link-time error results. Furthermore, if a program + * object contains any compute shaders, at least one must contain an + * input layout qualifier specifying the local work sizes of the + * program, or a link-time error will occur. + */ + for (unsigned sh = 0; sh < num_shaders; sh++) { + struct gl_shader *shader = shader_list[sh]; + + if (shader->Comp.LocalSize[0] != 0) { + if (linked_shader->Comp.LocalSize[0] != 0) { + for (int i = 0; i < 3; i++) { + if (linked_shader->Comp.LocalSize[i] != + shader->Comp.LocalSize[i]) { + linker_error(prog, "compute shader defined with conflicting " + "local sizes\n"); + return; + } + } + } + for (int i = 0; i < 3; i++) + linked_shader->Comp.LocalSize[i] = shader->Comp.LocalSize[i]; + } + } + + /* Just do the intrastage -> interstage propagation right now, + * since we already know we're in the right type of shader program + * for doing it. + */ + if (linked_shader->Comp.LocalSize[0] == 0) { + linker_error(prog, "compute shader didn't declare local size\n"); + return; + } + for (int i = 0; i < 3; i++) + prog->Comp.LocalSize[i] = linked_shader->Comp.LocalSize[i]; +} + + /** * Combine a group of shaders for a single stage to generate a linked shader * @@ -1391,6 +1454,7 @@ link_intrastage_shaders(void *mem_ctx, ralloc_steal(linked, linked->UniformBlocks); link_gs_inout_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_cs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); populate_symbol_table(linked); @@ -2045,6 +2109,13 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) goto done; } + /* Compute shaders have additional restrictions. */ + if (num_shaders[MESA_SHADER_COMPUTE] > 0 && + num_shaders[MESA_SHADER_COMPUTE] != prog->NumShaders) { + linker_error(prog, "Compute shaders may not be linked with any other " + "type of shader\n"); + } + for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i] != NULL) ctx->Driver.DeleteShader(ctx, prog->_LinkedShaders[i]); @@ -2098,7 +2169,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) unsigned prev; - for (prev = 0; prev < MESA_SHADER_STAGES; prev++) { + for (prev = 0; prev <= MESA_SHADER_FRAGMENT; prev++) { if (prog->_LinkedShaders[prev] != NULL) break; } @@ -2106,7 +2177,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) /* Validate the inputs of each stage with the output of the preceding * stage. */ - for (unsigned i = prev + 1; i < MESA_SHADER_STAGES; i++) { + for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) { if (prog->_LinkedShaders[i] == NULL) continue; @@ -2201,7 +2272,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) } unsigned first; - for (first = 0; first < MESA_SHADER_STAGES; first++) { + for (first = 0; first <= MESA_SHADER_FRAGMENT; first++) { if (prog->_LinkedShaders[first] != NULL) break; } @@ -2233,7 +2304,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) * eliminated if they are (transitively) not used in a later stage. */ int last, next; - for (last = MESA_SHADER_STAGES-1; last >= 0; last--) { + for (last = MESA_SHADER_FRAGMENT; last >= 0; last--) { if (prog->_LinkedShaders[last] != NULL) break; } diff --git a/mesalib/src/glsl/lower_instructions.cpp b/mesalib/src/glsl/lower_instructions.cpp index 8f8d448ea..44a6e8021 100644 --- a/mesalib/src/glsl/lower_instructions.cpp +++ b/mesalib/src/glsl/lower_instructions.cpp @@ -384,10 +384,10 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir) /* Constants */ ir_constant *zeroi = ir_constant::zero(ir, ivec); - ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu, vec_elem); ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem); ir_constant *exp_shift = new(ir) ir_constant(23u, vec_elem); + ir_constant *exp_width = new(ir) ir_constant(8u, vec_elem); /* Temporary variables */ ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); @@ -449,11 +449,17 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir) */ ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL); - ir->operation = ir_unop_bitcast_u2f; - ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask), - lshift(i2u(resulting_biased_exp), exp_shift_clone)); + ir->operation = ir_unop_bitcast_i2f; + ir->operands[0] = bitfield_insert(bitcast_f2i(x), resulting_biased_exp, + exp_shift_clone, exp_width); ir->operands[1] = NULL; + /* Don't generate new IR that would need to be lowered in an additional + * pass. + */ + if (lowering(BITFIELD_INSERT_TO_BFM_BFI)) + bitfield_insert_to_bfm_bfi(ir->operands[0]->as_expression()); + this->progress = true; } diff --git a/mesalib/src/glsl/main.cpp b/mesalib/src/glsl/main.cpp index 03b7c786b..3a0f812f2 100644 --- a/mesalib/src/glsl/main.cpp +++ b/mesalib/src/glsl/main.cpp @@ -50,6 +50,17 @@ initialize_context(struct gl_context *ctx, gl_api api) */ ctx->Const.GLSLVersion = glsl_version; ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Const.MaxComputeWorkGroupCount[0] = 65535; + ctx->Const.MaxComputeWorkGroupCount[1] = 65535; + ctx->Const.MaxComputeWorkGroupCount[2] = 65535; + ctx->Const.MaxComputeWorkGroupSize[0] = 1024; + ctx->Const.MaxComputeWorkGroupSize[1] = 1024; + ctx->Const.MaxComputeWorkGroupSize[2] = 64; + ctx->Const.MaxComputeWorkGroupInvocations = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ switch (ctx->Const.GLSLVersion) { case 100: @@ -221,7 +232,7 @@ load_text_file(void *ctx, const char *file_name) if (bytes < size - total_read) { free(text); text = NULL; - break; + goto error; } if (bytes == 0) { @@ -232,6 +243,7 @@ load_text_file(void *ctx, const char *file_name) } while (total_read < size); text[total_read] = '\0'; +error:; } fclose(fp); @@ -360,6 +372,8 @@ main(int argc, char **argv) shader->Type = GL_GEOMETRY_SHADER; else if (strncmp(".frag", ext, 5) == 0) shader->Type = GL_FRAGMENT_SHADER; + else if (strncmp(".comp", ext, 5) == 0) + shader->Type = GL_COMPUTE_SHADER; else usage_fail(argv[0]); shader->Stage = _mesa_shader_enum_to_shader_stage(shader->Type); diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp index d1f6435f4..1b4d31936 100644 --- a/mesalib/src/glsl/opt_algebraic.cpp +++ b/mesalib/src/glsl/opt_algebraic.cpp @@ -218,6 +218,11 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) this->mem_ctx = ralloc_parent(ir); switch (ir->operation) { + case ir_unop_bit_not: + if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not) + return op_expr[0]->operands[0]; + break; + case ir_unop_abs: if (op_expr[0] == NULL) break; @@ -240,6 +245,42 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) } break; + case ir_unop_exp: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_log) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_log: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_exp) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_exp2: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_log2) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_log2: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_exp2) { + return op_expr[0]->operands[0]; + } + break; + case ir_unop_logic_not: { enum ir_expression_operation new_op = ir_unop_logic_not; @@ -479,6 +520,10 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if (is_vec_one(op_const[0])) return op_const[0]; + /* x^1 == x */ + if (is_vec_one(op_const[1])) + return ir->operands[0]; + /* pow(2,x) == exp2(x) */ if (is_vec_two(op_const[0])) return expr(ir_unop_exp2, ir->operands[1]); @@ -502,15 +547,37 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) break; + case ir_triop_fma: + /* Operands are op0 * op1 + op2. */ + if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { + return ir->operands[2]; + } else if (is_vec_zero(op_const[2])) { + return mul(ir->operands[0], ir->operands[1]); + } else if (is_vec_one(op_const[0])) { + return add(ir->operands[1], ir->operands[2]); + } else if (is_vec_one(op_const[1])) { + return add(ir->operands[0], ir->operands[2]); + } + break; + case ir_triop_lrp: /* Operands are (x, y, a). */ if (is_vec_zero(op_const[2])) { return ir->operands[0]; } else if (is_vec_one(op_const[2])) { return ir->operands[1]; + } else if (ir->operands[0]->equals(ir->operands[1])) { + return ir->operands[0]; } break; + case ir_triop_csel: + if (is_vec_one(op_const[0])) + return ir->operands[1]; + if (is_vec_zero(op_const[0])) + return ir->operands[2]; + break; + default: break; } diff --git a/mesalib/src/glsl/opt_vectorize.cpp b/mesalib/src/glsl/opt_vectorize.cpp index 9ca811a86..8ee81f1a3 100644 --- a/mesalib/src/glsl/opt_vectorize.cpp +++ b/mesalib/src/glsl/opt_vectorize.cpp @@ -82,6 +82,8 @@ public: virtual ir_visitor_status visit_enter(ir_assignment *); virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_enter(ir_if *); + virtual ir_visitor_status visit_enter(ir_loop *); virtual ir_visitor_status visit_leave(ir_assignment *); @@ -104,9 +106,10 @@ public: * the nodes of the tree (expression float log2 (swiz z (var_ref v0))), * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))). * - * The function modifies only ir_expressions and ir_swizzles. For expressions - * it sets a new type and swizzles any scalar dereferences into appropriately - * sized vector arguments. For example, if combining + * The function operates on ir_expressions (and its operands) and ir_swizzles. + * For expressions it sets a new type and swizzles any non-expression and non- + * swizzle scalar operands into appropriately sized vector arguments. For + * example, if combining * * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1)))) * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1)))) @@ -144,9 +147,10 @@ rewrite_swizzle(ir_instruction *ir, void *data) mask->num_components, 1); for (unsigned i = 0; i < 4; i++) { if (expr->operands[i]) { - ir_dereference *deref = expr->operands[i]->as_dereference(); - if (deref && deref->type->is_scalar()) { - expr->operands[i] = new(ir) ir_swizzle(deref, 0, 0, 0, 0, + ir_rvalue *rval = expr->operands[i]->as_rvalue(); + if (rval && rval->type->is_scalar() && + !rval->as_expression() && !rval->as_swizzle()) { + expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0, mask->num_components); } } @@ -170,22 +174,31 @@ void ir_vectorize_visitor::try_vectorize() { if (this->last_assignment && this->channels > 1) { - ir_swizzle_mask mask = {0, 1, 2, 3, channels, 0}; - - visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); + ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0}; this->last_assignment->write_mask = 0; - for (unsigned i = 0; i < 4; i++) { + for (unsigned i = 0, j = 0; i < 4; i++) { if (this->assignment[i]) { this->last_assignment->write_mask |= 1 << i; if (this->assignment[i] != this->last_assignment) { this->assignment[i]->remove(); } + + switch (j) { + case 0: mask.x = i; break; + case 1: mask.y = i; break; + case 2: mask.z = i; break; + case 3: mask.w = i; break; + } + + j++; } } + visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); + this->progress = true; } clear(); @@ -276,6 +289,39 @@ ir_vectorize_visitor::visit_enter(ir_swizzle *ir) return visit_continue; } +/* Since there is no statement to visit between the "then" and "else" + * instructions try to vectorize before, in between, and after them to avoid + * combining statements from different basic blocks. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_if *ir) +{ + try_vectorize(); + + visit_list_elements(this, &ir->then_instructions); + try_vectorize(); + + visit_list_elements(this, &ir->else_instructions); + try_vectorize(); + + return visit_continue_with_parent; +} + +/* Since there is no statement to visit between the instructions in the body of + * the loop and the instructions after it try to vectorize before and after the + * body to avoid combining statements from different basic blocks. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_loop *ir) +{ + try_vectorize(); + + visit_list_elements(this, &ir->body_instructions); + try_vectorize(); + + return visit_continue_with_parent; +} + /** * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if * the swizzle mask(s) found were appropriate. Also save a pointer in diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp index 91794719b..6c25010b7 100644 --- a/mesalib/src/glsl/standalone_scaffolding.cpp +++ b/mesalib/src/glsl/standalone_scaffolding.cpp @@ -91,6 +91,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) ctx->Extensions.dummy_false = false; ctx->Extensions.dummy_true = true; + ctx->Extensions.ARB_compute_shader = true; ctx->Extensions.ARB_conservative_depth = true; ctx->Extensions.ARB_draw_instanced = true; ctx->Extensions.ARB_ES2_compatibility = true; @@ -140,6 +141,17 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 32; ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MaxComputeWorkGroupCount[0] = 65535; + ctx->Const.MaxComputeWorkGroupCount[1] = 65535; + ctx->Const.MaxComputeWorkGroupCount[2] = 65535; + ctx->Const.MaxComputeWorkGroupSize[0] = 1024; + ctx->Const.MaxComputeWorkGroupSize[1] = 1024; + ctx->Const.MaxComputeWorkGroupSize[2] = 64; + ctx->Const.MaxComputeWorkGroupInvocations = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ /* Set up default shader compiler options. */ struct gl_shader_compiler_options options; diff --git a/mesalib/src/glsl/standalone_scaffolding.h b/mesalib/src/glsl/standalone_scaffolding.h index 327fef2df..df783afdb 100644 --- a/mesalib/src/glsl/standalone_scaffolding.h +++ b/mesalib/src/glsl/standalone_scaffolding.h @@ -58,6 +58,8 @@ _mesa_shader_enum_to_shader_stage(GLenum v) return MESA_SHADER_FRAGMENT; case GL_GEOMETRY_SHADER: return MESA_SHADER_GEOMETRY; + case GL_COMPUTE_SHADER: + return MESA_SHADER_COMPUTE; default: assert(!"bad value in _mesa_shader_enum_to_shader_stage()"); return MESA_SHADER_VERTEX; -- cgit v1.2.3