diff options
Diffstat (limited to 'mesalib/src/mesa/state_tracker')
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_cb_blit.c | 21 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_cb_fbo.c | 24 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_cb_fbo.h | 5 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_cb_readpixels.c | 188 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_cb_texture.c | 20 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_cb_texture.h | 6 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_context.c | 5 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_context.h | 3 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_format.c | 6 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_format.h | 4 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 142 | ||||
-rw-r--r-- | mesalib/src/mesa/state_tracker/st_program.c | 45 |
12 files changed, 323 insertions, 146 deletions
diff --git a/mesalib/src/mesa/state_tracker/st_cb_blit.c b/mesalib/src/mesa/state_tracker/st_cb_blit.c index c463e3b04..50cab4294 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_blit.c +++ b/mesalib/src/mesa/state_tracker/st_cb_blit.c @@ -239,31 +239,22 @@ st_BlitFramebuffer(struct gl_context *ctx, /* depth and/or stencil blit */ /* get src/dst depth surfaces */ - struct gl_renderbuffer_attachment *srcDepth = - &readFB->Attachment[BUFFER_DEPTH]; - struct gl_renderbuffer_attachment *dstDepth = - &drawFB->Attachment[BUFFER_DEPTH]; - struct gl_renderbuffer_attachment *srcStencil = - &readFB->Attachment[BUFFER_STENCIL]; - struct gl_renderbuffer_attachment *dstStencil = - &drawFB->Attachment[BUFFER_STENCIL]; - struct st_renderbuffer *srcDepthRb = - st_renderbuffer(srcDepth->Renderbuffer); + st_renderbuffer(readFB->Attachment[BUFFER_DEPTH].Renderbuffer); struct st_renderbuffer *dstDepthRb = - st_renderbuffer(dstDepth->Renderbuffer); + st_renderbuffer(drawFB->Attachment[BUFFER_DEPTH].Renderbuffer); struct pipe_surface *dstDepthSurf = dstDepthRb ? dstDepthRb->surface : NULL; struct st_renderbuffer *srcStencilRb = - st_renderbuffer(srcStencil->Renderbuffer); + st_renderbuffer(readFB->Attachment[BUFFER_STENCIL].Renderbuffer); struct st_renderbuffer *dstStencilRb = - st_renderbuffer(dstStencil->Renderbuffer); + st_renderbuffer(drawFB->Attachment[BUFFER_STENCIL].Renderbuffer); struct pipe_surface *dstStencilSurf = dstStencilRb ? dstStencilRb->surface : NULL; - if (st_is_depth_stencil_combined(srcDepth, srcStencil) && - st_is_depth_stencil_combined(dstDepth, dstStencil)) { + if (_mesa_has_depthstencil_combined(readFB) && + _mesa_has_depthstencil_combined(drawFB)) { blit.mask = 0; if (mask & GL_DEPTH_BUFFER_BIT) blit.mask |= PIPE_MASK_Z; diff --git a/mesalib/src/mesa/state_tracker/st_cb_fbo.c b/mesalib/src/mesa/state_tracker/st_cb_fbo.c index 87c5b048c..4452e523b 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_fbo.c +++ b/mesalib/src/mesa/state_tracker/st_cb_fbo.c @@ -547,30 +547,6 @@ st_validate_attachment(struct gl_context *ctx, return valid; } - - -/** - * Check if two renderbuffer attachments name a combined depth/stencil - * renderbuffer. - */ -GLboolean -st_is_depth_stencil_combined(const struct gl_renderbuffer_attachment *depth, - const struct gl_renderbuffer_attachment *stencil) -{ - assert(depth && stencil); - - if (depth->Type == stencil->Type) { - if (depth->Type == GL_RENDERBUFFER_EXT && - depth->Renderbuffer == stencil->Renderbuffer) - return GL_TRUE; - - if (depth->Type == GL_TEXTURE && - depth->Texture == stencil->Texture) - return GL_TRUE; - } - - return GL_FALSE; -} /** diff --git a/mesalib/src/mesa/state_tracker/st_cb_fbo.h b/mesalib/src/mesa/state_tracker/st_cb_fbo.h index 506fd06d6..461dbe985 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_fbo.h +++ b/mesalib/src/mesa/state_tracker/st_cb_fbo.h @@ -76,9 +76,4 @@ st_new_renderbuffer_fb(enum pipe_format format, int samples, boolean sw); extern void st_init_fbo_functions(struct dd_function_table *functions); -extern GLboolean -st_is_depth_stencil_combined(const struct gl_renderbuffer_attachment *depth, - const struct gl_renderbuffer_attachment *stencil); - - #endif /* ST_CB_FBO_H */ diff --git a/mesalib/src/mesa/state_tracker/st_cb_readpixels.c b/mesalib/src/mesa/state_tracker/st_cb_readpixels.c index 6b824b161..bfed98870 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_readpixels.c +++ b/mesalib/src/mesa/state_tracker/st_cb_readpixels.c @@ -25,35 +25,209 @@ * **************************************************************************/ - +#include "main/image.h" +#include "main/pbo.h" #include "main/imports.h" #include "main/readpix.h" +#include "main/enums.h" +#include "main/framebuffer.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "st_cb_fbo.h" #include "st_atom.h" #include "st_context.h" #include "st_cb_bitmap.h" #include "st_cb_readpixels.h" +#include "state_tracker/st_cb_texture.h" +#include "state_tracker/st_format.h" +#include "state_tracker/st_texture.h" /** - * The only special thing we need to do for the state tracker's - * glReadPixels is to validate state (to be sure we have up-to-date - * framebuffer surfaces) and flush the bitmap cache prior to reading. + * This uses a blit to copy the read buffer to a texture format which matches + * the format and type combo and then a fast read-back is done using memcpy. + * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is + * a format which matches the swizzling. + * + * If such a format isn't available, we fall back to _mesa_readpixels. + * + * NOTE: Some drivers use a blit to convert between tiled and linear + * texture layouts during texture uploads/downloads, so the blit + * we do here should be free in such cases. */ static void st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, - GLvoid *dest) + GLvoid *pixels) { struct st_context *st = st_context(ctx); + struct gl_renderbuffer *rb = + _mesa_get_read_renderbuffer_for_format(ctx, format); + struct st_renderbuffer *strb = st_renderbuffer(rb); + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_resource *src; + struct pipe_resource *dst = NULL; + struct pipe_resource dst_templ; + enum pipe_format dst_format, src_format; + struct pipe_blit_info blit; + unsigned bind = PIPE_BIND_TRANSFER_READ; + struct pipe_transfer *tex_xfer; + ubyte *map = NULL; + /* Validate state (to be sure we have up-to-date framebuffer surfaces) + * and flush the bitmap cache prior to reading. */ st_validate_state(st); st_flush_bitmap_cache(st); - _mesa_readpixels(ctx, x, y, width, height, format, type, pack, dest); -} + if (!st->prefer_blit_based_texture_transfer) { + goto fallback; + } + + /* This must be done after state validation. */ + src = strb->texture; + + /* XXX Fallback for depth-stencil formats due to an incomplete + * stencil blit implementation in some drivers. */ + if (format == GL_DEPTH_STENCIL) { + goto fallback; + } + + /* We are creating a texture of the size of the region being read back. + * Need to check for NPOT texture support. */ + if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) && + (!util_is_power_of_two(width) || + !util_is_power_of_two(height))) { + goto fallback; + } + + /* If the base internal format and the texture format don't match, we have + * to use the slow path. */ + if (rb->_BaseFormat != + _mesa_get_format_base_format(rb->Format)) { + goto fallback; + } + + /* See if the texture format already matches the format and type, + * in which case the memcpy-based fast path will likely be used and + * we don't have to blit. */ + if (_mesa_format_matches_format_and_type(rb->Format, format, + type, pack->SwapBytes)) { + goto fallback; + } + + if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) { + goto fallback; + } + + /* Convert the source format to what is expected by ReadPixels + * and see if it's supported. */ + src_format = util_format_linear(src->format); + src_format = util_format_luminance_to_red(src_format); + src_format = util_format_intensity_to_red(src_format); + + if (!src_format || + !screen->is_format_supported(screen, src_format, src->target, + src->nr_samples, + PIPE_BIND_SAMPLER_VIEW)) { + printf("fallback: src format unsupported %s\n", util_format_short_name(src_format)); + goto fallback; + } + + if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL) + bind |= PIPE_BIND_DEPTH_STENCIL; + else + bind |= PIPE_BIND_RENDER_TARGET; + + /* Choose the destination format by finding the best match + * for the format+type combo. */ + dst_format = st_choose_matching_format(screen, bind, format, type, + pack->SwapBytes); + if (dst_format == PIPE_FORMAT_NONE) { + printf("fallback: no matching format for %s, %s\n", + _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type)); + goto fallback; + } + + /* create the destination texture */ + memset(&dst_templ, 0, sizeof(dst_templ)); + dst_templ.target = PIPE_TEXTURE_2D; + dst_templ.format = dst_format; + dst_templ.bind = bind; + dst_templ.usage = PIPE_USAGE_STAGING; + + st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1, + &dst_templ.width0, &dst_templ.height0, + &dst_templ.depth0, &dst_templ.array_size); + + dst = screen->resource_create(screen, &dst_templ); + if (!dst) { + goto fallback; + } + + blit.src.resource = src; + blit.src.level = strb->rtt_level; + blit.src.format = src_format; + blit.dst.resource = dst; + blit.dst.level = 0; + blit.dst.format = dst->format; + blit.src.box.x = x; + blit.dst.box.x = 0; + blit.src.box.y = y; + blit.dst.box.y = 0; + blit.src.box.z = strb->rtt_face + strb->rtt_slice; + blit.dst.box.z = 0; + blit.src.box.width = blit.dst.box.width = width; + blit.src.box.height = blit.dst.box.height = height; + blit.src.box.depth = blit.dst.box.depth = 1; + blit.mask = st_get_blit_mask(rb->_BaseFormat, format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + blit.scissor_enable = FALSE; + + if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { + blit.src.box.y = rb->Height - blit.src.box.y; + blit.src.box.height = -blit.src.box.height; + } + + /* blit */ + st->pipe->blit(st->pipe, &blit); + + /* map resources */ + pixels = _mesa_map_pbo_dest(ctx, pack, pixels); + + map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ, + 0, 0, 0, width, height, 1, &tex_xfer); + if (!map) { + _mesa_unmap_pbo_dest(ctx, pack); + pipe_resource_reference(&dst, NULL); + goto fallback; + } + + /* memcpy data into a user buffer */ + { + const uint bytesPerRow = width * util_format_get_blocksize(dst_format); + GLuint row; + + for (row = 0; row < height; row++) { + GLvoid *dest = _mesa_image_address3d(pack, pixels, + width, height, format, + type, 0, row, 0); + memcpy(dest, map, bytesPerRow); + map += tex_xfer->stride; + } + } + + pipe_transfer_unmap(pipe, tex_xfer); + _mesa_unmap_pbo_dest(ctx, pack); + pipe_resource_reference(&dst, NULL); + return; + +fallback: + _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); +} void st_init_readpixels_functions(struct dd_function_table *functions) { diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c index c922a3164..94fbbf7be 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_texture.c +++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c @@ -68,7 +68,7 @@ #define DBG if (0) printf -static enum pipe_texture_target +enum pipe_texture_target gl_target_to_pipe(GLenum target) { switch (target) { @@ -542,8 +542,8 @@ prep_teximage(struct gl_context *ctx, struct gl_texture_image *texImage, * Return a writemask for the gallium blit. The parameters can be base * formats or "format" from glDrawPixels/glTexImage/glGetTexImage. */ -static unsigned -get_blit_mask(GLenum srcFormat, GLenum dstFormat) +unsigned +st_get_blit_mask(GLenum srcFormat, GLenum dstFormat) { switch (dstFormat) { case GL_DEPTH_STENCIL: @@ -608,6 +608,10 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, unsigned bind; GLubyte *map; + if (!st->prefer_blit_based_texture_transfer) { + goto fallback; + } + if (!dst) { goto fallback; } @@ -769,7 +773,7 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, blit.src.box.width = blit.dst.box.width = width; blit.src.box.height = blit.dst.box.height = height; blit.src.box.depth = blit.dst.box.depth = depth; - blit.mask = get_blit_mask(format, texImage->_BaseFormat); + blit.mask = st_get_blit_mask(format, texImage->_BaseFormat); blit.filter = PIPE_TEX_FILTER_NEAREST; blit.scissor_enable = FALSE; @@ -860,6 +864,10 @@ st_GetTexImage(struct gl_context * ctx, ubyte *map = NULL; boolean done = FALSE; + if (!st->prefer_blit_based_texture_transfer) { + goto fallback; + } + if (!stImage->pt) { goto fallback; } @@ -996,7 +1004,7 @@ st_GetTexImage(struct gl_context * ctx, blit.src.box.width = blit.dst.box.width = width; blit.src.box.height = blit.dst.box.height = height; blit.src.box.depth = blit.dst.box.depth = depth; - blit.mask = get_blit_mask(texImage->_BaseFormat, format); + blit.mask = st_get_blit_mask(texImage->_BaseFormat, format); blit.filter = PIPE_TEX_FILTER_NEAREST; blit.scissor_enable = FALSE; @@ -1370,7 +1378,7 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims, blit.dst.box.width = width; blit.dst.box.height = height; blit.dst.box.depth = 1; - blit.mask = get_blit_mask(rb->_BaseFormat, texImage->_BaseFormat); + blit.mask = st_get_blit_mask(rb->_BaseFormat, texImage->_BaseFormat); blit.filter = PIPE_TEX_FILTER_NEAREST; /* 1D array textures need special treatment. diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.h b/mesalib/src/mesa/state_tracker/st_cb_texture.h index 27956bcc2..7f70d0b25 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_texture.h +++ b/mesalib/src/mesa/state_tracker/st_cb_texture.h @@ -38,6 +38,12 @@ struct gl_texture_object; struct pipe_context; struct st_context; +extern enum pipe_texture_target +gl_target_to_pipe(GLenum target); + +unsigned +st_get_blit_mask(GLenum srcFormat, GLenum dstFormat); + extern GLboolean st_finalize_texture(struct gl_context *ctx, struct pipe_context *pipe, diff --git a/mesalib/src/mesa/state_tracker/st_context.c b/mesalib/src/mesa/state_tracker/st_context.c index f9a584ba0..cc87f2bb3 100644 --- a/mesalib/src/mesa/state_tracker/st_context.c +++ b/mesalib/src/mesa/state_tracker/st_context.c @@ -182,6 +182,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, st->has_stencil_export = screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT); st->has_shader_model3 = screen->get_param(screen, PIPE_CAP_SM3); + st->prefer_blit_based_texture_transfer = screen->get_param(screen, + PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER); + + st->needs_texcoord_semantic = + screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD); /* GL limits and extensions */ st_init_limits(st); diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h index b9a98cd05..8786a036f 100644 --- a/mesalib/src/mesa/state_tracker/st_context.h +++ b/mesalib/src/mesa/state_tracker/st_context.h @@ -84,6 +84,9 @@ struct st_context boolean has_stencil_export; /**< can do shader stencil export? */ boolean has_time_elapsed; boolean has_shader_model3; + boolean prefer_blit_based_texture_transfer; + + boolean needs_texcoord_semantic; /* On old libGL's for linux we need to invalidate the drawables * on glViewpport calls, this is set via a option. diff --git a/mesalib/src/mesa/state_tracker/st_format.c b/mesalib/src/mesa/state_tracker/st_format.c index 5fd44e76d..a15706a03 100644 --- a/mesalib/src/mesa/state_tracker/st_format.c +++ b/mesalib/src/mesa/state_tracker/st_format.c @@ -1769,13 +1769,15 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target, * Called via ctx->Driver.ChooseTextureFormat(). */ size_t -st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat, - int samples[16]) +st_QuerySamplesForFormat(struct gl_context *ctx, GLenum target, + GLenum internalFormat, int samples[16]) { struct st_context *st = st_context(ctx); enum pipe_format format; unsigned i, bind, num_sample_counts = 0; + (void) target; + if (_mesa_is_depth_or_stencil_format(internalFormat)) bind = PIPE_BIND_DEPTH_STENCIL; else diff --git a/mesalib/src/mesa/state_tracker/st_format.h b/mesalib/src/mesa/state_tracker/st_format.h index 3db409b74..0a1c18d92 100644 --- a/mesalib/src/mesa/state_tracker/st_format.h +++ b/mesalib/src/mesa/state_tracker/st_format.h @@ -67,8 +67,8 @@ st_ChooseTextureFormat(struct gl_context * ctx, GLenum target, GLenum format, GLenum type); size_t -st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat, - int samples[16]); +st_QuerySamplesForFormat(struct gl_context *ctx, GLenum target, + GLenum internalFormat, int samples[16]); /* can we use a sampler view to translate these formats only used to make TFP so far */ diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 0cef092d3..e3718eeda 100644 --- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -85,6 +85,11 @@ extern "C" { */ #define MAX_TEMPS 4096 +/** + * Maximum number of arrays + */ +#define MAX_ARRAYS 256 + /* will be 4 for GLSL 4.00 */ #define MAX_GLSL_TEXTURE_OFFSET 1 @@ -315,9 +320,11 @@ public: int next_temp; + unsigned array_sizes[MAX_ARRAYS]; + unsigned next_array; + int num_address_regs; int samplers_used; - bool indirect_addr_temps; bool indirect_addr_consts; int glsl_version; @@ -549,9 +556,6 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, /* Update indirect addressing status used by TGSI */ if (dst.reladdr) { switch(dst.file) { - case PROGRAM_TEMPORARY: - this->indirect_addr_temps = true; - break; case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: @@ -570,9 +574,6 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, for (i=0; i<3; i++) { if(inst->src[i].reladdr) { switch(inst->src[i].file) { - case PROGRAM_TEMPORARY: - this->indirect_addr_temps = true; - break; case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: @@ -1005,17 +1006,26 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type) st_src_reg src; src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; - src.file = PROGRAM_TEMPORARY; - src.index = next_temp; src.reladdr = NULL; - next_temp += type_size(type); + src.negate = 0; + + if (type->is_array() || type->is_matrix()) { + src.file = PROGRAM_ARRAY; + src.index = next_array << 16 | 0x8000; + array_sizes[next_array] = type_size(type); + ++next_array; + + } else { + src.file = PROGRAM_TEMPORARY; + src.index = next_temp; + next_temp += type_size(type); + } if (type->is_array() || type->is_record()) { src.swizzle = SWIZZLE_NOOP; } else { src.swizzle = swizzle_for_size(type->vector_elements); } - src.negate = 0; return src; } @@ -1078,13 +1088,11 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) */ assert((int) ir->num_state_slots == type_size(ir->type)); - storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(storage); - this->next_temp += type_size(ir->type); + dst = st_dst_reg(get_temp(ir->type)); + + storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index); - dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, - native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); + this->variables.push_tail(storage); } @@ -2052,11 +2060,11 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) break; case ir_var_auto: case ir_var_temporary: - entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, - this->next_temp); + st_src_reg src = get_temp(var->type); + + entry = new(mem_ctx) variable_storage(var, src.file, src.index); this->variables.push_tail(entry); - next_temp += type_size(var->type); break; } @@ -2574,11 +2582,10 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) storage = find_variable_storage(param); assert(!storage); - storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(storage); + st_src_reg src = get_temp(param->type); - this->next_temp += type_size(param->type); + storage = new(mem_ctx) variable_storage(param, src.file, src.index); + this->variables.push_tail(storage); } if (!sig->return_type->is_void()) { @@ -2978,12 +2985,12 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() { result.file = PROGRAM_UNDEFINED; next_temp = 1; + next_array = 0; next_signature_id = 1; num_immediates = 0; current_function = NULL; num_address_regs = 0; samplers_used = 0; - indirect_addr_temps = false; indirect_addr_consts = false; glsl_version = 0; native_integers = false; @@ -3183,7 +3190,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void) assert(inst->dst.index < MAX_TEMPS); prevWriteMask = tempWrites[inst->dst.index]; tempWrites[inst->dst.index] |= inst->dst.writemask; - } + } else + break; /* For a CMP to be considered a conditional write, the destination * register and source register two must be the same. */ @@ -3821,7 +3829,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; v->samplers_used = prog->SamplersUsed = original->samplers_used; - v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); v->num_immediates = original->num_immediates; @@ -3952,7 +3959,6 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; v->samplers_used = prog->SamplersUsed = original->samplers_used; - v->indirect_addr_temps = original->indirect_addr_temps; v->indirect_addr_consts = original->indirect_addr_consts; memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); v->num_immediates = original->num_immediates; @@ -4014,6 +4020,7 @@ struct st_translate { struct ureg_program *ureg; struct ureg_dst temps[MAX_TEMPS]; + struct ureg_dst arrays[MAX_ARRAYS]; struct ureg_src *constants; struct ureg_src *immediates; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; @@ -4022,6 +4029,8 @@ struct st_translate { struct ureg_src samplers[PIPE_MAX_SAMPLERS]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + unsigned array_sizes[MAX_ARRAYS]; + const GLuint *inputMapping; const GLuint *outputMapping; @@ -4132,16 +4141,34 @@ dst_register(struct st_translate *t, gl_register_file file, GLuint index) { + unsigned array; + switch(file) { case PROGRAM_UNDEFINED: return ureg_dst_undef(); case PROGRAM_TEMPORARY: + assert(index >= 0); + assert(index < (int) Elements(t->temps)); + if (ureg_dst_is_undef(t->temps[index])) t->temps[index] = ureg_DECL_local_temporary(t->ureg); return t->temps[index]; + case PROGRAM_ARRAY: + array = index >> 16; + + assert(array >= 0); + assert(array < (int) Elements(t->arrays)); + + if (ureg_dst_is_undef(t->arrays[array])) + t->arrays[array] = ureg_DECL_array_temporary( + t->ureg, t->array_sizes[array], TRUE); + + return ureg_dst_array_offset(t->arrays[array], + (int)(index & 0xFFFF) - 0x8000); + case PROGRAM_OUTPUT: if (t->procType == TGSI_PROCESSOR_VERTEX) assert(index < VARYING_SLOT_MAX); @@ -4176,11 +4203,8 @@ src_register(struct st_translate *t, return ureg_src_undef(); case PROGRAM_TEMPORARY: - assert(index >= 0); - assert(index < (int) Elements(t->temps)); - if (ureg_dst_is_undef(t->temps[index])) - t->temps[index] = ureg_DECL_local_temporary(t->ureg); - return ureg_src(t->temps[index]); + case PROGRAM_ARRAY: + return ureg_src(dst_register(t, file, index)); case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: @@ -4262,8 +4286,10 @@ translate_dst(struct st_translate *t, } } - if (dst_reg->reladdr != NULL) + if (dst_reg->reladdr != NULL) { + assert(dst_reg->file != PROGRAM_TEMPORARY); dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); + } return dst; } @@ -4286,26 +4312,8 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg) src = ureg_negate(src); if (src_reg->reladdr != NULL) { - /* Normally ureg_src_indirect() would be used here, but a stupid compiler - * bug in g++ makes ureg_src_indirect (an inline C function) erroneously - * set the bit for src.Negate. So we have to do the operation manually - * here to work around the compiler's problems. */ - /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ - struct ureg_src addr = ureg_src(t->address[0]); - src.Indirect = 1; - src.IndirectFile = addr.File; - src.IndirectIndex = addr.Index; - src.IndirectSwizzle = addr.SwizzleX; - - if (src_reg->file != PROGRAM_INPUT && - src_reg->file != PROGRAM_OUTPUT) { - /* If src_reg->index was negative, it was set to zero in - * src_register(). Reassign it now. But don't do this - * for input/output regs since they get remapped while - * const buffers don't. - */ - src.Index = src_reg->index; - } + assert(src_reg->file != PROGRAM_TEMPORARY); + src = ureg_src_indirect(src, ureg_src(t->address[0])); } return src; @@ -4820,16 +4828,9 @@ st_translate_program( } } - if (program->indirect_addr_temps) { - /* If temps are accessed with indirect addressing, declare temporaries - * in sequential order. Else, we declare them on demand elsewhere. - * (Note: the number of temporaries is equal to program->next_temp) - */ - for (i = 0; i < (unsigned)program->next_temp; i++) { - /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ - t->temps[i] = ureg_DECL_local_temporary(t->ureg); - } - } + /* Copy over array sizes + */ + memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array); /* Emit constants and uniforms. TGSI uses a single index space for these, * so we put all the translated regs in t->constants. @@ -5064,16 +5065,9 @@ get_mesa_program(struct gl_context *ctx, v->copy_propagate(); while (v->eliminate_dead_code_advanced()); - /* FIXME: These passes to optimize temporary registers don't work when there - * is indirect addressing of the temporary register space. We need proper - * array support so that we don't have to give up these passes in every - * shader that uses arrays. - */ - if (!v->indirect_addr_temps) { - v->eliminate_dead_code(); - v->merge_registers(); - v->renumber_registers(); - } + v->eliminate_dead_code(); + v->merge_registers(); + v->renumber_registers(); /* Write the END instruction. */ v->emit(NULL, TGSI_OPCODE_END); diff --git a/mesalib/src/mesa/state_tracker/st_program.c b/mesalib/src/mesa/state_tracker/st_program.c index 6af8df316..7a38da84f 100644 --- a/mesalib/src/mesa/state_tracker/st_program.c +++ b/mesalib/src/mesa/state_tracker/st_program.c @@ -177,6 +177,7 @@ void st_prepare_vertex_program(struct gl_context *ctx, struct st_vertex_program *stvp) { + struct st_context *st = st_context(ctx); GLuint attr; stvp->num_inputs = 0; @@ -267,7 +268,8 @@ st_prepare_vertex_program(struct gl_context *ctx, case VARYING_SLOT_TEX5: case VARYING_SLOT_TEX6: case VARYING_SLOT_TEX7: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + stvp->output_semantic_name[slot] = st->needs_texcoord_semantic ? + TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; stvp->output_semantic_index[slot] = attr - VARYING_SLOT_TEX0; break; @@ -275,10 +277,8 @@ st_prepare_vertex_program(struct gl_context *ctx, default: assert(attr < VARYING_SLOT_MAX); stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - stvp->output_semantic_index[slot] = (VARYING_SLOT_VAR0 - - VARYING_SLOT_TEX0 + - attr - - VARYING_SLOT_VAR0); + stvp->output_semantic_index[slot] = st->needs_texcoord_semantic ? + (attr - VARYING_SLOT_VAR0) : (attr - VARYING_SLOT_TEX0); break; } } @@ -585,11 +585,18 @@ st_translate_fragment_program(struct st_context *st, * fragment shader plus fixed-function hardware (such as * BFC). * - * There is no requirement that semantic indexes start at - * zero or be restricted to a particular range -- nobody - * should be building tables based on semantic index. + * However, some drivers may need us to identify the PNTC and TEXi + * varyings if, for example, their capability to replace them with + * sprite coordinates is limited. */ case VARYING_SLOT_PNTC: + if (st->needs_texcoord_semantic) { + input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD; + input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + } + /* fall through */ case VARYING_SLOT_TEX0: case VARYING_SLOT_TEX1: case VARYING_SLOT_TEX2: @@ -598,13 +605,29 @@ st_translate_fragment_program(struct st_context *st, case VARYING_SLOT_TEX5: case VARYING_SLOT_TEX6: case VARYING_SLOT_TEX7: + if (st->needs_texcoord_semantic) { + input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; + input_semantic_index[slot] = attr - VARYING_SLOT_TEX0; + interpMode[slot] = + st_translate_interp(stfp->Base.InterpQualifier[attr], FALSE); + break; + } + /* fall through */ case VARYING_SLOT_VAR0: default: - /* Actually, let's try and zero-base this just for - * readability of the generated TGSI. + /* Semantic indices should be zero-based because drivers may choose + * to assign a fixed slot determined by that index. + * This is useful because ARB_separate_shader_objects uses location + * qualifiers for linkage, and if the semantic index corresponds to + * these locations, linkage passes in the driver become unecessary. + * + * If needs_texcoord_semantic is true, no semantic indices will be + * consumed for the TEXi varyings, and we can base the locations of + * the user varyings on VAR0. Otherwise, we use TEX0 as base index. */ assert(attr >= VARYING_SLOT_TEX0); - input_semantic_index[slot] = (attr - VARYING_SLOT_TEX0); + input_semantic_index[slot] = st->needs_texcoord_semantic ? + (attr - VARYING_SLOT_VAR0) : (attr - VARYING_SLOT_TEX0); input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; if (attr == VARYING_SLOT_PNTC) interpMode[slot] = TGSI_INTERPOLATE_LINEAR; |