aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/mesa/state_tracker
diff options
context:
space:
mode:
Diffstat (limited to 'mesalib/src/mesa/state_tracker')
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_blit.c21
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_fbo.c24
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_fbo.h5
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_readpixels.c188
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_texture.c20
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_texture.h6
-rw-r--r--mesalib/src/mesa/state_tracker/st_context.c5
-rw-r--r--mesalib/src/mesa/state_tracker/st_context.h3
-rw-r--r--mesalib/src/mesa/state_tracker/st_format.c6
-rw-r--r--mesalib/src/mesa/state_tracker/st_format.h4
-rw-r--r--mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp142
-rw-r--r--mesalib/src/mesa/state_tracker/st_program.c45
12 files changed, 323 insertions, 146 deletions
diff --git a/mesalib/src/mesa/state_tracker/st_cb_blit.c b/mesalib/src/mesa/state_tracker/st_cb_blit.c
index c463e3b04..50cab4294 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_blit.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_blit.c
@@ -239,31 +239,22 @@ st_BlitFramebuffer(struct gl_context *ctx,
/* depth and/or stencil blit */
/* get src/dst depth surfaces */
- struct gl_renderbuffer_attachment *srcDepth =
- &readFB->Attachment[BUFFER_DEPTH];
- struct gl_renderbuffer_attachment *dstDepth =
- &drawFB->Attachment[BUFFER_DEPTH];
- struct gl_renderbuffer_attachment *srcStencil =
- &readFB->Attachment[BUFFER_STENCIL];
- struct gl_renderbuffer_attachment *dstStencil =
- &drawFB->Attachment[BUFFER_STENCIL];
-
struct st_renderbuffer *srcDepthRb =
- st_renderbuffer(srcDepth->Renderbuffer);
+ st_renderbuffer(readFB->Attachment[BUFFER_DEPTH].Renderbuffer);
struct st_renderbuffer *dstDepthRb =
- st_renderbuffer(dstDepth->Renderbuffer);
+ st_renderbuffer(drawFB->Attachment[BUFFER_DEPTH].Renderbuffer);
struct pipe_surface *dstDepthSurf =
dstDepthRb ? dstDepthRb->surface : NULL;
struct st_renderbuffer *srcStencilRb =
- st_renderbuffer(srcStencil->Renderbuffer);
+ st_renderbuffer(readFB->Attachment[BUFFER_STENCIL].Renderbuffer);
struct st_renderbuffer *dstStencilRb =
- st_renderbuffer(dstStencil->Renderbuffer);
+ st_renderbuffer(drawFB->Attachment[BUFFER_STENCIL].Renderbuffer);
struct pipe_surface *dstStencilSurf =
dstStencilRb ? dstStencilRb->surface : NULL;
- if (st_is_depth_stencil_combined(srcDepth, srcStencil) &&
- st_is_depth_stencil_combined(dstDepth, dstStencil)) {
+ if (_mesa_has_depthstencil_combined(readFB) &&
+ _mesa_has_depthstencil_combined(drawFB)) {
blit.mask = 0;
if (mask & GL_DEPTH_BUFFER_BIT)
blit.mask |= PIPE_MASK_Z;
diff --git a/mesalib/src/mesa/state_tracker/st_cb_fbo.c b/mesalib/src/mesa/state_tracker/st_cb_fbo.c
index 87c5b048c..4452e523b 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_fbo.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_fbo.c
@@ -547,30 +547,6 @@ st_validate_attachment(struct gl_context *ctx,
return valid;
}
-
-
-/**
- * Check if two renderbuffer attachments name a combined depth/stencil
- * renderbuffer.
- */
-GLboolean
-st_is_depth_stencil_combined(const struct gl_renderbuffer_attachment *depth,
- const struct gl_renderbuffer_attachment *stencil)
-{
- assert(depth && stencil);
-
- if (depth->Type == stencil->Type) {
- if (depth->Type == GL_RENDERBUFFER_EXT &&
- depth->Renderbuffer == stencil->Renderbuffer)
- return GL_TRUE;
-
- if (depth->Type == GL_TEXTURE &&
- depth->Texture == stencil->Texture)
- return GL_TRUE;
- }
-
- return GL_FALSE;
-}
/**
diff --git a/mesalib/src/mesa/state_tracker/st_cb_fbo.h b/mesalib/src/mesa/state_tracker/st_cb_fbo.h
index 506fd06d6..461dbe985 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_fbo.h
+++ b/mesalib/src/mesa/state_tracker/st_cb_fbo.h
@@ -76,9 +76,4 @@ st_new_renderbuffer_fb(enum pipe_format format, int samples, boolean sw);
extern void
st_init_fbo_functions(struct dd_function_table *functions);
-extern GLboolean
-st_is_depth_stencil_combined(const struct gl_renderbuffer_attachment *depth,
- const struct gl_renderbuffer_attachment *stencil);
-
-
#endif /* ST_CB_FBO_H */
diff --git a/mesalib/src/mesa/state_tracker/st_cb_readpixels.c b/mesalib/src/mesa/state_tracker/st_cb_readpixels.c
index 6b824b161..bfed98870 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_readpixels.c
@@ -25,35 +25,209 @@
*
**************************************************************************/
-
+#include "main/image.h"
+#include "main/pbo.h"
#include "main/imports.h"
#include "main/readpix.h"
+#include "main/enums.h"
+#include "main/framebuffer.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "st_cb_fbo.h"
#include "st_atom.h"
#include "st_context.h"
#include "st_cb_bitmap.h"
#include "st_cb_readpixels.h"
+#include "state_tracker/st_cb_texture.h"
+#include "state_tracker/st_format.h"
+#include "state_tracker/st_texture.h"
/**
- * The only special thing we need to do for the state tracker's
- * glReadPixels is to validate state (to be sure we have up-to-date
- * framebuffer surfaces) and flush the bitmap cache prior to reading.
+ * This uses a blit to copy the read buffer to a texture format which matches
+ * the format and type combo and then a fast read-back is done using memcpy.
+ * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is
+ * a format which matches the swizzling.
+ *
+ * If such a format isn't available, we fall back to _mesa_readpixels.
+ *
+ * NOTE: Some drivers use a blit to convert between tiled and linear
+ * texture layouts during texture uploads/downloads, so the blit
+ * we do here should be free in such cases.
*/
static void
st_readpixels(struct gl_context *ctx, GLint x, GLint y,
GLsizei width, GLsizei height,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *pack,
- GLvoid *dest)
+ GLvoid *pixels)
{
struct st_context *st = st_context(ctx);
+ struct gl_renderbuffer *rb =
+ _mesa_get_read_renderbuffer_for_format(ctx, format);
+ struct st_renderbuffer *strb = st_renderbuffer(rb);
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_resource *src;
+ struct pipe_resource *dst = NULL;
+ struct pipe_resource dst_templ;
+ enum pipe_format dst_format, src_format;
+ struct pipe_blit_info blit;
+ unsigned bind = PIPE_BIND_TRANSFER_READ;
+ struct pipe_transfer *tex_xfer;
+ ubyte *map = NULL;
+ /* Validate state (to be sure we have up-to-date framebuffer surfaces)
+ * and flush the bitmap cache prior to reading. */
st_validate_state(st);
st_flush_bitmap_cache(st);
- _mesa_readpixels(ctx, x, y, width, height, format, type, pack, dest);
-}
+ if (!st->prefer_blit_based_texture_transfer) {
+ goto fallback;
+ }
+
+ /* This must be done after state validation. */
+ src = strb->texture;
+
+ /* XXX Fallback for depth-stencil formats due to an incomplete
+ * stencil blit implementation in some drivers. */
+ if (format == GL_DEPTH_STENCIL) {
+ goto fallback;
+ }
+
+ /* We are creating a texture of the size of the region being read back.
+ * Need to check for NPOT texture support. */
+ if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
+ (!util_is_power_of_two(width) ||
+ !util_is_power_of_two(height))) {
+ goto fallback;
+ }
+
+ /* If the base internal format and the texture format don't match, we have
+ * to use the slow path. */
+ if (rb->_BaseFormat !=
+ _mesa_get_format_base_format(rb->Format)) {
+ goto fallback;
+ }
+
+ /* See if the texture format already matches the format and type,
+ * in which case the memcpy-based fast path will likely be used and
+ * we don't have to blit. */
+ if (_mesa_format_matches_format_and_type(rb->Format, format,
+ type, pack->SwapBytes)) {
+ goto fallback;
+ }
+
+ if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) {
+ goto fallback;
+ }
+
+ /* Convert the source format to what is expected by ReadPixels
+ * and see if it's supported. */
+ src_format = util_format_linear(src->format);
+ src_format = util_format_luminance_to_red(src_format);
+ src_format = util_format_intensity_to_red(src_format);
+
+ if (!src_format ||
+ !screen->is_format_supported(screen, src_format, src->target,
+ src->nr_samples,
+ PIPE_BIND_SAMPLER_VIEW)) {
+ printf("fallback: src format unsupported %s\n", util_format_short_name(src_format));
+ goto fallback;
+ }
+
+ if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
+ bind |= PIPE_BIND_DEPTH_STENCIL;
+ else
+ bind |= PIPE_BIND_RENDER_TARGET;
+
+ /* Choose the destination format by finding the best match
+ * for the format+type combo. */
+ dst_format = st_choose_matching_format(screen, bind, format, type,
+ pack->SwapBytes);
+ if (dst_format == PIPE_FORMAT_NONE) {
+ printf("fallback: no matching format for %s, %s\n",
+ _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
+ goto fallback;
+ }
+
+ /* create the destination texture */
+ memset(&dst_templ, 0, sizeof(dst_templ));
+ dst_templ.target = PIPE_TEXTURE_2D;
+ dst_templ.format = dst_format;
+ dst_templ.bind = bind;
+ dst_templ.usage = PIPE_USAGE_STAGING;
+
+ st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1,
+ &dst_templ.width0, &dst_templ.height0,
+ &dst_templ.depth0, &dst_templ.array_size);
+
+ dst = screen->resource_create(screen, &dst_templ);
+ if (!dst) {
+ goto fallback;
+ }
+
+ blit.src.resource = src;
+ blit.src.level = strb->rtt_level;
+ blit.src.format = src_format;
+ blit.dst.resource = dst;
+ blit.dst.level = 0;
+ blit.dst.format = dst->format;
+ blit.src.box.x = x;
+ blit.dst.box.x = 0;
+ blit.src.box.y = y;
+ blit.dst.box.y = 0;
+ blit.src.box.z = strb->rtt_face + strb->rtt_slice;
+ blit.dst.box.z = 0;
+ blit.src.box.width = blit.dst.box.width = width;
+ blit.src.box.height = blit.dst.box.height = height;
+ blit.src.box.depth = blit.dst.box.depth = 1;
+ blit.mask = st_get_blit_mask(rb->_BaseFormat, format);
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+ blit.scissor_enable = FALSE;
+
+ if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
+ blit.src.box.y = rb->Height - blit.src.box.y;
+ blit.src.box.height = -blit.src.box.height;
+ }
+
+ /* blit */
+ st->pipe->blit(st->pipe, &blit);
+
+ /* map resources */
+ pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
+
+ map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ,
+ 0, 0, 0, width, height, 1, &tex_xfer);
+ if (!map) {
+ _mesa_unmap_pbo_dest(ctx, pack);
+ pipe_resource_reference(&dst, NULL);
+ goto fallback;
+ }
+
+ /* memcpy data into a user buffer */
+ {
+ const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
+ GLuint row;
+
+ for (row = 0; row < height; row++) {
+ GLvoid *dest = _mesa_image_address3d(pack, pixels,
+ width, height, format,
+ type, 0, row, 0);
+ memcpy(dest, map, bytesPerRow);
+ map += tex_xfer->stride;
+ }
+ }
+
+ pipe_transfer_unmap(pipe, tex_xfer);
+ _mesa_unmap_pbo_dest(ctx, pack);
+ pipe_resource_reference(&dst, NULL);
+ return;
+
+fallback:
+ _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
+}
void st_init_readpixels_functions(struct dd_function_table *functions)
{
diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c
index c922a3164..94fbbf7be 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_texture.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c
@@ -68,7 +68,7 @@
#define DBG if (0) printf
-static enum pipe_texture_target
+enum pipe_texture_target
gl_target_to_pipe(GLenum target)
{
switch (target) {
@@ -542,8 +542,8 @@ prep_teximage(struct gl_context *ctx, struct gl_texture_image *texImage,
* Return a writemask for the gallium blit. The parameters can be base
* formats or "format" from glDrawPixels/glTexImage/glGetTexImage.
*/
-static unsigned
-get_blit_mask(GLenum srcFormat, GLenum dstFormat)
+unsigned
+st_get_blit_mask(GLenum srcFormat, GLenum dstFormat)
{
switch (dstFormat) {
case GL_DEPTH_STENCIL:
@@ -608,6 +608,10 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
unsigned bind;
GLubyte *map;
+ if (!st->prefer_blit_based_texture_transfer) {
+ goto fallback;
+ }
+
if (!dst) {
goto fallback;
}
@@ -769,7 +773,7 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
blit.src.box.width = blit.dst.box.width = width;
blit.src.box.height = blit.dst.box.height = height;
blit.src.box.depth = blit.dst.box.depth = depth;
- blit.mask = get_blit_mask(format, texImage->_BaseFormat);
+ blit.mask = st_get_blit_mask(format, texImage->_BaseFormat);
blit.filter = PIPE_TEX_FILTER_NEAREST;
blit.scissor_enable = FALSE;
@@ -860,6 +864,10 @@ st_GetTexImage(struct gl_context * ctx,
ubyte *map = NULL;
boolean done = FALSE;
+ if (!st->prefer_blit_based_texture_transfer) {
+ goto fallback;
+ }
+
if (!stImage->pt) {
goto fallback;
}
@@ -996,7 +1004,7 @@ st_GetTexImage(struct gl_context * ctx,
blit.src.box.width = blit.dst.box.width = width;
blit.src.box.height = blit.dst.box.height = height;
blit.src.box.depth = blit.dst.box.depth = depth;
- blit.mask = get_blit_mask(texImage->_BaseFormat, format);
+ blit.mask = st_get_blit_mask(texImage->_BaseFormat, format);
blit.filter = PIPE_TEX_FILTER_NEAREST;
blit.scissor_enable = FALSE;
@@ -1370,7 +1378,7 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims,
blit.dst.box.width = width;
blit.dst.box.height = height;
blit.dst.box.depth = 1;
- blit.mask = get_blit_mask(rb->_BaseFormat, texImage->_BaseFormat);
+ blit.mask = st_get_blit_mask(rb->_BaseFormat, texImage->_BaseFormat);
blit.filter = PIPE_TEX_FILTER_NEAREST;
/* 1D array textures need special treatment.
diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.h b/mesalib/src/mesa/state_tracker/st_cb_texture.h
index 27956bcc2..7f70d0b25 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_texture.h
+++ b/mesalib/src/mesa/state_tracker/st_cb_texture.h
@@ -38,6 +38,12 @@ struct gl_texture_object;
struct pipe_context;
struct st_context;
+extern enum pipe_texture_target
+gl_target_to_pipe(GLenum target);
+
+unsigned
+st_get_blit_mask(GLenum srcFormat, GLenum dstFormat);
+
extern GLboolean
st_finalize_texture(struct gl_context *ctx,
struct pipe_context *pipe,
diff --git a/mesalib/src/mesa/state_tracker/st_context.c b/mesalib/src/mesa/state_tracker/st_context.c
index f9a584ba0..cc87f2bb3 100644
--- a/mesalib/src/mesa/state_tracker/st_context.c
+++ b/mesalib/src/mesa/state_tracker/st_context.c
@@ -182,6 +182,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
st->has_stencil_export =
screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT);
st->has_shader_model3 = screen->get_param(screen, PIPE_CAP_SM3);
+ st->prefer_blit_based_texture_transfer = screen->get_param(screen,
+ PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER);
+
+ st->needs_texcoord_semantic =
+ screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
/* GL limits and extensions */
st_init_limits(st);
diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h
index b9a98cd05..8786a036f 100644
--- a/mesalib/src/mesa/state_tracker/st_context.h
+++ b/mesalib/src/mesa/state_tracker/st_context.h
@@ -84,6 +84,9 @@ struct st_context
boolean has_stencil_export; /**< can do shader stencil export? */
boolean has_time_elapsed;
boolean has_shader_model3;
+ boolean prefer_blit_based_texture_transfer;
+
+ boolean needs_texcoord_semantic;
/* On old libGL's for linux we need to invalidate the drawables
* on glViewpport calls, this is set via a option.
diff --git a/mesalib/src/mesa/state_tracker/st_format.c b/mesalib/src/mesa/state_tracker/st_format.c
index 5fd44e76d..a15706a03 100644
--- a/mesalib/src/mesa/state_tracker/st_format.c
+++ b/mesalib/src/mesa/state_tracker/st_format.c
@@ -1769,13 +1769,15 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
* Called via ctx->Driver.ChooseTextureFormat().
*/
size_t
-st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat,
- int samples[16])
+st_QuerySamplesForFormat(struct gl_context *ctx, GLenum target,
+ GLenum internalFormat, int samples[16])
{
struct st_context *st = st_context(ctx);
enum pipe_format format;
unsigned i, bind, num_sample_counts = 0;
+ (void) target;
+
if (_mesa_is_depth_or_stencil_format(internalFormat))
bind = PIPE_BIND_DEPTH_STENCIL;
else
diff --git a/mesalib/src/mesa/state_tracker/st_format.h b/mesalib/src/mesa/state_tracker/st_format.h
index 3db409b74..0a1c18d92 100644
--- a/mesalib/src/mesa/state_tracker/st_format.h
+++ b/mesalib/src/mesa/state_tracker/st_format.h
@@ -67,8 +67,8 @@ st_ChooseTextureFormat(struct gl_context * ctx, GLenum target,
GLenum format, GLenum type);
size_t
-st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat,
- int samples[16]);
+st_QuerySamplesForFormat(struct gl_context *ctx, GLenum target,
+ GLenum internalFormat, int samples[16]);
/* can we use a sampler view to translate these formats
only used to make TFP so far */
diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 0cef092d3..e3718eeda 100644
--- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -85,6 +85,11 @@ extern "C" {
*/
#define MAX_TEMPS 4096
+/**
+ * Maximum number of arrays
+ */
+#define MAX_ARRAYS 256
+
/* will be 4 for GLSL 4.00 */
#define MAX_GLSL_TEXTURE_OFFSET 1
@@ -315,9 +320,11 @@ public:
int next_temp;
+ unsigned array_sizes[MAX_ARRAYS];
+ unsigned next_array;
+
int num_address_regs;
int samplers_used;
- bool indirect_addr_temps;
bool indirect_addr_consts;
int glsl_version;
@@ -549,9 +556,6 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
/* Update indirect addressing status used by TGSI */
if (dst.reladdr) {
switch(dst.file) {
- case PROGRAM_TEMPORARY:
- this->indirect_addr_temps = true;
- break;
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_STATE_VAR:
@@ -570,9 +574,6 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
for (i=0; i<3; i++) {
if(inst->src[i].reladdr) {
switch(inst->src[i].file) {
- case PROGRAM_TEMPORARY:
- this->indirect_addr_temps = true;
- break;
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_STATE_VAR:
@@ -1005,17 +1006,26 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
st_src_reg src;
src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
- src.file = PROGRAM_TEMPORARY;
- src.index = next_temp;
src.reladdr = NULL;
- next_temp += type_size(type);
+ src.negate = 0;
+
+ if (type->is_array() || type->is_matrix()) {
+ src.file = PROGRAM_ARRAY;
+ src.index = next_array << 16 | 0x8000;
+ array_sizes[next_array] = type_size(type);
+ ++next_array;
+
+ } else {
+ src.file = PROGRAM_TEMPORARY;
+ src.index = next_temp;
+ next_temp += type_size(type);
+ }
if (type->is_array() || type->is_record()) {
src.swizzle = SWIZZLE_NOOP;
} else {
src.swizzle = swizzle_for_size(type->vector_elements);
}
- src.negate = 0;
return src;
}
@@ -1078,13 +1088,11 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
*/
assert((int) ir->num_state_slots == type_size(ir->type));
- storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
- this->next_temp);
- this->variables.push_tail(storage);
- this->next_temp += type_size(ir->type);
+ dst = st_dst_reg(get_temp(ir->type));
+
+ storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
- dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
- native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
+ this->variables.push_tail(storage);
}
@@ -2052,11 +2060,11 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
break;
case ir_var_auto:
case ir_var_temporary:
- entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
- this->next_temp);
+ st_src_reg src = get_temp(var->type);
+
+ entry = new(mem_ctx) variable_storage(var, src.file, src.index);
this->variables.push_tail(entry);
- next_temp += type_size(var->type);
break;
}
@@ -2574,11 +2582,10 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
storage = find_variable_storage(param);
assert(!storage);
- storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
- this->next_temp);
- this->variables.push_tail(storage);
+ st_src_reg src = get_temp(param->type);
- this->next_temp += type_size(param->type);
+ storage = new(mem_ctx) variable_storage(param, src.file, src.index);
+ this->variables.push_tail(storage);
}
if (!sig->return_type->is_void()) {
@@ -2978,12 +2985,12 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
{
result.file = PROGRAM_UNDEFINED;
next_temp = 1;
+ next_array = 0;
next_signature_id = 1;
num_immediates = 0;
current_function = NULL;
num_address_regs = 0;
samplers_used = 0;
- indirect_addr_temps = false;
indirect_addr_consts = false;
glsl_version = 0;
native_integers = false;
@@ -3183,7 +3190,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
assert(inst->dst.index < MAX_TEMPS);
prevWriteMask = tempWrites[inst->dst.index];
tempWrites[inst->dst.index] |= inst->dst.writemask;
- }
+ } else
+ break;
/* For a CMP to be considered a conditional write, the destination
* register and source register two must be the same. */
@@ -3821,7 +3829,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_temps = original->indirect_addr_temps;
v->indirect_addr_consts = original->indirect_addr_consts;
memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
v->num_immediates = original->num_immediates;
@@ -3952,7 +3959,6 @@ get_bitmap_visitor(struct st_fragment_program *fp,
v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_temps = original->indirect_addr_temps;
v->indirect_addr_consts = original->indirect_addr_consts;
memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
v->num_immediates = original->num_immediates;
@@ -4014,6 +4020,7 @@ struct st_translate {
struct ureg_program *ureg;
struct ureg_dst temps[MAX_TEMPS];
+ struct ureg_dst arrays[MAX_ARRAYS];
struct ureg_src *constants;
struct ureg_src *immediates;
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
@@ -4022,6 +4029,8 @@ struct st_translate {
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+ unsigned array_sizes[MAX_ARRAYS];
+
const GLuint *inputMapping;
const GLuint *outputMapping;
@@ -4132,16 +4141,34 @@ dst_register(struct st_translate *t,
gl_register_file file,
GLuint index)
{
+ unsigned array;
+
switch(file) {
case PROGRAM_UNDEFINED:
return ureg_dst_undef();
case PROGRAM_TEMPORARY:
+ assert(index >= 0);
+ assert(index < (int) Elements(t->temps));
+
if (ureg_dst_is_undef(t->temps[index]))
t->temps[index] = ureg_DECL_local_temporary(t->ureg);
return t->temps[index];
+ case PROGRAM_ARRAY:
+ array = index >> 16;
+
+ assert(array >= 0);
+ assert(array < (int) Elements(t->arrays));
+
+ if (ureg_dst_is_undef(t->arrays[array]))
+ t->arrays[array] = ureg_DECL_array_temporary(
+ t->ureg, t->array_sizes[array], TRUE);
+
+ return ureg_dst_array_offset(t->arrays[array],
+ (int)(index & 0xFFFF) - 0x8000);
+
case PROGRAM_OUTPUT:
if (t->procType == TGSI_PROCESSOR_VERTEX)
assert(index < VARYING_SLOT_MAX);
@@ -4176,11 +4203,8 @@ src_register(struct st_translate *t,
return ureg_src_undef();
case PROGRAM_TEMPORARY:
- assert(index >= 0);
- assert(index < (int) Elements(t->temps));
- if (ureg_dst_is_undef(t->temps[index]))
- t->temps[index] = ureg_DECL_local_temporary(t->ureg);
- return ureg_src(t->temps[index]);
+ case PROGRAM_ARRAY:
+ return ureg_src(dst_register(t, file, index));
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
@@ -4262,8 +4286,10 @@ translate_dst(struct st_translate *t,
}
}
- if (dst_reg->reladdr != NULL)
+ if (dst_reg->reladdr != NULL) {
+ assert(dst_reg->file != PROGRAM_TEMPORARY);
dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
+ }
return dst;
}
@@ -4286,26 +4312,8 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
src = ureg_negate(src);
if (src_reg->reladdr != NULL) {
- /* Normally ureg_src_indirect() would be used here, but a stupid compiler
- * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
- * set the bit for src.Negate. So we have to do the operation manually
- * here to work around the compiler's problems. */
- /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
- struct ureg_src addr = ureg_src(t->address[0]);
- src.Indirect = 1;
- src.IndirectFile = addr.File;
- src.IndirectIndex = addr.Index;
- src.IndirectSwizzle = addr.SwizzleX;
-
- if (src_reg->file != PROGRAM_INPUT &&
- src_reg->file != PROGRAM_OUTPUT) {
- /* If src_reg->index was negative, it was set to zero in
- * src_register(). Reassign it now. But don't do this
- * for input/output regs since they get remapped while
- * const buffers don't.
- */
- src.Index = src_reg->index;
- }
+ assert(src_reg->file != PROGRAM_TEMPORARY);
+ src = ureg_src_indirect(src, ureg_src(t->address[0]));
}
return src;
@@ -4820,16 +4828,9 @@ st_translate_program(
}
}
- if (program->indirect_addr_temps) {
- /* If temps are accessed with indirect addressing, declare temporaries
- * in sequential order. Else, we declare them on demand elsewhere.
- * (Note: the number of temporaries is equal to program->next_temp)
- */
- for (i = 0; i < (unsigned)program->next_temp; i++) {
- /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
- t->temps[i] = ureg_DECL_local_temporary(t->ureg);
- }
- }
+ /* Copy over array sizes
+ */
+ memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
/* Emit constants and uniforms. TGSI uses a single index space for these,
* so we put all the translated regs in t->constants.
@@ -5064,16 +5065,9 @@ get_mesa_program(struct gl_context *ctx,
v->copy_propagate();
while (v->eliminate_dead_code_advanced());
- /* FIXME: These passes to optimize temporary registers don't work when there
- * is indirect addressing of the temporary register space. We need proper
- * array support so that we don't have to give up these passes in every
- * shader that uses arrays.
- */
- if (!v->indirect_addr_temps) {
- v->eliminate_dead_code();
- v->merge_registers();
- v->renumber_registers();
- }
+ v->eliminate_dead_code();
+ v->merge_registers();
+ v->renumber_registers();
/* Write the END instruction. */
v->emit(NULL, TGSI_OPCODE_END);
diff --git a/mesalib/src/mesa/state_tracker/st_program.c b/mesalib/src/mesa/state_tracker/st_program.c
index 6af8df316..7a38da84f 100644
--- a/mesalib/src/mesa/state_tracker/st_program.c
+++ b/mesalib/src/mesa/state_tracker/st_program.c
@@ -177,6 +177,7 @@ void
st_prepare_vertex_program(struct gl_context *ctx,
struct st_vertex_program *stvp)
{
+ struct st_context *st = st_context(ctx);
GLuint attr;
stvp->num_inputs = 0;
@@ -267,7 +268,8 @@ st_prepare_vertex_program(struct gl_context *ctx,
case VARYING_SLOT_TEX5:
case VARYING_SLOT_TEX6:
case VARYING_SLOT_TEX7:
- stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+ stvp->output_semantic_name[slot] = st->needs_texcoord_semantic ?
+ TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
stvp->output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
break;
@@ -275,10 +277,8 @@ st_prepare_vertex_program(struct gl_context *ctx,
default:
assert(attr < VARYING_SLOT_MAX);
stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- stvp->output_semantic_index[slot] = (VARYING_SLOT_VAR0 -
- VARYING_SLOT_TEX0 +
- attr -
- VARYING_SLOT_VAR0);
+ stvp->output_semantic_index[slot] = st->needs_texcoord_semantic ?
+ (attr - VARYING_SLOT_VAR0) : (attr - VARYING_SLOT_TEX0);
break;
}
}
@@ -585,11 +585,18 @@ st_translate_fragment_program(struct st_context *st,
* fragment shader plus fixed-function hardware (such as
* BFC).
*
- * There is no requirement that semantic indexes start at
- * zero or be restricted to a particular range -- nobody
- * should be building tables based on semantic index.
+ * However, some drivers may need us to identify the PNTC and TEXi
+ * varyings if, for example, their capability to replace them with
+ * sprite coordinates is limited.
*/
case VARYING_SLOT_PNTC:
+ if (st->needs_texcoord_semantic) {
+ input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
+ input_semantic_index[slot] = 0;
+ interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+ break;
+ }
+ /* fall through */
case VARYING_SLOT_TEX0:
case VARYING_SLOT_TEX1:
case VARYING_SLOT_TEX2:
@@ -598,13 +605,29 @@ st_translate_fragment_program(struct st_context *st,
case VARYING_SLOT_TEX5:
case VARYING_SLOT_TEX6:
case VARYING_SLOT_TEX7:
+ if (st->needs_texcoord_semantic) {
+ input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
+ input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
+ interpMode[slot] =
+ st_translate_interp(stfp->Base.InterpQualifier[attr], FALSE);
+ break;
+ }
+ /* fall through */
case VARYING_SLOT_VAR0:
default:
- /* Actually, let's try and zero-base this just for
- * readability of the generated TGSI.
+ /* Semantic indices should be zero-based because drivers may choose
+ * to assign a fixed slot determined by that index.
+ * This is useful because ARB_separate_shader_objects uses location
+ * qualifiers for linkage, and if the semantic index corresponds to
+ * these locations, linkage passes in the driver become unecessary.
+ *
+ * If needs_texcoord_semantic is true, no semantic indices will be
+ * consumed for the TEXi varyings, and we can base the locations of
+ * the user varyings on VAR0. Otherwise, we use TEX0 as base index.
*/
assert(attr >= VARYING_SLOT_TEX0);
- input_semantic_index[slot] = (attr - VARYING_SLOT_TEX0);
+ input_semantic_index[slot] = st->needs_texcoord_semantic ?
+ (attr - VARYING_SLOT_VAR0) : (attr - VARYING_SLOT_TEX0);
input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
if (attr == VARYING_SLOT_PNTC)
interpMode[slot] = TGSI_INTERPOLATE_LINEAR;