aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mesalib/configure.ac61
-rw-r--r--mesalib/include/GLES3/gl3.h4
-rw-r--r--mesalib/src/gallium/auxiliary/Makefile.am6
-rw-r--r--mesalib/src/gallium/auxiliary/util/u_debug.c2
-rw-r--r--mesalib/src/gallium/auxiliary/util/u_tile.c4
-rw-r--r--mesalib/src/gallium/auxiliary/util/u_tile.h8
-rw-r--r--mesalib/src/gallium/auxiliary/util/u_upload_mgr.c9
-rw-r--r--mesalib/src/gallium/auxiliary/util/u_vbuf.c76
-rw-r--r--mesalib/src/glsl/Makefile.am2
-rw-r--r--mesalib/src/glsl/Makefile.sources3
-rw-r--r--mesalib/src/glsl/SConscript2
-rw-r--r--mesalib/src/glsl/ast.h29
-rw-r--r--mesalib/src/glsl/ast_function.cpp15
-rw-r--r--mesalib/src/glsl/ast_to_hir.cpp333
-rw-r--r--mesalib/src/glsl/builtin_compiler/Makefile.am1
-rw-r--r--mesalib/src/glsl/builtin_types.h74
-rw-r--r--mesalib/src/glsl/builtin_variables.cpp94
-rw-r--r--mesalib/src/glsl/glcpp/glcpp-parse.y3
-rw-r--r--mesalib/src/glsl/glsl_lexer.ll10
-rw-r--r--mesalib/src/glsl/glsl_parser.yy82
-rw-r--r--mesalib/src/glsl/glsl_parser_extras.cpp1
-rw-r--r--mesalib/src/glsl/glsl_parser_extras.h2
-rw-r--r--mesalib/src/glsl/glsl_symbol_table.cpp14
-rw-r--r--mesalib/src/glsl/glsl_symbol_table.h1
-rw-r--r--mesalib/src/glsl/glsl_types.cpp94
-rw-r--r--mesalib/src/glsl/glsl_types.h43
-rw-r--r--mesalib/src/glsl/hir_field_selection.cpp3
-rw-r--r--mesalib/src/glsl/ir.cpp52
-rw-r--r--mesalib/src/glsl/ir.h100
-rw-r--r--mesalib/src/glsl/ir_builder.cpp159
-rw-r--r--mesalib/src/glsl/ir_builder.h72
-rw-r--r--mesalib/src/glsl/ir_clone.cpp12
-rw-r--r--mesalib/src/glsl/ir_constant_expression.cpp376
-rw-r--r--mesalib/src/glsl/ir_function.cpp6
-rw-r--r--mesalib/src/glsl/ir_optimization.h26
-rw-r--r--mesalib/src/glsl/ir_print_visitor.cpp3
-rw-r--r--mesalib/src/glsl/ir_reader.cpp10
-rw-r--r--mesalib/src/glsl/ir_set_program_inouts.cpp6
-rw-r--r--mesalib/src/glsl/ir_validate.cpp42
-rw-r--r--mesalib/src/glsl/link_uniform_block_active_visitor.cpp162
-rw-r--r--mesalib/src/glsl/link_uniform_block_active_visitor.h62
-rw-r--r--mesalib/src/glsl/link_uniform_blocks.cpp313
-rw-r--r--mesalib/src/glsl/link_uniform_initializers.cpp6
-rw-r--r--mesalib/src/glsl/link_uniforms.cpp255
-rw-r--r--mesalib/src/glsl/link_varyings.cpp42
-rw-r--r--mesalib/src/glsl/linker.cpp58
-rw-r--r--mesalib/src/glsl/linker.h51
-rw-r--r--mesalib/src/glsl/lower_clip_distance.cpp8
-rw-r--r--mesalib/src/glsl/lower_output_reads.cpp4
-rw-r--r--mesalib/src/glsl/lower_packed_varyings.cpp28
-rw-r--r--mesalib/src/glsl/lower_packing_builtins.cpp1314
-rw-r--r--mesalib/src/glsl/lower_ubo_reference.cpp104
-rw-r--r--mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp14
-rw-r--r--mesalib/src/glsl/opt_constant_folding.cpp3
-rw-r--r--mesalib/src/glsl/opt_constant_propagation.cpp3
-rw-r--r--mesalib/src/glsl/opt_constant_variable.cpp4
-rw-r--r--mesalib/src/glsl/opt_copy_propagation.cpp3
-rw-r--r--mesalib/src/glsl/opt_copy_propagation_elements.cpp3
-rw-r--r--mesalib/src/glsl/opt_dead_code.cpp14
-rw-r--r--mesalib/src/glsl/opt_function_inlining.cpp8
-rw-r--r--mesalib/src/glsl/opt_structure_splitting.cpp3
-rw-r--r--mesalib/src/glsl/opt_tree_grafting.cpp8
-rw-r--r--mesalib/src/glsl/s_expression.cpp6
-rw-r--r--mesalib/src/glsl/standalone_scaffolding.cpp19
-rw-r--r--mesalib/src/glsl/standalone_scaffolding.h3
-rw-r--r--mesalib/src/glsl/strtod.c22
-rw-r--r--mesalib/src/glsl/strtod.h3
-rw-r--r--mesalib/src/mesa/Android.libmesa_glsl_utils.mk10
-rw-r--r--mesalib/src/mesa/main/extensions.c1
-rw-r--r--mesalib/src/mesa/main/getstring.c4
-rw-r--r--mesalib/src/mesa/main/imports.c86
-rw-r--r--mesalib/src/mesa/main/imports.h3
-rw-r--r--mesalib/src/mesa/main/mtypes.h28
-rw-r--r--mesalib/src/mesa/main/remap.c6
-rw-r--r--mesalib/src/mesa/main/shader_query.cpp12
-rw-r--r--mesalib/src/mesa/main/texparam.c8
-rw-r--r--mesalib/src/mesa/main/uniforms.c2
-rw-r--r--mesalib/src/mesa/main/version.c25
-rw-r--r--mesalib/src/mesa/program/ir_to_mesa.cpp49
-rw-r--r--mesalib/src/mesa/program/program.c32
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_bitmap.c5
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_clear.c5
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_drawpixels.c9
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_drawtex.c7
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_texture.c1
-rw-r--r--mesalib/src/mesa/state_tracker/st_draw.c21
-rw-r--r--mesalib/src/mesa/state_tracker/st_extensions.c4
-rw-r--r--mesalib/src/mesa/state_tracker/st_format.c34
-rw-r--r--mesalib/src/mesa/state_tracker/st_format.h3
-rw-r--r--mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp43
-rw-r--r--mesalib/src/mesa/swrast/s_texfilter.c38
-rw-r--r--mesalib/src/mesa/vbo/vbo_exec_api.c5
-rw-r--r--mkfontscale/configure.ac1
-rw-r--r--mkfontscale/hash.c27
-rw-r--r--mkfontscale/ident.c3
-rw-r--r--mkfontscale/mkfontscale.c5
-rw-r--r--pixman/configure.ac16
-rw-r--r--pixman/demos/scale.c7
-rw-r--r--pixman/demos/scale.ui30
-rw-r--r--pixman/pixman/pixman-fast-path.c446
-rw-r--r--pixman/pixman/pixman-general.c3
-rw-r--r--pixman/pixman/pixman-glyph.c8
-rw-r--r--pixman/pixman/pixman-implementation.c28
-rw-r--r--pixman/pixman/pixman-inlines.h37
-rw-r--r--pixman/pixman/pixman-matrix.c408
-rw-r--r--pixman/pixman/pixman-private.h25
-rw-r--r--pixman/pixman/pixman-sse2.c284
-rw-r--r--pixman/pixman/pixman.c87
-rw-r--r--pixman/test/Makefile.sources1
-rw-r--r--pixman/test/affine-test.c6
-rw-r--r--pixman/test/lowlevel-blt-bench.c4
-rw-r--r--pixman/test/matrix-test.c186
112 files changed, 5425 insertions, 991 deletions
diff --git a/mesalib/configure.ac b/mesalib/configure.ac
index e769edadb..9cc5c4ae5 100644
--- a/mesalib/configure.ac
+++ b/mesalib/configure.ac
@@ -608,8 +608,10 @@ AC_ARG_ENABLE([vdpau],
[enable_vdpau=auto])
AC_ARG_ENABLE([opencl],
[AS_HELP_STRING([--enable-opencl],
- [enable OpenCL library @<:@default=no@:>@])],
- [enable_opencl="$enableval"],
+ [enable OpenCL library NOTE: Enabling this option will also enable
+ --with-llvm-shared-libs
+ @<:@default=no@:>@])],
+ [enable_opencl="$enableval" with_llvm_shared_libs="$enableval"],
[enable_opencl=no])
AC_ARG_ENABLE([xlib_glx],
[AS_HELP_STRING([--enable-xlib-glx],
@@ -1660,10 +1662,7 @@ if test "x$enable_gallium_llvm" = xyes; then
if test "x$LLVM_CONFIG" != xno; then
LLVM_VERSION=`$LLVM_CONFIG --version | sed 's/svn.*//g'`
LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e 's/\([[0-9]]\)\.\([[0-9]]\)/\10\2/g'`
- if test "x$with_llvm_shared_libs" = xyes; then
- dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
- LLVM_LIBS="-lLLVM-`$LLVM_CONFIG --version`"
- else
+ if test "x$with_llvm_shared_libs" != xyes; then
LLVM_COMPONENTS="engine bitwriter"
if $LLVM_CONFIG --components | grep -q '\<mcjit\>'; then
LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit"
@@ -1672,7 +1671,6 @@ if test "x$enable_gallium_llvm" = xyes; then
if test "x$enable_opencl" = xyes; then
LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation"
fi
- LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
fi
LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags`
LLVM_BINDIR=`$LLVM_CONFIG --bindir`
@@ -1797,7 +1795,7 @@ radeon_llvm_check() {
configure flag])
fi
AC_MSG_WARN([Please ensure you use the latest llvm tree from git://people.freedesktop.org/~tstellar/llvm master before submitting a bug])
- LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --libs r600`"
+ LLVM_COMPONENTS="${LLVM_COMPONENTS} r600"
}
dnl Gallium drivers
@@ -1836,12 +1834,13 @@ if test "x$with_gallium_drivers" != x; then
if test "x$enable_r600_llvm" = xyes -o "x$enable_opencl" = xyes; then
radeon_llvm_check
NEED_RADEON_GALLIUM=yes;
+ LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo"
fi
if test "x$enable_r600_llvm" = xyes; then
USE_R600_LLVM_COMPILER=yes;
fi
if test "x$enable_opencl" = xyes -a "x$with_llvm_shared_libs" = xno; then
- LLVM_LIBS="${LLVM_LIBS} `$LLVM_CONFIG --libs bitreader asmparser`"
+ LLVM_COMPONENTS="${LLVM_COMPONENTS} bitreader asmparser"
fi
gallium_check_st "radeon/drm" "dri-r600" "xorg-r600" "" "xvmc-r600" "vdpau-r600"
;;
@@ -1891,6 +1890,50 @@ if test "x$with_gallium_drivers" != x; then
esac
done
fi
+
+dnl Set LLVM_LIBS - This is done after the driver configuration so
+dnl that drivers can add additonal components to LLVM_COMPONENTS.
+dnl Previously, gallium drivers were updating LLVM_LIBS directly
+dnl by calling llvm-config --libs ${DRIVER_LLVM_COMPONENTS}, but
+dnl this was causing the same libraries to be appear multiple times
+dnl in LLVM_LIBS.
+
+if test "x$MESA_LLVM" != x0; then
+
+ LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
+
+ if test "x$with_llvm_shared_libs" = xyes; then
+ dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
+ LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
+ AC_CHECK_FILE("$LLVM_LIBDIR/lib$LLVM_SO_NAME.so", llvm_have_one_so=yes,)
+
+ if test "x$llvm_have_one_so" = xyes; then
+ dnl LLVM was built using auto*, so there is only one shared object.
+ LLVM_LIBS="-l$LLVM_SO_NAME"
+ else
+ dnl If LLVM was built with CMake, there will be one shared object per
+ dnl component.
+ AC_CHECK_FILE("$LLVM_LIBDIR/libLLVMTarget.so",,
+ AC_MSG_ERROR([Could not find llvm shared libraries:
+ Please make sure you have built llvm with the --enable-shared option
+ and that your llvm libraries are installed in $LLVM_LIBDIR
+ If you have installed your llvm libraries to a different directory you
+ can use the --with-llvm-prefix= configure flag to specify this directory.
+ NOTE: Mesa is attempting to use llvm shared libraries because you have
+ passed one of the following options to configure:
+ --with-llvm-shared-libs
+ --enable-opencl
+ If you do not want to build with llvm shared libraries and instead want to
+ use llvm static libraries then remove these options from your configure
+ invocation and reconfigure.]))
+
+ dnl We don't need to update LLVM_LIBS in this case because the LLVM
+ dnl install uses a shared object for each compoenent and we have
+ dnl already added all of these objects to LLVM_LIBS.
+ fi
+ fi
+fi
+
AM_CONDITIONAL(HAVE_GALLIUM_SVGA, test "x$HAVE_GALLIUM_SVGA" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_I915, test "x$HAVE_GALLIUM_I915" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
diff --git a/mesalib/include/GLES3/gl3.h b/mesalib/include/GLES3/gl3.h
index b9399e994..09f2b5333 100644
--- a/mesalib/include/GLES3/gl3.h
+++ b/mesalib/include/GLES3/gl3.h
@@ -2,7 +2,7 @@
#define __gl3_h_
/*
- * gl3.h last updated on $Date: 2012-09-12 10:13:02 -0700 (Wed, 12 Sep 2012) $
+ * gl3.h last updated on $Date: 2012-10-03 07:52:40 -0700 (Wed, 03 Oct 2012) $
*/
#include <GLES3/gl3platform.h>
@@ -796,7 +796,7 @@ typedef struct __GLsync *GLsync;
#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
#define GL_MAX_ELEMENT_INDEX 0x8D6B
#define GL_NUM_SAMPLE_COUNTS 0x9380
-#define GL_TEXTURE_IMMUTABLE_LEVELS 0x8D63
+#define GL_TEXTURE_IMMUTABLE_LEVELS 0x82DF
/*-------------------------------------------------------------------------
* Entrypoint definitions
diff --git a/mesalib/src/gallium/auxiliary/Makefile.am b/mesalib/src/gallium/auxiliary/Makefile.am
index 49792930a..a4eee4773 100644
--- a/mesalib/src/gallium/auxiliary/Makefile.am
+++ b/mesalib/src/gallium/auxiliary/Makefile.am
@@ -45,9 +45,3 @@ util/u_format_srgb.c: $(srcdir)/util/u_format_srgb.py
util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
-
-# XXX: As a work around for https://bugs.freedesktop.org/show_bug.cgi?id=59334
-# clover needs to link against libgallium.a. Delete this once we have a real
-# fix for this bug.
-all-local: libgallium.la
- ln -f $(builddir)/.libs/libgallium.a $(builddir)/libgallium.a
diff --git a/mesalib/src/gallium/auxiliary/util/u_debug.c b/mesalib/src/gallium/auxiliary/util/u_debug.c
index 6e8c5b993..f4670f28c 100644
--- a/mesalib/src/gallium/auxiliary/util/u_debug.c
+++ b/mesalib/src/gallium/auxiliary/util/u_debug.c
@@ -232,7 +232,7 @@ debug_get_flags_option(const char *name,
unsigned long result;
const char *str;
const struct debug_named_value *orig = flags;
- int namealign = 0;
+ unsigned namealign = 0;
str = os_get_option(name);
if(!str)
diff --git a/mesalib/src/gallium/auxiliary/util/u_tile.c b/mesalib/src/gallium/auxiliary/util/u_tile.c
index 6c618a674..62298cdab 100644
--- a/mesalib/src/gallium/auxiliary/util/u_tile.c
+++ b/mesalib/src/gallium/auxiliary/util/u_tile.c
@@ -806,7 +806,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
for (j = 0; j < w; j++) {
/* convert 32-bit integer Z to float Z */
const double scale = 1.0 / 0xffffffffU;
- pDest[j] = ptrc[j] * scale;
+ pDest[j] = (float) (ptrc[j] * scale);
}
pDest += pt->stride/4;
ptrc += srcStride;
@@ -820,7 +820,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
for (j = 0; j < w; j++) {
/* convert 32-bit integer Z to float Z */
const double scale = 1.0 / 0xffffffffU;
- pDest[j*2] = ptrc[j] * scale;
+ pDest[j*2] = (float) (ptrc[j] * scale);
}
pDest += pt->stride/4;
ptrc += srcStride;
diff --git a/mesalib/src/gallium/auxiliary/util/u_tile.h b/mesalib/src/gallium/auxiliary/util/u_tile.h
index abcd402c8..9e8194459 100644
--- a/mesalib/src/gallium/auxiliary/util/u_tile.h
+++ b/mesalib/src/gallium/auxiliary/util/u_tile.h
@@ -45,13 +45,13 @@ struct pipe_transfer;
static INLINE boolean
u_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_box *box)
{
- if (x >= box->width)
+ if ((int) x >= box->width)
return TRUE;
- if (y >= box->height)
+ if ((int) y >= box->height)
return TRUE;
- if (x + *w > box->width)
+ if ((int) (x + *w) > box->width)
*w = box->width - x;
- if (y + *h > box->height)
+ if ((int) (y + *h) > box->height)
*h = box->height - y;
return FALSE;
}
diff --git a/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c b/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c
index ee1c6881e..6859751c5 100644
--- a/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -163,6 +163,13 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
unsigned alloc_offset = align(min_out_offset, upload->alignment);
unsigned offset;
+ /* Init these return values here in case we fail below to make
+ * sure the caller doesn't get garbage values.
+ */
+ *out_offset = ~0;
+ pipe_resource_reference(outbuf, NULL);
+ *ptr = NULL;
+
/* Make sure we have enough space in the upload buffer
* for the sub-allocation. */
if (MAX2(upload->offset, alloc_offset) + alloc_size > upload->size) {
@@ -182,8 +189,6 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
PIPE_TRANSFER_UNSYNCHRONIZED,
&upload->transfer);
if (!upload->map) {
- pipe_resource_reference(outbuf, NULL);
- *ptr = NULL;
upload->transfer = NULL;
return PIPE_ERROR_OUT_OF_MEMORY;
}
diff --git a/mesalib/src/gallium/auxiliary/util/u_vbuf.c b/mesalib/src/gallium/auxiliary/util/u_vbuf.c
index b712b52de..244b04d2a 100644
--- a/mesalib/src/gallium/auxiliary/util/u_vbuf.c
+++ b/mesalib/src/gallium/auxiliary/util/u_vbuf.c
@@ -323,7 +323,7 @@ void u_vbuf_destroy(struct u_vbuf *mgr)
FREE(mgr);
}
-static void
+static enum pipe_error
u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
unsigned vb_mask, unsigned out_vb,
int start_vertex, unsigned num_vertices,
@@ -335,6 +335,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
struct pipe_resource *out_buffer = NULL;
uint8_t *out_map;
unsigned out_offset, mask;
+ enum pipe_error err;
/* Get a translate object. */
tr = translate_cache_find(mgr->translate_cache, key);
@@ -381,6 +382,14 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
assert((ib->buffer || ib->user_buffer) && ib->index_size);
+ /* Create and map the output buffer. */
+ err = u_upload_alloc(mgr->uploader, 0,
+ key->output_stride * num_indices,
+ &out_offset, &out_buffer,
+ (void**)&out_map);
+ if (err != PIPE_OK)
+ return err;
+
if (ib->user_buffer) {
map = (uint8_t*)ib->user_buffer + offset;
} else {
@@ -389,12 +398,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
PIPE_TRANSFER_READ, &transfer);
}
- /* Create and map the output buffer. */
- u_upload_alloc(mgr->uploader, 0,
- key->output_stride * num_indices,
- &out_offset, &out_buffer,
- (void**)&out_map);
-
switch (ib->index_size) {
case 4:
tr->run_elts(tr, (unsigned*)map, num_indices, 0, out_map);
@@ -412,11 +415,13 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
}
} else {
/* Create and map the output buffer. */
- u_upload_alloc(mgr->uploader,
- key->output_stride * start_vertex,
- key->output_stride * num_vertices,
- &out_offset, &out_buffer,
- (void**)&out_map);
+ err = u_upload_alloc(mgr->uploader,
+ key->output_stride * start_vertex,
+ key->output_stride * num_vertices,
+ &out_offset, &out_buffer,
+ (void**)&out_map);
+ if (err != PIPE_OK)
+ return err;
out_offset -= key->output_stride * start_vertex;
@@ -441,6 +446,8 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
pipe_resource_reference(
&mgr->real_vertex_buffer[out_vb].buffer, NULL);
mgr->real_vertex_buffer[out_vb].buffer = out_buffer;
+
+ return PIPE_OK;
}
static boolean
@@ -588,11 +595,14 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
/* Translate buffers. */
for (type = 0; type < VB_NUM; type++) {
if (key[type].nr_elements) {
- u_vbuf_translate_buffers(mgr, &key[type], mask[type],
- mgr->fallback_vbs[type],
- start[type], num[type],
- start_index, num_indices, min_index,
- unroll_indices && type == VB_VERTEX);
+ enum pipe_error err;
+ err = u_vbuf_translate_buffers(mgr, &key[type], mask[type],
+ mgr->fallback_vbs[type],
+ start[type], num[type],
+ start_index, num_indices, min_index,
+ unroll_indices && type == VB_VERTEX);
+ if (err != PIPE_OK)
+ return FALSE;
/* Fixup the stride for constant attribs. */
if (type == VB_CONST) {
@@ -884,7 +894,7 @@ void u_vbuf_set_index_buffer(struct u_vbuf *mgr,
pipe->set_index_buffer(pipe, ib);
}
-static void
+static enum pipe_error
u_vbuf_upload_buffers(struct u_vbuf *mgr,
int start_vertex, unsigned num_vertices,
int start_instance, unsigned num_instances)
@@ -953,6 +963,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
unsigned start, end;
struct pipe_vertex_buffer *real_vb;
const uint8_t *ptr;
+ enum pipe_error err;
i = u_bit_scan(&buffer_mask);
@@ -963,11 +974,15 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
real_vb = &mgr->real_vertex_buffer[i];
ptr = mgr->vertex_buffer[i].user_buffer;
- u_upload_data(mgr->uploader, start, end - start, ptr + start,
- &real_vb->buffer_offset, &real_vb->buffer);
+ err = u_upload_data(mgr->uploader, start, end - start, ptr + start,
+ &real_vb->buffer_offset, &real_vb->buffer);
+ if (err != PIPE_OK)
+ return err;
real_vb->buffer_offset -= start;
}
+
+ return PIPE_OK;
}
static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr)
@@ -1176,11 +1191,13 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
if (unroll_indices ||
incompatible_vb_mask ||
mgr->ve->incompatible_elem_mask) {
- /* XXX check the return value */
- u_vbuf_translate_begin(mgr, start_vertex, num_vertices,
- info->start_instance, info->instance_count,
- info->start, info->count, min_index,
- unroll_indices);
+ if (!u_vbuf_translate_begin(mgr, start_vertex, num_vertices,
+ info->start_instance, info->instance_count,
+ info->start, info->count, min_index,
+ unroll_indices)) {
+ debug_warn_once("u_vbuf_translate_begin() failed");
+ return;
+ }
user_vb_mask &= ~(incompatible_vb_mask |
mgr->ve->incompatible_vb_mask_all);
@@ -1188,8 +1205,13 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
/* Upload user buffers. */
if (user_vb_mask) {
- u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
- info->start_instance, info->instance_count);
+ if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
+ info->start_instance,
+ info->instance_count) != PIPE_OK) {
+ debug_warn_once("u_vbuf_upload_buffers() failed");
+ return;
+ }
+
mgr->dirty_real_vb_mask |= user_vb_mask;
}
diff --git a/mesalib/src/glsl/Makefile.am b/mesalib/src/glsl/Makefile.am
index 058d8aed3..d0e5cd1d0 100644
--- a/mesalib/src/glsl/Makefile.am
+++ b/mesalib/src/glsl/Makefile.am
@@ -52,6 +52,7 @@ check_PROGRAMS = \
tests_uniform_initializer_test_SOURCES = \
$(top_srcdir)/src/mesa/main/hash_table.c \
+ $(top_srcdir)/src/mesa/main/imports.c \
$(top_srcdir)/src/mesa/program/prog_hash_table.c\
$(top_srcdir)/src/mesa/program/symbol_table.c \
tests/copy_constant_to_storage_tests.cpp \
@@ -100,6 +101,7 @@ endif
glsl_test_SOURCES = \
$(top_srcdir)/src/mesa/main/hash_table.c \
+ $(top_srcdir)/src/mesa/main/imports.c \
$(top_srcdir)/src/mesa/program/prog_hash_table.c \
$(top_srcdir)/src/mesa/program/symbol_table.c \
$(GLSL_SRCDIR)/standalone_scaffolding.cpp \
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources
index de63c3246..c294aa429 100644
--- a/mesalib/src/glsl/Makefile.sources
+++ b/mesalib/src/glsl/Makefile.sources
@@ -47,6 +47,8 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/link_functions.cpp \
$(GLSL_SRCDIR)/link_uniforms.cpp \
$(GLSL_SRCDIR)/link_uniform_initializers.cpp \
+ $(GLSL_SRCDIR)/link_uniform_block_active_visitor.cpp \
+ $(GLSL_SRCDIR)/link_uniform_blocks.cpp \
$(GLSL_SRCDIR)/link_varyings.cpp \
$(GLSL_SRCDIR)/loop_analysis.cpp \
$(GLSL_SRCDIR)/loop_controls.cpp \
@@ -60,6 +62,7 @@ LIBGLSL_FILES = \
$(GLSL_SRCDIR)/lower_mat_op_to_vec.cpp \
$(GLSL_SRCDIR)/lower_noise.cpp \
$(GLSL_SRCDIR)/lower_packed_varyings.cpp \
+ $(GLSL_SRCDIR)/lower_packing_builtins.cpp \
$(GLSL_SRCDIR)/lower_texture_projection.cpp \
$(GLSL_SRCDIR)/lower_variable_index_to_cond_assign.cpp \
$(GLSL_SRCDIR)/lower_vec_index_to_cond_assign.cpp \
diff --git a/mesalib/src/glsl/SConscript b/mesalib/src/glsl/SConscript
index 6981f041b..c4ab97c1e 100644
--- a/mesalib/src/glsl/SConscript
+++ b/mesalib/src/glsl/SConscript
@@ -59,6 +59,7 @@ else:
# Copy these files to avoid generation object files into src/mesa/program
env.Prepend(CPPPATH = ['#src/mesa/main'])
env.Command('hash_table.c', '#src/mesa/main/hash_table.c', Copy('$TARGET', '$SOURCE'))
+ env.Command('imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', '$SOURCE'))
# Copy these files to avoid generation object files into src/mesa/program
env.Prepend(CPPPATH = ['#src/mesa/program'])
env.Command('prog_hash_table.c', '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE'))
@@ -68,6 +69,7 @@ else:
mesa_objs = env.StaticObject([
'hash_table.c',
+ 'imports.c',
'prog_hash_table.c',
'symbol_table.c',
])
diff --git a/mesalib/src/glsl/ast.h b/mesalib/src/glsl/ast.h
index 50747822d..1a28963c4 100644
--- a/mesalib/src/glsl/ast.h
+++ b/mesalib/src/glsl/ast.h
@@ -804,11 +804,12 @@ public:
class ast_uniform_block : public ast_node {
public:
ast_uniform_block(ast_type_qualifier layout,
- const char *block_name,
- ast_declarator_list *member_list)
- : layout(layout), block_name(block_name)
+ const char *instance_name,
+ ast_expression *array_size)
+ : layout(layout), block_name(NULL), instance_name(instance_name),
+ array_size(array_size)
{
- declarations.push_degenerate_list_at_head(&member_list->link);
+ /* empty */
}
virtual ir_rvalue *hir(exec_list *instructions,
@@ -816,8 +817,28 @@ public:
ast_type_qualifier layout;
const char *block_name;
+
+ /**
+ * Declared name of the block instance, if specified.
+ *
+ * If the block does not have an instance name, this field will be
+ * \c NULL.
+ */
+ const char *instance_name;
+
/** List of ast_declarator_list * */
exec_list declarations;
+
+ /**
+ * Declared array size of the block instance
+ *
+ * If the block is not declared as an array, this field will be \c NULL.
+ *
+ * \note
+ * A block can only be an array if it also has an instance name. If this
+ * field is not \c NULL, ::instance_name must also not be \c NULL.
+ */
+ ast_expression *array_size;
};
/*@}*/
diff --git a/mesalib/src/glsl/ast_function.cpp b/mesalib/src/glsl/ast_function.cpp
index dc7a58bf2..26f72cf8e 100644
--- a/mesalib/src/glsl/ast_function.cpp
+++ b/mesalib/src/glsl/ast_function.cpp
@@ -132,12 +132,13 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
}
/* Verify that 'out' and 'inout' actual parameters are lvalues. */
- if (formal->mode == ir_var_out || formal->mode == ir_var_inout) {
+ if (formal->mode == ir_var_function_out
+ || formal->mode == ir_var_function_inout) {
const char *mode = NULL;
switch (formal->mode) {
- case ir_var_out: mode = "out"; break;
- case ir_var_inout: mode = "inout"; break;
- default: assert(false); break;
+ case ir_var_function_out: mode = "out"; break;
+ case ir_var_function_inout: mode = "inout"; break;
+ default: assert(false); break;
}
/* This AST-based check catches errors like f(i++). The IR-based
@@ -210,13 +211,13 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
if (formal->type->is_numeric() || formal->type->is_boolean()) {
switch (formal->mode) {
case ir_var_const_in:
- case ir_var_in: {
+ case ir_var_function_in: {
ir_rvalue *converted
= convert_component(actual, formal->type);
actual->replace_with(converted);
break;
}
- case ir_var_out:
+ case ir_var_function_out:
if (actual->type != formal->type) {
/* To convert an out parameter, we need to create a
* temporary variable to hold the value before conversion,
@@ -254,7 +255,7 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
actual->replace_with(deref_tmp_2);
}
break;
- case ir_var_inout:
+ case ir_var_function_inout:
/* Inout parameters should never require conversion, since that
* would require an implicit conversion to exist both to and
* from the formal parameter type, and there are no
diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp
index de3ce902e..49093d88f 100644
--- a/mesalib/src/glsl/ast_to_hir.cpp
+++ b/mesalib/src/glsl/ast_to_hir.cpp
@@ -857,14 +857,11 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
case GLSL_TYPE_ERROR:
case GLSL_TYPE_VOID:
case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_INTERFACE:
/* I assume a comparison of a struct containing a sampler just
* ignores the sampler present in the type.
*/
break;
-
- default:
- assert(!"Should not get here.");
- break;
}
if (cmp == NULL)
@@ -1625,6 +1622,15 @@ ast_expression::hir(exec_list *instructions,
}
} else if (array->type->array_size() == 0) {
_mesa_glsl_error(&loc, state, "unsized array index must be constant");
+ } else if (array->type->is_array()
+ && array->type->fields.array->is_interface()) {
+ /* Page 46 in section 4.3.7 of the OpenGL ES 3.00 spec says:
+ *
+ * "All indexes used to index a uniform block array must be
+ * constant integral expressions."
+ */
+ _mesa_glsl_error(&loc, state,
+ "uniform block array index must be constant");
} else {
if (array->type->is_array()) {
/* whole_variable_referenced can return NULL if the array is a
@@ -1924,11 +1930,11 @@ is_varying_var(ir_variable *var, _mesa_glsl_parser_targets target)
{
switch (target) {
case vertex_shader:
- return var->mode == ir_var_out;
+ return var->mode == ir_var_shader_out;
case fragment_shader:
- return var->mode == ir_var_in;
+ return var->mode == ir_var_shader_in;
default:
- return var->mode == ir_var_out || var->mode == ir_var_in;
+ return var->mode == ir_var_shader_out || var->mode == ir_var_shader_in;
}
}
@@ -1997,13 +2003,16 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
* the setting alone.
*/
if (qual->flags.q.in && qual->flags.q.out)
- var->mode = ir_var_inout;
- else if (qual->flags.q.attribute || qual->flags.q.in
+ var->mode = ir_var_function_inout;
+ else if (qual->flags.q.in)
+ var->mode = is_parameter ? ir_var_function_in : ir_var_shader_in;
+ else if (qual->flags.q.attribute
|| (qual->flags.q.varying && (state->target == fragment_shader)))
- var->mode = ir_var_in;
- else if (qual->flags.q.out
- || (qual->flags.q.varying && (state->target == vertex_shader)))
- var->mode = ir_var_out;
+ var->mode = ir_var_shader_in;
+ else if (qual->flags.q.out)
+ var->mode = is_parameter ? ir_var_function_out : ir_var_shader_out;
+ else if (qual->flags.q.varying && (state->target == vertex_shader))
+ var->mode = ir_var_shader_out;
else if (qual->flags.q.uniform)
var->mode = ir_var_uniform;
@@ -2028,10 +2037,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
* Similar text exists in the section on vertex shader outputs.
*
* Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES
- * 3.00 spec claims to allow structs as well. However, this is likely
- * an error, since section 11 of the spec ("Counting of Inputs and
- * Outputs") enumerates all possible types of interstage linkage
- * variables, and it does not mention structs.
+ * 3.00 spec allows structs as well. Varying structs are also allowed
+ * in GLSL 1.50.
*/
switch (var->type->get_scalar_type()->base_type) {
case GLSL_TYPE_FLOAT:
@@ -2046,6 +2053,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
state->get_version_string());
break;
case GLSL_TYPE_STRUCT:
+ if (state->is_version(150, 300))
+ break;
_mesa_glsl_error(loc, state,
"varying variables may not be of type struct");
break;
@@ -2058,15 +2067,16 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
if (state->all_invariant && (state->current_function == NULL)) {
switch (state->target) {
case vertex_shader:
- if (var->mode == ir_var_out)
+ if (var->mode == ir_var_shader_out)
var->invariant = true;
break;
case geometry_shader:
- if ((var->mode == ir_var_in) || (var->mode == ir_var_out))
+ if ((var->mode == ir_var_shader_in)
+ || (var->mode == ir_var_shader_out))
var->invariant = true;
break;
case fragment_shader:
- if (var->mode == ir_var_in)
+ if (var->mode == ir_var_shader_in)
var->invariant = true;
break;
}
@@ -2082,8 +2092,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
var->interpolation = INTERP_QUALIFIER_NONE;
if (var->interpolation != INTERP_QUALIFIER_NONE &&
- !(state->target == vertex_shader && var->mode == ir_var_out) &&
- !(state->target == fragment_shader && var->mode == ir_var_in)) {
+ !(state->target == vertex_shader && var->mode == ir_var_shader_out) &&
+ !(state->target == fragment_shader && var->mode == ir_var_shader_in)) {
_mesa_glsl_error(loc, state,
"interpolation qualifier `%s' can only be applied to "
"vertex shader outputs and fragment shader inputs.",
@@ -2116,7 +2126,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
*/
switch (state->target) {
case vertex_shader:
- if (!global_scope || (var->mode != ir_var_in)) {
+ if (!global_scope || (var->mode != ir_var_shader_in)) {
fail = true;
string = "input";
}
@@ -2129,7 +2139,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
break;
case fragment_shader:
- if (!global_scope || (var->mode != ir_var_out)) {
+ if (!global_scope || (var->mode != ir_var_shader_out)) {
fail = true;
string = "output";
}
@@ -2440,7 +2450,7 @@ process_initializer(ir_variable *var, ast_declaration *decl,
"cannot initialize samplers");
}
- if ((var->mode == ir_var_in) && (state->current_function == NULL)) {
+ if ((var->mode == ir_var_shader_in) && (state->current_function == NULL)) {
_mesa_glsl_error(& initializer_loc, state,
"cannot initialize %s shader input / %s",
_mesa_glsl_shader_target_name(state->target),
@@ -2579,12 +2589,12 @@ ast_declarator_list::hir(exec_list *instructions,
"Undeclared variable `%s' cannot be marked "
"invariant\n", decl->identifier);
} else if ((state->target == vertex_shader)
- && (earlier->mode != ir_var_out)) {
+ && (earlier->mode != ir_var_shader_out)) {
_mesa_glsl_error(& loc, state,
"`%s' cannot be marked invariant, vertex shader "
"outputs only\n", decl->identifier);
} else if ((state->target == fragment_shader)
- && (earlier->mode != ir_var_in)) {
+ && (earlier->mode != ir_var_shader_in)) {
_mesa_glsl_error(& loc, state,
"`%s' cannot be marked invariant, fragment shader "
"inputs only\n", decl->identifier);
@@ -2707,16 +2717,13 @@ ast_declarator_list::hir(exec_list *instructions,
& loc, this->ubo_qualifiers_valid, false);
if (this->type->qualifier.flags.q.invariant) {
- if ((state->target == vertex_shader) && !(var->mode == ir_var_out ||
- var->mode == ir_var_inout)) {
- /* FINISHME: Note that this doesn't work for invariant on
- * a function signature outval
- */
+ if ((state->target == vertex_shader) &&
+ var->mode != ir_var_shader_out) {
_mesa_glsl_error(& loc, state,
"`%s' cannot be marked invariant, vertex shader "
"outputs only\n", var->name);
} else if ((state->target == fragment_shader) &&
- !(var->mode == ir_var_in || var->mode == ir_var_inout)) {
+ var->mode != ir_var_shader_in) {
/* FINISHME: Note that this doesn't work for invariant on
* a function signature inval
*/
@@ -2753,7 +2760,7 @@ ast_declarator_list::hir(exec_list *instructions,
"global scope%s",
mode, var->name, extra);
}
- } else if (var->mode == ir_var_in) {
+ } else if (var->mode == ir_var_shader_in) {
var->read_only = true;
if (state->target == vertex_shader) {
@@ -2833,7 +2840,7 @@ ast_declarator_list::hir(exec_list *instructions,
&& state->target == vertex_shader
&& state->current_function == NULL
&& var->type->is_integer()
- && var->mode == ir_var_out
+ && var->mode == ir_var_shader_out
&& var->interpolation != INTERP_QUALIFIER_FLAT) {
_mesa_glsl_error(&loc, state, "If a vertex output is an integer, "
@@ -3137,7 +3144,8 @@ ast_parameter_declarator::hir(exec_list *instructions,
}
is_void = false;
- ir_variable *var = new(ctx) ir_variable(type, this->identifier, ir_var_in);
+ ir_variable *var = new(ctx)
+ ir_variable(type, this->identifier, ir_var_function_in);
/* Apply any specified qualifiers to the parameter declaration. Note that
* for function parameters the default mode is 'in'.
@@ -3151,7 +3159,7 @@ ast_parameter_declarator::hir(exec_list *instructions,
* as out or inout function parameters, nor can they be assigned
* into."
*/
- if ((var->mode == ir_var_inout || var->mode == ir_var_out)
+ if ((var->mode == ir_var_function_inout || var->mode == ir_var_function_out)
&& type->contains_sampler()) {
_mesa_glsl_error(&loc, state, "out and inout parameters cannot contain samplers");
type = glsl_type::error_type;
@@ -3171,7 +3179,7 @@ ast_parameter_declarator::hir(exec_list *instructions,
* So for GLSL 1.10, passing an array as an out or inout parameter is not
* allowed. This restriction is removed in GLSL 1.20, and in GLSL ES.
*/
- if ((var->mode == ir_var_inout || var->mode == ir_var_out)
+ if ((var->mode == ir_var_function_inout || var->mode == ir_var_function_out)
&& type->is_array()
&& !state->check_version(120, 100, &loc,
"Arrays cannot be out or inout parameters")) {
@@ -4018,35 +4026,50 @@ ast_type_specifier::hir(exec_list *instructions,
}
-ir_rvalue *
-ast_struct_specifier::hir(exec_list *instructions,
- struct _mesa_glsl_parse_state *state)
+/**
+ * Process a structure or interface block tree into an array of structure fields
+ *
+ * After parsing, where there are some syntax differnces, structures and
+ * interface blocks are almost identical. They are similar enough that the
+ * AST for each can be processed the same way into a set of
+ * \c glsl_struct_field to describe the members.
+ *
+ * \return
+ * The number of fields processed. A pointer to the array structure fields is
+ * stored in \c *fields_ret.
+ */
+unsigned
+ast_process_structure_or_interface_block(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state,
+ exec_list *declarations,
+ YYLTYPE &loc,
+ glsl_struct_field **fields_ret,
+ bool is_interface,
+ bool block_row_major)
{
unsigned decl_count = 0;
- /* Make an initial pass over the list of structure fields to determine how
+ /* Make an initial pass over the list of fields to determine how
* many there are. Each element in this list is an ast_declarator_list.
* This means that we actually need to count the number of elements in the
* 'declarations' list in each of the elements.
*/
- foreach_list_typed (ast_declarator_list, decl_list, link,
- &this->declarations) {
+ foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
foreach_list_const (decl_ptr, & decl_list->declarations) {
decl_count++;
}
}
- /* Allocate storage for the structure fields and process the field
+ /* Allocate storage for the fields and process the field
* declarations. As the declarations are processed, try to also convert
* the types to HIR. This ensures that structure definitions embedded in
- * other structure definitions are processed.
+ * other structure definitions or in interface blocks are processed.
*/
glsl_struct_field *const fields = ralloc_array(state, glsl_struct_field,
decl_count);
unsigned i = 0;
- foreach_list_typed (ast_declarator_list, decl_list, link,
- &this->declarations) {
+ foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
const char *type_name;
decl_list->type->specifier->hir(instructions, state);
@@ -4055,7 +4078,6 @@ ast_struct_specifier::hir(exec_list *instructions,
* embedded structure definitions have been removed from the language.
*/
if (state->es_shader && decl_list->type->specifier->structure != NULL) {
- YYLTYPE loc = this->get_location();
_mesa_glsl_error(&loc, state, "Embedded structure definitions are "
"not allowed in GLSL ES 1.00.");
}
@@ -4065,25 +4087,88 @@ ast_struct_specifier::hir(exec_list *instructions,
foreach_list_typed (ast_declaration, decl, link,
&decl_list->declarations) {
- const struct glsl_type *field_type = decl_type;
+ /* From the GL_ARB_uniform_buffer_object spec:
+ *
+ * "Sampler types are not allowed inside of uniform
+ * blocks. All other types, arrays, and structures
+ * allowed for uniforms are allowed within a uniform
+ * block."
+ */
+ const struct glsl_type *field_type = decl_type;
+
+ if (is_interface && field_type->contains_sampler()) {
+ YYLTYPE loc = decl_list->get_location();
+ _mesa_glsl_error(&loc, state,
+ "Uniform in non-default uniform block contains sampler\n");
+ }
+
+ const struct ast_type_qualifier *const qual =
+ & decl_list->type->qualifier;
+ if (qual->flags.q.std140 ||
+ qual->flags.q.packed ||
+ qual->flags.q.shared) {
+ _mesa_glsl_error(&loc, state,
+ "uniform block layout qualifiers std140, packed, and "
+ "shared can only be applied to uniform blocks, not "
+ "members");
+ }
+
if (decl->is_array) {
- YYLTYPE loc = decl->get_location();
field_type = process_array_type(&loc, decl_type, decl->array_size,
state);
}
fields[i].type = (field_type != NULL)
? field_type : glsl_type::error_type;
fields[i].name = decl->identifier;
+
+ if (qual->flags.q.row_major || qual->flags.q.column_major) {
+ if (!field_type->is_matrix() && !field_type->is_record()) {
+ _mesa_glsl_error(&loc, state,
+ "uniform block layout qualifiers row_major and "
+ "column_major can only be applied to matrix and "
+ "structure types");
+ } else
+ validate_matrix_layout_for_type(state, &loc, field_type);
+ }
+
+ if (field_type->is_matrix() ||
+ (field_type->is_array() && field_type->fields.array->is_matrix())) {
+ fields[i].row_major = block_row_major;
+ if (qual->flags.q.row_major)
+ fields[i].row_major = true;
+ else if (qual->flags.q.column_major)
+ fields[i].row_major = false;
+ }
+
i++;
}
}
assert(i == decl_count);
+ *fields_ret = fields;
+ return decl_count;
+}
+
+
+ir_rvalue *
+ast_struct_specifier::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+{
+ YYLTYPE loc = this->get_location();
+ glsl_struct_field *fields;
+ unsigned decl_count =
+ ast_process_structure_or_interface_block(instructions,
+ state,
+ &this->declarations,
+ loc,
+ &fields,
+ false,
+ false);
+
const glsl_type *t =
glsl_type::get_record_instance(fields, decl_count, this->name);
- YYLTYPE loc = this->get_location();
if (!state->symbols->add_type(name, t)) {
_mesa_glsl_error(& loc, state, "struct `%s' previously defined", name);
} else {
@@ -4102,96 +4187,98 @@ ast_struct_specifier::hir(exec_list *instructions,
return NULL;
}
-static struct gl_uniform_block *
-get_next_uniform_block(struct _mesa_glsl_parse_state *state)
-{
- if (state->num_uniform_blocks >= state->uniform_block_array_size) {
- state->uniform_block_array_size *= 2;
- if (state->uniform_block_array_size <= 4)
- state->uniform_block_array_size = 4;
-
- state->uniform_blocks = reralloc(state,
- state->uniform_blocks,
- struct gl_uniform_block,
- state->uniform_block_array_size);
- }
-
- memset(&state->uniform_blocks[state->num_uniform_blocks],
- 0, sizeof(*state->uniform_blocks));
- return &state->uniform_blocks[state->num_uniform_blocks++];
-}
-
ir_rvalue *
ast_uniform_block::hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state)
{
+ YYLTYPE loc = this->get_location();
+
/* The ast_uniform_block has a list of ast_declarator_lists. We
* need to turn those into ir_variables with an association
* with this uniform block.
*/
- struct gl_uniform_block *ubo = get_next_uniform_block(state);
- ubo->Name = ralloc_strdup(state->uniform_blocks, this->block_name);
+ enum glsl_interface_packing packing;
+ if (this->layout.flags.q.shared) {
+ packing = GLSL_INTERFACE_PACKING_SHARED;
+ } else if (this->layout.flags.q.packed) {
+ packing = GLSL_INTERFACE_PACKING_PACKED;
+ } else {
+ /* The default layout is std140.
+ */
+ packing = GLSL_INTERFACE_PACKING_STD140;
+ }
- if (!state->symbols->add_uniform_block(ubo)) {
+ bool block_row_major = this->layout.flags.q.row_major;
+ exec_list declared_variables;
+ glsl_struct_field *fields;
+ unsigned int num_variables =
+ ast_process_structure_or_interface_block(&declared_variables,
+ state,
+ &this->declarations,
+ loc,
+ &fields,
+ true,
+ block_row_major);
+
+ const glsl_type *block_type =
+ glsl_type::get_interface_instance(fields,
+ num_variables,
+ packing,
+ this->block_name);
+
+ if (!state->symbols->add_type(block_type->name, block_type)) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(&loc, state, "Uniform block name `%s' already taken in "
- "the current scope.\n", ubo->Name);
+ "the current scope.\n", this->block_name);
}
- unsigned int num_variables = 0;
- foreach_list_typed(ast_declarator_list, decl_list, link, &declarations) {
- foreach_list_const(node, &decl_list->declarations) {
- num_variables++;
- }
- }
-
- bool block_row_major = this->layout.flags.q.row_major;
-
- ubo->Uniforms = rzalloc_array(state->uniform_blocks,
- struct gl_uniform_buffer_variable,
- num_variables);
-
- foreach_list_typed(ast_declarator_list, decl_list, link, &declarations) {
- exec_list declared_variables;
-
- decl_list->hir(&declared_variables, state);
+ /* Since interface blocks cannot contain statements, it should be
+ * impossible for the block to generate any instructions.
+ */
+ assert(declared_variables.is_empty());
- foreach_list_const(node, &declared_variables) {
- ir_variable *var = (ir_variable *)node;
+ /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec
+ * says:
+ *
+ * "If an instance name (instance-name) is used, then it puts all the
+ * members inside a scope within its own name space, accessed with the
+ * field selector ( . ) operator (analogously to structures)."
+ */
+ if (this->instance_name) {
+ ir_variable *var;
- struct gl_uniform_buffer_variable *ubo_var =
- &ubo->Uniforms[ubo->NumUniforms++];
+ if (this->array_size != NULL) {
+ const glsl_type *block_array_type =
+ process_array_type(&loc, block_type, this->array_size, state);
- var->uniform_block = ubo - state->uniform_blocks;
+ var = new(state) ir_variable(block_array_type,
+ this->instance_name,
+ ir_var_uniform);
+ } else {
+ var = new(state) ir_variable(block_type,
+ this->instance_name,
+ ir_var_uniform);
+ }
- ubo_var->Name = ralloc_strdup(state->uniform_blocks, var->name);
- ubo_var->Type = var->type;
- ubo_var->Offset = 0; /* Assigned at link time. */
+ var->interface_type = block_type;
+ state->symbols->add_variable(var);
+ instructions->push_tail(var);
+ } else {
+ /* In order to have an array size, the block must also be declared with
+ * an instane name.
+ */
+ assert(this->array_size == NULL);
- if (var->type->is_matrix() ||
- (var->type->is_array() && var->type->fields.array->is_matrix())) {
- ubo_var->RowMajor = block_row_major;
- if (decl_list->type->qualifier.flags.q.row_major)
- ubo_var->RowMajor = true;
- else if (decl_list->type->qualifier.flags.q.column_major)
- ubo_var->RowMajor = false;
- }
+ for (unsigned i = 0; i < num_variables; i++) {
+ ir_variable *var =
+ new(state) ir_variable(fields[i].type,
+ ralloc_strdup(state, fields[i].name),
+ ir_var_uniform);
+ var->interface_type = block_type;
- /* From the GL_ARB_uniform_buffer_object spec:
- *
- * "Sampler types are not allowed inside of uniform
- * blocks. All other types, arrays, and structures
- * allowed for uniforms are allowed within a uniform
- * block."
- */
- if (var->type->contains_sampler()) {
- YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state,
- "Uniform in non-default uniform block contains sampler\n");
- }
+ state->symbols->add_variable(var);
+ instructions->push_tail(var);
}
-
- instructions->append_list(&declared_variables);
}
return NULL;
@@ -4222,7 +4309,7 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
gl_FragData_assigned = true;
else if (strncmp(var->name, "gl_", 3) != 0) {
if (state->target == fragment_shader &&
- (var->mode == ir_var_out || var->mode == ir_var_inout)) {
+ var->mode == ir_var_shader_out) {
user_defined_fs_output_assigned = true;
user_defined_fs_output = var;
}
diff --git a/mesalib/src/glsl/builtin_compiler/Makefile.am b/mesalib/src/glsl/builtin_compiler/Makefile.am
index 1a863b228..976640822 100644
--- a/mesalib/src/glsl/builtin_compiler/Makefile.am
+++ b/mesalib/src/glsl/builtin_compiler/Makefile.am
@@ -55,6 +55,7 @@ libglslcore_la_SOURCES = \
builtin_compiler_SOURCES = \
$(top_srcdir)/src/mesa/main/hash_table.c \
+ $(top_srcdir)/src/mesa/main/imports.c \
$(top_srcdir)/src/mesa/program/prog_hash_table.c\
$(top_srcdir)/src/mesa/program/symbol_table.c \
$(BUILTIN_COMPILER_CXX_FILES) \
diff --git a/mesalib/src/glsl/builtin_types.h b/mesalib/src/glsl/builtin_types.h
index a4c995fd1..c78c2d270 100644
--- a/mesalib/src/glsl/builtin_types.h
+++ b/mesalib/src/glsl/builtin_types.h
@@ -89,9 +89,9 @@ const glsl_type *const glsl_type::mat4_type = & builtin_core_types[14];
/*@{*/
static const struct glsl_struct_field gl_DepthRangeParameters_fields[] = {
- { glsl_type::float_type, "near" },
- { glsl_type::float_type, "far" },
- { glsl_type::float_type, "diff" },
+ { glsl_type::float_type, "near", false },
+ { glsl_type::float_type, "far", false },
+ { glsl_type::float_type, "diff", false },
};
const glsl_type glsl_type::builtin_structure_types[] = {
@@ -106,58 +106,58 @@ const glsl_type glsl_type::builtin_structure_types[] = {
/*@{*/
static const struct glsl_struct_field gl_PointParameters_fields[] = {
- { glsl_type::float_type, "size" },
- { glsl_type::float_type, "sizeMin" },
- { glsl_type::float_type, "sizeMax" },
- { glsl_type::float_type, "fadeThresholdSize" },
- { glsl_type::float_type, "distanceConstantAttenuation" },
- { glsl_type::float_type, "distanceLinearAttenuation" },
- { glsl_type::float_type, "distanceQuadraticAttenuation" },
+ { glsl_type::float_type, "size", false },
+ { glsl_type::float_type, "sizeMin", false },
+ { glsl_type::float_type, "sizeMax", false },
+ { glsl_type::float_type, "fadeThresholdSize", false },
+ { glsl_type::float_type, "distanceConstantAttenuation", false },
+ { glsl_type::float_type, "distanceLinearAttenuation", false },
+ { glsl_type::float_type, "distanceQuadraticAttenuation", false },
};
static const struct glsl_struct_field gl_MaterialParameters_fields[] = {
- { glsl_type::vec4_type, "emission" },
- { glsl_type::vec4_type, "ambient" },
- { glsl_type::vec4_type, "diffuse" },
- { glsl_type::vec4_type, "specular" },
- { glsl_type::float_type, "shininess" },
+ { glsl_type::vec4_type, "emission", false },
+ { glsl_type::vec4_type, "ambient", false },
+ { glsl_type::vec4_type, "diffuse", false },
+ { glsl_type::vec4_type, "specular", false },
+ { glsl_type::float_type, "shininess", false },
};
static const struct glsl_struct_field gl_LightSourceParameters_fields[] = {
- { glsl_type::vec4_type, "ambient" },
- { glsl_type::vec4_type, "diffuse" },
- { glsl_type::vec4_type, "specular" },
- { glsl_type::vec4_type, "position" },
- { glsl_type::vec4_type, "halfVector" },
- { glsl_type::vec3_type, "spotDirection" },
- { glsl_type::float_type, "spotExponent" },
- { glsl_type::float_type, "spotCutoff" },
- { glsl_type::float_type, "spotCosCutoff" },
- { glsl_type::float_type, "constantAttenuation" },
- { glsl_type::float_type, "linearAttenuation" },
- { glsl_type::float_type, "quadraticAttenuation" },
+ { glsl_type::vec4_type, "ambient", false },
+ { glsl_type::vec4_type, "diffuse", false },
+ { glsl_type::vec4_type, "specular", false },
+ { glsl_type::vec4_type, "position", false },
+ { glsl_type::vec4_type, "halfVector", false },
+ { glsl_type::vec3_type, "spotDirection", false },
+ { glsl_type::float_type, "spotExponent", false },
+ { glsl_type::float_type, "spotCutoff", false },
+ { glsl_type::float_type, "spotCosCutoff", false },
+ { glsl_type::float_type, "constantAttenuation", false },
+ { glsl_type::float_type, "linearAttenuation", false },
+ { glsl_type::float_type, "quadraticAttenuation", false },
};
static const struct glsl_struct_field gl_LightModelParameters_fields[] = {
- { glsl_type::vec4_type, "ambient" },
+ { glsl_type::vec4_type, "ambient", false },
};
static const struct glsl_struct_field gl_LightModelProducts_fields[] = {
- { glsl_type::vec4_type, "sceneColor" },
+ { glsl_type::vec4_type, "sceneColor", false },
};
static const struct glsl_struct_field gl_LightProducts_fields[] = {
- { glsl_type::vec4_type, "ambient" },
- { glsl_type::vec4_type, "diffuse" },
- { glsl_type::vec4_type, "specular" },
+ { glsl_type::vec4_type, "ambient", false },
+ { glsl_type::vec4_type, "diffuse", false },
+ { glsl_type::vec4_type, "specular", false },
};
static const struct glsl_struct_field gl_FogParameters_fields[] = {
- { glsl_type::vec4_type, "color" },
- { glsl_type::float_type, "density" },
- { glsl_type::float_type, "start" },
- { glsl_type::float_type, "end" },
- { glsl_type::float_type, "scale" },
+ { glsl_type::vec4_type, "color", false },
+ { glsl_type::float_type, "density", false },
+ { glsl_type::float_type, "start", false },
+ { glsl_type::float_type, "end", false },
+ { glsl_type::float_type, "scale", false },
};
const glsl_type glsl_type::builtin_110_deprecated_structure_types[] = {
diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp
index e7769419f..ccee7746e 100644
--- a/mesalib/src/glsl/builtin_variables.cpp
+++ b/mesalib/src/glsl/builtin_variables.cpp
@@ -47,18 +47,18 @@ struct builtin_variable {
};
static const builtin_variable builtin_core_vs_variables[] = {
- { ir_var_out, VERT_RESULT_HPOS, "vec4", "gl_Position" },
- { ir_var_out, VERT_RESULT_PSIZ, "float", "gl_PointSize" },
+ { ir_var_shader_out, VERT_RESULT_HPOS, "vec4", "gl_Position" },
+ { ir_var_shader_out, VERT_RESULT_PSIZ, "float", "gl_PointSize" },
};
static const builtin_variable builtin_core_fs_variables[] = {
- { ir_var_in, FRAG_ATTRIB_WPOS, "vec4", "gl_FragCoord" },
- { ir_var_in, FRAG_ATTRIB_FACE, "bool", "gl_FrontFacing" },
- { ir_var_out, FRAG_RESULT_COLOR, "vec4", "gl_FragColor" },
+ { ir_var_shader_in, FRAG_ATTRIB_WPOS, "vec4", "gl_FragCoord" },
+ { ir_var_shader_in, FRAG_ATTRIB_FACE, "bool", "gl_FrontFacing" },
+ { ir_var_shader_out, FRAG_RESULT_COLOR, "vec4", "gl_FragColor" },
};
static const builtin_variable builtin_100ES_fs_variables[] = {
- { ir_var_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" },
+ { ir_var_shader_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" },
};
static const builtin_variable builtin_300ES_vs_variables[] = {
@@ -66,46 +66,46 @@ static const builtin_variable builtin_300ES_vs_variables[] = {
};
static const builtin_variable builtin_300ES_fs_variables[] = {
- { ir_var_in, FRAG_ATTRIB_WPOS, "vec4", "gl_FragCoord" },
- { ir_var_in, FRAG_ATTRIB_FACE, "bool", "gl_FrontFacing" },
- { ir_var_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" },
- { ir_var_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" },
+ { ir_var_shader_in, FRAG_ATTRIB_WPOS, "vec4", "gl_FragCoord" },
+ { ir_var_shader_in, FRAG_ATTRIB_FACE, "bool", "gl_FrontFacing" },
+ { ir_var_shader_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" },
+ { ir_var_shader_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" },
};
static const builtin_variable builtin_110_fs_variables[] = {
- { ir_var_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" },
+ { ir_var_shader_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" },
};
static const builtin_variable builtin_110_deprecated_fs_variables[] = {
- { ir_var_in, FRAG_ATTRIB_COL0, "vec4", "gl_Color" },
- { ir_var_in, FRAG_ATTRIB_COL1, "vec4", "gl_SecondaryColor" },
- { ir_var_in, FRAG_ATTRIB_FOGC, "float", "gl_FogFragCoord" },
+ { ir_var_shader_in, FRAG_ATTRIB_COL0, "vec4", "gl_Color" },
+ { ir_var_shader_in, FRAG_ATTRIB_COL1, "vec4", "gl_SecondaryColor" },
+ { ir_var_shader_in, FRAG_ATTRIB_FOGC, "float", "gl_FogFragCoord" },
};
static const builtin_variable builtin_110_deprecated_vs_variables[] = {
- { ir_var_in, VERT_ATTRIB_POS, "vec4", "gl_Vertex" },
- { ir_var_in, VERT_ATTRIB_NORMAL, "vec3", "gl_Normal" },
- { ir_var_in, VERT_ATTRIB_COLOR0, "vec4", "gl_Color" },
- { ir_var_in, VERT_ATTRIB_COLOR1, "vec4", "gl_SecondaryColor" },
- { ir_var_in, VERT_ATTRIB_TEX0, "vec4", "gl_MultiTexCoord0" },
- { ir_var_in, VERT_ATTRIB_TEX1, "vec4", "gl_MultiTexCoord1" },
- { ir_var_in, VERT_ATTRIB_TEX2, "vec4", "gl_MultiTexCoord2" },
- { ir_var_in, VERT_ATTRIB_TEX3, "vec4", "gl_MultiTexCoord3" },
- { ir_var_in, VERT_ATTRIB_TEX4, "vec4", "gl_MultiTexCoord4" },
- { ir_var_in, VERT_ATTRIB_TEX5, "vec4", "gl_MultiTexCoord5" },
- { ir_var_in, VERT_ATTRIB_TEX6, "vec4", "gl_MultiTexCoord6" },
- { ir_var_in, VERT_ATTRIB_TEX7, "vec4", "gl_MultiTexCoord7" },
- { ir_var_in, VERT_ATTRIB_FOG, "float", "gl_FogCoord" },
- { ir_var_out, VERT_RESULT_CLIP_VERTEX, "vec4", "gl_ClipVertex" },
- { ir_var_out, VERT_RESULT_COL0, "vec4", "gl_FrontColor" },
- { ir_var_out, VERT_RESULT_BFC0, "vec4", "gl_BackColor" },
- { ir_var_out, VERT_RESULT_COL1, "vec4", "gl_FrontSecondaryColor" },
- { ir_var_out, VERT_RESULT_BFC1, "vec4", "gl_BackSecondaryColor" },
- { ir_var_out, VERT_RESULT_FOGC, "float", "gl_FogFragCoord" },
+ { ir_var_shader_in, VERT_ATTRIB_POS, "vec4", "gl_Vertex" },
+ { ir_var_shader_in, VERT_ATTRIB_NORMAL, "vec3", "gl_Normal" },
+ { ir_var_shader_in, VERT_ATTRIB_COLOR0, "vec4", "gl_Color" },
+ { ir_var_shader_in, VERT_ATTRIB_COLOR1, "vec4", "gl_SecondaryColor" },
+ { ir_var_shader_in, VERT_ATTRIB_TEX0, "vec4", "gl_MultiTexCoord0" },
+ { ir_var_shader_in, VERT_ATTRIB_TEX1, "vec4", "gl_MultiTexCoord1" },
+ { ir_var_shader_in, VERT_ATTRIB_TEX2, "vec4", "gl_MultiTexCoord2" },
+ { ir_var_shader_in, VERT_ATTRIB_TEX3, "vec4", "gl_MultiTexCoord3" },
+ { ir_var_shader_in, VERT_ATTRIB_TEX4, "vec4", "gl_MultiTexCoord4" },
+ { ir_var_shader_in, VERT_ATTRIB_TEX5, "vec4", "gl_MultiTexCoord5" },
+ { ir_var_shader_in, VERT_ATTRIB_TEX6, "vec4", "gl_MultiTexCoord6" },
+ { ir_var_shader_in, VERT_ATTRIB_TEX7, "vec4", "gl_MultiTexCoord7" },
+ { ir_var_shader_in, VERT_ATTRIB_FOG, "float", "gl_FogCoord" },
+ { ir_var_shader_out, VERT_RESULT_CLIP_VERTEX, "vec4", "gl_ClipVertex" },
+ { ir_var_shader_out, VERT_RESULT_COL0, "vec4", "gl_FrontColor" },
+ { ir_var_shader_out, VERT_RESULT_BFC0, "vec4", "gl_BackColor" },
+ { ir_var_shader_out, VERT_RESULT_COL1, "vec4", "gl_FrontSecondaryColor" },
+ { ir_var_shader_out, VERT_RESULT_BFC1, "vec4", "gl_BackSecondaryColor" },
+ { ir_var_shader_out, VERT_RESULT_FOGC, "float", "gl_FogFragCoord" },
};
static const builtin_variable builtin_120_fs_variables[] = {
- { ir_var_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" },
+ { ir_var_shader_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" },
};
static const builtin_variable builtin_130_vs_variables[] = {
@@ -403,16 +403,18 @@ add_variable(exec_list *instructions, glsl_symbol_table *symtab,
switch (var->mode) {
case ir_var_auto:
- case ir_var_in:
- case ir_var_const_in:
+ case ir_var_shader_in:
case ir_var_uniform:
case ir_var_system_value:
var->read_only = true;
break;
- case ir_var_inout:
- case ir_var_out:
+ case ir_var_shader_out:
break;
default:
+ /* The only variables that are added using this function should be
+ * uniforms, shader inputs, and shader outputs, constants (which use
+ * ir_var_auto), and system values.
+ */
assert(0);
break;
}
@@ -752,7 +754,8 @@ generate_110_vs_variables(exec_list *instructions,
glsl_type::get_array_instance(glsl_type::vec4_type, 0);
add_variable(instructions, state->symbols,
- "gl_TexCoord", vec4_array_type, ir_var_out, VERT_RESULT_TEX0);
+ "gl_TexCoord", vec4_array_type, ir_var_shader_out,
+ VERT_RESULT_TEX0);
generate_ARB_draw_buffers_variables(instructions, state, false,
vertex_shader);
@@ -812,7 +815,7 @@ generate_130_vs_variables(exec_list *instructions,
glsl_type::get_array_instance(glsl_type::float_type, 0);
add_variable(instructions, state->symbols,
- "gl_ClipDistance", clip_distance_array_type, ir_var_out,
+ "gl_ClipDistance", clip_distance_array_type, ir_var_shader_out,
VERT_RESULT_CLIP_DIST0);
}
@@ -937,7 +940,8 @@ generate_110_fs_variables(exec_list *instructions,
glsl_type::get_array_instance(glsl_type::vec4_type, 0);
add_variable(instructions, state->symbols,
- "gl_TexCoord", vec4_array_type, ir_var_in, FRAG_ATTRIB_TEX0);
+ "gl_TexCoord", vec4_array_type, ir_var_shader_in,
+ FRAG_ATTRIB_TEX0);
generate_ARB_draw_buffers_variables(instructions, state, false,
fragment_shader);
@@ -969,7 +973,7 @@ generate_ARB_draw_buffers_variables(exec_list *instructions,
ir_variable *const fd =
add_variable(instructions, state->symbols,
"gl_FragData", vec4_array_type,
- ir_var_out, FRAG_RESULT_DATA0);
+ ir_var_shader_out, FRAG_RESULT_DATA0);
if (warn)
fd->warn_extension = "GL_ARB_draw_buffers";
@@ -1026,7 +1030,7 @@ generate_ARB_shader_stencil_export_variables(exec_list *instructions,
ir_variable *const fd =
add_variable(instructions, state->symbols,
"gl_FragStencilRefARB", glsl_type::int_type,
- ir_var_out, FRAG_RESULT_STENCIL);
+ ir_var_shader_out, FRAG_RESULT_STENCIL);
if (warn)
fd->warn_extension = "GL_ARB_shader_stencil_export";
@@ -1042,7 +1046,7 @@ generate_AMD_shader_stencil_export_variables(exec_list *instructions,
ir_variable *const fd =
add_variable(instructions, state->symbols,
"gl_FragStencilRefAMD", glsl_type::int_type,
- ir_var_out, FRAG_RESULT_STENCIL);
+ ir_var_shader_out, FRAG_RESULT_STENCIL);
if (warn)
fd->warn_extension = "GL_AMD_shader_stencil_export";
@@ -1083,7 +1087,7 @@ generate_fs_clipdistance(exec_list *instructions,
glsl_type::get_array_instance(glsl_type::float_type, 0);
add_variable(instructions, state->symbols,
- "gl_ClipDistance", clip_distance_array_type, ir_var_in,
+ "gl_ClipDistance", clip_distance_array_type, ir_var_shader_in,
FRAG_ATTRIB_CLIP_DIST0);
}
diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y
index 8fba923a2..e927c7cb7 100644
--- a/mesalib/src/glsl/glcpp/glcpp-parse.y
+++ b/mesalib/src/glsl/glcpp/glcpp-parse.y
@@ -1227,6 +1227,9 @@ glcpp_parser_create (const struct gl_extensions *extensions, int api)
if (extensions->ARB_texture_cube_map_array)
add_builtin_define(parser, "GL_ARB_texture_cube_map_array", 1);
+
+ if (extensions->ARB_shading_language_packing)
+ add_builtin_define(parser, "GL_ARB_shading_language_packing", 1);
}
}
diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll
index 2f66c5828..ddc9f8073 100644
--- a/mesalib/src/glsl/glsl_lexer.ll
+++ b/mesalib/src/glsl/glsl_lexer.ll
@@ -399,23 +399,23 @@ layout {
}
[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?[fF]? {
- yylval->real = glsl_strtod(yytext, NULL);
+ yylval->real = glsl_strtof(yytext, NULL);
return FLOATCONSTANT;
}
\.[0-9]+([eE][+-]?[0-9]+)?[fF]? {
- yylval->real = glsl_strtod(yytext, NULL);
+ yylval->real = glsl_strtof(yytext, NULL);
return FLOATCONSTANT;
}
[0-9]+\.([eE][+-]?[0-9]+)?[fF]? {
- yylval->real = glsl_strtod(yytext, NULL);
+ yylval->real = glsl_strtof(yytext, NULL);
return FLOATCONSTANT;
}
[0-9]+[eE][+-]?[0-9]+[fF]? {
- yylval->real = glsl_strtod(yytext, NULL);
+ yylval->real = glsl_strtof(yytext, NULL);
return FLOATCONSTANT;
}
[0-9]+[fF] {
- yylval->real = glsl_strtod(yytext, NULL);
+ yylval->real = glsl_strtof(yytext, NULL);
return FLOATCONSTANT;
}
diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy
index 88aae64d4..154ce2d09 100644
--- a/mesalib/src/glsl/glsl_parser.yy
+++ b/mesalib/src/glsl/glsl_parser.yy
@@ -79,6 +79,7 @@ static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg)
ast_case_label_list *case_label_list;
ast_case_statement *case_statement;
ast_case_statement_list *case_statement_list;
+ ast_uniform_block *uniform_block;
struct {
ast_node *cond;
@@ -112,6 +113,7 @@ static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg)
%token STRUCT VOID_TOK WHILE
%token <identifier> IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER
%type <identifier> any_identifier
+%type <uniform_block> instance_name_opt
%token <real> FLOATCONSTANT
%token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT
%token <identifier> FIELD_SELECTION
@@ -221,6 +223,7 @@ static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg)
%type <node> declaration_statement
%type <node> jump_statement
%type <node> uniform_block
+%type <uniform_block> basic_uniform_block
%type <struct_specifier> struct_specifier
%type <declarator_list> struct_declaration_list
%type <declarator_list> struct_declaration
@@ -1884,31 +1887,27 @@ function_definition:
/* layout_qualifieropt is packed into this rule */
uniform_block:
- UNIFORM NEW_IDENTIFIER '{' member_list '}' ';'
+ basic_uniform_block
{
- void *ctx = state;
- $$ = new(ctx) ast_uniform_block(*state->default_uniform_qualifier,
- $2, $4);
-
- if (!state->ARB_uniform_buffer_object_enable) {
- _mesa_glsl_error(& @1, state,
- "#version 140 / GL_ARB_uniform_buffer_object "
- "required for defining uniform blocks\n");
- } else if (state->ARB_uniform_buffer_object_warn) {
- _mesa_glsl_warning(& @1, state,
- "#version 140 / GL_ARB_uniform_buffer_object "
- "required for defining uniform blocks\n");
- }
+ $$ = $1;
}
- | layout_qualifier UNIFORM NEW_IDENTIFIER '{' member_list '}' ';'
+ | layout_qualifier basic_uniform_block
{
- void *ctx = state;
-
- ast_type_qualifier qual = *state->default_uniform_qualifier;
- if (!qual.merge_qualifier(& @1, state, $1)) {
+ ast_uniform_block *block = $2;
+ if (!block->layout.merge_qualifier(& @1, state, $1)) {
YYERROR;
}
- $$ = new(ctx) ast_uniform_block(qual, $3, $5);
+ $$ = block;
+ }
+ ;
+
+basic_uniform_block:
+ UNIFORM NEW_IDENTIFIER '{' member_list '}' instance_name_opt ';'
+ {
+ ast_uniform_block *const block = $6;
+
+ block->block_name = $2;
+ block->declarations.push_degenerate_list_at_head(& $4->link);
if (!state->ARB_uniform_buffer_object_enable) {
_mesa_glsl_error(& @1, state,
@@ -1919,6 +1918,49 @@ uniform_block:
"#version 140 / GL_ARB_uniform_buffer_object "
"required for defining uniform blocks\n");
}
+
+ /* Since block arrays require names, and both features are added in
+ * the same language versions, we don't have to explicitly
+ * version-check both things.
+ */
+ if (block->instance_name != NULL
+ && !(state->language_version == 300 && state->es_shader)) {
+ _mesa_glsl_error(& @1, state,
+ "#version 300 es required for using uniform "
+ "blocks with an instance name\n");
+ }
+
+ $$ = block;
+ }
+ ;
+
+instance_name_opt:
+ /* empty */
+ {
+ $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier,
+ NULL,
+ NULL);
+ }
+ | NEW_IDENTIFIER
+ {
+ $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier,
+ $1,
+ NULL);
+ }
+ | NEW_IDENTIFIER '[' constant_expression ']'
+ {
+ $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier,
+ $1,
+ $3);
+ }
+ | NEW_IDENTIFIER '[' ']'
+ {
+ _mesa_glsl_error(& @1, state,
+ "instance block arrays must be explicitly sized\n");
+
+ $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier,
+ $1,
+ NULL);
}
;
diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp
index b460c8619..c8dbc89ff 100644
--- a/mesalib/src/glsl/glsl_parser_extras.cpp
+++ b/mesalib/src/glsl/glsl_parser_extras.cpp
@@ -462,6 +462,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(ARB_uniform_buffer_object, true, false, true, true, false, ARB_uniform_buffer_object),
EXT(OES_standard_derivatives, false, false, true, false, true, OES_standard_derivatives),
EXT(ARB_texture_cube_map_array, true, false, true, true, false, ARB_texture_cube_map_array),
+ EXT(ARB_shading_language_packing, true, false, true, true, false, ARB_shading_language_packing),
};
#undef EXT
diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h
index 2e6bb0b0a..53df149d8 100644
--- a/mesalib/src/glsl/glsl_parser_extras.h
+++ b/mesalib/src/glsl/glsl_parser_extras.h
@@ -272,6 +272,8 @@ struct _mesa_glsl_parse_state {
bool OES_standard_derivatives_warn;
bool ARB_texture_cube_map_array_enable;
bool ARB_texture_cube_map_array_warn;
+ bool ARB_shading_language_packing_enable;
+ bool ARB_shading_language_packing_warn;
/*@}*/
/** Extensions supported by the OpenGL implementation. */
diff --git a/mesalib/src/glsl/glsl_symbol_table.cpp b/mesalib/src/glsl/glsl_symbol_table.cpp
index eb275b12e..8d34547c6 100644
--- a/mesalib/src/glsl/glsl_symbol_table.cpp
+++ b/mesalib/src/glsl/glsl_symbol_table.cpp
@@ -41,15 +41,13 @@ public:
ralloc_free(entry);
}
- symbol_table_entry(ir_variable *v) : v(v), f(0), t(0), u(0) {}
- symbol_table_entry(ir_function *f) : v(0), f(f), t(0), u(0) {}
- symbol_table_entry(const glsl_type *t) : v(0), f(0), t(t), u(0) {}
- symbol_table_entry(struct gl_uniform_block *u) : v(0), f(0), t(0), u(u) {}
+ symbol_table_entry(ir_variable *v) : v(v), f(0), t(0) {}
+ symbol_table_entry(ir_function *f) : v(0), f(f), t(0) {}
+ symbol_table_entry(const glsl_type *t) : v(0), f(0), t(t) {}
ir_variable *v;
ir_function *f;
const glsl_type *t;
- struct gl_uniform_block *u;
};
glsl_symbol_table::glsl_symbol_table()
@@ -134,12 +132,6 @@ bool glsl_symbol_table::add_function(ir_function *f)
return _mesa_symbol_table_add_symbol(table, -1, f->name, entry) == 0;
}
-bool glsl_symbol_table::add_uniform_block(struct gl_uniform_block *u)
-{
- symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(u);
- return _mesa_symbol_table_add_symbol(table, -1, u->Name, entry) == 0;
-}
-
void glsl_symbol_table::add_global_function(ir_function *f)
{
symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f);
diff --git a/mesalib/src/glsl/glsl_symbol_table.h b/mesalib/src/glsl/glsl_symbol_table.h
index f95fb8a01..9f5602787 100644
--- a/mesalib/src/glsl/glsl_symbol_table.h
+++ b/mesalib/src/glsl/glsl_symbol_table.h
@@ -99,7 +99,6 @@ public:
bool add_variable(ir_variable *v);
bool add_type(const char *name, const glsl_type *t);
bool add_function(ir_function *f);
- bool add_uniform_block(struct gl_uniform_block *u);
/*@}*/
/**
diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp
index 71b185027..4a2c87907 100644
--- a/mesalib/src/glsl/glsl_types.cpp
+++ b/mesalib/src/glsl/glsl_types.cpp
@@ -34,6 +34,7 @@ extern "C" {
hash_table *glsl_type::array_types = NULL;
hash_table *glsl_type::record_types = NULL;
+hash_table *glsl_type::interface_types = NULL;
void *glsl_type::mem_ctx = NULL;
void
@@ -51,7 +52,7 @@ glsl_type::glsl_type(GLenum gl_type,
gl_type(gl_type),
base_type(base_type),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
- sampler_type(0),
+ sampler_type(0), interface_packing(0),
vector_elements(vector_elements), matrix_columns(matrix_columns),
length(0)
{
@@ -69,7 +70,7 @@ glsl_type::glsl_type(GLenum gl_type,
gl_type(gl_type),
base_type(GLSL_TYPE_SAMPLER),
sampler_dimensionality(dim), sampler_shadow(shadow),
- sampler_array(array), sampler_type(type),
+ sampler_array(array), sampler_type(type), interface_packing(0),
vector_elements(0), matrix_columns(0),
length(0)
{
@@ -82,7 +83,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
const char *name) :
base_type(GLSL_TYPE_STRUCT),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
- sampler_type(0),
+ sampler_type(0), interface_packing(0),
vector_elements(0), matrix_columns(0),
length(num_fields)
{
@@ -96,6 +97,29 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
this->fields.structure[i].type = fields[i].type;
this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
fields[i].name);
+ this->fields.structure[i].row_major = fields[i].row_major;
+ }
+}
+
+glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+ enum glsl_interface_packing packing, const char *name) :
+ base_type(GLSL_TYPE_INTERFACE),
+ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ sampler_type(0), interface_packing((unsigned) packing),
+ vector_elements(0), matrix_columns(0),
+ length(num_fields)
+{
+ unsigned int i;
+
+ init_ralloc_type_ctx();
+ this->name = ralloc_strdup(this->mem_ctx, name);
+ this->fields.structure = ralloc_array(this->mem_ctx,
+ glsl_struct_field, length);
+ for (i = 0; i < length; i++) {
+ this->fields.structure[i].type = fields[i].type;
+ this->fields.structure[i].name = ralloc_strdup(this->fields.structure,
+ fields[i].name);
+ this->fields.structure[i].row_major = fields[i].row_major;
}
}
@@ -429,7 +453,7 @@ _mesa_glsl_release_types(void)
glsl_type::glsl_type(const glsl_type *array, unsigned length) :
base_type(GLSL_TYPE_ARRAY),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
- sampler_type(0),
+ sampler_type(0), interface_packing(0),
vector_elements(0), matrix_columns(0),
name(NULL), length(length)
{
@@ -561,12 +585,18 @@ glsl_type::record_key_compare(const void *a, const void *b)
if (key1->length != key2->length)
return 1;
+ if (key1->interface_packing != key2->interface_packing)
+ return 1;
+
for (unsigned i = 0; i < key1->length; i++) {
if (key1->fields.structure[i].type != key2->fields.structure[i].type)
return 1;
if (strcmp(key1->fields.structure[i].name,
key2->fields.structure[i].name) != 0)
return 1;
+ if (key1->fields.structure[i].row_major
+ != key2->fields.structure[i].row_major)
+ return 1;
}
return 0;
@@ -621,9 +651,37 @@ glsl_type::get_record_instance(const glsl_struct_field *fields,
const glsl_type *
+glsl_type::get_interface_instance(const glsl_struct_field *fields,
+ unsigned num_fields,
+ enum glsl_interface_packing packing,
+ const char *name)
+{
+ const glsl_type key(fields, num_fields, packing, name);
+
+ if (interface_types == NULL) {
+ interface_types = hash_table_ctor(64, record_key_hash, record_key_compare);
+ }
+
+ const glsl_type *t = (glsl_type *) hash_table_find(interface_types, & key);
+ if (t == NULL) {
+ t = new glsl_type(fields, num_fields, packing, name);
+
+ hash_table_insert(interface_types, (void *) t, t);
+ }
+
+ assert(t->base_type == GLSL_TYPE_INTERFACE);
+ assert(t->length == num_fields);
+ assert(strcmp(t->name, name) == 0);
+
+ return t;
+}
+
+
+const glsl_type *
glsl_type::field_type(const char *name) const
{
- if (this->base_type != GLSL_TYPE_STRUCT)
+ if (this->base_type != GLSL_TYPE_STRUCT
+ && this->base_type != GLSL_TYPE_INTERFACE)
return error_type;
for (unsigned i = 0; i < this->length; i++) {
@@ -638,7 +696,8 @@ glsl_type::field_type(const char *name) const
int
glsl_type::field_index(const char *name) const
{
- if (this->base_type != GLSL_TYPE_STRUCT)
+ if (this->base_type != GLSL_TYPE_STRUCT
+ && this->base_type != GLSL_TYPE_INTERFACE)
return -1;
for (unsigned i = 0; i < this->length; i++) {
@@ -660,7 +719,8 @@ glsl_type::component_slots() const
case GLSL_TYPE_BOOL:
return this->components();
- case GLSL_TYPE_STRUCT: {
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE: {
unsigned size = 0;
for (unsigned i = 0; i < this->length; i++)
@@ -672,9 +732,13 @@ glsl_type::component_slots() const
case GLSL_TYPE_ARRAY:
return this->length * this->fields.array->component_slots();
- default:
- return 0;
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
+ break;
}
+
+ return 0;
}
bool
@@ -799,12 +863,6 @@ glsl_type::std140_base_alignment(bool row_major) const
return -1;
}
-static unsigned
-align(unsigned val, unsigned align)
-{
- return (val + align - 1) / align * align;
-}
-
unsigned
glsl_type::std140_size(bool row_major) const
{
@@ -906,11 +964,11 @@ glsl_type::std140_size(bool row_major) const
for (unsigned i = 0; i < this->length; i++) {
const struct glsl_type *field_type = this->fields.structure[i].type;
unsigned align = field_type->std140_base_alignment(row_major);
- size = (size + align - 1) / align * align;
+ size = glsl_align(size, align);
size += field_type->std140_size(row_major);
}
- size = align(size,
- this->fields.structure[0].type->std140_base_alignment(row_major));
+ size = glsl_align(size,
+ this->fields.structure[0].type->std140_base_alignment(row_major));
return size;
}
diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h
index d6f5c105e..b0db2bf11 100644
--- a/mesalib/src/glsl/glsl_types.h
+++ b/mesalib/src/glsl/glsl_types.h
@@ -54,6 +54,7 @@ enum glsl_base_type {
GLSL_TYPE_BOOL,
GLSL_TYPE_SAMPLER,
GLSL_TYPE_STRUCT,
+ GLSL_TYPE_INTERFACE,
GLSL_TYPE_ARRAY,
GLSL_TYPE_VOID,
GLSL_TYPE_ERROR
@@ -69,6 +70,12 @@ enum glsl_sampler_dim {
GLSL_SAMPLER_DIM_EXTERNAL
};
+enum glsl_interface_packing {
+ GLSL_INTERFACE_PACKING_STD140,
+ GLSL_INTERFACE_PACKING_SHARED,
+ GLSL_INTERFACE_PACKING_PACKED
+};
+
#ifdef __cplusplus
#include "GL/gl.h"
#include "ralloc.h"
@@ -84,6 +91,7 @@ struct glsl_type {
* only \c GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT,
* and \c GLSL_TYPE_UINT are valid.
*/
+ unsigned interface_packing:2;
/* Callers of this ralloc-based new need not call delete. It's
* easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */
@@ -130,8 +138,9 @@ struct glsl_type {
/**
* For \c GLSL_TYPE_ARRAY, this is the length of the array. For
- * \c GLSL_TYPE_STRUCT, it is the number of elements in the structure and
- * the number of values pointed to by \c fields.structure (below).
+ * \c GLSL_TYPE_STRUCT or \c GLSL_TYPE_INTERFACE, it is the number of
+ * elements in the structure and the number of values pointed to by
+ * \c fields.structure (below).
*/
unsigned length;
@@ -232,6 +241,14 @@ struct glsl_type {
const char *name);
/**
+ * Get the instance of an interface block type
+ */
+ static const glsl_type *get_interface_instance(const glsl_struct_field *fields,
+ unsigned num_fields,
+ enum glsl_interface_packing packing,
+ const char *name);
+
+ /**
* Query the total number of scalars that make up a scalar, vector or matrix
*/
unsigned components() const
@@ -394,6 +411,14 @@ struct glsl_type {
}
/**
+ * Query whether or not a type is an interface
+ */
+ bool is_interface() const
+ {
+ return base_type == GLSL_TYPE_INTERFACE;
+ }
+
+ /**
* Query whether or not a type is the void type singleton.
*/
bool is_void() const
@@ -491,6 +516,10 @@ private:
glsl_type(const glsl_struct_field *fields, unsigned num_fields,
const char *name);
+ /** Constructor for interface types */
+ glsl_type(const glsl_struct_field *fields, unsigned num_fields,
+ enum glsl_interface_packing packing, const char *name);
+
/** Constructor for array types */
glsl_type(const glsl_type *array, unsigned length);
@@ -500,6 +529,9 @@ private:
/** Hash table containing the known record types. */
static struct hash_table *record_types;
+ /** Hash table containing the known interface types. */
+ static struct hash_table *interface_types;
+
static int record_key_compare(const void *a, const void *b);
static unsigned record_key_hash(const void *key);
@@ -566,8 +598,15 @@ private:
struct glsl_struct_field {
const struct glsl_type *type;
const char *name;
+ bool row_major;
};
+static inline unsigned int
+glsl_align(unsigned int a, unsigned int align)
+{
+ return (a + align - 1) / align * align;
+}
+
#endif /* __cplusplus */
#endif /* GLSL_TYPES_H */
diff --git a/mesalib/src/glsl/hir_field_selection.cpp b/mesalib/src/glsl/hir_field_selection.cpp
index ac416d5da..0035a5f81 100644
--- a/mesalib/src/glsl/hir_field_selection.cpp
+++ b/mesalib/src/glsl/hir_field_selection.cpp
@@ -61,7 +61,8 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr,
_mesa_glsl_error(& loc, state, "Invalid swizzle / mask `%s'",
expr->primary_expression.identifier);
}
- } else if (op->type->base_type == GLSL_TYPE_STRUCT) {
+ } else if (op->type->base_type == GLSL_TYPE_STRUCT
+ || op->type->base_type == GLSL_TYPE_INTERFACE) {
result = new(ctx) ir_dereference_record(op,
expr->primary_expression.identifier);
diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp
index 703f5ec58..954995db3 100644
--- a/mesalib/src/glsl/ir.cpp
+++ b/mesalib/src/glsl/ir.cpp
@@ -306,6 +306,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
break;
case ir_unop_noise:
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
this->type = glsl_type::float_type;
break;
@@ -313,6 +315,25 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
this->type = glsl_type::bool_type;
break;
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_snorm_4x8:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_unorm_4x8:
+ case ir_unop_pack_half_2x16:
+ this->type = glsl_type::uint_type;
+ break;
+
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
+ this->type = glsl_type::vec2_type;
+ break;
+
+ case ir_unop_unpack_snorm_4x8:
+ case ir_unop_unpack_unorm_4x8:
+ this->type = glsl_type::vec4_type;
+ break;
+
default:
assert(!"not reached: missing automatic type setup for ir_expression");
this->type = op0->type;
@@ -364,10 +385,15 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
case ir_binop_bit_and:
case ir_binop_bit_xor:
case ir_binop_bit_or:
+ assert(!op0->type->is_matrix());
+ assert(!op1->type->is_matrix());
if (op0->type->is_scalar()) {
- this->type = op1->type;
+ this->type = op1->type;
} else if (op1->type->is_scalar()) {
- this->type = op0->type;
+ this->type = op0->type;
+ } else {
+ assert(op0->type->vector_elements == op1->type->vector_elements);
+ this->type = op0->type;
}
break;
@@ -386,6 +412,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
this->type = glsl_type::float_type;
break;
+ case ir_binop_pack_half_2x16_split:
+ this->type = glsl_type::uint_type;
+ break;
+
case ir_binop_lshift:
case ir_binop_rshift:
this->type = op0->type;
@@ -454,6 +484,18 @@ static const char *const operator_strs[] = {
"cos_reduced",
"dFdx",
"dFdy",
+ "packSnorm2x16",
+ "packSnorm4x8",
+ "packUnorm2x16",
+ "packUnorm4x8",
+ "packHalf2x16",
+ "unpackSnorm2x16",
+ "unpackSnorm4x8",
+ "unpackUnorm2x16",
+ "unpackUnorm4x8",
+ "unpackHalf2x16",
+ "unpackHalf2x16_split_x",
+ "unpackHalf2x16_split_y",
"noise",
"+",
"-",
@@ -480,6 +522,7 @@ static const char *const operator_strs[] = {
"min",
"max",
"pow",
+ "packHalf2x16_split",
"ubo_load",
"vector",
};
@@ -1493,7 +1536,6 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
this->has_initializer = false;
this->location = -1;
this->location_frac = 0;
- this->uniform_block = -1;
this->warn_extension = NULL;
this->constant_value = NULL;
this->constant_initializer = NULL;
@@ -1553,8 +1595,8 @@ modes_match(unsigned a, unsigned b)
return true;
/* Accept "in" vs. "const in" */
- if ((a == ir_var_const_in && b == ir_var_in) ||
- (b == ir_var_const_in && a == ir_var_in))
+ if ((a == ir_var_const_in && b == ir_var_function_in) ||
+ (b == ir_var_const_in && a == ir_var_function_in))
return true;
return false;
diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h
index 85fc5ce95..efd80dad8 100644
--- a/mesalib/src/glsl/ir.h
+++ b/mesalib/src/glsl/ir.h
@@ -265,9 +265,11 @@ protected:
enum ir_variable_mode {
ir_var_auto = 0, /**< Function local variables and globals. */
ir_var_uniform, /**< Variable declared as a uniform. */
- ir_var_in,
- ir_var_out,
- ir_var_inout,
+ ir_var_shader_in,
+ ir_var_shader_out,
+ ir_var_function_in,
+ ir_var_function_out,
+ ir_var_function_inout,
ir_var_const_in, /**< "in" param that must be a constant expression */
ir_var_system_value, /**< Ex: front-face, instance-id, etc. */
ir_var_temporary /**< Temporary variable generated during compilation. */
@@ -348,6 +350,41 @@ public:
glsl_interp_qualifier determine_interpolation_mode(bool flat_shade);
/**
+ * Determine whether or not a variable is part of a uniform block.
+ */
+ inline bool is_in_uniform_block() const
+ {
+ return this->mode == ir_var_uniform && this->interface_type != NULL;
+ }
+
+ /**
+ * Determine whether or not a variable is the declaration of an interface
+ * block
+ *
+ * For the first declaration below, there will be an \c ir_variable named
+ * "instance" whose type and whose instance_type will be the same
+ * \cglsl_type. For the second declaration, there will be an \c ir_variable
+ * named "f" whose type is float and whose instance_type is B2.
+ *
+ * "instance" is an interface instance variable, but "f" is not.
+ *
+ * uniform B1 {
+ * float f;
+ * } instance;
+ *
+ * uniform B2 {
+ * float f;
+ * };
+ */
+ inline bool is_interface_instance() const
+ {
+ const glsl_type *const t = this->type;
+
+ return (t == this->interface_type)
+ || (t->is_array() && t->fields.array == this->interface_type);
+ }
+
+ /**
* Declared type of the variable
*/
const struct glsl_type *type;
@@ -401,7 +438,7 @@ public:
*
* \sa ir_variable_mode
*/
- unsigned mode:3;
+ unsigned mode:4;
/**
* Interpolation mode for shader inputs / outputs
@@ -481,16 +518,6 @@ public:
int location;
/**
- * Uniform block number for uniforms.
- *
- * This index is into the shader's list of uniform blocks, not the
- * linked program's merged list.
- *
- * If the variable is not in a uniform block, the value will be -1.
- */
- int uniform_block;
-
- /**
* output index for dual source blending.
*/
int index;
@@ -530,6 +557,14 @@ public:
* objects.
*/
ir_constant *constant_initializer;
+
+ /**
+ * For variables that are in an interface block or are an instance of an
+ * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
+ *
+ * \sa ir_variable::location
+ */
+ const glsl_type *interface_type;
};
@@ -908,7 +943,7 @@ public:
unsigned write_mask:4;
};
-/* Update ir_expression::num_operands() and operator_strs when
+/* Update ir_expression::get_num_operands() and operator_strs when
* updating this list.
*/
enum ir_expression_operation {
@@ -969,6 +1004,32 @@ enum ir_expression_operation {
ir_unop_dFdy,
/*@}*/
+ /**
+ * \name Floating point pack and unpack operations.
+ */
+ /*@{*/
+ ir_unop_pack_snorm_2x16,
+ ir_unop_pack_snorm_4x8,
+ ir_unop_pack_unorm_2x16,
+ ir_unop_pack_unorm_4x8,
+ ir_unop_pack_half_2x16,
+ ir_unop_unpack_snorm_2x16,
+ ir_unop_unpack_snorm_4x8,
+ ir_unop_unpack_unorm_2x16,
+ ir_unop_unpack_unorm_4x8,
+ ir_unop_unpack_half_2x16,
+ /*@}*/
+
+ /**
+ * \name Lowered floating point unpacking operations.
+ *
+ * \see lower_packing_builtins_visitor::split_unpack_half_2x16
+ */
+ /*@{*/
+ ir_unop_unpack_half_2x16_split_x,
+ ir_unop_unpack_half_2x16_split_y,
+ /*@}*/
+
ir_unop_noise,
/**
@@ -1036,6 +1097,15 @@ enum ir_expression_operation {
ir_binop_pow,
/**
+ * \name Lowered floating point packing operations.
+ *
+ * \see lower_packing_builtins_visitor::split_pack_half_2x16
+ */
+ /*@{*/
+ ir_binop_pack_half_2x16_split,
+ /*@}*/
+
+ /**
* Load a value the size of a given GLSL type from a uniform block.
*
* operand0 is the ir_constant uniform block index in the linked shader.
diff --git a/mesalib/src/glsl/ir_builder.cpp b/mesalib/src/glsl/ir_builder.cpp
index c62f0b115..8fb30a02a 100644
--- a/mesalib/src/glsl/ir_builder.cpp
+++ b/mesalib/src/glsl/ir_builder.cpp
@@ -188,11 +188,27 @@ ir_expression *mul(operand a, operand b)
return expr(ir_binop_mul, a, b);
}
+ir_expression *div(operand a, operand b)
+{
+ return expr(ir_binop_div, a, b);
+}
+
+ir_expression *round_even(operand a)
+{
+ return expr(ir_unop_round_even, a);
+}
+
ir_expression *dot(operand a, operand b)
{
return expr(ir_binop_dot, a, b);
}
+ir_expression*
+clamp(operand a, operand b, operand c)
+{
+ return expr(ir_binop_min, expr(ir_binop_max, a, b), c);
+}
+
ir_expression *
saturate(operand a)
{
@@ -203,4 +219,147 @@ saturate(operand a)
new(mem_ctx) ir_constant(0.0f));
}
+ir_expression*
+equal(operand a, operand b)
+{
+ return expr(ir_binop_equal, a, b);
+}
+
+ir_expression*
+less(operand a, operand b)
+{
+ return expr(ir_binop_less, a, b);
+}
+
+ir_expression*
+greater(operand a, operand b)
+{
+ return expr(ir_binop_greater, a, b);
+}
+
+ir_expression*
+lequal(operand a, operand b)
+{
+ return expr(ir_binop_lequal, a, b);
+}
+
+ir_expression*
+gequal(operand a, operand b)
+{
+ return expr(ir_binop_gequal, a, b);
+}
+
+ir_expression*
+logic_not(operand a)
+{
+ return expr(ir_unop_logic_not, a);
+}
+
+ir_expression*
+logic_and(operand a, operand b)
+{
+ return expr(ir_binop_logic_and, a, b);
+}
+
+ir_expression*
+logic_or(operand a, operand b)
+{
+ return expr(ir_binop_logic_or, a, b);
+}
+
+ir_expression*
+bit_not(operand a)
+{
+ return expr(ir_unop_bit_not, a);
+}
+
+ir_expression*
+bit_and(operand a, operand b)
+{
+ return expr(ir_binop_bit_and, a, b);
+}
+
+ir_expression*
+bit_or(operand a, operand b)
+{
+ return expr(ir_binop_bit_or, a, b);
+}
+
+ir_expression*
+lshift(operand a, operand b)
+{
+ return expr(ir_binop_lshift, a, b);
+}
+
+ir_expression*
+rshift(operand a, operand b)
+{
+ return expr(ir_binop_rshift, a, b);
+}
+
+ir_expression*
+f2i(operand a)
+{
+ return expr(ir_unop_f2i, a);
+}
+
+ir_expression*
+i2f(operand a)
+{
+ return expr(ir_unop_i2f, a);
+}
+
+ir_expression*
+i2u(operand a)
+{
+ return expr(ir_unop_i2u, a);
+}
+
+ir_expression*
+u2i(operand a)
+{
+ return expr(ir_unop_u2i, a);
+}
+
+ir_expression*
+f2u(operand a)
+{
+ return expr(ir_unop_f2u, a);
+}
+
+ir_expression*
+u2f(operand a)
+{
+ return expr(ir_unop_u2f, a);
+}
+
+ir_if*
+if_tree(operand condition,
+ ir_instruction *then_branch)
+{
+ assert(then_branch != NULL);
+
+ void *mem_ctx = ralloc_parent(condition.val);
+
+ ir_if *result = new(mem_ctx) ir_if(condition.val);
+ result->then_instructions.push_tail(then_branch);
+ return result;
+}
+
+ir_if*
+if_tree(operand condition,
+ ir_instruction *then_branch,
+ ir_instruction *else_branch)
+{
+ assert(then_branch != NULL);
+ assert(else_branch != NULL);
+
+ void *mem_ctx = ralloc_parent(condition.val);
+
+ ir_if *result = new(mem_ctx) ir_if(condition.val);
+ result->then_instructions.push_tail(then_branch);
+ result->else_instructions.push_tail(else_branch);
+ return result;
+}
+
} /* namespace ir_builder */
diff --git a/mesalib/src/glsl/ir_builder.h b/mesalib/src/glsl/ir_builder.h
index 067858df4..690ac74eb 100644
--- a/mesalib/src/glsl/ir_builder.h
+++ b/mesalib/src/glsl/ir_builder.h
@@ -25,6 +25,15 @@
namespace ir_builder {
+#ifndef WRITEMASK_X
+enum writemask {
+ WRITEMASK_X = 0x1,
+ WRITEMASK_Y = 0x2,
+ WRITEMASK_Z = 0x4,
+ WRITEMASK_W = 0x8,
+};
+#endif
+
/**
* This little class exists to let the helper expression generators
* take either an ir_rvalue * or an ir_variable * to be automatically
@@ -73,9 +82,40 @@ public:
class ir_factory {
public:
+ ir_factory()
+ : instructions(NULL),
+ mem_ctx(NULL)
+ {
+ return;
+ }
+
void emit(ir_instruction *ir);
ir_variable *make_temp(const glsl_type *type, const char *name);
+ ir_constant*
+ constant(float f)
+ {
+ return new(mem_ctx) ir_constant(f);
+ }
+
+ ir_constant*
+ constant(int i)
+ {
+ return new(mem_ctx) ir_constant(i);
+ }
+
+ ir_constant*
+ constant(unsigned u)
+ {
+ return new(mem_ctx) ir_constant(u);
+ }
+
+ ir_constant*
+ constant(bool b)
+ {
+ return new(mem_ctx) ir_constant(b);
+ }
+
exec_list *instructions;
void *mem_ctx;
};
@@ -88,9 +128,35 @@ ir_expression *expr(ir_expression_operation op, operand a, operand b);
ir_expression *add(operand a, operand b);
ir_expression *sub(operand a, operand b);
ir_expression *mul(operand a, operand b);
+ir_expression *div(operand a, operand b);
+ir_expression *round_even(operand a);
ir_expression *dot(operand a, operand b);
+ir_expression *clamp(operand a, operand b, operand c);
ir_expression *saturate(operand a);
+ir_expression *equal(operand a, operand b);
+ir_expression *less(operand a, operand b);
+ir_expression *greater(operand a, operand b);
+ir_expression *lequal(operand a, operand b);
+ir_expression *gequal(operand a, operand b);
+
+ir_expression *logic_not(operand a);
+ir_expression *logic_and(operand a, operand b);
+ir_expression *logic_or(operand a, operand b);
+
+ir_expression *bit_not(operand a);
+ir_expression *bit_or(operand a, operand b);
+ir_expression *bit_and(operand a, operand b);
+ir_expression *lshift(operand a, operand b);
+ir_expression *rshift(operand a, operand b);
+
+ir_expression *f2i(operand a);
+ir_expression *i2f(operand a);
+ir_expression *f2u(operand a);
+ir_expression *u2f(operand a);
+ir_expression *i2u(operand a);
+ir_expression *u2i(operand a);
+
/**
* Swizzle away later components, but preserve the ordering.
*/
@@ -108,4 +174,10 @@ ir_swizzle *swizzle_xy(operand a);
ir_swizzle *swizzle_xyz(operand a);
ir_swizzle *swizzle_xyzw(operand a);
+ir_if *if_tree(operand condition,
+ ir_instruction *then_branch);
+ir_if *if_tree(operand condition,
+ ir_instruction *then_branch,
+ ir_instruction *else_branch);
+
} /* namespace ir_builder */
diff --git a/mesalib/src/glsl/ir_clone.cpp b/mesalib/src/glsl/ir_clone.cpp
index c62c1fc20..b94ff05df 100644
--- a/mesalib/src/glsl/ir_clone.cpp
+++ b/mesalib/src/glsl/ir_clone.cpp
@@ -50,7 +50,6 @@ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const
var->interpolation = this->interpolation;
var->location = this->location;
var->index = this->index;
- var->uniform_block = this->uniform_block;
var->warn_extension = this->warn_extension;
var->origin_upper_left = this->origin_upper_left;
var->pixel_center_integer = this->pixel_center_integer;
@@ -77,6 +76,8 @@ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const
var->constant_initializer =
this->constant_initializer->clone(mem_ctx, ht);
+ var->interface_type = this->interface_type;
+
if (ht) {
hash_table_insert(ht, var, (void *)const_cast<ir_variable *>(this));
}
@@ -375,10 +376,15 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
return c;
}
- default:
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
+ case GLSL_TYPE_INTERFACE:
assert(!"Should not get here.");
- return NULL;
+ break;
}
+
+ return NULL;
}
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index 17b54b923..86b863f31 100644
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -40,25 +40,6 @@
#include "glsl_types.h"
#include "program/hash_table.h"
-/* Using C99 rounding functions for roundToEven() implementation is
- * difficult, because round(), rint, and nearbyint() are affected by
- * fesetenv(), which the application may have done for its own
- * purposes. Mesa's IROUND macro is close to what we want, but it
- * rounds away from 0 on n + 0.5.
- */
-static int
-round_to_even(float val)
-{
- int rounded = IROUND(val);
-
- if (val - floor(val) == 0.5) {
- if (rounded % 2 != 0)
- rounded += val > 0 ? -1 : 1;
- }
-
- return rounded;
-}
-
static float
dot(ir_constant *op0, ir_constant *op1)
{
@@ -94,6 +75,297 @@ bitcast_f2u(float f)
return u;
}
+/**
+ * Evaluate one component of a floating-point 4x8 unpacking function.
+ */
+typedef uint8_t
+(*pack_1x8_func_t)(float);
+
+/**
+ * Evaluate one component of a floating-point 2x16 unpacking function.
+ */
+typedef uint16_t
+(*pack_1x16_func_t)(float);
+
+/**
+ * Evaluate one component of a floating-point 4x8 unpacking function.
+ */
+typedef float
+(*unpack_1x8_func_t)(uint8_t);
+
+/**
+ * Evaluate one component of a floating-point 2x16 unpacking function.
+ */
+typedef float
+(*unpack_1x16_func_t)(uint16_t);
+
+/**
+ * Evaluate a 2x16 floating-point packing function.
+ */
+static uint32_t
+pack_2x16(pack_1x16_func_t pack_1x16,
+ float x, float y)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * packSnorm2x16
+ * -------------
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * The specifications for the other packing functions contain similar
+ * language.
+ */
+ uint32_t u = 0;
+ u |= ((uint32_t) pack_1x16(x) << 0);
+ u |= ((uint32_t) pack_1x16(y) << 16);
+ return u;
+}
+
+/**
+ * Evaluate a 4x8 floating-point packing function.
+ */
+static uint32_t
+pack_4x8(pack_1x8_func_t pack_1x8,
+ float x, float y, float z, float w)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * packSnorm4x8
+ * ------------
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * The specifications for the other packing functions contain similar
+ * language.
+ */
+ uint32_t u = 0;
+ u |= ((uint32_t) pack_1x8(x) << 0);
+ u |= ((uint32_t) pack_1x8(y) << 8);
+ u |= ((uint32_t) pack_1x8(z) << 16);
+ u |= ((uint32_t) pack_1x8(w) << 24);
+ return u;
+}
+
+/**
+ * Evaluate a 2x16 floating-point unpacking function.
+ */
+static void
+unpack_2x16(unpack_1x16_func_t unpack_1x16,
+ uint32_t u,
+ float *x, float *y)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * unpackSnorm2x16
+ * ---------------
+ * The first component of the returned vector will be extracted from
+ * the least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * The specifications for the other unpacking functions contain similar
+ * language.
+ */
+ *x = unpack_1x16((uint16_t) (u & 0xffff));
+ *y = unpack_1x16((uint16_t) (u >> 16));
+}
+
+/**
+ * Evaluate a 4x8 floating-point unpacking function.
+ */
+static void
+unpack_4x8(unpack_1x8_func_t unpack_1x8, uint32_t u,
+ float *x, float *y, float *z, float *w)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * unpackSnorm4x8
+ * --------------
+ * The first component of the returned vector will be extracted from
+ * the least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * The specifications for the other unpacking functions contain similar
+ * language.
+ */
+ *x = unpack_1x8((uint8_t) (u & 0xff));
+ *y = unpack_1x8((uint8_t) (u >> 8));
+ *z = unpack_1x8((uint8_t) (u >> 16));
+ *w = unpack_1x8((uint8_t) (u >> 24));
+}
+
+/**
+ * Evaluate one component of packSnorm4x8.
+ */
+static uint8_t
+pack_snorm_1x8(float x)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * packSnorm4x8
+ * ------------
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
+ *
+ * We must first cast the float to an int, because casting a negative
+ * float to a uint is undefined.
+ */
+ return (uint8_t) (int8_t)
+ _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 127.0f);
+}
+
+/**
+ * Evaluate one component of packSnorm2x16.
+ */
+static uint16_t
+pack_snorm_1x16(float x)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * packSnorm2x16
+ * -------------
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+ *
+ * We must first cast the float to an int, because casting a negative
+ * float to a uint is undefined.
+ */
+ return (uint16_t) (int16_t)
+ _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm4x8.
+ */
+static float
+unpack_snorm_1x8(uint8_t u)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * unpackSnorm4x8
+ * --------------
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackSnorm4x8: clamp(f / 127.0, -1, +1)
+ */
+ return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm2x16.
+ */
+static float
+unpack_snorm_1x16(uint16_t u)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * unpackSnorm2x16
+ * ---------------
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
+ */
+ return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component packUnorm4x8.
+ */
+static uint8_t
+pack_unorm_1x8(float x)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * packUnorm4x8
+ * ------------
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
+ */
+ return (uint8_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 255.0f);
+}
+
+/**
+ * Evaluate one component packUnorm2x16.
+ */
+static uint16_t
+pack_unorm_1x16(float x)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * packUnorm2x16
+ * -------------
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+ */
+ return (uint16_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
+}
+
+/**
+ * Evaluate one component of unpackUnorm4x8.
+ */
+static float
+unpack_unorm_1x8(uint8_t u)
+{
+ /* From section 8.4 of the GLSL 4.30 spec:
+ *
+ * unpackUnorm4x8
+ * --------------
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackUnorm4x8: f / 255.0
+ */
+ return (float) u / 255.0f;
+}
+
+/**
+ * Evaluate one component of unpackUnorm2x16.
+ */
+static float
+unpack_unorm_1x16(uint16_t u)
+{
+ /* From section 8.4 of the GLSL ES 3.00 spec:
+ *
+ * unpackUnorm2x16
+ * ---------------
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackUnorm2x16: f / 65535.0
+ */
+ return (float) u / 65535.0f;
+}
+
+/**
+ * Evaluate one component of packHalf2x16.
+ */
+static uint16_t
+pack_half_1x16(float x)
+{
+ return _mesa_float_to_half(x);
+}
+
+/**
+ * Evaluate one component of unpackHalf2x16.
+ */
+static float
+unpack_half_1x16(uint16_t u)
+{
+ return _mesa_half_to_float(u);
+}
+
ir_constant *
ir_rvalue::constant_expression_value(struct hash_table *variable_context)
{
@@ -279,7 +551,7 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
case ir_unop_round_even:
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
for (unsigned c = 0; c < op[0]->type->components(); c++) {
- data.f[c] = round_to_even(op[0]->value.f[c]);
+ data.f[c] = _mesa_round_to_even(op[0]->value.f[c]);
}
break;
@@ -459,6 +731,70 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
}
break;
+ case ir_unop_pack_snorm_2x16:
+ assert(op[0]->type == glsl_type::vec2_type);
+ data.u[0] = pack_2x16(pack_snorm_1x16,
+ op[0]->value.f[0],
+ op[0]->value.f[1]);
+ break;
+ case ir_unop_pack_snorm_4x8:
+ assert(op[0]->type == glsl_type::vec4_type);
+ data.u[0] = pack_4x8(pack_snorm_1x8,
+ op[0]->value.f[0],
+ op[0]->value.f[1],
+ op[0]->value.f[2],
+ op[0]->value.f[3]);
+ break;
+ case ir_unop_unpack_snorm_2x16:
+ assert(op[0]->type == glsl_type::uint_type);
+ unpack_2x16(unpack_snorm_1x16,
+ op[0]->value.u[0],
+ &data.f[0], &data.f[1]);
+ break;
+ case ir_unop_unpack_snorm_4x8:
+ assert(op[0]->type == glsl_type::uint_type);
+ unpack_4x8(unpack_snorm_1x8,
+ op[0]->value.u[0],
+ &data.f[0], &data.f[1], &data.f[2], &data.f[3]);
+ break;
+ case ir_unop_pack_unorm_2x16:
+ assert(op[0]->type == glsl_type::vec2_type);
+ data.u[0] = pack_2x16(pack_unorm_1x16,
+ op[0]->value.f[0],
+ op[0]->value.f[1]);
+ break;
+ case ir_unop_pack_unorm_4x8:
+ assert(op[0]->type == glsl_type::vec4_type);
+ data.u[0] = pack_4x8(pack_unorm_1x8,
+ op[0]->value.f[0],
+ op[0]->value.f[1],
+ op[0]->value.f[2],
+ op[0]->value.f[3]);
+ break;
+ case ir_unop_unpack_unorm_2x16:
+ assert(op[0]->type == glsl_type::uint_type);
+ unpack_2x16(unpack_unorm_1x16,
+ op[0]->value.u[0],
+ &data.f[0], &data.f[1]);
+ break;
+ case ir_unop_unpack_unorm_4x8:
+ assert(op[0]->type == glsl_type::uint_type);
+ unpack_4x8(unpack_unorm_1x8,
+ op[0]->value.u[0],
+ &data.f[0], &data.f[1], &data.f[2], &data.f[3]);
+ break;
+ case ir_unop_pack_half_2x16:
+ assert(op[0]->type == glsl_type::vec2_type);
+ data.u[0] = pack_2x16(pack_half_1x16,
+ op[0]->value.f[0],
+ op[0]->value.f[1]);
+ break;
+ case ir_unop_unpack_half_2x16:
+ assert(op[0]->type == glsl_type::uint_type);
+ unpack_2x16(unpack_half_1x16,
+ op[0]->value.u[0],
+ &data.f[0], &data.f[1]);
+ break;
case ir_binop_pow:
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
for (unsigned c = 0; c < op[0]->type->components(); c++) {
diff --git a/mesalib/src/glsl/ir_function.cpp b/mesalib/src/glsl/ir_function.cpp
index a525693ed..fe4209c77 100644
--- a/mesalib/src/glsl/ir_function.cpp
+++ b/mesalib/src/glsl/ir_function.cpp
@@ -78,17 +78,17 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
return PARAMETER_LIST_NO_MATCH;
case ir_var_const_in:
- case ir_var_in:
+ case ir_var_function_in:
if (!actual->type->can_implicitly_convert_to(param->type))
return PARAMETER_LIST_NO_MATCH;
break;
- case ir_var_out:
+ case ir_var_function_out:
if (!param->type->can_implicitly_convert_to(actual->type))
return PARAMETER_LIST_NO_MATCH;
break;
- case ir_var_inout:
+ case ir_var_function_inout:
/* Since there are no bi-directional automatic conversions (e.g.,
* there is int -> float but no float -> int), inout parameters must
* be exact matches.
diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h
index 6b9519174..8f3301840 100644
--- a/mesalib/src/glsl/ir_optimization.h
+++ b/mesalib/src/glsl/ir_optimization.h
@@ -37,6 +37,31 @@
#define MOD_TO_FRACT 0x20
#define INT_DIV_TO_MUL_RCP 0x40
+/**
+ * \see class lower_packing_builtins_visitor
+ */
+enum lower_packing_builtins_op {
+ LOWER_PACK_UNPACK_NONE = 0x0000,
+
+ LOWER_PACK_SNORM_2x16 = 0x0001,
+ LOWER_UNPACK_SNORM_2x16 = 0x0002,
+
+ LOWER_PACK_UNORM_2x16 = 0x0004,
+ LOWER_UNPACK_UNORM_2x16 = 0x0008,
+
+ LOWER_PACK_HALF_2x16 = 0x0010,
+ LOWER_UNPACK_HALF_2x16 = 0x0020,
+
+ LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040,
+ LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080,
+
+ LOWER_PACK_SNORM_4x8 = 0x0100,
+ LOWER_UNPACK_SNORM_4x8 = 0x0200,
+
+ LOWER_PACK_UNORM_4x8 = 0x0400,
+ LOWER_UNPACK_UNORM_4x8 = 0x0800,
+};
+
bool do_common_optimization(exec_list *ir, bool linked,
bool uniform_locations_assigned,
unsigned max_unroll_iterations);
@@ -74,6 +99,7 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions,
bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
bool lower_clip_distance(gl_shader *shader);
void lower_output_reads(exec_list *instructions);
+bool lower_packing_builtins(exec_list *instructions, int op_mask);
void lower_ubo_reference(struct gl_shader *shader, exec_list *instructions);
void lower_packed_varyings(void *mem_ctx, unsigned location_base,
unsigned locations_used, ir_variable_mode mode,
diff --git a/mesalib/src/glsl/ir_print_visitor.cpp b/mesalib/src/glsl/ir_print_visitor.cpp
index 8aa26e5d0..acc92dbf1 100644
--- a/mesalib/src/glsl/ir_print_visitor.cpp
+++ b/mesalib/src/glsl/ir_print_visitor.cpp
@@ -146,7 +146,8 @@ void ir_print_visitor::visit(ir_variable *ir)
const char *const cent = (ir->centroid) ? "centroid " : "";
const char *const inv = (ir->invariant) ? "invariant " : "";
- const char *const mode[] = { "", "uniform ", "in ", "out ", "inout ",
+ const char *const mode[] = { "", "uniform ", "shader_in ", "shader_out ",
+ "in ", "out ", "inout ",
"const_in ", "sys ", "temporary " };
const char *const interp[] = { "", "flat", "noperspective" };
diff --git a/mesalib/src/glsl/ir_reader.cpp b/mesalib/src/glsl/ir_reader.cpp
index 03dbb67c3..405e75b64 100644
--- a/mesalib/src/glsl/ir_reader.cpp
+++ b/mesalib/src/glsl/ir_reader.cpp
@@ -400,13 +400,17 @@ ir_reader::read_declaration(s_expression *expr)
} else if (strcmp(qualifier->value(), "auto") == 0) {
var->mode = ir_var_auto;
} else if (strcmp(qualifier->value(), "in") == 0) {
- var->mode = ir_var_in;
+ var->mode = ir_var_function_in;
+ } else if (strcmp(qualifier->value(), "shader_in") == 0) {
+ var->mode = ir_var_shader_in;
} else if (strcmp(qualifier->value(), "const_in") == 0) {
var->mode = ir_var_const_in;
} else if (strcmp(qualifier->value(), "out") == 0) {
- var->mode = ir_var_out;
+ var->mode = ir_var_function_out;
+ } else if (strcmp(qualifier->value(), "shader_out") == 0) {
+ var->mode = ir_var_shader_out;
} else if (strcmp(qualifier->value(), "inout") == 0) {
- var->mode = ir_var_inout;
+ var->mode = ir_var_function_inout;
} else if (strcmp(qualifier->value(), "temporary") == 0) {
var->mode = ir_var_temporary;
} else if (strcmp(qualifier->value(), "smooth") == 0) {
diff --git a/mesalib/src/glsl/ir_set_program_inouts.cpp b/mesalib/src/glsl/ir_set_program_inouts.cpp
index e5de07e01..1e102bfbb 100644
--- a/mesalib/src/glsl/ir_set_program_inouts.cpp
+++ b/mesalib/src/glsl/ir_set_program_inouts.cpp
@@ -85,7 +85,7 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len,
for (int i = 0; i < len; i++) {
GLbitfield64 bitfield = BITFIELD64_BIT(var->location + var->index + offset + i);
- if (var->mode == ir_var_in) {
+ if (var->mode == ir_var_shader_in) {
prog->InputsRead |= bitfield;
if (is_fragment_shader) {
gl_fragment_program *fprog = (gl_fragment_program *) prog;
@@ -152,8 +152,8 @@ ir_set_program_inouts_visitor::visit_enter(ir_dereference_array *ir)
ir_visitor_status
ir_set_program_inouts_visitor::visit(ir_variable *ir)
{
- if (ir->mode == ir_var_in ||
- ir->mode == ir_var_out ||
+ if (ir->mode == ir_var_shader_in ||
+ ir->mode == ir_var_shader_out ||
ir->mode == ir_var_system_value) {
hash_table_insert(this->ht, ir, ir);
}
diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp
index ad57a3149..d8cafd55f 100644
--- a/mesalib/src/glsl/ir_validate.cpp
+++ b/mesalib/src/glsl/ir_validate.cpp
@@ -329,6 +329,38 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type == ir->type);
break;
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_half_2x16:
+ assert(ir->type == glsl_type::uint_type);
+ assert(ir->operands[0]->type == glsl_type::vec2_type);
+ break;
+
+ case ir_unop_pack_snorm_4x8:
+ case ir_unop_pack_unorm_4x8:
+ assert(ir->type == glsl_type::uint_type);
+ assert(ir->operands[0]->type == glsl_type::vec4_type);
+ break;
+
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
+ assert(ir->type == glsl_type::vec2_type);
+ assert(ir->operands[0]->type == glsl_type::uint_type);
+ break;
+
+ case ir_unop_unpack_snorm_4x8:
+ case ir_unop_unpack_unorm_4x8:
+ assert(ir->type == glsl_type::vec4_type);
+ assert(ir->operands[0]->type == glsl_type::uint_type);
+ break;
+
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+ assert(ir->type == glsl_type::float_type);
+ assert(ir->operands[0]->type == glsl_type::uint_type);
+ break;
+
case ir_unop_noise:
/* XXX what can we assert here? */
break;
@@ -423,6 +455,12 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type == ir->operands[1]->type);
break;
+ case ir_binop_pack_half_2x16_split:
+ assert(ir->type == glsl_type::uint_type);
+ assert(ir->operands[0]->type == glsl_type::float_type);
+ assert(ir->operands[1]->type == glsl_type::float_type);
+ break;
+
case ir_binop_ubo_load:
assert(ir->operands[0]->as_constant());
assert(ir->operands[0]->type == glsl_type::uint_type);
@@ -605,8 +643,8 @@ ir_validate::visit_enter(ir_call *ir)
printf("ir_call parameter type mismatch:\n");
goto dump_ir;
}
- if (formal_param->mode == ir_var_out
- || formal_param->mode == ir_var_inout) {
+ if (formal_param->mode == ir_var_function_out
+ || formal_param->mode == ir_var_function_inout) {
if (!actual_param->is_lvalue()) {
printf("ir_call out/inout parameters must be lvalues:\n");
goto dump_ir;
diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.cpp b/mesalib/src/glsl/link_uniform_block_active_visitor.cpp
new file mode 100644
index 000000000..56a8384e9
--- /dev/null
+++ b/mesalib/src/glsl/link_uniform_block_active_visitor.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "link_uniform_block_active_visitor.h"
+#include "program.h"
+
+link_uniform_block_active *
+process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var)
+{
+ const uint32_t h = _mesa_hash_string(var->interface_type->name);
+ const hash_entry *const existing_block =
+ _mesa_hash_table_search(ht, h, var->interface_type->name);
+
+ const glsl_type *const block_type = var->is_interface_instance()
+ ? var->type : var->interface_type;
+
+
+ /* If a block with this block-name has not previously been seen, add it.
+ * If a block with this block-name has been seen, it must be identical to
+ * the block currently being examined.
+ */
+ if (existing_block == NULL) {
+ link_uniform_block_active *const b =
+ rzalloc(mem_ctx, struct link_uniform_block_active);
+
+ b->type = block_type;
+ b->has_instance_name = var->is_interface_instance();
+
+ _mesa_hash_table_insert(ht, h, var->interface_type->name,
+ (void *) b);
+ return b;
+ } else {
+ link_uniform_block_active *const b =
+ (link_uniform_block_active *) existing_block->data;
+
+ if (b->type != block_type
+ || b->has_instance_name != var->is_interface_instance())
+ return NULL;
+ else
+ return b;
+ }
+
+ assert(!"Should not get here.");
+ return NULL;
+}
+
+ir_visitor_status
+link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir)
+{
+ ir_dereference_variable *const d = ir->array->as_dereference_variable();
+ ir_variable *const var = (d == NULL) ? NULL : d->var;
+
+ /* If the r-value being dereferenced is not a variable (e.g., a field of a
+ * structure) or is not a uniform block instance, continue.
+ *
+ * WARNING: It is not enough for the variable to be part of uniform block.
+ * It must represent the entire block. Arrays (or matrices) inside blocks
+ * that lack an instance name are handled by the ir_dereference_variable
+ * function.
+ */
+ if (var == NULL
+ || !var->is_in_uniform_block()
+ || !var->is_interface_instance())
+ return visit_continue;
+
+ /* Process the block. Bail if there was an error.
+ */
+ link_uniform_block_active *const b =
+ process_block(this->mem_ctx, this->ht, var);
+ if (b == NULL) {
+ linker_error(prog,
+ "uniform block `%s' has mismatching definitions",
+ var->interface_type->name);
+ this->success = false;
+ return visit_stop;
+ }
+
+ /* Block arrays must be declared with an instance name.
+ */
+ assert(b->has_instance_name);
+ assert((b->num_array_elements == 0) == (b->array_elements == NULL));
+ assert(b->type != NULL);
+
+ /* Determine whether or not this array index has already been added to the
+ * list of active array indices. At this point all constant folding must
+ * have occured, and the array index must be a constant.
+ */
+ ir_constant *c = ir->array_index->as_constant();
+ assert(c != NULL);
+
+ const unsigned idx = c->get_uint_component(0);
+
+ unsigned i;
+ for (i = 0; i < b->num_array_elements; i++) {
+ if (b->array_elements[i] == idx)
+ break;
+ }
+
+ assert(i <= b->num_array_elements);
+
+ if (i == b->num_array_elements) {
+ b->array_elements = reralloc(this->mem_ctx,
+ b->array_elements,
+ unsigned,
+ b->num_array_elements + 1);
+
+ b->array_elements[b->num_array_elements] = idx;
+
+ b->num_array_elements++;
+ }
+
+ return visit_continue_with_parent;
+}
+
+ir_visitor_status
+link_uniform_block_active_visitor::visit(ir_dereference_variable *ir)
+{
+ ir_variable *var = ir->var;
+
+ if (!var->is_in_uniform_block())
+ return visit_continue;
+
+ assert(!var->is_interface_instance() || !var->type->is_array());
+
+ /* Process the block. Bail if there was an error.
+ */
+ link_uniform_block_active *const b =
+ process_block(this->mem_ctx, this->ht, var);
+ if (b == NULL) {
+ linker_error(this->prog,
+ "uniform block `%s' has mismatching definitions",
+ var->interface_type->name);
+ this->success = false;
+ return visit_stop;
+ }
+
+ assert(b->num_array_elements == 0);
+ assert(b->array_elements == NULL);
+ assert(b->type != NULL);
+
+ return visit_continue;
+}
diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.h b/mesalib/src/glsl/link_uniform_block_active_visitor.h
new file mode 100644
index 000000000..fba628a8f
--- /dev/null
+++ b/mesalib/src/glsl/link_uniform_block_active_visitor.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+#ifndef LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H
+#define LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "glsl_types.h"
+#include "main/hash_table.h"
+
+struct link_uniform_block_active {
+ const glsl_type *type;
+
+ unsigned *array_elements;
+ unsigned num_array_elements;
+
+ bool has_instance_name;
+};
+
+class link_uniform_block_active_visitor : public ir_hierarchical_visitor {
+public:
+ link_uniform_block_active_visitor(void *mem_ctx, struct hash_table *ht,
+ struct gl_shader_program *prog)
+ : success(true), prog(prog), ht(ht), mem_ctx(mem_ctx)
+ {
+ /* empty */
+ }
+
+ virtual ir_visitor_status visit_enter(ir_dereference_array *);
+ virtual ir_visitor_status visit(ir_dereference_variable *);
+
+ bool success;
+
+private:
+ struct gl_shader_program *prog;
+ struct hash_table *ht;
+ void *mem_ctx;
+};
+
+#endif /* LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H */
diff --git a/mesalib/src/glsl/link_uniform_blocks.cpp b/mesalib/src/glsl/link_uniform_blocks.cpp
new file mode 100644
index 000000000..74fe1e29f
--- /dev/null
+++ b/mesalib/src/glsl/link_uniform_blocks.cpp
@@ -0,0 +1,313 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/core.h"
+#include "ir.h"
+#include "linker.h"
+#include "ir_uniform.h"
+#include "link_uniform_block_active_visitor.h"
+#include "main/hash_table.h"
+#include "program.h"
+
+class ubo_visitor : public uniform_field_visitor {
+public:
+ ubo_visitor(void *mem_ctx, gl_uniform_buffer_variable *variables,
+ unsigned num_variables)
+ : index(0), offset(0), buffer_size(0), variables(variables),
+ num_variables(num_variables), mem_ctx(mem_ctx), is_array_instance(false)
+ {
+ /* empty */
+ }
+
+ void process(const glsl_type *type, const char *name)
+ {
+ this->offset = 0;
+ this->buffer_size = 0;
+ this->is_array_instance = strchr(name, ']') != NULL;
+ this->uniform_field_visitor::process(type, name);
+ }
+
+ unsigned index;
+ unsigned offset;
+ unsigned buffer_size;
+ gl_uniform_buffer_variable *variables;
+ unsigned num_variables;
+ void *mem_ctx;
+ bool is_array_instance;
+
+private:
+ virtual void visit_field(const glsl_type *type, const char *name,
+ bool row_major)
+ {
+ assert(this->index < this->num_variables);
+
+ gl_uniform_buffer_variable *v = &this->variables[this->index++];
+
+ v->Name = ralloc_strdup(mem_ctx, name);
+ v->Type = type;
+ v->RowMajor = row_major;
+
+ if (this->is_array_instance) {
+ v->IndexName = ralloc_strdup(mem_ctx, name);
+
+ char *open_bracket = strchr(v->IndexName, '[');
+ assert(open_bracket != NULL);
+
+ char *close_bracket = strchr(open_bracket, ']');
+ assert(close_bracket != NULL);
+
+ /* Length of the tail without the ']' but with the NUL.
+ */
+ unsigned len = strlen(close_bracket + 1) + 1;
+
+ memmove(open_bracket, close_bracket + 1, len);
+ } else {
+ v->IndexName = v->Name;
+ }
+
+ unsigned alignment = type->std140_base_alignment(v->RowMajor);
+ unsigned size = type->std140_size(v->RowMajor);
+
+ this->offset = glsl_align(this->offset, alignment);
+ v->Offset = this->offset;
+ this->offset += size;
+
+ /* From the GL_ARB_uniform_buffer_object spec:
+ *
+ * "For uniform blocks laid out according to [std140] rules, the
+ * minimum buffer object size returned by the
+ * UNIFORM_BLOCK_DATA_SIZE query is derived by taking the offset of
+ * the last basic machine unit consumed by the last uniform of the
+ * uniform block (including any end-of-array or end-of-structure
+ * padding), adding one, and rounding up to the next multiple of
+ * the base alignment required for a vec4."
+ */
+ this->buffer_size = glsl_align(this->offset, 16);
+ }
+
+ virtual void visit_field(const glsl_struct_field *field)
+ {
+ this->offset = glsl_align(this->offset,
+ field->type->std140_base_alignment(false));
+ }
+};
+
+class count_block_size : public uniform_field_visitor {
+public:
+ count_block_size() : num_active_uniforms(0)
+ {
+ /* empty */
+ }
+
+ unsigned num_active_uniforms;
+
+private:
+ virtual void visit_field(const glsl_type *type, const char *name,
+ bool row_major)
+ {
+ (void) type;
+ (void) name;
+ (void) row_major;
+ this->num_active_uniforms++;
+ }
+};
+
+struct block {
+ const glsl_type *type;
+ bool has_instance_name;
+};
+
+int
+link_uniform_blocks(void *mem_ctx,
+ struct gl_shader_program *prog,
+ struct gl_shader **shader_list,
+ unsigned num_shaders,
+ struct gl_uniform_block **blocks_ret)
+{
+ /* This hash table will track all of the uniform blocks that have been
+ * encountered. Since blocks with the same block-name must be the same,
+ * the hash is organized by block-name.
+ */
+ struct hash_table *block_hash =
+ _mesa_hash_table_create(mem_ctx, _mesa_key_string_equal);
+
+ /* Determine which uniform blocks are active.
+ */
+ link_uniform_block_active_visitor v(mem_ctx, block_hash, prog);
+ for (unsigned i = 0; i < num_shaders; i++) {
+ visit_list_elements(&v, shader_list[i]->ir);
+ }
+
+ /* Count the number of active uniform blocks. Count the total number of
+ * active slots in those uniform blocks.
+ */
+ unsigned num_blocks = 0;
+ unsigned num_variables = 0;
+ count_block_size block_size;
+ struct hash_entry *entry;
+
+ hash_table_foreach (block_hash, entry) {
+ const struct link_uniform_block_active *const b =
+ (const struct link_uniform_block_active *) entry->data;
+
+ const glsl_type *const block_type =
+ b->type->is_array() ? b->type->fields.array : b->type;
+
+ assert((b->num_array_elements > 0) == b->type->is_array());
+
+ block_size.num_active_uniforms = 0;
+ block_size.process(block_type, "");
+
+ if (b->num_array_elements > 0) {
+ num_blocks += b->num_array_elements;
+ num_variables += b->num_array_elements
+ * block_size.num_active_uniforms;
+ } else {
+ num_blocks++;
+ num_variables += block_size.num_active_uniforms;
+ }
+
+ }
+
+ if (num_blocks == 0) {
+ assert(num_variables == 0);
+ _mesa_hash_table_destroy(block_hash, NULL);
+ return 0;
+ }
+
+ assert(num_variables != 0);
+
+ /* Allocate storage to hold all of the informatation related to uniform
+ * blocks that can be queried through the API.
+ */
+ gl_uniform_block *blocks =
+ ralloc_array(mem_ctx, gl_uniform_block, num_blocks);
+ gl_uniform_buffer_variable *variables =
+ ralloc_array(blocks, gl_uniform_buffer_variable, num_variables);
+
+ /* Add each variable from each uniform block to the API tracking
+ * structures.
+ */
+ unsigned i = 0;
+ ubo_visitor parcel(blocks, variables, num_variables);
+
+ STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140)
+ == unsigned(ubo_packing_std140));
+ STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED)
+ == unsigned(ubo_packing_shared));
+ STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED)
+ == unsigned(ubo_packing_packed));
+
+
+ hash_table_foreach (block_hash, entry) {
+ const struct link_uniform_block_active *const b =
+ (const struct link_uniform_block_active *) entry->data;
+ const glsl_type *block_type = b->type;
+
+ if (b->num_array_elements > 0) {
+ const char *const name = block_type->fields.array->name;
+
+ assert(b->has_instance_name);
+ for (unsigned j = 0; j < b->num_array_elements; j++) {
+ blocks[i].Name = ralloc_asprintf(blocks, "%s[%u]", name,
+ b->array_elements[j]);
+ blocks[i].Uniforms = &variables[parcel.index];
+ blocks[i].Binding = 0;
+ blocks[i].UniformBufferSize = 0;
+ blocks[i]._Packing =
+ gl_uniform_block_packing(block_type->interface_packing);
+
+ parcel.process(block_type->fields.array,
+ blocks[i].Name);
+
+ blocks[i].UniformBufferSize = parcel.buffer_size;
+
+ blocks[i].NumUniforms =
+ (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
+
+ i++;
+ }
+ } else {
+ blocks[i].Name = ralloc_strdup(blocks, block_type->name);
+ blocks[i].Uniforms = &variables[parcel.index];
+ blocks[i].Binding = 0;
+ blocks[i].UniformBufferSize = 0;
+ blocks[i]._Packing =
+ gl_uniform_block_packing(block_type->interface_packing);
+
+ parcel.process(block_type,
+ b->has_instance_name ? block_type->name : "");
+
+ blocks[i].UniformBufferSize = parcel.buffer_size;
+
+ blocks[i].NumUniforms =
+ (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
+
+ i++;
+ }
+ }
+
+ assert(parcel.index == num_variables);
+
+ _mesa_hash_table_destroy(block_hash, NULL);
+
+ *blocks_ret = blocks;
+ return num_blocks;
+}
+
+bool
+link_uniform_blocks_are_compatible(const gl_uniform_block *a,
+ const gl_uniform_block *b)
+{
+ assert(strcmp(a->Name, b->Name) == 0);
+
+ /* Page 35 (page 42 of the PDF) in section 4.3.7 of the GLSL 1.50 spec says:
+ *
+ * "Matched block names within an interface (as defined above) must
+ * match in terms of having the same number of declarations with the
+ * same sequence of types and the same sequence of member names, as
+ * well as having the same member-wise layout qualification....if a
+ * matching block is declared as an array, then the array sizes must
+ * also match... Any mismatch will generate a link error."
+ *
+ * Arrays are not yet supported, so there is no check for that.
+ */
+ if (a->NumUniforms != b->NumUniforms)
+ return false;
+
+ if (a->_Packing != b->_Packing)
+ return false;
+
+ for (unsigned i = 0; i < a->NumUniforms; i++) {
+ if (strcmp(a->Uniforms[i].Name, b->Uniforms[i].Name) != 0)
+ return false;
+
+ if (a->Uniforms[i].Type != b->Uniforms[i].Type)
+ return false;
+
+ if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor)
+ return false;
+ }
+
+ return true;
+}
diff --git a/mesalib/src/glsl/link_uniform_initializers.cpp b/mesalib/src/glsl/link_uniform_initializers.cpp
index 849e08097..836a360fa 100644
--- a/mesalib/src/glsl/link_uniform_initializers.cpp
+++ b/mesalib/src/glsl/link_uniform_initializers.cpp
@@ -67,7 +67,11 @@ copy_constant_to_storage(union gl_constant_value *storage,
case GLSL_TYPE_BOOL:
storage[i].b = int(val->value.b[i]);
break;
- default:
+ case GLSL_TYPE_ARRAY:
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
/* All other types should have already been filtered by other
* paths in the caller.
*/
diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp
index 07d9c18de..f1284adb2 100644
--- a/mesalib/src/glsl/link_uniforms.cpp
+++ b/mesalib/src/glsl/link_uniforms.cpp
@@ -29,12 +29,6 @@
#include "program/hash_table.h"
#include "program.h"
-static inline unsigned int
-align(unsigned int a, unsigned int align)
-{
- return (a + align - 1) / align * align;
-}
-
/**
* \file link_uniforms.cpp
* Assign locations for GLSL uniforms.
@@ -58,23 +52,49 @@ values_for_type(const glsl_type *type)
}
void
+uniform_field_visitor::process(const glsl_type *type, const char *name)
+{
+ assert(type->is_record()
+ || (type->is_array() && type->fields.array->is_record())
+ || type->is_interface()
+ || (type->is_array() && type->fields.array->is_interface()));
+
+ char *name_copy = ralloc_strdup(NULL, name);
+ recursion(type, &name_copy, strlen(name), false);
+ ralloc_free(name_copy);
+}
+
+void
uniform_field_visitor::process(ir_variable *var)
{
const glsl_type *t = var->type;
+ /* false is always passed for the row_major parameter to the other
+ * processing functions because no information is available to do
+ * otherwise. See the warning in linker.h.
+ */
+
/* Only strdup the name if we actually will need to modify it. */
if (t->is_record() || (t->is_array() && t->fields.array->is_record())) {
char *name = ralloc_strdup(NULL, var->name);
- recursion(var->type, &name, strlen(name));
+ recursion(var->type, &name, strlen(name), false);
+ ralloc_free(name);
+ } else if (t->is_interface()) {
+ char *name = ralloc_strdup(NULL, var->type->name);
+ recursion(var->type, &name, strlen(name), false);
+ ralloc_free(name);
+ } else if (t->is_array() && t->fields.array->is_interface()) {
+ char *name = ralloc_strdup(NULL, var->type->fields.array->name);
+ recursion(var->type, &name, strlen(name), false);
ralloc_free(name);
} else {
- this->visit_field(t, var->name);
+ this->visit_field(t, var->name, false);
}
}
void
uniform_field_visitor::recursion(const glsl_type *t, char **name,
- size_t name_length)
+ size_t name_length, bool row_major)
{
/* Records need to have each field processed individually.
*
@@ -82,30 +102,47 @@ uniform_field_visitor::recursion(const glsl_type *t, char **name,
* individually, then each field of the resulting array elements processed
* individually.
*/
- if (t->is_record()) {
+ if (t->is_record() || t->is_interface()) {
for (unsigned i = 0; i < t->length; i++) {
const char *field = t->fields.structure[i].name;
size_t new_length = name_length;
- /* Append '.field' to the current uniform name. */
- ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
+ if (t->fields.structure[i].type->is_record())
+ this->visit_field(&t->fields.structure[i]);
+
+ /* Append '.field' to the current uniform name. */
+ if (name_length == 0) {
+ ralloc_asprintf_rewrite_tail(name, &new_length, "%s", field);
+ } else {
+ ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
+ }
- recursion(t->fields.structure[i].type, name, new_length);
+ recursion(t->fields.structure[i].type, name, new_length,
+ t->fields.structure[i].row_major);
}
- } else if (t->is_array() && t->fields.array->is_record()) {
+ } else if (t->is_array() && (t->fields.array->is_record()
+ || t->fields.array->is_interface())) {
for (unsigned i = 0; i < t->length; i++) {
size_t new_length = name_length;
/* Append the subscript to the current uniform name */
ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
- recursion(t->fields.array, name, new_length);
+ recursion(t->fields.array, name, new_length,
+ t->fields.structure[i].row_major);
}
} else {
- this->visit_field(t, *name);
+ this->visit_field(t, *name, row_major);
}
}
+void
+uniform_field_visitor::visit_field(const glsl_struct_field *field)
+{
+ (void) field;
+ /* empty */
+}
+
/**
* Class to help calculate the storage requirements for a set of uniforms
*
@@ -131,6 +168,15 @@ public:
this->num_shader_uniform_components = 0;
}
+ void process(ir_variable *var)
+ {
+ if (var->is_interface_instance())
+ uniform_field_visitor::process(var->interface_type,
+ var->interface_type->name);
+ else
+ uniform_field_visitor::process(var);
+ }
+
/**
* Total number of active uniforms counted
*/
@@ -152,10 +198,15 @@ public:
unsigned num_shader_uniform_components;
private:
- virtual void visit_field(const glsl_type *type, const char *name)
+ virtual void visit_field(const glsl_type *type, const char *name,
+ bool row_major)
{
assert(!type->is_record());
assert(!(type->is_array() && type->fields.array->is_record()));
+ assert(!type->is_interface());
+ assert(!(type->is_array() && type->fields.array->is_interface()));
+
+ (void) row_major;
/* Count the number of samplers regardless of whether the uniform is
* already in the hash table. The hash table prevents adding the same
@@ -224,42 +275,77 @@ public:
}
void set_and_process(struct gl_shader_program *prog,
- struct gl_shader *shader,
ir_variable *var)
{
- ubo_var = NULL;
- if (var->uniform_block != -1) {
- struct gl_uniform_block *block =
- &shader->UniformBlocks[var->uniform_block];
-
- ubo_block_index = -1;
- for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
- if (!strcmp(prog->UniformBlocks[i].Name,
- shader->UniformBlocks[var->uniform_block].Name)) {
- ubo_block_index = i;
- break;
+ ubo_block_index = -1;
+ if (var->is_in_uniform_block()) {
+ if (var->is_interface_instance() && var->type->is_array()) {
+ unsigned l = strlen(var->interface_type->name);
+
+ for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+ if (strncmp(var->interface_type->name,
+ prog->UniformBlocks[i].Name,
+ l) == 0
+ && prog->UniformBlocks[i].Name[l] == '[') {
+ ubo_block_index = i;
+ break;
+ }
+ }
+ } else {
+ for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+ if (strcmp(var->interface_type->name,
+ prog->UniformBlocks[i].Name) == 0) {
+ ubo_block_index = i;
+ break;
+ }
}
}
assert(ubo_block_index != -1);
- ubo_var_index = var->location;
- ubo_var = &block->Uniforms[var->location];
- ubo_byte_offset = ubo_var->Offset;
- }
-
- process(var);
+ /* Uniform blocks that were specified with an instance name must be
+ * handled a little bit differently. The name of the variable is the
+ * name used to reference the uniform block instead of being the name
+ * of a variable within the block. Therefore, searching for the name
+ * within the block will fail.
+ */
+ if (var->is_interface_instance()) {
+ ubo_byte_offset = 0;
+ ubo_row_major = false;
+ } else {
+ const struct gl_uniform_block *const block =
+ &prog->UniformBlocks[ubo_block_index];
+
+ assert(var->location != -1);
+
+ const struct gl_uniform_buffer_variable *const ubo_var =
+ &block->Uniforms[var->location];
+
+ ubo_row_major = ubo_var->RowMajor;
+ ubo_byte_offset = ubo_var->Offset;
+ }
+
+ if (var->is_interface_instance())
+ process(var->interface_type, var->interface_type->name);
+ else
+ process(var);
+ } else
+ process(var);
}
- struct gl_uniform_buffer_variable *ubo_var;
int ubo_block_index;
- int ubo_var_index;
int ubo_byte_offset;
+ bool ubo_row_major;
private:
- virtual void visit_field(const glsl_type *type, const char *name)
+ virtual void visit_field(const glsl_type *type, const char *name,
+ bool row_major)
{
assert(!type->is_record());
assert(!(type->is_array() && type->fields.array->is_record()));
+ assert(!type->is_interface());
+ assert(!(type->is_array() && type->fields.array->is_interface()));
+
+ (void) row_major;
unsigned id;
bool found = this->map->get(id, name);
@@ -330,17 +416,17 @@ private:
this->uniforms[id].num_driver_storage = 0;
this->uniforms[id].driver_storage = NULL;
this->uniforms[id].storage = this->values;
- if (this->ubo_var) {
+ if (this->ubo_block_index != -1) {
this->uniforms[id].block_index = this->ubo_block_index;
- unsigned alignment = type->std140_base_alignment(ubo_var->RowMajor);
- this->ubo_byte_offset = align(this->ubo_byte_offset, alignment);
+ unsigned alignment = type->std140_base_alignment(ubo_row_major);
+ this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);
this->uniforms[id].offset = this->ubo_byte_offset;
- this->ubo_byte_offset += type->std140_size(ubo_var->RowMajor);
+ this->ubo_byte_offset += type->std140_size(ubo_row_major);
if (type->is_array()) {
this->uniforms[id].array_stride =
- align(type->fields.array->std140_size(ubo_var->RowMajor), 16);
+ glsl_align(type->fields.array->std140_size(ubo_row_major), 16);
} else {
this->uniforms[id].array_stride = 0;
}
@@ -348,7 +434,7 @@ private:
if (type->is_matrix() ||
(type->is_array() && type->fields.array->is_matrix())) {
this->uniforms[id].matrix_stride = 16;
- this->uniforms[id].row_major = ubo_var->RowMajor;
+ this->uniforms[id].row_major = ubo_row_major;
} else {
this->uniforms[id].matrix_stride = 0;
this->uniforms[id].row_major = false;
@@ -399,26 +485,10 @@ link_cross_validate_uniform_block(void *mem_ctx,
{
for (unsigned int i = 0; i < *num_linked_blocks; i++) {
struct gl_uniform_block *old_block = &(*linked_blocks)[i];
- if (strcmp(old_block->Name, new_block->Name) == 0) {
- if (old_block->NumUniforms != new_block->NumUniforms) {
- return -1;
- }
- for (unsigned j = 0; j < old_block->NumUniforms; j++) {
- if (strcmp(old_block->Uniforms[j].Name,
- new_block->Uniforms[j].Name) != 0)
- return -1;
-
- if (old_block->Uniforms[j].Offset !=
- new_block->Uniforms[j].Offset)
- return -1;
-
- if (old_block->Uniforms[j].RowMajor !=
- new_block->Uniforms[j].RowMajor)
- return -1;
- }
- return i;
- }
+ if (strcmp(old_block->Name, new_block->Name) == 0)
+ return link_uniform_blocks_are_compatible(old_block, new_block)
+ ? i : -1;
}
*linked_blocks = reralloc(mem_ctx, *linked_blocks,
@@ -440,7 +510,13 @@ link_cross_validate_uniform_block(void *mem_ctx,
struct gl_uniform_buffer_variable *ubo_var =
&linked_block->Uniforms[i];
- ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name);
+ if (ubo_var->Name == ubo_var->IndexName) {
+ ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name);
+ ubo_var->IndexName = ubo_var->Name;
+ } else {
+ ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name);
+ ubo_var->IndexName = ralloc_strdup(*linked_blocks, ubo_var->IndexName);
+ }
}
return linked_block_index;
@@ -458,17 +534,47 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
foreach_list(node, shader->ir) {
ir_variable *const var = ((ir_instruction *) node)->as_variable();
- if ((var == NULL) || (var->uniform_block == -1))
+ if ((var == NULL) || !var->is_in_uniform_block())
continue;
assert(var->mode == ir_var_uniform);
+ if (var->is_interface_instance()) {
+ var->location = 0;
+ continue;
+ }
+
bool found = false;
+ char sentinel = '\0';
+
+ if (var->type->is_record()) {
+ sentinel = '.';
+ } else if (var->type->is_array()
+ && var->type->fields.array->is_record()) {
+ sentinel = '[';
+ }
+
+ const unsigned l = strlen(var->name);
for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
for (unsigned j = 0; j < shader->UniformBlocks[i].NumUniforms; j++) {
- if (!strcmp(var->name, shader->UniformBlocks[i].Uniforms[j].Name)) {
+ if (sentinel) {
+ const char *begin = shader->UniformBlocks[i].Uniforms[j].Name;
+ const char *end = strchr(begin, sentinel);
+
+ if (end == NULL)
+ continue;
+
+ if (l != (end - begin))
+ continue;
+
+ if (strncmp(var->name, begin, l) == 0) {
+ found = true;
+ var->location = j;
+ break;
+ }
+ } else if (!strcmp(var->name,
+ shader->UniformBlocks[i].Uniforms[j].Name)) {
found = true;
- var->uniform_block = i;
var->location = j;
break;
}
@@ -494,7 +600,7 @@ link_assign_uniform_block_offsets(struct gl_shader *shader)
unsigned alignment = type->std140_base_alignment(ubo_var->RowMajor);
unsigned size = type->std140_size(ubo_var->RowMajor);
- offset = align(offset, alignment);
+ offset = glsl_align(offset, alignment);
ubo_var->Offset = offset;
offset += size;
}
@@ -510,7 +616,7 @@ link_assign_uniform_block_offsets(struct gl_shader *shader)
* and rounding up to the next multiple of the base
* alignment required for a vec4."
*/
- block->UniformBufferSize = align(offset, 16);
+ block->UniformBufferSize = glsl_align(offset, 16);
}
}
@@ -538,13 +644,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog)
*/
memset(prog->SamplerUnits, 0, sizeof(prog->SamplerUnits));
- for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
- if (prog->_LinkedShaders[i] == NULL)
- continue;
-
- link_update_uniform_buffer_variables(prog->_LinkedShaders[i]);
- }
-
/* First pass: Count the uniform resources used by the user-defined
* uniforms. While this happens, each active uniform will have an index
* assigned to it.
@@ -557,6 +656,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog)
if (prog->_LinkedShaders[i] == NULL)
continue;
+ link_update_uniform_buffer_variables(prog->_LinkedShaders[i]);
+
/* Reset various per-shader target counts.
*/
uniform_size.start_shader();
@@ -620,7 +721,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog)
if (strncmp("gl_", var->name, 3) == 0)
continue;
- parcel.set_and_process(prog, prog->_LinkedShaders[i], var);
+ parcel.set_and_process(prog, var);
}
prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used;
diff --git a/mesalib/src/glsl/link_varyings.cpp b/mesalib/src/glsl/link_varyings.cpp
index 5c27f231e..25681d618 100644
--- a/mesalib/src/glsl/link_varyings.cpp
+++ b/mesalib/src/glsl/link_varyings.cpp
@@ -54,10 +54,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
foreach_list(node, producer->ir) {
ir_variable *const var = ((ir_instruction *) node)->as_variable();
- /* FINISHME: For geometry shaders, this should also look for inout
- * FINISHME: variables.
- */
- if ((var == NULL) || (var->mode != ir_var_out))
+ if ((var == NULL) || (var->mode != ir_var_shader_out))
continue;
parameters.add_variable(var);
@@ -71,10 +68,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
foreach_list(node, consumer->ir) {
ir_variable *const input = ((ir_instruction *) node)->as_variable();
- /* FINISHME: For geometry shaders, this should also look for inout
- * FINISHME: variables.
- */
- if ((input == NULL) || (input->mode != ir_var_in))
+ if ((input == NULL) || (input->mode != ir_var_shader_in))
continue;
ir_variable *const output = parameters.get_variable(input->name);
@@ -417,8 +411,17 @@ tfeedback_decl::find_output_var(gl_shader_program *prog,
const char *name = this->is_clip_distance_mesa
? "gl_ClipDistanceMESA" : this->var_name;
ir_variable *var = producer->symbols->get_variable(name);
- if (var && var->mode == ir_var_out)
+ if (var && var->mode == ir_var_shader_out) {
+ const glsl_type *type = var->type;
+ while (type->base_type == GLSL_TYPE_ARRAY)
+ type = type->fields.array;
+ if (type->base_type == GLSL_TYPE_STRUCT) {
+ linker_error(prog, "Transform feedback of varying structs not "
+ "implemented yet.");
+ return NULL;
+ }
return var;
+ }
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
@@ -810,16 +813,15 @@ varying_matches::compute_packing_order(ir_variable *var)
{
const glsl_type *element_type = var->type;
- /* FINISHME: Support for "varying" records in GLSL 1.50. */
while (element_type->base_type == GLSL_TYPE_ARRAY) {
element_type = element_type->fields.array;
}
- switch (element_type->vector_elements) {
+ switch (element_type->component_slots() % 4) {
case 1: return PACKING_ORDER_SCALAR;
case 2: return PACKING_ORDER_VEC2;
case 3: return PACKING_ORDER_VEC3;
- case 4: return PACKING_ORDER_VEC4;
+ case 0: return PACKING_ORDER_VEC4;
default:
assert(!"Unexpected value of vector_elements");
return PACKING_ORDER_VEC4;
@@ -854,7 +856,7 @@ is_varying_var(GLenum shaderType, const ir_variable *var)
{
/* Only fragment shaders will take a varying variable as an input */
if (shaderType == GL_FRAGMENT_SHADER &&
- var->mode == ir_var_in) {
+ var->mode == ir_var_shader_in) {
switch (var->location) {
case FRAG_ATTRIB_WPOS:
case FRAG_ATTRIB_FACE:
@@ -915,13 +917,13 @@ assign_varying_locations(struct gl_context *ctx,
foreach_list(node, producer->ir) {
ir_variable *const output_var = ((ir_instruction *) node)->as_variable();
- if ((output_var == NULL) || (output_var->mode != ir_var_out))
+ if ((output_var == NULL) || (output_var->mode != ir_var_shader_out))
continue;
ir_variable *input_var =
consumer ? consumer->symbols->get_variable(output_var->name) : NULL;
- if (input_var && input_var->mode != ir_var_in)
+ if (input_var && input_var->mode != ir_var_shader_in)
input_var = NULL;
if (input_var) {
@@ -965,11 +967,11 @@ assign_varying_locations(struct gl_context *ctx,
*/
assert(!ctx->Extensions.EXT_transform_feedback);
} else {
- lower_packed_varyings(mem_ctx, producer_base, slots_used, ir_var_out,
- producer);
+ lower_packed_varyings(mem_ctx, producer_base, slots_used,
+ ir_var_shader_out, producer);
if (consumer) {
- lower_packed_varyings(mem_ctx, consumer_base, slots_used, ir_var_in,
- consumer);
+ lower_packed_varyings(mem_ctx, consumer_base, slots_used,
+ ir_var_shader_in, consumer);
}
}
@@ -979,7 +981,7 @@ assign_varying_locations(struct gl_context *ctx,
foreach_list(node, consumer->ir) {
ir_variable *const var = ((ir_instruction *) node)->as_variable();
- if ((var == NULL) || (var->mode != ir_var_in))
+ if ((var == NULL) || (var->mode != ir_var_shader_in))
continue;
if (var->is_unmatched_generic_inout) {
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index 63548e071..63ce178f4 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -107,8 +107,8 @@ public:
ir_rvalue *param_rval = (ir_rvalue *)iter.get();
ir_variable *sig_param = (ir_variable *)sig_iter.get();
- if (sig_param->mode == ir_var_out ||
- sig_param->mode == ir_var_inout) {
+ if (sig_param->mode == ir_var_function_out ||
+ sig_param->mode == ir_var_function_inout) {
ir_variable *var = param_rval->variable_referenced();
if (var && strcmp(name, var->name) == 0) {
found = true;
@@ -212,10 +212,10 @@ link_invalidate_variable_locations(gl_shader *sh, int input_base,
int base;
switch (var->mode) {
- case ir_var_in:
+ case ir_var_shader_in:
base = input_base;
break;
- case ir_var_out:
+ case ir_var_shader_out:
base = output_base;
break;
default:
@@ -393,10 +393,9 @@ mode_string(const ir_variable *var)
case ir_var_auto:
return (var->read_only) ? "global constant" : "global variable";
- case ir_var_uniform: return "uniform";
- case ir_var_in: return "shader input";
- case ir_var_out: return "shader output";
- case ir_var_inout: return "shader inout";
+ case ir_var_uniform: return "uniform";
+ case ir_var_shader_in: return "shader input";
+ case ir_var_shader_out: return "shader output";
case ir_var_const_in:
case ir_var_temporary:
@@ -874,7 +873,6 @@ link_intrastage_shaders(void *mem_ctx,
unsigned num_shaders)
{
struct gl_uniform_block *uniform_blocks = NULL;
- unsigned num_uniform_blocks = 0;
/* Check that global variables defined in multiple shaders are consistent.
*/
@@ -882,23 +880,11 @@ link_intrastage_shaders(void *mem_ctx,
return NULL;
/* Check that uniform blocks between shaders for a stage agree. */
- for (unsigned i = 0; i < num_shaders; i++) {
- struct gl_shader *sh = shader_list[i];
-
- for (unsigned j = 0; j < sh->NumUniformBlocks; j++) {
- link_assign_uniform_block_offsets(sh);
-
- int index = link_cross_validate_uniform_block(mem_ctx,
- &uniform_blocks,
- &num_uniform_blocks,
- &sh->UniformBlocks[j]);
- if (index == -1) {
- linker_error(prog, "uniform block `%s' has mismatching definitions",
- sh->UniformBlocks[j].Name);
- return NULL;
- }
- }
- }
+ const int num_uniform_blocks =
+ link_uniform_blocks(mem_ctx, prog, shader_list, num_shaders,
+ &uniform_blocks);
+ if (num_uniform_blocks < 0)
+ return NULL;
/* Check that there is only a single definition of each function signature
* across all shaders.
@@ -1069,8 +1055,8 @@ update_array_sizes(struct gl_shader_program *prog)
ir_variable *const var = ((ir_instruction *) node)->as_variable();
if ((var == NULL) || (var->mode != ir_var_uniform &&
- var->mode != ir_var_in &&
- var->mode != ir_var_out) ||
+ var->mode != ir_var_shader_in &&
+ var->mode != ir_var_shader_out) ||
!var->type->is_array())
continue;
@@ -1078,7 +1064,7 @@ update_array_sizes(struct gl_shader_program *prog)
* will not be eliminated. Since we always do std140, just
* don't resize arrays in UBOs.
*/
- if (var->uniform_block != -1)
+ if (var->is_in_uniform_block())
continue;
unsigned int size = var->max_array_access;
@@ -1206,7 +1192,8 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0;
const enum ir_variable_mode direction =
- (target_index == MESA_SHADER_VERTEX) ? ir_var_in : ir_var_out;
+ (target_index == MESA_SHADER_VERTEX)
+ ? ir_var_shader_in : ir_var_shader_out;
/* Temporary storage for the set of attributes that need locations assigned.
@@ -1428,7 +1415,7 @@ store_fragdepth_layout(struct gl_shader_program *prog)
foreach_list(node, ir) {
ir_variable *const var = ((ir_instruction *) node)->as_variable();
- if (var == NULL || var->mode != ir_var_out) {
+ if (var == NULL || var->mode != ir_var_shader_out) {
continue;
}
@@ -1809,7 +1796,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
if (prog->_LinkedShaders[MESA_SHADER_VERTEX] != NULL) {
demote_shader_inputs_and_outputs(prog->_LinkedShaders[MESA_SHADER_VERTEX],
- ir_var_out);
+ ir_var_shader_out);
/* Eliminate code that is now dead due to unused vertex outputs being
* demoted.
@@ -1821,9 +1808,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) {
gl_shader *const sh = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
- demote_shader_inputs_and_outputs(sh, ir_var_in);
- demote_shader_inputs_and_outputs(sh, ir_var_inout);
- demote_shader_inputs_and_outputs(sh, ir_var_out);
+ demote_shader_inputs_and_outputs(sh, ir_var_shader_in);
+ demote_shader_inputs_and_outputs(sh, ir_var_shader_out);
/* Eliminate code that is now dead due to unused geometry outputs being
* demoted.
@@ -1835,7 +1821,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] != NULL) {
gl_shader *const sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- demote_shader_inputs_and_outputs(sh, ir_var_in);
+ demote_shader_inputs_and_outputs(sh, ir_var_shader_in);
/* Eliminate code that is now dead due to unused fragment inputs being
* demoted. This shouldn't actually do anything other than remove
diff --git a/mesalib/src/glsl/linker.h b/mesalib/src/glsl/linker.h
index 67c7f3488..14eb9c1cd 100644
--- a/mesalib/src/glsl/linker.h
+++ b/mesalib/src/glsl/linker.h
@@ -49,6 +49,17 @@ link_cross_validate_uniform_block(void *mem_ctx,
void
link_assign_uniform_block_offsets(struct gl_shader *shader);
+extern bool
+link_uniform_blocks_are_compatible(const gl_uniform_block *a,
+ const gl_uniform_block *b);
+
+extern int
+link_uniform_blocks(void *mem_ctx,
+ struct gl_shader_program *prog,
+ struct gl_shader **shader_list,
+ unsigned num_shaders,
+ struct gl_uniform_block **blocks_ret);
+
/**
* Class for processing all of the leaf fields of an uniform
*
@@ -71,24 +82,60 @@ public:
* \param var The uniform variable that is to be processed
*
* Calls \c ::visit_field for each leaf of the uniform.
+ *
+ * \warning
+ * This entry should only be used with uniform blocks in cases where the
+ * row / column ordering of matrices in the block does not matter. For
+ * example, enumerating the names of members of the block, but not for
+ * determining the offsets of members.
*/
void process(ir_variable *var);
+ /**
+ * Begin processing a uniform of a structured type.
+ *
+ * This flavor of \c process should be used to handle structured types
+ * (i.e., structures, interfaces, or arrays there of) that need special
+ * name handling. A common usage is to handle cases where the block name
+ * (instead of the instance name) is used for an interface block.
+ *
+ * \param type Type that is to be processed, associated with \c name
+ * \param name Base name of the structured uniform being processed
+ *
+ * \note
+ * \c type must be \c GLSL_TYPE_RECORD, \c GLSL_TYPE_INTERFACE, or an array
+ * there of.
+ */
+ void process(const glsl_type *type, const char *name);
+
protected:
/**
* Method invoked for each leaf of the uniform
*
* \param type Type of the field.
* \param name Fully qualified name of the field.
+ * \param row_major For a matrix type, is it stored row-major.
+ */
+ virtual void visit_field(const glsl_type *type, const char *name,
+ bool row_major) = 0;
+
+ /**
+ * Visit a record before visiting its fields
+ *
+ * For structures-of-structures or interfaces-of-structures, this visits
+ * the inner structure before visiting its fields.
+ *
+ * The default implementation does nothing.
*/
- virtual void visit_field(const glsl_type *type, const char *name) = 0;
+ virtual void visit_field(const glsl_struct_field *field);
private:
/**
* \param name_length Length of the current name \b not including the
* terminating \c NUL character.
*/
- void recursion(const glsl_type *t, char **name, size_t name_length);
+ void recursion(const glsl_type *t, char **name, size_t name_length,
+ bool row_major);
};
void
diff --git a/mesalib/src/glsl/lower_clip_distance.cpp b/mesalib/src/glsl/lower_clip_distance.cpp
index 09bdc36e1..643807de8 100644
--- a/mesalib/src/glsl/lower_clip_distance.cpp
+++ b/mesalib/src/glsl/lower_clip_distance.cpp
@@ -301,8 +301,8 @@ lower_clip_distance_visitor::visit_leave(ir_call *ir)
this->base_ir->insert_before(temp_clip_distance);
actual_param->replace_with(
new(ctx) ir_dereference_variable(temp_clip_distance));
- if (formal_param->mode == ir_var_in
- || formal_param->mode == ir_var_inout) {
+ if (formal_param->mode == ir_var_function_in
+ || formal_param->mode == ir_var_function_inout) {
/* Copy from gl_ClipDistance to the temporary before the call.
* Since we are going to insert this copy before the current
* instruction, we need to visit it afterwards to make sure it
@@ -314,8 +314,8 @@ lower_clip_distance_visitor::visit_leave(ir_call *ir)
this->base_ir->insert_before(new_assignment);
this->visit_new_assignment(new_assignment);
}
- if (formal_param->mode == ir_var_out
- || formal_param->mode == ir_var_inout) {
+ if (formal_param->mode == ir_var_function_out
+ || formal_param->mode == ir_var_function_inout) {
/* Copy from the temporary to gl_ClipDistance after the call.
* Since visit_list_elements() has already decided which
* instruction it's going to visit next, we need to visit
diff --git a/mesalib/src/glsl/lower_output_reads.cpp b/mesalib/src/glsl/lower_output_reads.cpp
index a6192a517..b93e254ec 100644
--- a/mesalib/src/glsl/lower_output_reads.cpp
+++ b/mesalib/src/glsl/lower_output_reads.cpp
@@ -41,7 +41,7 @@ class output_read_remover : public ir_hierarchical_visitor {
protected:
/**
* A hash table mapping from the original ir_variable shader outputs
- * (ir_var_out mode) to the new temporaries to be used instead.
+ * (ir_var_shader_out mode) to the new temporaries to be used instead.
*/
hash_table *replacements;
@@ -86,7 +86,7 @@ output_read_remover::~output_read_remover()
ir_visitor_status
output_read_remover::visit(ir_dereference_variable *ir)
{
- if (ir->var->mode != ir_var_out)
+ if (ir->var->mode != ir_var_shader_out)
return visit_continue;
ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var);
diff --git a/mesalib/src/glsl/lower_packed_varyings.cpp b/mesalib/src/glsl/lower_packed_varyings.cpp
index 9e7f274b7..8a40f5e72 100644
--- a/mesalib/src/glsl/lower_packed_varyings.cpp
+++ b/mesalib/src/glsl/lower_packed_varyings.cpp
@@ -70,6 +70,10 @@
* This lowering pass also packs flat floats, ints, and uints together, by
* using ivec4 as the base type of flat "varyings", and using appropriate
* casts to convert floats and uints into ints.
+ *
+ * This lowering pass also handles varyings whose type is a struct or an array
+ * of struct. Structs are packed in order and with no gaps, so there may be a
+ * performance penalty due to structure elements being double-parked.
*/
#include "glsl_symbol_table.h"
@@ -135,8 +139,8 @@ private:
ir_variable **packed_varyings;
/**
- * Type of varying which is being lowered in this pass (either ir_var_in or
- * ir_var_out).
+ * Type of varying which is being lowered in this pass (either
+ * ir_var_shader_in or ir_var_shader_out).
*/
const ir_variable_mode mode;
@@ -274,10 +278,20 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
ir_variable *unpacked_var,
const char *name)
{
- /* FINISHME: Support for "varying" records in GLSL 1.50. */
- assert(!rvalue->type->is_record());
-
- if (rvalue->type->is_array()) {
+ if (rvalue->type->is_record()) {
+ for (unsigned i = 0; i < rvalue->type->length; i++) {
+ if (i != 0)
+ rvalue = rvalue->clone(this->mem_ctx, NULL);
+ const char *field_name = rvalue->type->fields.structure[i].name;
+ ir_dereference_record *dereference_record = new(this->mem_ctx)
+ ir_dereference_record(rvalue, field_name);
+ char *deref_name
+ = ralloc_asprintf(this->mem_ctx, "%s.%s", name, field_name);
+ fine_location = this->lower_rvalue(dereference_record, fine_location,
+ unpacked_var, deref_name);
+ }
+ return fine_location;
+ } else if (rvalue->type->is_array()) {
/* Arrays are packed/unpacked by considering each array element in
* sequence.
*/
@@ -336,7 +350,7 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
unpacked_var, name));
ir_swizzle *swizzle = new(this->mem_ctx)
ir_swizzle(packed_deref, swizzle_values, components);
- if (this->mode == ir_var_out) {
+ if (this->mode == ir_var_shader_out) {
ir_assignment *assignment
= this->bitwise_assign_pack(swizzle, rvalue);
this->main_instructions->push_tail(assignment);
diff --git a/mesalib/src/glsl/lower_packing_builtins.cpp b/mesalib/src/glsl/lower_packing_builtins.cpp
new file mode 100644
index 000000000..db73c7b0f
--- /dev/null
+++ b/mesalib/src/glsl/lower_packing_builtins.cpp
@@ -0,0 +1,1314 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "ir.h"
+#include "ir_builder.h"
+#include "ir_optimization.h"
+#include "ir_rvalue_visitor.h"
+
+namespace {
+
+using namespace ir_builder;
+
+/**
+ * A visitor that lowers built-in floating-point pack/unpack expressions
+ * such packSnorm2x16.
+ */
+class lower_packing_builtins_visitor : public ir_rvalue_visitor {
+public:
+ /**
+ * \param op_mask is a bitmask of `enum lower_packing_builtins_op`
+ */
+ explicit lower_packing_builtins_visitor(int op_mask)
+ : op_mask(op_mask),
+ progress(false)
+ {
+ /* Mutually exclusive options. */
+ assert(!((op_mask & LOWER_PACK_HALF_2x16) &&
+ (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT)));
+
+ assert(!((op_mask & LOWER_UNPACK_HALF_2x16) &&
+ (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT)));
+
+ factory.instructions = &factory_instructions;
+ }
+
+ virtual ~lower_packing_builtins_visitor()
+ {
+ assert(factory_instructions.is_empty());
+ }
+
+ bool get_progress() { return progress; }
+
+ void handle_rvalue(ir_rvalue **rvalue)
+ {
+ if (!*rvalue)
+ return;
+
+ ir_expression *expr = (*rvalue)->as_expression();
+ if (!expr)
+ return;
+
+ enum lower_packing_builtins_op lowering_op =
+ choose_lowering_op(expr->operation);
+
+ if (lowering_op == LOWER_PACK_UNPACK_NONE)
+ return;
+
+ setup_factory(ralloc_parent(expr));
+
+ ir_rvalue *op0 = expr->operands[0];
+ ralloc_steal(factory.mem_ctx, op0);
+
+ switch (lowering_op) {
+ case LOWER_PACK_SNORM_2x16:
+ *rvalue = lower_pack_snorm_2x16(op0);
+ break;
+ case LOWER_PACK_SNORM_4x8:
+ *rvalue = lower_pack_snorm_4x8(op0);
+ break;
+ case LOWER_PACK_UNORM_2x16:
+ *rvalue = lower_pack_unorm_2x16(op0);
+ break;
+ case LOWER_PACK_UNORM_4x8:
+ *rvalue = lower_pack_unorm_4x8(op0);
+ break;
+ case LOWER_PACK_HALF_2x16:
+ *rvalue = lower_pack_half_2x16(op0);
+ break;
+ case LOWER_PACK_HALF_2x16_TO_SPLIT:
+ *rvalue = split_pack_half_2x16(op0);
+ break;
+ case LOWER_UNPACK_SNORM_2x16:
+ *rvalue = lower_unpack_snorm_2x16(op0);
+ break;
+ case LOWER_UNPACK_SNORM_4x8:
+ *rvalue = lower_unpack_snorm_4x8(op0);
+ break;
+ case LOWER_UNPACK_UNORM_2x16:
+ *rvalue = lower_unpack_unorm_2x16(op0);
+ break;
+ case LOWER_UNPACK_UNORM_4x8:
+ *rvalue = lower_unpack_unorm_4x8(op0);
+ break;
+ case LOWER_UNPACK_HALF_2x16:
+ *rvalue = lower_unpack_half_2x16(op0);
+ break;
+ case LOWER_UNPACK_HALF_2x16_TO_SPLIT:
+ *rvalue = split_unpack_half_2x16(op0);
+ break;
+ case LOWER_PACK_UNPACK_NONE:
+ assert(!"not reached");
+ break;
+ }
+
+ teardown_factory();
+ progress = true;
+ }
+
+private:
+ const int op_mask;
+ bool progress;
+ ir_factory factory;
+ exec_list factory_instructions;
+
+ /**
+ * Determine the needed lowering operation by filtering \a expr_op
+ * through \ref op_mask.
+ */
+ enum lower_packing_builtins_op
+ choose_lowering_op(ir_expression_operation expr_op)
+ {
+ /* C++ regards int and enum as fundamentally different types.
+ * So, we can't simply return from each case; we must cast the return
+ * value.
+ */
+ int result;
+
+ switch (expr_op) {
+ case ir_unop_pack_snorm_2x16:
+ result = op_mask & LOWER_PACK_SNORM_2x16;
+ break;
+ case ir_unop_pack_snorm_4x8:
+ result = op_mask & LOWER_PACK_SNORM_4x8;
+ break;
+ case ir_unop_pack_unorm_2x16:
+ result = op_mask & LOWER_PACK_UNORM_2x16;
+ break;
+ case ir_unop_pack_unorm_4x8:
+ result = op_mask & LOWER_PACK_UNORM_4x8;
+ break;
+ case ir_unop_pack_half_2x16:
+ result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT);
+ break;
+ case ir_unop_unpack_snorm_2x16:
+ result = op_mask & LOWER_UNPACK_SNORM_2x16;
+ break;
+ case ir_unop_unpack_snorm_4x8:
+ result = op_mask & LOWER_UNPACK_SNORM_4x8;
+ break;
+ case ir_unop_unpack_unorm_2x16:
+ result = op_mask & LOWER_UNPACK_UNORM_2x16;
+ break;
+ case ir_unop_unpack_unorm_4x8:
+ result = op_mask & LOWER_UNPACK_UNORM_4x8;
+ break;
+ case ir_unop_unpack_half_2x16:
+ result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT);
+ break;
+ default:
+ result = LOWER_PACK_UNPACK_NONE;
+ break;
+ }
+
+ return static_cast<enum lower_packing_builtins_op>(result);
+ }
+
+ void
+ setup_factory(void *mem_ctx)
+ {
+ assert(factory.mem_ctx == NULL);
+ assert(factory.instructions->is_empty());
+
+ factory.mem_ctx = mem_ctx;
+ }
+
+ void
+ teardown_factory()
+ {
+ base_ir->insert_before(factory.instructions);
+ assert(factory.instructions->is_empty());
+ factory.mem_ctx = NULL;
+ }
+
+ template <typename T>
+ ir_constant*
+ constant(T x)
+ {
+ return factory.constant(x);
+ }
+
+ /**
+ * \brief Pack two uint16's into a single uint32.
+ *
+ * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32
+ * where the least significant bits specify the first element of the pair.
+ * Return the uint32.
+ */
+ ir_rvalue*
+ pack_uvec2_to_uint(ir_rvalue *uvec2_rval)
+ {
+ assert(uvec2_rval->type == glsl_type::uvec2_type);
+
+ /* uvec2 u = UVEC2_RVAL; */
+ ir_variable *u = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_uvec2_to_uint");
+ factory.emit(assign(u, uvec2_rval));
+
+ /* return (u.y << 16) | (u.x & 0xffff); */
+ return bit_or(lshift(swizzle_y(u), constant(16u)),
+ bit_and(swizzle_x(u), constant(0xffffu)));
+ }
+
+ /**
+ * \brief Pack four uint8's into a single uint32.
+ *
+ * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a
+ * uint32 where the least significant bits specify the first element of the
+ * 4-tuple. Return the uint32.
+ */
+ ir_rvalue*
+ pack_uvec4_to_uint(ir_rvalue *uvec4_rval)
+ {
+ assert(uvec4_rval->type == glsl_type::uvec4_type);
+
+ /* uvec4 u = UVEC4_RVAL; */
+ ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
+ "tmp_pack_uvec4_to_uint");
+ factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));
+
+ /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */
+ return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)),
+ lshift(swizzle_z(u), constant(16u))),
+ bit_or(lshift(swizzle_y(u), constant(8u)),
+ swizzle_x(u)));
+ }
+
+ /**
+ * \brief Unpack a uint32 into two uint16's.
+ *
+ * Interpret the given uint32 as a uint16 pair where the uint32's least
+ * significant bits specify the pair's first element. Return the uint16
+ * pair as a uvec2.
+ */
+ ir_rvalue*
+ unpack_uint_to_uvec2(ir_rvalue *uint_rval)
+ {
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ /* uint u = UINT_RVAL; */
+ ir_variable *u = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_uint_to_uvec2_u");
+ factory.emit(assign(u, uint_rval));
+
+ /* uvec2 u2; */
+ ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_uint_to_uvec2_u2");
+
+ /* u2.x = u & 0xffffu; */
+ factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X));
+
+ /* u2.y = u >> 16u; */
+ factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y));
+
+ return deref(u2).val;
+ }
+
+ /**
+ * \brief Unpack a uint32 into four uint8's.
+ *
+ * Interpret the given uint32 as a uint8 4-tuple where the uint32's least
+ * significant bits specify the 4-tuple's first element. Return the uint8
+ * 4-tuple as a uvec4.
+ */
+ ir_rvalue*
+ unpack_uint_to_uvec4(ir_rvalue *uint_rval)
+ {
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ /* uint u = UINT_RVAL; */
+ ir_variable *u = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_uint_to_uvec4_u");
+ factory.emit(assign(u, uint_rval));
+
+ /* uvec4 u4; */
+ ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type,
+ "tmp_unpack_uint_to_uvec4_u4");
+
+ /* u4.x = u & 0xffu; */
+ factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
+
+ /* u4.y = (u >> 8u) & 0xffu; */
+ factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
+ constant(0xffu)), WRITEMASK_Y));
+
+ /* u4.z = (u >> 16u) & 0xffu; */
+ factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
+ constant(0xffu)), WRITEMASK_Z));
+
+ /* u4.w = (u >> 24u) */
+ factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
+
+ return deref(u4).val;
+ }
+
+ /**
+ * \brief Lower a packSnorm2x16 expression.
+ *
+ * \param vec2_rval is packSnorm2x16's input
+ * \return packSnorm2x16's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_snorm_2x16(ir_rvalue *vec2_rval)
+ {
+ /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp uint packSnorm2x16(vec2 v)
+ * --------------------------------
+ * First, converts each component of the normalized floating-point value
+ * v into 16-bit integer values. Then, the results are packed into the
+ * returned 32-bit unsigned integer.
+ *
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+ *
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return pack_uvec2_to_uint(
+ * uvec2(ivec2(
+ * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f))));
+ *
+ * It is necessary to first convert the vec2 to ivec2 rather than directly
+ * converting vec2 to uvec2 because the latter conversion is undefined.
+ * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to
+ * convert a negative floating point value to an uint".
+ */
+ assert(vec2_rval->type == glsl_type::vec2_type);
+
+ ir_rvalue *result = pack_uvec2_to_uint(
+ i2u(f2i(round_even(mul(clamp(vec2_rval,
+ constant(-1.0f),
+ constant(1.0f)),
+ constant(32767.0f))))));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower a packSnorm4x8 expression.
+ *
+ * \param vec4_rval is packSnorm4x8's input
+ * \return packSnorm4x8's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_snorm_4x8(ir_rvalue *vec4_rval)
+ {
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+ *
+ * highp uint packSnorm4x8(vec4 v)
+ * -------------------------------
+ * First, converts each component of the normalized floating-point value
+ * v into 8-bit integer values. Then, the results are packed into the
+ * returned 32-bit unsigned integer.
+ *
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
+ *
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return pack_uvec4_to_uint(
+ * uvec4(ivec4(
+ * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f))));
+ *
+ * It is necessary to first convert the vec4 to ivec4 rather than directly
+ * converting vec4 to uvec4 because the latter conversion is undefined.
+ * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to
+ * convert a negative floating point value to an uint".
+ */
+ assert(vec4_rval->type == glsl_type::vec4_type);
+
+ ir_rvalue *result = pack_uvec4_to_uint(
+ i2u(f2i(round_even(mul(clamp(vec4_rval,
+ constant(-1.0f),
+ constant(1.0f)),
+ constant(127.0f))))));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower an unpackSnorm2x16 expression.
+ *
+ * \param uint_rval is unpackSnorm2x16's input
+ * \return unpackSnorm2x16's output as a vec2 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_snorm_2x16(ir_rvalue *uint_rval)
+ {
+ /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp vec2 unpackSnorm2x16 (highp uint p)
+ * -----------------------------------------
+ * First, unpacks a single 32-bit unsigned integer p into a pair of
+ * 16-bit unsigned integers. Then, each component is converted to
+ * a normalized floating-point value to generate the returned
+ * two-component vector.
+ *
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackSnorm2x16: clamp(f / 32767.0, -1,+1)
+ *
+ * The first component of the returned vector will be extracted from the
+ * least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return clamp(
+ * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f,
+ * -1.0f, 1.0f);
+ *
+ * The above IR may appear unnecessarily complex, but the intermediate
+ * conversion to ivec2 and the bit shifts are necessary to correctly unpack
+ * negative floats.
+ *
+ * To see why, consider packing and then unpacking vec2(-1.0, 0.0).
+ * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we
+ * place that int16 into an int32, which results in the *positive* integer
+ * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather
+ * unimportant bit 16. We must now extend the int16's sign bit into bits
+ * 17-32, which is accomplished by left-shifting then right-shifting.
+ */
+
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ ir_rvalue *result =
+ clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)),
+ constant(16)),
+ constant(16u))),
+ constant(32767.0f)),
+ constant(-1.0f),
+ constant(1.0f));
+
+ assert(result->type == glsl_type::vec2_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower an unpackSnorm4x8 expression.
+ *
+ * \param uint_rval is unpackSnorm4x8's input
+ * \return unpackSnorm4x8's output as a vec4 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_snorm_4x8(ir_rvalue *uint_rval)
+ {
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+ *
+ * highp vec4 unpackSnorm4x8 (highp uint p)
+ * ----------------------------------------
+ * First, unpacks a single 32-bit unsigned integer p into four
+ * 8-bit unsigned integers. Then, each component is converted to
+ * a normalized floating-point value to generate the returned
+ * four-component vector.
+ *
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackSnorm4x8: clamp(f / 127.0, -1, +1)
+ *
+ * The first component of the returned vector will be extracted from the
+ * least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return clamp(
+ * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f,
+ * -1.0f, 1.0f);
+ *
+ * The above IR may appear unnecessarily complex, but the intermediate
+ * conversion to ivec4 and the bit shifts are necessary to correctly unpack
+ * negative floats.
+ *
+ * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0,
+ * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we
+ * place that int8 into an int32, which results in the *positive* integer
+ * 0x000000ff. The int8's sign bit becomes, in the int32, the rather
+ * unimportant bit 8. We must now extend the int8's sign bit into bits
+ * 9-32, which is accomplished by left-shifting then right-shifting.
+ */
+
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ ir_rvalue *result =
+ clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
+ constant(24u)),
+ constant(24u))),
+ constant(127.0f)),
+ constant(-1.0f),
+ constant(1.0f));
+
+ assert(result->type == glsl_type::vec4_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower a packUnorm2x16 expression.
+ *
+ * \param vec2_rval is packUnorm2x16's input
+ * \return packUnorm2x16's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_unorm_2x16(ir_rvalue *vec2_rval)
+ {
+ /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp uint packUnorm2x16 (vec2 v)
+ * ---------------------------------
+ * First, converts each component of the normalized floating-point value
+ * v into 16-bit integer values. Then, the results are packed into the
+ * returned 32-bit unsigned integer.
+ *
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+ *
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return pack_uvec2_to_uint(uvec2(
+ * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f)));
+ *
+ * Here it is safe to directly convert the vec2 to uvec2 because the the
+ * vec2 has been clamped to a non-negative range.
+ */
+
+ assert(vec2_rval->type == glsl_type::vec2_type);
+
+ ir_rvalue *result = pack_uvec2_to_uint(
+ f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f)))));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower a packUnorm4x8 expression.
+ *
+ * \param vec4_rval is packUnorm4x8's input
+ * \return packUnorm4x8's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_unorm_4x8(ir_rvalue *vec4_rval)
+ {
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+ *
+ * highp uint packUnorm4x8 (vec4 v)
+ * --------------------------------
+ * First, converts each component of the normalized floating-point value
+ * v into 8-bit integer values. Then, the results are packed into the
+ * returned 32-bit unsigned integer.
+ *
+ * The conversion for component c of v to fixed point is done as
+ * follows:
+ *
+ * packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
+ *
+ * The first component of the vector will be written to the least
+ * significant bits of the output; the last component will be written to
+ * the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return pack_uvec4_to_uint(uvec4(
+ * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f)));
+ *
+ * Here it is safe to directly convert the vec4 to uvec4 because the the
+ * vec4 has been clamped to a non-negative range.
+ */
+
+ assert(vec4_rval->type == glsl_type::vec4_type);
+
+ ir_rvalue *result = pack_uvec4_to_uint(
+ f2u(round_even(mul(saturate(vec4_rval), constant(255.0f)))));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower an unpackUnorm2x16 expression.
+ *
+ * \param uint_rval is unpackUnorm2x16's input
+ * \return unpackUnorm2x16's output as a vec2 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_unorm_2x16(ir_rvalue *uint_rval)
+ {
+ /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp vec2 unpackUnorm2x16 (highp uint p)
+ * -----------------------------------------
+ * First, unpacks a single 32-bit unsigned integer p into a pair of
+ * 16-bit unsigned integers. Then, each component is converted to
+ * a normalized floating-point value to generate the returned
+ * two-component vector.
+ *
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackUnorm2x16: f / 65535.0
+ *
+ * The first component of the returned vector will be extracted from the
+ * least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0;
+ */
+
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)),
+ constant(65535.0f));
+
+ assert(result->type == glsl_type::vec2_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower an unpackUnorm4x8 expression.
+ *
+ * \param uint_rval is unpackUnorm4x8's input
+ * \return unpackUnorm4x8's output as a vec4 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_unorm_4x8(ir_rvalue *uint_rval)
+ {
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
+ *
+ * highp vec4 unpackUnorm4x8 (highp uint p)
+ * ----------------------------------------
+ * First, unpacks a single 32-bit unsigned integer p into four
+ * 8-bit unsigned integers. Then, each component is converted to
+ * a normalized floating-point value to generate the returned
+ * two-component vector.
+ *
+ * The conversion for unpacked fixed-point value f to floating point is
+ * done as follows:
+ *
+ * unpackUnorm4x8: f / 255.0
+ *
+ * The first component of the returned vector will be extracted from the
+ * least significant bits of the input; the last component will be
+ * extracted from the most significant bits.
+ *
+ * This function generates IR that approximates the following pseudo-GLSL:
+ *
+ * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0;
+ */
+
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)),
+ constant(255.0f));
+
+ assert(result->type == glsl_type::vec4_type);
+ return result;
+ }
+
+ /**
+ * \brief Lower the component-wise calculation of packHalf2x16.
+ *
+ * \param f_rval is one component of packHafl2x16's input
+ * \param e_rval is the unshifted exponent bits of f_rval
+ * \param m_rval is the unshifted mantissa bits of f_rval
+ *
+ * \return a uint rvalue that encodes a float16 in its lower 16 bits
+ */
+ ir_rvalue*
+ pack_half_1x16_nosign(ir_rvalue *f_rval,
+ ir_rvalue *e_rval,
+ ir_rvalue *m_rval)
+ {
+ assert(e_rval->type == glsl_type::uint_type);
+ assert(m_rval->type == glsl_type::uint_type);
+
+ /* uint u16; */
+ ir_variable *u16 = factory.make_temp(glsl_type::uint_type,
+ "tmp_pack_half_1x16_u16");
+
+ /* float f = FLOAT_RVAL; */
+ ir_variable *f = factory.make_temp(glsl_type::float_type,
+ "tmp_pack_half_1x16_f");
+ factory.emit(assign(f, f_rval));
+
+ /* uint e = E_RVAL; */
+ ir_variable *e = factory.make_temp(glsl_type::uint_type,
+ "tmp_pack_half_1x16_e");
+ factory.emit(assign(e, e_rval));
+
+ /* uint m = M_RVAL; */
+ ir_variable *m = factory.make_temp(glsl_type::uint_type,
+ "tmp_pack_half_1x16_m");
+ factory.emit(assign(m, m_rval));
+
+ /* Preliminaries
+ * -------------
+ *
+ * For a float16, the bit layout is:
+ *
+ * sign: 15
+ * exponent: 10:14
+ * mantissa: 0:9
+ *
+ * Let f16 be a float16 value. The sign, exponent, and mantissa
+ * determine its value thus:
+ *
+ * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1)
+ * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2)
+ * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3)
+ * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4)
+ * if e16 = 31 and m16 != 0, then NaN (5)
+ *
+ * where 0 <= m16 < 2^10.
+ *
+ * For a float32, the bit layout is:
+ *
+ * sign: 31
+ * exponent: 23:30
+ * mantissa: 0:22
+ *
+ * Let f32 be a float32 value. The sign, exponent, and mantissa
+ * determine its value thus:
+ *
+ * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10)
+ * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11)
+ * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12)
+ * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13)
+ * if e32 = 255 and m32 != 0, then NaN (14)
+ *
+ * where 0 <= m32 < 2^23.
+ *
+ * The minimum and maximum normal float16 values are
+ *
+ * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20)
+ * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21)
+ *
+ * The step at max_norm16 is
+ *
+ * max_step16 = 2^5 (22)
+ *
+ * Observe that the float16 boundary values in equations 20-21 lie in the
+ * range of normal float32 values.
+ *
+ *
+ * Rounding Behavior
+ * -----------------
+ * Not all float32 values can be exactly represented as a float16. We
+ * round all such intermediate float32 values to the nearest float16; if
+ * the float32 is exactly between to float16 values, we round to the one
+ * with an even mantissa. This rounding behavior has several benefits:
+ *
+ * - It has no sign bias.
+ *
+ * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
+ * GPU ISA.
+ *
+ * - By reproducing the behavior of the GPU (at least on Intel hardware),
+ * compile-time evaluation of constant packHalf2x16 GLSL expressions will
+ * result in the same value as if the expression were executed on the
+ * GPU.
+ *
+ * Calculation
+ * -----------
+ * Our task is to compute s16, e16, m16 given f32. Since this function
+ * ignores the sign bit, assume that s32 = s16 = 0. There are several
+ * cases consider.
+ */
+
+ factory.emit(
+
+ /* Case 1) f32 is NaN
+ *
+ * The resultant f16 will also be NaN.
+ */
+
+ /* if (e32 == 255 && m32 != 0) { */
+ if_tree(logic_and(equal(e, constant(0xffu << 23u)),
+ logic_not(equal(m, constant(0u)))),
+
+ assign(u16, constant(0x7fffu)),
+
+ /* Case 2) f32 lies in the range [0, min_norm16).
+ *
+ * The resultant float16 will be either zero, subnormal, or normal.
+ *
+ * Solving
+ *
+ * f32 = min_norm16 (30)
+ *
+ * gives
+ *
+ * e32 = 113 and m32 = 0 (31)
+ *
+ * Therefore this case occurs if and only if
+ *
+ * e32 < 113 (32)
+ */
+
+ /* } else if (e32 < 113) { */
+ if_tree(less(e, constant(113u << 23u)),
+
+ /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */
+ assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f),
+ constant((float) (1 << 24)))))),
+
+ /* Case 3) f32 lies in the range
+ * [min_norm16, max_norm16 + max_step16).
+ *
+ * The resultant float16 will be either normal or infinite.
+ *
+ * Solving
+ *
+ * f32 = max_norm16 + max_step16 (40)
+ * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41)
+ * = 2^16 (42)
+ * gives
+ *
+ * e32 = 143 and m32 = 0 (43)
+ *
+ * We already solved the boundary condition f32 = min_norm16 above
+ * in equation 31. Therefore this case occurs if and only if
+ *
+ * 113 <= e32 and e32 < 143
+ */
+
+ /* } else if (e32 < 143) { */
+ if_tree(less(e, constant(143u << 23u)),
+
+ /* The addition below handles the case where the mantissa rounds
+ * up to 1024 and bumps the exponent.
+ *
+ * u16 = ((e - (112u << 23u)) >> 13u)
+ * + round_to_even((float(m) / (1u << 13u));
+ */
+ assign(u16, add(rshift(sub(e, constant(112u << 23u)),
+ constant(13u)),
+ f2u(round_even(
+ div(u2f(m), constant((float) (1 << 13))))))),
+
+ /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf].
+ *
+ * The resultant float16 will be infinite.
+ *
+ * The cases above caught all float32 values in the range
+ * [0, max_norm16 + max_step16), so this is the fall-through case.
+ */
+
+ /* } else { */
+
+ assign(u16, constant(31u << 10u))))));
+
+ /* } */
+
+ return deref(u16).val;
+ }
+
+ /**
+ * \brief Lower a packHalf2x16 expression.
+ *
+ * \param vec2_rval is packHalf2x16's input
+ * \return packHalf2x16's output as a uint rvalue
+ */
+ ir_rvalue*
+ lower_pack_half_2x16(ir_rvalue *vec2_rval)
+ {
+ /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * highp uint packHalf2x16 (mediump vec2 v)
+ * ----------------------------------------
+ * Returns an unsigned integer obtained by converting the components of
+ * a two-component floating-point vector to the 16-bit floating-point
+ * representation found in the OpenGL ES Specification, and then packing
+ * these two 16-bit integers into a 32-bit unsigned integer.
+ *
+ * The first vector component specifies the 16 least- significant bits
+ * of the result; the second component specifies the 16 most-significant
+ * bits.
+ */
+
+ assert(vec2_rval->type == glsl_type::vec2_type);
+
+ /* vec2 f = VEC2_RVAL; */
+ ir_variable *f = factory.make_temp(glsl_type::vec2_type,
+ "tmp_pack_half_2x16_f");
+ factory.emit(assign(f, vec2_rval));
+
+ /* uvec2 f32 = bitcast_f2u(f); */
+ ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_half_2x16_f32");
+ factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f)));
+
+ /* uvec2 f16; */
+ ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_half_2x16_f16");
+
+ /* Get f32's unshifted exponent bits.
+ *
+ * uvec2 e = f32 & 0x7f800000u;
+ */
+ ir_variable *e = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_half_2x16_e");
+ factory.emit(assign(e, bit_and(f32, constant(0x7f800000u))));
+
+ /* Get f32's unshifted mantissa bits.
+ *
+ * uvec2 m = f32 & 0x007fffffu;
+ */
+ ir_variable *m = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_pack_half_2x16_m");
+ factory.emit(assign(m, bit_and(f32, constant(0x007fffffu))));
+
+ /* Set f16's exponent and mantissa bits.
+ *
+ * f16.x = pack_half_1x16_nosign(e.x, m.x);
+ * f16.y = pack_half_1y16_nosign(e.y, m.y);
+ */
+ factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f),
+ swizzle_x(e),
+ swizzle_x(m)),
+ WRITEMASK_X));
+ factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f),
+ swizzle_y(e),
+ swizzle_y(m)),
+ WRITEMASK_Y));
+
+ /* Set f16's sign bits.
+ *
+ * f16 |= (f32 & (1u << 31u) >> 16u;
+ */
+ factory.emit(
+ assign(f16, bit_or(f16,
+ rshift(bit_and(f32, constant(1u << 31u)),
+ constant(16u)))));
+
+
+ /* return (f16.y << 16u) | f16.x; */
+ ir_rvalue *result = bit_or(lshift(swizzle_y(f16),
+ constant(16u)),
+ swizzle_x(f16));
+
+ assert(result->type == glsl_type::uint_type);
+ return result;
+ }
+
+ /**
+ * \brief Split packHalf2x16's vec2 operand into two floats.
+ *
+ * \param vec2_rval is packHalf2x16's input
+ * \return a uint rvalue
+ *
+ * Some code generators, such as the i965 fragment shader, require that all
+ * vector expressions be lowered to a sequence of scalar expressions.
+ * However, packHalf2x16 cannot be scalarized by the same mechanism as
+ * a true vector operation because its input and output have a differing
+ * number of vector components.
+ *
+ * This method scalarizes packHalf2x16 by transforming it from an unary
+ * operation having vector input to a binary operation having scalar input.
+ * That is, it transforms
+ *
+ * packHalf2x16(VEC2_RVAL);
+ *
+ * into
+ *
+ * vec2 v = VEC2_RVAL;
+ * return packHalf2x16_split(v.x, v.y);
+ */
+ ir_rvalue*
+ split_pack_half_2x16(ir_rvalue *vec2_rval)
+ {
+ assert(vec2_rval->type == glsl_type::vec2_type);
+
+ ir_variable *v = factory.make_temp(glsl_type::vec2_type,
+ "tmp_split_pack_half_2x16_v");
+ factory.emit(assign(v, vec2_rval));
+
+ return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v));
+ }
+
+ /**
+ * \brief Lower the component-wise calculation of unpackHalf2x16.
+ *
+ * Given a uint that encodes a float16 in its lower 16 bits, this function
+ * returns a uint that encodes a float32 with the same value. The sign bit
+ * of the float16 is ignored.
+ *
+ * \param e_rval is the unshifted exponent bits of a float16
+ * \param m_rval is the unshifted mantissa bits of a float16
+ * \param a uint rvalue that encodes a float32
+ */
+ ir_rvalue*
+ unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval)
+ {
+ assert(e_rval->type == glsl_type::uint_type);
+ assert(m_rval->type == glsl_type::uint_type);
+
+ /* uint u32; */
+ ir_variable *u32 = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_half_1x16_u32");
+
+ /* uint e = E_RVAL; */
+ ir_variable *e = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_half_1x16_e");
+ factory.emit(assign(e, e_rval));
+
+ /* uint m = M_RVAL; */
+ ir_variable *m = factory.make_temp(glsl_type::uint_type,
+ "tmp_unpack_half_1x16_m");
+ factory.emit(assign(m, m_rval));
+
+ /* Preliminaries
+ * -------------
+ *
+ * For a float16, the bit layout is:
+ *
+ * sign: 15
+ * exponent: 10:14
+ * mantissa: 0:9
+ *
+ * Let f16 be a float16 value. The sign, exponent, and mantissa
+ * determine its value thus:
+ *
+ * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1)
+ * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2)
+ * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3)
+ * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4)
+ * if e16 = 31 and m16 != 0, then NaN (5)
+ *
+ * where 0 <= m16 < 2^10.
+ *
+ * For a float32, the bit layout is:
+ *
+ * sign: 31
+ * exponent: 23:30
+ * mantissa: 0:22
+ *
+ * Let f32 be a float32 value. The sign, exponent, and mantissa
+ * determine its value thus:
+ *
+ * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10)
+ * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11)
+ * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12)
+ * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13)
+ * if e32 = 255 and m32 != 0, then NaN (14)
+ *
+ * where 0 <= m32 < 2^23.
+ *
+ * Calculation
+ * -----------
+ * Our task is to compute s32, e32, m32 given f16. Since this function
+ * ignores the sign bit, assume that s32 = s16 = 0. There are several
+ * cases consider.
+ */
+
+ factory.emit(
+
+ /* Case 1) f16 is zero or subnormal.
+ *
+ * The simplest method of calcuating f32 in this case is
+ *
+ * f32 = f16 (20)
+ * = 2^(-14) * (m16 / 2^10) (21)
+ * = m16 / 2^(-24) (22)
+ */
+
+ /* if (e16 == 0) { */
+ if_tree(equal(e, constant(0u)),
+
+ /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */
+ assign(u32, expr(ir_unop_bitcast_f2u,
+ div(u2f(m), constant((float)(1 << 24))))),
+
+ /* Case 2) f16 is normal.
+ *
+ * The equation
+ *
+ * f32 = f16 (30)
+ * 2^(e32 - 127) * (1 + m32 / 2^23) = (31)
+ * 2^(e16 - 15) * (1 + m16 / 2^10)
+ *
+ * can be decomposed into two
+ *
+ * 2^(e32 - 127) = 2^(e16 - 15) (32)
+ * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33)
+ *
+ * which solve to
+ *
+ * e32 = e16 + 112 (34)
+ * m32 = m16 * 2^13 (35)
+ */
+
+ /* } else if (e16 < 31)) { */
+ if_tree(less(e, constant(31u << 10u)),
+
+ /* u32 = ((e + (112 << 10)) | m) << 13;
+ */
+ assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m),
+ constant(13u))),
+
+
+ /* Case 3) f16 is infinite. */
+ if_tree(equal(m, constant(0u)),
+
+ assign(u32, constant(255u << 23u)),
+
+ /* Case 4) f16 is NaN. */
+ /* } else { */
+
+ assign(u32, constant(0x7fffffffu))))));
+
+ /* } */
+
+ return deref(u32).val;
+ }
+
+ /**
+ * \brief Lower an unpackHalf2x16 expression.
+ *
+ * \param uint_rval is unpackHalf2x16's input
+ * \return unpackHalf2x16's output as a vec2 rvalue
+ */
+ ir_rvalue*
+ lower_unpack_half_2x16(ir_rvalue *uint_rval)
+ {
+ /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec:
+ *
+ * mediump vec2 unpackHalf2x16 (highp uint v)
+ * ------------------------------------------
+ * Returns a two-component floating-point vector with components
+ * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit
+ * values, interpreting those values as 16-bit floating-point numbers
+ * according to the OpenGL ES Specification, and converting them to
+ * 32-bit floating-point values.
+ *
+ * The first component of the vector is obtained from the
+ * 16 least-significant bits of v; the second component is obtained
+ * from the 16 most-significant bits of v.
+ */
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ /* uint u = RVALUE;
+ * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16);
+ */
+ ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_half_2x16_f16");
+ factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval)));
+
+ /* uvec2 f32; */
+ ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_half_2x16_f32");
+
+ /* Get f16's unshifted exponent bits.
+ *
+ * uvec2 e = f16 & 0x7c00u;
+ */
+ ir_variable *e = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_half_2x16_e");
+ factory.emit(assign(e, bit_and(f16, constant(0x7c00u))));
+
+ /* Get f16's unshifted mantissa bits.
+ *
+ * uvec2 m = f16 & 0x03ffu;
+ */
+ ir_variable *m = factory.make_temp(glsl_type::uvec2_type,
+ "tmp_unpack_half_2x16_m");
+ factory.emit(assign(m, bit_and(f16, constant(0x03ffu))));
+
+ /* Set f32's exponent and mantissa bits.
+ *
+ * f32.x = unpack_half_1x16_nosign(e.x, m.x);
+ * f32.y = unpack_half_1x16_nosign(e.y, m.y);
+ */
+ factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e),
+ swizzle_x(m)),
+ WRITEMASK_X));
+ factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e),
+ swizzle_y(m)),
+ WRITEMASK_Y));
+
+ /* Set f32's sign bit.
+ *
+ * f32 |= (f16 & 0x8000u) << 16u;
+ */
+ factory.emit(assign(f32, bit_or(f32,
+ lshift(bit_and(f16,
+ constant(0x8000u)),
+ constant(16u)))));
+
+ /* return bitcast_u2f(f32); */
+ ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32);
+ assert(result->type == glsl_type::vec2_type);
+ return result;
+ }
+
+ /**
+ * \brief Split unpackHalf2x16 into two operations.
+ *
+ * \param uint_rval is unpackHalf2x16's input
+ * \return a vec2 rvalue
+ *
+ * Some code generators, such as the i965 fragment shader, require that all
+ * vector expressions be lowered to a sequence of scalar expressions.
+ * However, unpackHalf2x16 cannot be scalarized by the same method as
+ * a true vector operation because the number of components of its input
+ * and output differ.
+ *
+ * This method scalarizes unpackHalf2x16 by transforming it from a single
+ * operation having vec2 output to a pair of operations each having float
+ * output. That is, it transforms
+ *
+ * unpackHalf2x16(UINT_RVAL)
+ *
+ * into
+ *
+ * uint u = UINT_RVAL;
+ * vec2 v;
+ *
+ * v.x = unpackHalf2x16_split_x(u);
+ * v.y = unpackHalf2x16_split_y(u);
+ *
+ * return v;
+ */
+ ir_rvalue*
+ split_unpack_half_2x16(ir_rvalue *uint_rval)
+ {
+ assert(uint_rval->type == glsl_type::uint_type);
+
+ /* uint u = uint_rval; */
+ ir_variable *u = factory.make_temp(glsl_type::uint_type,
+ "tmp_split_unpack_half_2x16_u");
+ factory.emit(assign(u, uint_rval));
+
+ /* vec2 v; */
+ ir_variable *v = factory.make_temp(glsl_type::vec2_type,
+ "tmp_split_unpack_half_2x16_v");
+
+ /* v.x = unpack_half_2x16_split_x(u); */
+ factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u),
+ WRITEMASK_X));
+
+ /* v.y = unpack_half_2x16_split_y(u); */
+ factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u),
+ WRITEMASK_Y));
+
+ return deref(v).val;
+ }
+};
+
+} // namespace anonymous
+
+/**
+ * \brief Lower the builtin packing functions.
+ *
+ * \param op_mask is a bitmask of `enum lower_packing_builtins_op`.
+ */
+bool
+lower_packing_builtins(exec_list *instructions, int op_mask)
+{
+ lower_packing_builtins_visitor v(op_mask);
+ visit_list_elements(&v, instructions, true);
+ return v.get_progress();
+}
diff --git a/mesalib/src/glsl/lower_ubo_reference.cpp b/mesalib/src/glsl/lower_ubo_reference.cpp
index e8d2c4742..026197df7 100644
--- a/mesalib/src/glsl/lower_ubo_reference.cpp
+++ b/mesalib/src/glsl/lower_ubo_reference.cpp
@@ -61,10 +61,58 @@ public:
bool progress;
};
-static inline unsigned int
-align(unsigned int a, unsigned int align)
+/**
+ * Determine the name of the interface block field
+ *
+ * This is the name of the specific member as it would appear in the
+ * \c gl_uniform_buffer_variable::Name field in the shader's
+ * \c UniformBlocks array.
+ */
+static const char *
+interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d)
{
- return (a + align - 1) / align * align;
+ ir_constant *previous_index = NULL;
+
+ while (d != NULL) {
+ switch (d->ir_type) {
+ case ir_type_dereference_variable: {
+ ir_dereference_variable *v = (ir_dereference_variable *) d;
+ if (previous_index
+ && v->var->is_interface_instance()
+ && v->var->type->is_array())
+ return ralloc_asprintf(mem_ctx,
+ "%s[%d]",
+ base_name,
+ previous_index->get_uint_component(0));
+ else
+ return base_name;
+
+ break;
+ }
+
+ case ir_type_dereference_record: {
+ ir_dereference_record *r = (ir_dereference_record *) d;
+
+ d = r->record->as_dereference();
+ break;
+ }
+
+ case ir_type_dereference_array: {
+ ir_dereference_array *a = (ir_dereference_array *) d;
+
+ d = a->array->as_dereference();
+ previous_index = a->array_index->as_constant();
+ break;
+ }
+
+ default:
+ assert(!"Should not get here.");
+ break;
+ }
+ }
+
+ assert(!"Should not get here.");
+ return NULL;
}
void
@@ -78,13 +126,30 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
return;
ir_variable *var = deref->variable_referenced();
- if (!var || var->uniform_block == -1)
+ if (!var || !var->is_in_uniform_block())
return;
mem_ctx = ralloc_parent(*rvalue);
- uniform_block = var->uniform_block;
- struct gl_uniform_block *block = &shader->UniformBlocks[uniform_block];
- this->ubo_var = &block->Uniforms[var->location];
+
+ const char *const field_name =
+ interface_field_name(mem_ctx, (char *) var->interface_type->name, deref);
+
+ this->uniform_block = -1;
+ for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
+ if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) {
+ this->uniform_block = i;
+
+ struct gl_uniform_block *block = &shader->UniformBlocks[i];
+
+ this->ubo_var = var->is_interface_instance()
+ ? &block->Uniforms[0] : &block->Uniforms[var->location];
+
+ break;
+ }
+ }
+
+ assert(this->uniform_block != (unsigned) -1);
+
ir_rvalue *offset = new(mem_ctx) ir_constant(0u);
unsigned const_offset = 0;
bool row_major = ubo_var->RowMajor;
@@ -111,9 +176,21 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
* vector) is handled below in emit_ubo_loads.
*/
array_stride = 4;
+ } else if (deref_array->type->is_interface()) {
+ /* We're processing an array dereference of an interface instance
+ * array. The thing being dereferenced *must* be a variable
+ * dereference because intefaces cannot be embedded an other
+ * types. In terms of calculating the offsets for the lowering
+ * pass, we don't care about the array index. All elements of an
+ * interface instance array will have the same offsets relative to
+ * the base of the block that backs them.
+ */
+ assert(deref_array->array->as_dereference_variable());
+ deref = deref_array->array->as_dereference();
+ break;
} else {
array_stride = deref_array->type->std140_size(row_major);
- array_stride = align(array_stride, 16);
+ array_stride = glsl_align(array_stride, 16);
}
ir_constant *const_index = deref_array->array_index->as_constant();
@@ -138,7 +215,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
const glsl_type *type = struct_type->fields.structure[i].type;
unsigned field_align = type->std140_base_alignment(row_major);
max_field_align = MAX2(field_align, max_field_align);
- intra_struct_offset = align(intra_struct_offset, field_align);
+ intra_struct_offset = glsl_align(intra_struct_offset, field_align);
if (strcmp(struct_type->fields.structure[i].name,
deref_record->field) == 0)
@@ -146,7 +223,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
intra_struct_offset += type->std140_size(row_major);
}
- const_offset = align(const_offset, max_field_align);
+ const_offset = glsl_align(const_offset, max_field_align);
const_offset += intra_struct_offset;
deref = deref_record->record->as_dereference();
@@ -217,8 +294,8 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
field->name);
field_offset =
- align(field_offset,
- field->type->std140_base_alignment(ubo_var->RowMajor));
+ glsl_align(field_offset,
+ field->type->std140_base_alignment(ubo_var->RowMajor));
emit_ubo_loads(field_deref, base_offset, deref_offset + field_offset);
@@ -229,7 +306,8 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
if (deref->type->is_array()) {
unsigned array_stride =
- align(deref->type->fields.array->std140_size(ubo_var->RowMajor), 16);
+ glsl_align(deref->type->fields.array->std140_size(ubo_var->RowMajor),
+ 16);
for (unsigned i = 0; i < deref->type->length; i++) {
ir_constant *element = new(mem_ctx) ir_constant(i);
diff --git a/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp b/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp
index 57771074a..040b0bf83 100644
--- a/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -364,12 +364,16 @@ public:
return this->lower_temps;
case ir_var_uniform:
return this->lower_uniforms;
- case ir_var_in:
+ case ir_var_function_in:
case ir_var_const_in:
- return (var->location == -1) ? this->lower_temps : this->lower_inputs;
- case ir_var_out:
- return (var->location == -1) ? this->lower_temps : this->lower_outputs;
- case ir_var_inout:
+ return this->lower_temps;
+ case ir_var_shader_in:
+ return this->lower_inputs;
+ case ir_var_function_out:
+ return this->lower_temps;
+ case ir_var_shader_out:
+ return this->lower_outputs;
+ case ir_var_function_inout:
return this->lower_temps;
}
diff --git a/mesalib/src/glsl/opt_constant_folding.cpp b/mesalib/src/glsl/opt_constant_folding.cpp
index 7d94d481c..072fefe9a 100644
--- a/mesalib/src/glsl/opt_constant_folding.cpp
+++ b/mesalib/src/glsl/opt_constant_folding.cpp
@@ -127,7 +127,8 @@ ir_constant_folding_visitor::visit_enter(ir_call *ir)
ir_rvalue *param_rval = (ir_rvalue *)iter.get();
ir_variable *sig_param = (ir_variable *)sig_iter.get();
- if (sig_param->mode == ir_var_in || sig_param->mode == ir_var_const_in) {
+ if (sig_param->mode == ir_var_function_in
+ || sig_param->mode == ir_var_const_in) {
ir_rvalue *new_param = param_rval;
handle_rvalue(&new_param);
diff --git a/mesalib/src/glsl/opt_constant_propagation.cpp b/mesalib/src/glsl/opt_constant_propagation.cpp
index a03811999..2f65937fe 100644
--- a/mesalib/src/glsl/opt_constant_propagation.cpp
+++ b/mesalib/src/glsl/opt_constant_propagation.cpp
@@ -285,7 +285,8 @@ ir_constant_propagation_visitor::visit_enter(ir_call *ir)
foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {
ir_variable *sig_param = (ir_variable *)sig_param_iter.get();
ir_rvalue *param = (ir_rvalue *)iter.get();
- if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) {
+ if (sig_param->mode != ir_var_function_out
+ && sig_param->mode != ir_var_function_inout) {
ir_rvalue *new_param = param;
handle_rvalue(&new_param);
if (new_param != param)
diff --git a/mesalib/src/glsl/opt_constant_variable.cpp b/mesalib/src/glsl/opt_constant_variable.cpp
index 1bbaf8e47..cbe6450c6 100644
--- a/mesalib/src/glsl/opt_constant_variable.cpp
+++ b/mesalib/src/glsl/opt_constant_variable.cpp
@@ -137,8 +137,8 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
ir_rvalue *param_rval = (ir_rvalue *)iter.get();
ir_variable *param = (ir_variable *)sig_iter.get();
- if (param->mode == ir_var_out ||
- param->mode == ir_var_inout) {
+ if (param->mode == ir_var_function_out ||
+ param->mode == ir_var_function_inout) {
ir_variable *var = param_rval->variable_referenced();
struct assignment_entry *entry;
diff --git a/mesalib/src/glsl/opt_copy_propagation.cpp b/mesalib/src/glsl/opt_copy_propagation.cpp
index 2952ce594..7282b611e 100644
--- a/mesalib/src/glsl/opt_copy_propagation.cpp
+++ b/mesalib/src/glsl/opt_copy_propagation.cpp
@@ -189,7 +189,8 @@ ir_copy_propagation_visitor::visit_enter(ir_call *ir)
foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {
ir_variable *sig_param = (ir_variable *)sig_param_iter.get();
ir_instruction *ir = (ir_instruction *)iter.get();
- if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) {
+ if (sig_param->mode != ir_var_function_out
+ && sig_param->mode != ir_var_function_inout) {
ir->accept(this);
}
sig_param_iter.next();
diff --git a/mesalib/src/glsl/opt_copy_propagation_elements.cpp b/mesalib/src/glsl/opt_copy_propagation_elements.cpp
index de9f4ef6f..6a19da40d 100644
--- a/mesalib/src/glsl/opt_copy_propagation_elements.cpp
+++ b/mesalib/src/glsl/opt_copy_propagation_elements.cpp
@@ -297,7 +297,8 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {
ir_variable *sig_param = (ir_variable *)sig_param_iter.get();
ir_instruction *ir = (ir_instruction *)iter.get();
- if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) {
+ if (sig_param->mode != ir_var_function_out
+ && sig_param->mode != ir_var_function_inout) {
ir->accept(this);
}
sig_param_iter.next();
diff --git a/mesalib/src/glsl/opt_dead_code.cpp b/mesalib/src/glsl/opt_dead_code.cpp
index 47247e20d..b65e5c2ce 100644
--- a/mesalib/src/glsl/opt_dead_code.cpp
+++ b/mesalib/src/glsl/opt_dead_code.cpp
@@ -77,10 +77,11 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
if (entry->assign) {
/* Remove a single dead assignment to the variable we found.
- * Don't do so if it's a shader output, though.
+ * Don't do so if it's a shader or function output, though.
*/
- if (entry->var->mode != ir_var_out &&
- entry->var->mode != ir_var_inout) {
+ if (entry->var->mode != ir_var_function_out &&
+ entry->var->mode != ir_var_function_inout &&
+ entry->var->mode != ir_var_shader_out) {
entry->assign->remove();
progress = true;
@@ -97,15 +98,10 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
/* uniform initializers are precious, and could get used by another
* stage. Also, once uniform locations have been assigned, the
* declaration cannot be deleted.
- *
- * Also, GL_ARB_uniform_buffer_object says that std140
- * uniforms will not be eliminated. Since we always do
- * std140, just don't eliminate uniforms in UBOs.
*/
if (entry->var->mode == ir_var_uniform &&
(uniform_locations_assigned ||
- entry->var->constant_value ||
- entry->var->uniform_block != -1))
+ entry->var->constant_value))
continue;
entry->var->remove();
diff --git a/mesalib/src/glsl/opt_function_inlining.cpp b/mesalib/src/glsl/opt_function_inlining.cpp
index f9f5bd442..0733d5180 100644
--- a/mesalib/src/glsl/opt_function_inlining.cpp
+++ b/mesalib/src/glsl/opt_function_inlining.cpp
@@ -144,9 +144,9 @@ ir_call::generate_inline(ir_instruction *next_ir)
}
/* Move the actual param into our param variable if it's an 'in' type. */
- if (parameters[i] && (sig_param->mode == ir_var_in ||
+ if (parameters[i] && (sig_param->mode == ir_var_function_in ||
sig_param->mode == ir_var_const_in ||
- sig_param->mode == ir_var_inout)) {
+ sig_param->mode == ir_var_function_inout)) {
ir_assignment *assign;
assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
@@ -202,8 +202,8 @@ ir_call::generate_inline(ir_instruction *next_ir)
const ir_variable *const sig_param = (ir_variable *) sig_param_iter.get();
/* Move our param variable into the actual param if it's an 'out' type. */
- if (parameters[i] && (sig_param->mode == ir_var_out ||
- sig_param->mode == ir_var_inout)) {
+ if (parameters[i] && (sig_param->mode == ir_var_function_out ||
+ sig_param->mode == ir_var_function_inout)) {
ir_assignment *assign;
assign = new(ctx) ir_assignment(param->clone(ctx, NULL)->as_rvalue(),
diff --git a/mesalib/src/glsl/opt_structure_splitting.cpp b/mesalib/src/glsl/opt_structure_splitting.cpp
index 9b3f048e4..806c079e5 100644
--- a/mesalib/src/glsl/opt_structure_splitting.cpp
+++ b/mesalib/src/glsl/opt_structure_splitting.cpp
@@ -104,7 +104,8 @@ ir_structure_reference_visitor::get_variable_entry(ir_variable *var)
{
assert(var);
- if (!var->type->is_record() || var->mode == ir_var_uniform)
+ if (!var->type->is_record() || var->mode == ir_var_uniform
+ || var->mode == ir_var_shader_in || var->mode == ir_var_shader_out)
return NULL;
foreach_iter(exec_list_iterator, iter, this->variable_list) {
diff --git a/mesalib/src/glsl/opt_tree_grafting.cpp b/mesalib/src/glsl/opt_tree_grafting.cpp
index 25b18ea94..113abb7b0 100644
--- a/mesalib/src/glsl/opt_tree_grafting.cpp
+++ b/mesalib/src/glsl/opt_tree_grafting.cpp
@@ -211,7 +211,8 @@ ir_tree_grafting_visitor::visit_enter(ir_call *ir)
ir_rvalue *ir = (ir_rvalue *)iter.get();
ir_rvalue *new_ir = ir;
- if (sig_param->mode != ir_var_in && sig_param->mode != ir_var_const_in) {
+ if (sig_param->mode != ir_var_function_in
+ && sig_param->mode != ir_var_const_in) {
if (check_graft(ir, sig_param) == visit_stop)
return visit_stop;
continue;
@@ -350,8 +351,9 @@ tree_grafting_basic_block(ir_instruction *bb_first,
if (!lhs_var)
continue;
- if (lhs_var->mode == ir_var_out ||
- lhs_var->mode == ir_var_inout)
+ if (lhs_var->mode == ir_var_function_out ||
+ lhs_var->mode == ir_var_function_inout ||
+ lhs_var->mode == ir_var_shader_out)
continue;
ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
diff --git a/mesalib/src/glsl/s_expression.cpp b/mesalib/src/glsl/s_expression.cpp
index 57de9d334..1bdf6bca6 100644
--- a/mesalib/src/glsl/s_expression.cpp
+++ b/mesalib/src/glsl/s_expression.cpp
@@ -66,18 +66,18 @@ read_atom(void *ctx, const char *&src, char *&symbol_buffer)
return NULL; // no atom
// Check for the special symbol '+INF', which means +Infinity. Note: C99
- // requires strtod to parse '+INF' as +Infinity, but we still support some
+ // requires strtof to parse '+INF' as +Infinity, but we still support some
// non-C99-compliant compilers (e.g. MSVC).
if (n == 4 && strncmp(src, "+INF", 4) == 0) {
expr = new(ctx) s_float(std::numeric_limits<float>::infinity());
} else {
// Check if the atom is a number.
char *float_end = NULL;
- double f = glsl_strtod(src, &float_end);
+ float f = glsl_strtof(src, &float_end);
if (float_end != src) {
char *int_end = NULL;
int i = strtol(src, &int_end, 10);
- // If strtod matched more characters, it must have a decimal part
+ // If strtof matched more characters, it must have a decimal part
if (float_end > int_end)
expr = new(ctx) s_float(f);
else
diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp
index 33d3804c6..0fb4f5b16 100644
--- a/mesalib/src/glsl/standalone_scaffolding.cpp
+++ b/mesalib/src/glsl/standalone_scaffolding.cpp
@@ -34,6 +34,24 @@
#include "ralloc.h"
void
+_mesa_warning(struct gl_context *ctx, const char *fmt, ...)
+{
+ va_list vargs;
+ (void) ctx;
+
+ va_start(vargs, fmt);
+
+ /* This output is not thread-safe, but that's good enough for the
+ * standalone compiler.
+ */
+ fprintf(stderr, "Mesa warning: ");
+ vfprintf(stderr, fmt, vargs);
+ fprintf(stderr, "\n");
+
+ va_end(vargs);
+}
+
+void
_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
struct gl_shader *sh)
{
@@ -81,6 +99,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
ctx->Extensions.EXT_texture3D = true;
ctx->Extensions.OES_EGL_image_external = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
+ ctx->Extensions.ARB_shading_language_packing = true;
ctx->Extensions.OES_standard_derivatives = true;
ctx->Extensions.ARB_texture_cube_map_array = true;
diff --git a/mesalib/src/glsl/standalone_scaffolding.h b/mesalib/src/glsl/standalone_scaffolding.h
index 41ce35bef..096b2f114 100644
--- a/mesalib/src/glsl/standalone_scaffolding.h
+++ b/mesalib/src/glsl/standalone_scaffolding.h
@@ -34,6 +34,9 @@
#include "main/mtypes.h"
extern "C" void
+_mesa_warning(struct gl_context *ctx, const char *fmtString, ... );
+
+extern "C" void
_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
struct gl_shader *sh);
diff --git a/mesalib/src/glsl/strtod.c b/mesalib/src/glsl/strtod.c
index 47c1f0ed6..5d4346b5a 100644
--- a/mesalib/src/glsl/strtod.c
+++ b/mesalib/src/glsl/strtod.c
@@ -55,3 +55,25 @@ glsl_strtod(const char *s, char **end)
return strtod(s, end);
#endif
}
+
+
+/**
+ * Wrapper around strtof which uses the "C" locale so the decimal
+ * point is always '.'
+ */
+float
+glsl_strtof(const char *s, char **end)
+{
+#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \
+ !defined(__HAIKU__) && !defined(__UCLIBC__)
+ static locale_t loc = NULL;
+ if (!loc) {
+ loc = newlocale(LC_CTYPE_MASK, "C", NULL);
+ }
+ return strtof_l(s, end, loc);
+#elif _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE
+ return strtof(s, end);
+#else
+ return (float) strtod(s, end);
+#endif
+}
diff --git a/mesalib/src/glsl/strtod.h b/mesalib/src/glsl/strtod.h
index 0cf6409d4..ad847dbb0 100644
--- a/mesalib/src/glsl/strtod.h
+++ b/mesalib/src/glsl/strtod.h
@@ -34,6 +34,9 @@ extern "C" {
extern double
glsl_strtod(const char *s, char **end);
+extern float
+glsl_strtof(const char *s, char **end);
+
#ifdef __cplusplus
}
diff --git a/mesalib/src/mesa/Android.libmesa_glsl_utils.mk b/mesalib/src/mesa/Android.libmesa_glsl_utils.mk
index 9c5f3493c..47f2e151b 100644
--- a/mesalib/src/mesa/Android.libmesa_glsl_utils.mk
+++ b/mesalib/src/mesa/Android.libmesa_glsl_utils.mk
@@ -35,10 +35,13 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libmesa_glsl_utils
-LOCAL_C_INCLUDES := $(MESA_TOP)/src/glsl
+LOCAL_C_INCLUDES := \
+ $(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/mapi
LOCAL_SRC_FILES := \
main/hash_table.c \
+ main/imports.c \
program/prog_hash_table.c \
program/symbol_table.c
@@ -54,10 +57,13 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libmesa_glsl_utils
LOCAL_IS_HOST_MODULE := true
-LOCAL_C_INCLUDES := $(MESA_TOP)/src/glsl
+LOCAL_C_INCLUDES := \
+ $(MESA_TOP)/src/glsl \
+ $(MESA_TOP)/src/mapi
LOCAL_SRC_FILES := \
main/hash_table.c \
+ main/imports.c \
program/prog_hash_table.c \
program/symbol_table.c
diff --git a/mesalib/src/mesa/main/extensions.c b/mesalib/src/mesa/main/extensions.c
index 5d01ac8ea..7ae07fb5a 100644
--- a/mesalib/src/mesa/main/extensions.c
+++ b/mesalib/src/mesa/main/extensions.c
@@ -125,6 +125,7 @@ static const struct extension extension_table[] = {
{ "GL_ARB_shader_stencil_export", o(ARB_shader_stencil_export), GL, 2009 },
{ "GL_ARB_shader_texture_lod", o(ARB_shader_texture_lod), GL, 2009 },
{ "GL_ARB_shading_language_100", o(ARB_shading_language_100), GLL, 2003 },
+ { "GL_ARB_shading_language_packing", o(ARB_shading_language_packing), GL, 2011 },
{ "GL_ARB_shadow", o(ARB_shadow), GLL, 2001 },
{ "GL_ARB_sync", o(ARB_sync), GL, 2003 },
{ "GL_ARB_texture_border_clamp", o(ARB_texture_border_clamp), GLL, 2000 },
diff --git a/mesalib/src/mesa/main/getstring.c b/mesalib/src/mesa/main/getstring.c
index 1f23cc0a4..aa3a528fd 100644
--- a/mesalib/src/mesa/main/getstring.c
+++ b/mesalib/src/mesa/main/getstring.c
@@ -74,7 +74,9 @@ shading_language_version(struct gl_context *ctx)
break;
case API_OPENGLES2:
- return (const GLubyte *) "OpenGL ES GLSL ES 1.0.16";
+ return (ctx->Version < 30)
+ ? (const GLubyte *) "OpenGL ES GLSL ES 1.0.16"
+ : (const GLubyte *) "OpenGL ES GLSL ES 3.0";
case API_OPENGLES:
/* fall-through */
diff --git a/mesalib/src/mesa/main/imports.c b/mesalib/src/mesa/main/imports.c
index 76f835e0e..e6f754254 100644
--- a/mesalib/src/mesa/main/imports.c
+++ b/mesalib/src/mesa/main/imports.c
@@ -314,10 +314,43 @@ _mesa_bitcount_64(uint64_t n)
#endif
+/* Using C99 rounding functions for roundToEven() implementation is
+ * difficult, because round(), rint, and nearbyint() are affected by
+ * fesetenv(), which the application may have done for its own
+ * purposes. Mesa's IROUND macro is close to what we want, but it
+ * rounds away from 0 on n + 0.5.
+ */
+int
+_mesa_round_to_even(float val)
+{
+ int rounded = IROUND(val);
+
+ if (val - floor(val) == 0.5) {
+ if (rounded % 2 != 0)
+ rounded += val > 0 ? -1 : 1;
+ }
+
+ return rounded;
+}
+
+
/**
* Convert a 4-byte float to a 2-byte half float.
- * Based on code from:
- * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
+ *
+ * Not all float32 values can be represented exactly as a float16 value. We
+ * round such intermediate float32 values to the nearest float16. When the
+ * float32 lies exactly between to float16 values, we round to the one with
+ * an even mantissa.
+ *
+ * This rounding behavior has several benefits:
+ * - It has no sign bias.
+ *
+ * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
+ * GPU ISA.
+ *
+ * - By reproducing the behavior of the GPU (at least on Intel hardware),
+ * compile-time evaluation of constant packHalf2x16 GLSL expressions will
+ * result in the same value as if the expression were executed on the GPU.
*/
GLhalfARB
_mesa_float_to_half(float val)
@@ -356,32 +389,13 @@ _mesa_float_to_half(float val)
else {
/* regular number */
const int new_exp = flt_e - 127;
- if (new_exp < -24) {
- /* this maps to 0 */
- /* m = 0; - already set */
- e = 0;
- }
- else if (new_exp < -14) {
- /* this maps to a denorm */
- unsigned int exp_val = (unsigned int) (-14 - new_exp); /* 2^-exp_val*/
+ if (new_exp < -14) {
+ /* The float32 lies in the range (0.0, min_normal16) and is rounded
+ * to a nearby float16 value. The result will be either zero, subnormal,
+ * or normal.
+ */
e = 0;
- switch (exp_val) {
- case 0:
- _mesa_warning(NULL,
- "float_to_half: logical error in denorm creation!\n");
- /* m = 0; - already set */
- break;
- case 1: m = 512 + (flt_m >> 14); break;
- case 2: m = 256 + (flt_m >> 15); break;
- case 3: m = 128 + (flt_m >> 16); break;
- case 4: m = 64 + (flt_m >> 17); break;
- case 5: m = 32 + (flt_m >> 18); break;
- case 6: m = 16 + (flt_m >> 19); break;
- case 7: m = 8 + (flt_m >> 20); break;
- case 8: m = 4 + (flt_m >> 21); break;
- case 9: m = 2 + (flt_m >> 22); break;
- case 10: m = 1; break;
- }
+ m = _mesa_round_to_even((1 << 24) * fabsf(fi.f));
}
else if (new_exp > 15) {
/* map this value to infinity */
@@ -389,12 +403,26 @@ _mesa_float_to_half(float val)
e = 31;
}
else {
- /* regular */
+ /* The float32 lies in the range
+ * [min_normal16, max_normal16 + max_step16)
+ * and is rounded to a nearby float16 value. The result will be
+ * either normal or infinite.
+ */
e = new_exp + 15;
- m = flt_m >> 13;
+ m = _mesa_round_to_even(flt_m / (float) (1 << 13));
}
}
+ assert(0 <= m && m <= 1024);
+ if (m == 1024) {
+ /* The float32 was rounded upwards into the range of the next exponent,
+ * so bump the exponent. This correctly handles the case where f32
+ * should be rounded up to float16 infinity.
+ */
+ ++e;
+ m = 0;
+ }
+
result = (s << 15) | (e << 10) | m;
return result;
}
diff --git a/mesalib/src/mesa/main/imports.h b/mesalib/src/mesa/main/imports.h
index 8446ea2a3..4b783818b 100644
--- a/mesalib/src/mesa/main/imports.h
+++ b/mesalib/src/mesa/main/imports.h
@@ -548,6 +548,9 @@ _mesa_fls(unsigned int n)
#endif
}
+extern int
+_mesa_round_to_even(float val);
+
extern GLhalfARB
_mesa_float_to_half(float f);
diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h
index d37e6c4c0..3369623f7 100644
--- a/mesalib/src/mesa/main/mtypes.h
+++ b/mesalib/src/mesa/main/mtypes.h
@@ -2273,11 +2273,30 @@ typedef enum
struct gl_uniform_buffer_variable
{
char *Name;
+
+ /**
+ * Name of the uniform as seen by glGetUniformIndices.
+ *
+ * glGetUniformIndices requires that the block instance index \b not be
+ * present in the name of queried uniforms.
+ *
+ * \note
+ * \c gl_uniform_buffer_variable::IndexName and
+ * \c gl_uniform_buffer_variable::Name may point to identical storage.
+ */
+ char *IndexName;
+
const struct glsl_type *Type;
unsigned int Offset;
GLboolean RowMajor;
};
+enum gl_uniform_block_packing {
+ ubo_packing_std140,
+ ubo_packing_shared,
+ ubo_packing_packed
+};
+
struct gl_uniform_block
{
/** Declared name of the uniform block */
@@ -2299,6 +2318,14 @@ struct gl_uniform_block
* (GL_UNIFORM_BLOCK_DATA_SIZE).
*/
GLuint UniformBufferSize;
+
+ /**
+ * Layout specified in the shader
+ *
+ * This isn't accessible through the API, but it is used while
+ * cross-validating uniform blocks.
+ */
+ enum gl_uniform_block_packing _Packing;
};
/**
@@ -3042,6 +3069,7 @@ struct gl_extensions
GLboolean ARB_shader_stencil_export;
GLboolean ARB_shader_texture_lod;
GLboolean ARB_shading_language_100;
+ GLboolean ARB_shading_language_packing;
GLboolean ARB_shadow;
GLboolean ARB_sync;
GLboolean ARB_texture_border_clamp;
diff --git a/mesalib/src/mesa/main/remap.c b/mesalib/src/mesa/main/remap.c
index c89fba453..a09870561 100644
--- a/mesalib/src/mesa/main/remap.c
+++ b/mesalib/src/mesa/main/remap.c
@@ -208,8 +208,10 @@ _mesa_do_init_remap_table(const char *pool,
offset = _mesa_map_function_spec(spec);
/* store the dispatch offset in the remap table */
driDispatchRemapTable[i] = offset;
- if (offset < 0)
- _mesa_warning(NULL, "failed to remap index %d", i);
+ if (offset < 0) {
+ const char *name = spec + strlen(spec) + 1;
+ _mesa_warning(NULL, "failed to remap %s", name);
+ }
}
}
diff --git a/mesalib/src/mesa/main/shader_query.cpp b/mesalib/src/mesa/main/shader_query.cpp
index 27b1b8f56..3014a9778 100644
--- a/mesalib/src/mesa/main/shader_query.cpp
+++ b/mesalib/src/mesa/main/shader_query.cpp
@@ -106,7 +106,7 @@ _mesa_GetActiveAttrib(GLhandleARB program, GLuint desired_index,
const ir_variable *const var = ((ir_instruction *) node)->as_variable();
if (var == NULL
- || var->mode != ir_var_in
+ || var->mode != ir_var_shader_in
|| var->location == -1)
continue;
@@ -169,7 +169,7 @@ _mesa_GetAttribLocation(GLhandleARB program, const GLcharARB * name)
* attribute, or if an error occurs, -1 will be returned."
*/
if (var == NULL
- || var->mode != ir_var_in
+ || var->mode != ir_var_shader_in
|| var->location == -1
|| var->location < VERT_ATTRIB_GENERIC0)
continue;
@@ -197,7 +197,7 @@ _mesa_count_active_attribs(struct gl_shader_program *shProg)
const ir_variable *const var = ((ir_instruction *) node)->as_variable();
if (var == NULL
- || var->mode != ir_var_in
+ || var->mode != ir_var_shader_in
|| var->location == -1)
continue;
@@ -223,7 +223,7 @@ _mesa_longest_attribute_name_length(struct gl_shader_program *shProg)
const ir_variable *const var = ((ir_instruction *) node)->as_variable();
if (var == NULL
- || var->mode != ir_var_in
+ || var->mode != ir_var_shader_in
|| var->location == -1)
continue;
@@ -333,7 +333,7 @@ _mesa_GetFragDataIndex(GLuint program, const GLchar *name)
* attribute, or if an error occurs, -1 will be returned."
*/
if (var == NULL
- || var->mode != ir_var_out
+ || var->mode != ir_var_shader_out
|| var->location == -1
|| var->location < FRAG_RESULT_DATA0)
continue;
@@ -389,7 +389,7 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name)
* attribute, or if an error occurs, -1 will be returned."
*/
if (var == NULL
- || var->mode != ir_var_out
+ || var->mode != ir_var_shader_out
|| var->location == -1
|| var->location < FRAG_RESULT_DATA0)
continue;
diff --git a/mesalib/src/mesa/main/texparam.c b/mesalib/src/mesa/main/texparam.c
index 8d0ae16fb..52ede13c0 100644
--- a/mesalib/src/mesa/main/texparam.c
+++ b/mesalib/src/mesa/main/texparam.c
@@ -1388,10 +1388,10 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
if (ctx->API != API_OPENGLES || !ctx->Extensions.OES_draw_texture)
goto invalid_pname;
- params[0] = obj->CropRect[0];
- params[1] = obj->CropRect[1];
- params[2] = obj->CropRect[2];
- params[3] = obj->CropRect[3];
+ params[0] = (GLfloat) obj->CropRect[0];
+ params[1] = (GLfloat) obj->CropRect[1];
+ params[2] = (GLfloat) obj->CropRect[2];
+ params[3] = (GLfloat) obj->CropRect[3];
break;
case GL_TEXTURE_SWIZZLE_R_EXT:
diff --git a/mesalib/src/mesa/main/uniforms.c b/mesalib/src/mesa/main/uniforms.c
index 62c85b3c0..d902407a0 100644
--- a/mesalib/src/mesa/main/uniforms.c
+++ b/mesalib/src/mesa/main/uniforms.c
@@ -695,7 +695,7 @@ _mesa_GetActiveUniformBlockiv(GLuint program,
for (i = 0; i < block->NumUniforms; i++) {
unsigned offset;
params[i] = _mesa_get_uniform_location(ctx, shProg,
- block->Uniforms[i].Name,
+ block->Uniforms[i].IndexName,
&offset);
}
return;
diff --git a/mesalib/src/mesa/main/version.c b/mesalib/src/mesa/main/version.c
index 4373d7b91..e944a5518 100644
--- a/mesalib/src/mesa/main/version.c
+++ b/mesalib/src/mesa/main/version.c
@@ -323,7 +323,30 @@ compute_version_es2(struct gl_context *ctx)
ctx->Extensions.ARB_fragment_shader &&
ctx->Extensions.ARB_texture_non_power_of_two &&
ctx->Extensions.EXT_blend_equation_separate);
- if (ver_2_0) {
+ /* FINISHME: This list isn't quite right. */
+ const GLboolean ver_3_0 = (ctx->Extensions.ARB_half_float_vertex &&
+ ctx->Extensions.ARB_internalformat_query &&
+ ctx->Extensions.ARB_map_buffer_range &&
+ ctx->Extensions.ARB_shader_texture_lod &&
+ ctx->Extensions.ARB_texture_float &&
+ ctx->Extensions.ARB_texture_rg &&
+ ctx->Extensions.ARB_texture_compression_rgtc &&
+ ctx->Extensions.EXT_draw_buffers2 &&
+ /* ctx->Extensions.ARB_framebuffer_object && */
+ ctx->Extensions.EXT_framebuffer_sRGB &&
+ ctx->Extensions.EXT_packed_float &&
+ ctx->Extensions.EXT_texture_array &&
+ ctx->Extensions.EXT_texture_shared_exponent &&
+ ctx->Extensions.EXT_transform_feedback &&
+ ctx->Extensions.NV_conditional_render &&
+ ctx->Extensions.ARB_draw_instanced &&
+ ctx->Extensions.ARB_uniform_buffer_object &&
+ ctx->Extensions.EXT_texture_snorm &&
+ ctx->Extensions.NV_primitive_restart &&
+ ctx->Extensions.OES_depth_texture_cube_map);
+ if (ver_3_0) {
+ ctx->Version = 30;
+ } else if (ver_2_0) {
ctx->Version = 20;
} else {
_mesa_problem(ctx, "Incomplete OpenGL ES 2.0 support.");
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index 0f7439b3b..cd89171da 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -623,10 +623,14 @@ type_size(const struct glsl_type *type)
* at link time.
*/
return 1;
- default:
- assert(0);
- return 0;
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
+ case GLSL_TYPE_INTERFACE:
+ assert(!"Invalid type in type_size");
+ break;
}
+
+ return 0;
}
/**
@@ -1427,7 +1431,21 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_fract:
emit(ir, OPCODE_FRC, result_dst, op[0]);
break;
-
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_snorm_4x8:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_unorm_4x8:
+ case ir_unop_pack_half_2x16:
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_snorm_4x8:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_unorm_4x8:
+ case ir_unop_unpack_half_2x16:
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+ case ir_binop_pack_half_2x16_split:
+ assert(!"not supported");
+ break;
case ir_binop_min:
emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
break;
@@ -1529,21 +1547,18 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
var->location);
this->variables.push_tail(entry);
break;
- case ir_var_in:
- case ir_var_inout:
+ case ir_var_shader_in:
/* The linker assigns locations for varyings and attributes,
* including deprecated builtins (like gl_Color),
* user-assigned generic attributes (glBindVertexLocation),
* and user-defined varyings.
- *
- * FINISHME: We would hit this path for function arguments. Fix!
*/
assert(var->location != -1);
entry = new(mem_ctx) variable_storage(var,
PROGRAM_INPUT,
var->location);
break;
- case ir_var_out:
+ case ir_var_shader_out:
assert(var->location != -1);
entry = new(mem_ctx) variable_storage(var,
PROGRAM_OUTPUT,
@@ -2378,7 +2393,8 @@ public:
}
private:
- virtual void visit_field(const glsl_type *type, const char *name);
+ virtual void visit_field(const glsl_type *type, const char *name,
+ bool row_major);
struct gl_shader_program *shader_program;
struct gl_program_parameter_list *params;
@@ -2386,10 +2402,13 @@ private:
};
void
-add_uniform_to_shader::visit_field(const glsl_type *type, const char *name)
+add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
+ bool row_major)
{
unsigned int size;
+ (void) row_major;
+
if (type->is_vector() || type->is_scalar()) {
size = type->vector_elements;
} else {
@@ -2459,7 +2478,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
ir_variable *var = ((ir_instruction *) node)->as_variable();
if ((var == NULL) || (var->mode != ir_var_uniform)
- || var->uniform_block != -1 || (strncmp(var->name, "gl_", 3) == 0))
+ || var->is_in_uniform_block() || (strncmp(var->name, "gl_", 3) == 0))
continue;
add.process(var);
@@ -2522,7 +2541,11 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
format = uniform_native;
columns = 1;
break;
- default:
+ case GLSL_TYPE_ARRAY:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_ERROR:
+ case GLSL_TYPE_INTERFACE:
assert(!"Should not get here.");
break;
}
diff --git a/mesalib/src/mesa/program/program.c b/mesalib/src/mesa/program/program.c
index 993803dd5..fb0aeb7ed 100644
--- a/mesalib/src/mesa/program/program.c
+++ b/mesalib/src/mesa/program/program.c
@@ -696,7 +696,7 @@ _mesa_combine_programs(struct gl_context *ctx,
const GLuint newLength = lenA + lenB;
GLboolean usedTemps[MAX_PROGRAM_TEMPS];
GLuint firstTemp = 0;
- GLbitfield inputsB;
+ GLbitfield64 inputsB;
GLuint i;
ASSERT(progA->Target == progB->Target);
@@ -724,7 +724,7 @@ _mesa_combine_programs(struct gl_context *ctx,
if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) {
const struct gl_fragment_program *fprogA, *fprogB;
struct gl_fragment_program *newFprog;
- GLbitfield progB_inputsRead = progB->InputsRead;
+ GLbitfield64 progB_inputsRead = progB->InputsRead;
GLint progB_colorFile, progB_colorIndex;
fprogA = gl_fragment_program_const(progA);
@@ -840,8 +840,8 @@ _mesa_find_used_registers(const struct gl_program *prog,
for (j = 0; j < n; j++) {
if (inst->SrcReg[j].File == file) {
- ASSERT(inst->SrcReg[j].Index < usedSize);
- if(inst->SrcReg[j].Index < usedSize)
+ ASSERT(inst->SrcReg[j].Index < (GLint) usedSize);
+ if (inst->SrcReg[j].Index < (GLint) usedSize)
used[inst->SrcReg[j].Index] = GL_TRUE;
}
}
@@ -908,23 +908,23 @@ _mesa_valid_register_index(const struct gl_context *ctx,
return GL_TRUE; /* XXX or maybe false? */
case PROGRAM_TEMPORARY:
- return index >= 0 && index < c->MaxTemps;
+ return index >= 0 && index < (GLint) c->MaxTemps;
case PROGRAM_ENV_PARAM:
- return index >= 0 && index < c->MaxEnvParams;
+ return index >= 0 && index < (GLint) c->MaxEnvParams;
case PROGRAM_LOCAL_PARAM:
- return index >= 0 && index < c->MaxLocalParams;
+ return index >= 0 && index < (GLint) c->MaxLocalParams;
case PROGRAM_UNIFORM:
case PROGRAM_STATE_VAR:
/* aka constant buffer */
- return index >= 0 && index < c->MaxUniformComponents / 4;
+ return index >= 0 && index < (GLint) c->MaxUniformComponents / 4;
case PROGRAM_CONSTANT:
/* constant buffer w/ possible relative negative addressing */
return (index > (int) c->MaxUniformComponents / -4 &&
- index < c->MaxUniformComponents / 4);
+ index < (int) c->MaxUniformComponents / 4);
case PROGRAM_INPUT:
if (index < 0)
@@ -932,11 +932,11 @@ _mesa_valid_register_index(const struct gl_context *ctx,
switch (shaderType) {
case MESA_SHADER_VERTEX:
- return index < VERT_ATTRIB_GENERIC0 + c->MaxAttribs;
+ return index < VERT_ATTRIB_GENERIC0 + (GLint) c->MaxAttribs;
case MESA_SHADER_FRAGMENT:
- return index < FRAG_ATTRIB_VAR0 + ctx->Const.MaxVarying;
+ return index < FRAG_ATTRIB_VAR0 + (GLint) ctx->Const.MaxVarying;
case MESA_SHADER_GEOMETRY:
- return index < GEOM_ATTRIB_VAR0 + ctx->Const.MaxVarying;
+ return index < GEOM_ATTRIB_VAR0 + (GLint) ctx->Const.MaxVarying;
default:
return GL_FALSE;
}
@@ -947,17 +947,17 @@ _mesa_valid_register_index(const struct gl_context *ctx,
switch (shaderType) {
case MESA_SHADER_VERTEX:
- return index < VERT_RESULT_VAR0 + ctx->Const.MaxVarying;
+ return index < VERT_RESULT_VAR0 + (GLint) ctx->Const.MaxVarying;
case MESA_SHADER_FRAGMENT:
- return index < FRAG_RESULT_DATA0 + ctx->Const.MaxDrawBuffers;
+ return index < FRAG_RESULT_DATA0 + (GLint) ctx->Const.MaxDrawBuffers;
case MESA_SHADER_GEOMETRY:
- return index < GEOM_RESULT_VAR0 + ctx->Const.MaxVarying;
+ return index < GEOM_RESULT_VAR0 + (GLint) ctx->Const.MaxVarying;
default:
return GL_FALSE;
}
case PROGRAM_ADDRESS:
- return index >= 0 && index < c->MaxAddressRegs;
+ return index >= 0 && index < (GLint) c->MaxAddressRegs;
default:
_mesa_problem(ctx,
diff --git a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c
index 843dc5be3..63dbdb29b 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c
@@ -350,9 +350,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
tBot = (GLfloat) height;
}
- u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), vbuf_offset, vbuf,
- (void**)&vertices);
- if (!vbuf) {
+ if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
+ vbuf_offset, vbuf, (void **) &vertices) != PIPE_OK) {
return;
}
diff --git a/mesalib/src/mesa/state_tracker/st_cb_clear.c b/mesalib/src/mesa/state_tracker/st_cb_clear.c
index d01236e28..a5aa8f496 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_clear.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_clear.c
@@ -141,9 +141,8 @@ draw_quad(struct st_context *st,
GLuint i, offset;
float (*vertices)[2][4]; /**< vertex pos + color */
- u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), &offset, &vbuf,
- (void**)&vertices);
- if (!vbuf) {
+ if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]),
+ &offset, &vbuf, (void **) &vertices) != PIPE_OK) {
return;
}
diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
index ff8a9dc43..c944b81f6 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -568,9 +568,8 @@ draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
struct pipe_resource *buf = NULL;
unsigned offset;
- u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset, &buf,
- (void**)&verts);
- if (!buf) {
+ if (u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset,
+ &buf, (void **) &verts) != PIPE_OK) {
return;
}
@@ -795,7 +794,7 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
y1 = y + height * ctx->Pixel.ZoomY;
/* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
- z = z * 2.0 - 1.0;
+ z = z * 2.0f - 1.0f;
draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,
normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width,
@@ -1063,7 +1062,7 @@ static void
clamp_size(struct pipe_context *pipe, GLsizei *width, GLsizei *height,
struct gl_pixelstore_attrib *unpack)
{
- const unsigned maxSize =
+ const int maxSize =
1 << (pipe->screen->get_param(pipe->screen,
PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c
index 269068da2..5ca097004 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c
@@ -148,10 +148,9 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
GLfloat *vbuf = NULL;
GLuint attr;
- u_upload_alloc(st->uploader, 0,
- numAttribs * 4 * 4 * sizeof(GLfloat),
- &offset, &vbuffer, (void**)&vbuf);
- if (!vbuffer) {
+ if (u_upload_alloc(st->uploader, 0,
+ numAttribs * 4 * 4 * sizeof(GLfloat),
+ &offset, &vbuffer, (void **) &vbuf) != PIPE_OK) {
return;
}
diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c
index 7f07b741e..3cea2df07 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_texture.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c
@@ -1555,6 +1555,7 @@ void
st_init_texture_functions(struct dd_function_table *functions)
{
functions->ChooseTextureFormat = st_ChooseTextureFormat;
+ functions->QuerySamplesForFormat = st_QuerySamplesForFormat;
functions->TexImage = st_TexImage;
functions->TexSubImage = _mesa_store_texsubimage;
functions->CompressedTexSubImage = _mesa_store_compressed_texsubimage;
diff --git a/mesalib/src/mesa/state_tracker/st_draw.c b/mesalib/src/mesa/state_tracker/st_draw.c
index de539ca5a..de62264a1 100644
--- a/mesalib/src/mesa/state_tracker/st_draw.c
+++ b/mesalib/src/mesa/state_tracker/st_draw.c
@@ -84,7 +84,12 @@ all_varyings_in_vbos(const struct gl_client_array *arrays[])
}
-static void
+/**
+ * Basically, translate Mesa's index buffer information into
+ * a pipe_index_buffer object.
+ * \return TRUE or FALSE for success/failure
+ */
+static boolean
setup_index_buffer(struct st_context *st,
const struct _mesa_index_buffer *ib,
struct pipe_index_buffer *ibuffer)
@@ -100,8 +105,12 @@ setup_index_buffer(struct st_context *st,
ibuffer->offset = pointer_to_offset(ib->ptr);
}
else if (st->indexbuf_uploader) {
- u_upload_data(st->indexbuf_uploader, 0, ib->count * ibuffer->index_size,
- ib->ptr, &ibuffer->offset, &ibuffer->buffer);
+ if (u_upload_data(st->indexbuf_uploader, 0,
+ ib->count * ibuffer->index_size, ib->ptr,
+ &ibuffer->offset, &ibuffer->buffer) != PIPE_OK) {
+ /* out of memory */
+ return FALSE;
+ }
u_upload_unmap(st->indexbuf_uploader);
}
else {
@@ -110,6 +119,7 @@ setup_index_buffer(struct st_context *st,
}
cso_set_index_buffer(st->cso_context, ibuffer);
+ return TRUE;
}
@@ -220,7 +230,10 @@ st_draw_vbo(struct gl_context *ctx,
vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index,
nr_prims);
- setup_index_buffer(st, ib, &ibuffer);
+ if (!setup_index_buffer(st, ib, &ibuffer)) {
+ /* out of memory */
+ return;
+ }
info.indexed = TRUE;
if (min_index != ~0 && max_index != ~0) {
diff --git a/mesalib/src/mesa/state_tracker/st_extensions.c b/mesalib/src/mesa/state_tracker/st_extensions.c
index 18d89815d..af54cf7c8 100644
--- a/mesalib/src/mesa/state_tracker/st_extensions.c
+++ b/mesalib/src/mesa/state_tracker/st_extensions.c
@@ -516,6 +516,7 @@ void st_init_extensions(struct st_context *st)
ctx->Extensions.ARB_fragment_shader = GL_TRUE;
ctx->Extensions.ARB_half_float_pixel = GL_TRUE;
ctx->Extensions.ARB_half_float_vertex = GL_TRUE;
+ ctx->Extensions.ARB_internalformat_query = GL_TRUE;
ctx->Extensions.ARB_map_buffer_range = GL_TRUE;
ctx->Extensions.ARB_shader_objects = GL_TRUE;
ctx->Extensions.ARB_shading_language_100 = GL_TRUE;
@@ -594,9 +595,10 @@ void st_init_extensions(struct st_context *st)
ctx->Const.NativeIntegers = GL_TRUE;
ctx->Const.MaxClipPlanes = 8;
- /* Extensions that only depend on GLSL 1.3. */
+ /* Extensions that either depend on GLSL 1.30 or are a subset thereof. */
ctx->Extensions.ARB_conservative_depth = GL_TRUE;
ctx->Extensions.ARB_shader_bit_encoding = GL_TRUE;
+ ctx->Extensions.OES_depth_texture_cube_map = GL_TRUE;
} else {
/* Optional integer support for GLSL 1.2. */
if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
diff --git a/mesalib/src/mesa/state_tracker/st_format.c b/mesalib/src/mesa/state_tracker/st_format.c
index af81f732d..7ef063953 100644
--- a/mesalib/src/mesa/state_tracker/st_format.c
+++ b/mesalib/src/mesa/state_tracker/st_format.c
@@ -1642,6 +1642,40 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
}
+/**
+ * Called via ctx->Driver.ChooseTextureFormat().
+ */
+size_t
+st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat,
+ int samples[16])
+{
+ struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+ enum pipe_format format;
+ unsigned i, bind, num_sample_counts = 0;
+
+ if (_mesa_is_depth_or_stencil_format(internalFormat))
+ bind = PIPE_BIND_DEPTH_STENCIL;
+ else
+ bind = PIPE_BIND_RENDER_TARGET;
+
+ /* Set sample counts in descending order. */
+ for (i = 16; i > 1; i--) {
+ format = st_choose_format(screen, internalFormat, GL_NONE, GL_NONE,
+ PIPE_TEXTURE_2D, i, bind);
+
+ if (format != PIPE_FORMAT_NONE) {
+ samples[num_sample_counts++] = i;
+ }
+ }
+
+ if (!num_sample_counts) {
+ samples[num_sample_counts++] = 1;
+ }
+
+ return num_sample_counts;
+}
+
+
GLboolean
st_sampler_compat_formats(enum pipe_format format1, enum pipe_format format2)
{
diff --git a/mesalib/src/mesa/state_tracker/st_format.h b/mesalib/src/mesa/state_tracker/st_format.h
index 39397b17a..cb6e5bc96 100644
--- a/mesalib/src/mesa/state_tracker/st_format.h
+++ b/mesalib/src/mesa/state_tracker/st_format.h
@@ -67,6 +67,9 @@ st_ChooseTextureFormat(struct gl_context * ctx, GLenum target,
GLint internalFormat,
GLenum format, GLenum type);
+size_t
+st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat,
+ int samples[16]);
/* can we use a sampler view to translate these formats
only used to make TFP so far */
diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 1d96e905c..c6ac634a2 100644
--- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -984,10 +984,13 @@ type_size(const struct glsl_type *type)
* at link time.
*/
return 1;
- default:
- assert(0);
- return 0;
+ case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_VOID:
+ case GLSL_TYPE_ERROR:
+ assert(!"Invalid type in type_size");
+ break;
}
+ return 0;
}
/**
@@ -1932,10 +1935,23 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
}
break;
}
+ case ir_unop_pack_snorm_2x16:
+ case ir_unop_pack_unorm_2x16:
+ case ir_unop_pack_half_2x16:
+ case ir_unop_pack_snorm_4x8:
+ case ir_unop_pack_unorm_4x8:
+ case ir_unop_unpack_snorm_2x16:
+ case ir_unop_unpack_unorm_2x16:
+ case ir_unop_unpack_half_2x16:
+ case ir_unop_unpack_half_2x16_split_x:
+ case ir_unop_unpack_half_2x16_split_y:
+ case ir_unop_unpack_snorm_4x8:
+ case ir_unop_unpack_unorm_4x8:
+ case ir_binop_pack_half_2x16_split:
case ir_quadop_vector:
- /* This operation should have already been handled.
+ /* This operation is not supported, or should have already been handled.
*/
- assert(!"Should not get here.");
+ assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
break;
}
@@ -2001,21 +2017,18 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
var->location);
this->variables.push_tail(entry);
break;
- case ir_var_in:
- case ir_var_inout:
+ case ir_var_shader_in:
/* The linker assigns locations for varyings and attributes,
* including deprecated builtins (like gl_Color), user-assign
* generic attributes (glBindVertexLocation), and
* user-defined varyings.
- *
- * FINISHME: We would hit this path for function arguments. Fix!
*/
assert(var->location != -1);
entry = new(mem_ctx) variable_storage(var,
PROGRAM_INPUT,
var->location);
break;
- case ir_var_out:
+ case ir_var_shader_out:
assert(var->location != -1);
entry = new(mem_ctx) variable_storage(var,
PROGRAM_OUTPUT,
@@ -2304,7 +2317,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
l.writemask = WRITEMASK_XYZW;
} else if (ir->lhs->type->is_scalar() &&
- ir->lhs->variable_referenced()->mode == ir_var_out) {
+ ir->lhs->variable_referenced()->mode == ir_var_shader_out) {
/* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
* FINISHME: W component of fragment shader output zero, work correctly.
*/
@@ -2581,8 +2594,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
ir_rvalue *param_rval = (ir_rvalue *)iter.get();
ir_variable *param = (ir_variable *)sig_iter.get();
- if (param->mode == ir_var_in ||
- param->mode == ir_var_inout) {
+ if (param->mode == ir_var_function_in ||
+ param->mode == ir_var_function_inout) {
variable_storage *storage = find_variable_storage(param);
assert(storage);
@@ -2617,8 +2630,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
ir_rvalue *param_rval = (ir_rvalue *)iter.get();
ir_variable *param = (ir_variable *)sig_iter.get();
- if (param->mode == ir_var_out ||
- param->mode == ir_var_inout) {
+ if (param->mode == ir_var_function_out ||
+ param->mode == ir_var_function_inout) {
variable_storage *storage = find_variable_storage(param);
assert(storage);
diff --git a/mesalib/src/mesa/swrast/s_texfilter.c b/mesalib/src/mesa/swrast/s_texfilter.c
index 0a91cca06..953300f65 100644
--- a/mesalib/src/mesa/swrast/s_texfilter.c
+++ b/mesalib/src/mesa/swrast/s_texfilter.c
@@ -1647,14 +1647,14 @@ sample_2d_ewa(struct gl_context *ctx,
GLfloat rgba[])
{
GLint level = lod > 0 ? lod : 0;
- GLfloat scaling = 1.0 / (1 << level);
+ GLfloat scaling = 1.0f / (1 << level);
const struct gl_texture_image *img = tObj->Image[0][level];
const struct gl_texture_image *mostDetailedImage =
tObj->Image[0][tObj->BaseLevel];
const struct swrast_texture_image *swImg =
swrast_texture_image_const(mostDetailedImage);
- GLfloat tex_u=-0.5 + texcoord[0] * swImg->WidthScale * scaling;
- GLfloat tex_v=-0.5 + texcoord[1] * swImg->HeightScale * scaling;
+ GLfloat tex_u = -0.5f + texcoord[0] * swImg->WidthScale * scaling;
+ GLfloat tex_v = -0.5f + texcoord[1] * swImg->HeightScale * scaling;
GLfloat ux = dudx * scaling;
GLfloat vx = dvdx * scaling;
@@ -1667,20 +1667,20 @@ sample_2d_ewa(struct gl_context *ctx,
GLfloat A = vx*vx+vy*vy+1;
GLfloat B = -2*(ux*vx+uy*vy);
GLfloat C = ux*ux+uy*uy+1;
- GLfloat F = A*C-B*B/4.0;
+ GLfloat F = A*C-B*B/4.0f;
/* check if it is an ellipse */
/* ASSERT(F > 0.0); */
/* Compute the ellipse's (u,v) bounding box in texture space */
- GLfloat d = -B*B+4.0*C*A;
- GLfloat box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with */
- GLfloat box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */
+ GLfloat d = -B*B+4.0f*C*A;
+ GLfloat box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with */
+ GLfloat box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
- GLint u0 = floor(tex_u - box_u);
- GLint u1 = ceil (tex_u + box_u);
- GLint v0 = floor(tex_v - box_v);
- GLint v1 = ceil (tex_v + box_v);
+ GLint u0 = (GLint) floorf(tex_u - box_u);
+ GLint u1 = (GLint) ceilf (tex_u + box_u);
+ GLint v0 = (GLint) floorf(tex_v - box_v);
+ GLint v1 = (GLint) ceilf (tex_v + box_v);
GLfloat num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
GLfloat newCoord[2];
@@ -1692,7 +1692,7 @@ sample_2d_ewa(struct gl_context *ctx,
/* Scale ellipse formula to directly index the Filter Lookup Table.
* i.e. scale so that F = WEIGHT_LUT_SIZE-1
*/
- double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
+ GLfloat formScale = (GLfloat) (WEIGHT_LUT_SIZE - 1) / F;
A *= formScale;
B *= formScale;
C *= formScale;
@@ -1715,7 +1715,7 @@ sample_2d_ewa(struct gl_context *ctx,
/* as a LUT is used, q must never be negative;
* should not happen, though
*/
- const GLint qClamped = q >= 0.0F ? q : 0;
+ const GLint qClamped = q >= 0.0F ? (GLint) q : 0;
GLfloat weight = weightLut[qClamped];
newCoord[0] = u / ((GLfloat) img->Width2);
@@ -1795,19 +1795,19 @@ sample_2d_footprint(struct gl_context *ctx,
/* Calculate the per anisotropic sample offsets in s,t space. */
if (Px2 > Py2) {
- numSamples = ceil(sqrtf(Px2));
+ numSamples = (GLint) ceilf(sqrtf(Px2));
ds = ux / ((GLfloat) img->Width2);
dt = vx / ((GLfloat) img->Height2);
}
else {
- numSamples = ceil(sqrtf(Py2));
+ numSamples = (GLint) ceilf(sqrtf(Py2));
ds = uy / ((GLfloat) img->Width2);
dt = vy / ((GLfloat) img->Height2);
}
for (s = 0; s<numSamples; s++) {
- newCoord[0] = texcoord[0] + ds * ((GLfloat)(s+1) / (numSamples+1) -0.5);
- newCoord[1] = texcoord[1] + dt * ((GLfloat)(s+1) / (numSamples+1) -0.5);
+ newCoord[0] = texcoord[0] + ds * ((GLfloat)(s+1) / (numSamples+1) -0.5f);
+ newCoord[1] = texcoord[1] + dt * ((GLfloat)(s+1) / (numSamples+1) -0.5f);
sample_2d_linear(ctx, samp, img, newCoord, rgba);
num[0] += rgba[0];
@@ -1956,7 +1956,7 @@ sample_lambda_2d_aniso(struct gl_context *ctx,
/* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
* this since 0.5*log(x) = log(sqrt(x))
*/
- lod = 0.5 * LOG2(Pmin2);
+ lod = 0.5f * LOG2(Pmin2);
if (adjustLOD) {
/* from swrast/s_texcombine.c _swrast_texture_span */
@@ -1988,7 +1988,7 @@ sample_lambda_2d_aniso(struct gl_context *ctx,
* seem to be worth the extra running time.
*/
sample_2d_ewa(ctx, samp, tObj, texcoords[i],
- dudx, dvdx, dudy, dvdy, floor(lod), rgba[i]);
+ dudx, dvdx, dudy, dvdy, (GLint) floorf(lod), rgba[i]);
/* unused: */
(void) sample_2d_footprint;
diff --git a/mesalib/src/mesa/vbo/vbo_exec_api.c b/mesalib/src/mesa/vbo/vbo_exec_api.c
index 985f2209c..353f8cfde 100644
--- a/mesalib/src/mesa/vbo/vbo_exec_api.c
+++ b/mesalib/src/mesa/vbo/vbo_exec_api.c
@@ -124,6 +124,11 @@ void vbo_exec_vtx_wrap( struct vbo_exec_context *exec )
*/
vbo_exec_wrap_buffers( exec );
+ if (!exec->vtx.buffer_ptr) {
+ /* probably ran out of memory earlier when allocating the VBO */
+ return;
+ }
+
/* Copy stored stored vertices to start of new list.
*/
assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr);
diff --git a/mkfontscale/configure.ac b/mkfontscale/configure.ac
index 4340f99e1..4c7e599d5 100644
--- a/mkfontscale/configure.ac
+++ b/mkfontscale/configure.ac
@@ -27,6 +27,7 @@ AC_INIT([mkfontscale], [1.1.0],
[mkfontscale])
AC_CONFIG_SRCDIR([Makefile.am])
AC_CONFIG_HEADERS([config.h])
+AC_USE_SYSTEM_EXTENSIONS
# Initialize Automake
AM_INIT_AUTOMAKE([foreign dist-bzip2])
diff --git a/mkfontscale/hash.c b/mkfontscale/hash.c
index c2cf9caa3..3adfb6861 100644
--- a/mkfontscale/hash.c
+++ b/mkfontscale/hash.c
@@ -20,6 +20,8 @@
THE SOFTWARE.
*/
+#include "config.h"
+
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -41,14 +43,11 @@ hash(const char *string)
}
static void
-strcpy_lwr(char *dst, const char *src)
+str_tolower(char *s)
{
- while(1) {
- *dst = tolower(*src);
- if(*src == '\0')
- break;
- src++;
- dst++;
+ while(*s != '\0') {
+ *s = tolower(*s);
+ s++;
}
}
@@ -97,12 +96,11 @@ putHash(HashTablePtr table, char *key, char *value, int prio)
for(bp = table[i]; bp; bp = bp->next) {
if(strcasecmp(bp->key, key) == 0) {
if(prio > bp->prio) {
- keycopy = malloc(strlen(key) + 1);
+ keycopy = strdup(key);
if(keycopy == NULL) goto fail;
- strcpy_lwr(keycopy, key);
- valuecopy = malloc(strlen(value) + 1);
+ str_tolower(keycopy);
+ valuecopy = strdup(value);
if(valuecopy == NULL) goto fail;
- strcpy(valuecopy, value);
free(bp->key);
free(bp->value);
bp->key = keycopy;
@@ -111,14 +109,13 @@ putHash(HashTablePtr table, char *key, char *value, int prio)
return 1;
}
}
- keycopy = malloc(strlen(key) + 1);
+ keycopy = strdup(key);
if(keycopy == NULL)
goto fail;
- strcpy_lwr(keycopy, key);
- valuecopy = malloc(strlen(value) + 1);
+ str_tolower(keycopy);
+ valuecopy = strdup(value);
if(valuecopy == NULL)
goto fail;
- strcpy(valuecopy, value);
bp = malloc(sizeof(HashBucketRec));
if(bp == NULL)
goto fail;
diff --git a/mkfontscale/ident.c b/mkfontscale/ident.c
index bf544832c..41212575e 100644
--- a/mkfontscale/ident.c
+++ b/mkfontscale/ident.c
@@ -315,10 +315,9 @@ pcfIdentify(fontFile *f, char **name)
if(i >= nprops)
goto fail;
- s = malloc(strlen(strings + props[i].value) + 1);
+ s = strdup(strings + props[i].value);
if(s == NULL)
goto fail;
- strcpy(s, strings + props[i].value);
*name = s;
free(strings);
free(props);
diff --git a/mkfontscale/mkfontscale.c b/mkfontscale/mkfontscale.c
index 5cf5cb9af..a67f28338 100644
--- a/mkfontscale/mkfontscale.c
+++ b/mkfontscale/mkfontscale.c
@@ -20,6 +20,8 @@
THE SOFTWARE.
*/
+#include "config.h"
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -896,10 +898,9 @@ doDirectory(const char *dirname_given, int numEncodings, ListPtr encodingsToDo)
BDF_PropertyRec prop;
rc = FT_Get_BDF_Property(face, "FONT", &prop);
if(rc == 0 && prop.type == BDF_PROPERTY_TYPE_ATOM) {
- xlfd_name = malloc(strlen(prop.u.atom) + 1);
+ xlfd_name = strdup(prop.u.atom);
if(xlfd_name == NULL)
goto done;
- strcpy(xlfd_name, prop.u.atom);
}
}
}
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 515e31218..a93e2905b 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -968,6 +968,22 @@ fi
AC_MSG_RESULT($support_for_attribute_constructor)
AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR)
+dnl =====================================
+dnl __float128
+
+support_for_float128=no
+
+AC_MSG_CHECKING(for __float128)
+AC_LINK_IFELSE([AC_LANG_SOURCE([[
+__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; }
+]])], support_for_float128=yes)
+
+if test x$support_for_float128 = xyes; then
+ AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128])
+fi
+
+AC_MSG_RESULT($support_for_float128)
+
dnl ==================
dnl libpng
diff --git a/pixman/demos/scale.c b/pixman/demos/scale.c
index 9100ff72a..869ada12b 100644
--- a/pixman/demos/scale.c
+++ b/pixman/demos/scale.c
@@ -39,6 +39,7 @@ typedef struct
GtkAdjustment * scale_x_adjustment;
GtkAdjustment * scale_y_adjustment;
GtkAdjustment * rotate_adjustment;
+ GtkAdjustment * subsample_adjustment;
int scaled_width;
int scaled_height;
} app_t;
@@ -236,7 +237,8 @@ rescale (GtkWidget *may_be_null, app_t *app)
get_value (app, filters, "reconstruct_y_combo_box"),
get_value (app, filters, "sample_x_combo_box"),
get_value (app, filters, "sample_y_combo_box"),
- 4, 4);
+ gtk_adjustment_get_value (app->subsample_adjustment),
+ gtk_adjustment_get_value (app->subsample_adjustment));
pixman_image_set_filter (app->original, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, params, n_params);
@@ -360,10 +362,13 @@ app_new (pixman_image_t *original)
GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_y_adjustment"));
app->rotate_adjustment =
GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "rotate_adjustment"));
+ app->subsample_adjustment =
+ GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "subsample_adjustment"));
g_signal_connect (app->scale_x_adjustment, "value_changed", G_CALLBACK (rescale), app);
g_signal_connect (app->scale_y_adjustment, "value_changed", G_CALLBACK (rescale), app);
g_signal_connect (app->rotate_adjustment, "value_changed", G_CALLBACK (rescale), app);
+ g_signal_connect (app->subsample_adjustment, "value_changed", G_CALLBACK (rescale), app);
widget = get_widget (app, "scale_x_scale");
gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL);
diff --git a/pixman/demos/scale.ui b/pixman/demos/scale.ui
index f7c0c805f..b3450d34d 100644
--- a/pixman/demos/scale.ui
+++ b/pixman/demos/scale.ui
@@ -23,6 +23,14 @@
<property name="page_increment">10</property>
<property name="page_size">10</property>
</object>
+ <object class="GtkAdjustment" id="subsample_adjustment">
+ <property name="lower">1</property>
+ <property name="upper">12</property>
+ <property name="step_increment">1</property>
+ <property name="page_increment">1</property>
+ <property name="page_size">0</property>
+ <property name="value">4</property>
+ </object>
<object class="GtkWindow" id="main">
<child>
<object class="GtkHBox" id="u">
@@ -51,6 +59,7 @@
<child>
<object class="GtkVBox" id="box1">
<property name="visible">True</property>
+ <property name="spacing">12</property>
<child>
<object class="GtkHBox" id="box2">
<property name="visible">True</property>
@@ -234,6 +243,17 @@
</packing>
</child>
<child>
+ <object class="GtkLabel" id="label9">
+ <property name="visible">True</property>
+ <property name="xalign">1</property>
+ <property name="label" translatable="yes">&lt;b&gt;Subsample:&lt;/b&gt;</property>
+ <property name="use_markup">True</property>
+ </object>
+ <packing>
+ <property name="top_attach">5</property>
+ </packing>
+ </child>
+ <child>
<object class="GtkComboBox" id="reconstruct_x_combo_box">
<property name="visible">True</property>
</object>
@@ -277,6 +297,16 @@
<property name="top_attach">4</property>
</packing>
</child>
+ <child>
+ <object class="GtkSpinButton" id="subsample_spin_button">
+ <property name="visible">True</property>
+ <property name="adjustment">subsample_adjustment</property>
+ </object>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="top_attach">5</property>
+ </packing>
+ </child>
</object>
<packing>
<property name="expand">False</property>
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index c625e0c4a..247aea645 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -739,36 +739,6 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp,
}
static void
-fast_composite_src_x888_0565 (pixman_implementation_t *imp,
- pixman_composite_info_t *info)
-{
- PIXMAN_COMPOSITE_ARGS (info);
- uint16_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- *dst = convert_8888_to_0565 (s);
- dst++;
- }
- }
-}
-
-static void
fast_composite_add_8_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
@@ -1243,6 +1213,18 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,
pixman_composite_func_t func;
pixman_format_code_t mask_format;
uint32_t src_flags, mask_flags;
+ int32_t sx, sy;
+ int32_t width_remain;
+ int32_t num_pixels;
+ int32_t src_width;
+ int32_t i, j;
+ pixman_image_t extended_src_image;
+ uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
+ pixman_bool_t need_src_extension;
+ uint32_t *src_line;
+ int32_t src_stride;
+ int32_t src_bpp;
+ pixman_composite_info_t info2 = *info;
src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
@@ -1258,149 +1240,131 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,
mask_flags = FAST_PATH_IS_OPAQUE;
}
- if (_pixman_implementation_lookup_composite (
- imp->toplevel, info->op,
- src_image->common.extended_format_code, src_flags,
- mask_format, mask_flags,
- dest_image->common.extended_format_code, info->dest_flags,
- &imp, &func))
+ _pixman_implementation_lookup_composite (
+ imp->toplevel, info->op,
+ src_image->common.extended_format_code, src_flags,
+ mask_format, mask_flags,
+ dest_image->common.extended_format_code, info->dest_flags,
+ &imp, &func);
+
+ src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
+
+ if (src_image->bits.width < REPEAT_MIN_WIDTH &&
+ (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) &&
+ !src_image->bits.indexed)
{
- int32_t sx, sy;
- int32_t width_remain;
- int32_t num_pixels;
- int32_t src_width;
- int32_t i, j;
- pixman_image_t extended_src_image;
- uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
- pixman_bool_t need_src_extension;
- uint32_t *src_line;
- int32_t src_stride;
- int32_t src_bpp;
- pixman_composite_info_t info2 = *info;
-
- src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
-
- if (src_image->bits.width < REPEAT_MIN_WIDTH &&
- (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) &&
- !src_image->bits.indexed)
- {
- sx = src_x;
- sx = MOD (sx, src_image->bits.width);
- sx += width;
- src_width = 0;
+ sx = src_x;
+ sx = MOD (sx, src_image->bits.width);
+ sx += width;
+ src_width = 0;
- while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
- src_width += src_image->bits.width;
+ while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
+ src_width += src_image->bits.width;
- src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
+ src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
- /* Initialize/validate stack-allocated temporary image */
- _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
- src_width, 1, &extended_src[0], src_stride,
- FALSE);
- _pixman_image_validate (&extended_src_image);
+ /* Initialize/validate stack-allocated temporary image */
+ _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
+ src_width, 1, &extended_src[0], src_stride,
+ FALSE);
+ _pixman_image_validate (&extended_src_image);
- info2.src_image = &extended_src_image;
- need_src_extension = TRUE;
- }
- else
- {
- src_width = src_image->bits.width;
- need_src_extension = FALSE;
- }
+ info2.src_image = &extended_src_image;
+ need_src_extension = TRUE;
+ }
+ else
+ {
+ src_width = src_image->bits.width;
+ need_src_extension = FALSE;
+ }
- sx = src_x;
- sy = src_y;
+ sx = src_x;
+ sy = src_y;
- while (--height >= 0)
- {
- sx = MOD (sx, src_width);
- sy = MOD (sy, src_image->bits.height);
+ while (--height >= 0)
+ {
+ sx = MOD (sx, src_width);
+ sy = MOD (sy, src_image->bits.height);
- if (need_src_extension)
+ if (need_src_extension)
+ {
+ if (src_bpp == 32)
{
- if (src_bpp == 32)
- {
- PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
- for (i = 0; i < src_width; )
- {
- for (j = 0; j < src_image->bits.width; j++, i++)
- extended_src[i] = src_line[j];
- }
- }
- else if (src_bpp == 16)
+ for (i = 0; i < src_width; )
{
- uint16_t *src_line_16;
-
- PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
- src_line_16, 1);
- src_line = (uint32_t*)src_line_16;
-
- for (i = 0; i < src_width; )
- {
- for (j = 0; j < src_image->bits.width; j++, i++)
- ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
- }
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ extended_src[i] = src_line[j];
}
- else if (src_bpp == 8)
- {
- uint8_t *src_line_8;
+ }
+ else if (src_bpp == 16)
+ {
+ uint16_t *src_line_16;
- PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
- src_line_8, 1);
- src_line = (uint32_t*)src_line_8;
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
+ src_line_16, 1);
+ src_line = (uint32_t*)src_line_16;
- for (i = 0; i < src_width; )
- {
- for (j = 0; j < src_image->bits.width; j++, i++)
- ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
- }
+ for (i = 0; i < src_width; )
+ {
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
}
-
- info2.src_y = 0;
}
- else
+ else if (src_bpp == 8)
{
- info2.src_y = sy;
+ uint8_t *src_line_8;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
+ src_line_8, 1);
+ src_line = (uint32_t*)src_line_8;
+
+ for (i = 0; i < src_width; )
+ {
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
+ }
}
- width_remain = width;
+ info2.src_y = 0;
+ }
+ else
+ {
+ info2.src_y = sy;
+ }
- while (width_remain > 0)
- {
- num_pixels = src_width - sx;
+ width_remain = width;
- if (num_pixels > width_remain)
- num_pixels = width_remain;
+ while (width_remain > 0)
+ {
+ num_pixels = src_width - sx;
- info2.src_x = sx;
- info2.width = num_pixels;
- info2.height = 1;
+ if (num_pixels > width_remain)
+ num_pixels = width_remain;
- func (imp, &info2);
+ info2.src_x = sx;
+ info2.width = num_pixels;
+ info2.height = 1;
- width_remain -= num_pixels;
- info2.mask_x += num_pixels;
- info2.dest_x += num_pixels;
- sx = 0;
- }
+ func (imp, &info2);
- sx = src_x;
- sy++;
- info2.mask_x = info->mask_x;
- info2.mask_y++;
- info2.dest_x = info->dest_x;
- info2.dest_y++;
+ width_remain -= num_pixels;
+ info2.mask_x += num_pixels;
+ info2.dest_x += num_pixels;
+ sx = 0;
}
- if (need_src_extension)
- _pixman_image_fini (&extended_src_image);
- }
- else
- {
- _pixman_log_error (FUNC, "Didn't find a suitable function ");
+ sx = src_x;
+ sy++;
+ info2.mask_x = info->mask_x;
+ info2.mask_y++;
+ info2.dest_x = info->dest_x;
+ info2.dest_y++;
}
+
+ if (need_src_extension)
+ _pixman_image_fini (&extended_src_image);
}
/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
@@ -1913,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
@@ -2199,12 +2159,200 @@ fast_path_fill (pixman_implementation_t *imp,
return TRUE;
}
+/*****************************************************************************/
+
+static uint32_t *
+fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int32_t w = iter->width;
+ uint32_t *dst = iter->buffer;
+ const uint16_t *src = (const uint16_t *)iter->bits;
+
+ iter->bits += iter->stride;
+
+ /* Align the source buffer at 4 bytes boundary */
+ if (w > 0 && ((uintptr_t)src & 3))
+ {
+ *dst++ = convert_0565_to_8888 (*src++);
+ w--;
+ }
+ /* Process two pixels per iteration */
+ while ((w -= 2) >= 0)
+ {
+ uint32_t sr, sb, sg, t0, t1;
+ uint32_t s = *(const uint32_t *)src;
+ src += 2;
+ sr = (s >> 8) & 0x00F800F8;
+ sb = (s << 3) & 0x00F800F8;
+ sg = (s >> 3) & 0x00FC00FC;
+ sr |= sr >> 5;
+ sb |= sb >> 5;
+ sg |= sg >> 6;
+ t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
+ (sb & 0xFF) | 0xFF000000;
+ t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
+ (sb >> 16) | 0xFF000000;
+#ifdef WORDS_BIGENDIAN
+ *dst++ = t1;
+ *dst++ = t0;
+#else
+ *dst++ = t0;
+ *dst++ = t1;
+#endif
+ }
+ if (w & 1)
+ {
+ *dst = convert_0565_to_8888 (*src);
+ }
+
+ return iter->buffer;
+}
+
+static uint32_t *
+fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
+{
+ iter->bits += iter->stride;
+ return iter->buffer;
+}
+
+/* Helper function for a workaround, which tries to ensure that 0x1F001F
+ * constant is always allocated in a register on RISC architectures.
+ */
+static force_inline uint32_t
+convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
+{
+ uint32_t a, b;
+ a = (s >> 3) & x1F001F;
+ b = s & 0xFC00;
+ a |= a >> 5;
+ a |= b >> 5;
+ return a;
+}
+
+static void
+fast_write_back_r5g6b5 (pixman_iter_t *iter)
+{
+ int32_t w = iter->width;
+ uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
+ const uint32_t *src = iter->buffer;
+ /* Workaround to ensure that x1F001F variable is allocated in a register */
+ static volatile uint32_t volatile_x1F001F = 0x1F001F;
+ uint32_t x1F001F = volatile_x1F001F;
+
+ while ((w -= 4) >= 0)
+ {
+ uint32_t s1 = *src++;
+ uint32_t s2 = *src++;
+ uint32_t s3 = *src++;
+ uint32_t s4 = *src++;
+ *dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
+ }
+ if (w & 2)
+ {
+ *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+ }
+ if (w & 1)
+ {
+ *dst = convert_8888_to_0565_workaround (*src, x1F001F);
+ }
+}
+
+typedef struct
+{
+ pixman_format_code_t format;
+ pixman_iter_get_scanline_t get_scanline;
+ pixman_iter_write_back_t write_back;
+} fetcher_info_t;
+
+static const fetcher_info_t fetchers[] =
+{
+ { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
+ { PIXMAN_null }
+};
+
+static pixman_bool_t
+fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+ pixman_image_t *image = iter->image;
+
+#define FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+
+ if ((iter->iter_flags & ITER_NARROW) &&
+ (iter->image_flags & FLAGS) == FLAGS)
+ {
+ const fetcher_info_t *f;
+
+ for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ {
+ if (image->common.extended_format_code == f->format)
+ {
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+
+ iter->get_scanline = f->get_scanline;
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+static pixman_bool_t
+fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+ pixman_image_t *image = iter->image;
+
+ if ((iter->iter_flags & ITER_NARROW) &&
+ (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
+ {
+ const fetcher_info_t *f;
+
+ for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ {
+ if (image->common.extended_format_code == f->format)
+ {
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+
+ if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+ (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+ {
+ iter->get_scanline = fast_dest_fetch_noop;
+ }
+ else
+ {
+ iter->get_scanline = f->get_scanline;
+ }
+ iter->write_back = f->write_back;
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+
pixman_implementation_t *
_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
imp->fill = fast_path_fill;
+ imp->src_iter_init = fast_src_iter_init;
+ imp->dest_iter_init = fast_dest_iter_init;
return imp;
}
diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c
index f175d771e..93a1b9acf 100644
--- a/pixman/pixman/pixman-general.c
+++ b/pixman/pixman/pixman-general.c
@@ -188,9 +188,6 @@ general_composite_rect (pixman_implementation_t *imp,
compose = _pixman_implementation_lookup_combiner (
imp->toplevel, op, component_alpha, narrow);
- if (!compose)
- return;
-
for (i = 0; i < height; ++i)
{
uint32_t *s, *m, *d;
diff --git a/pixman/pixman/pixman-glyph.c b/pixman/pixman/pixman-glyph.c
index 6d2c8bbb7..5a271b64b 100644
--- a/pixman/pixman/pixman-glyph.c
+++ b/pixman/pixman/pixman-glyph.c
@@ -463,16 +463,13 @@ pixman_composite_glyphs_no_mask (pixman_op_t op,
{
glyph_format = glyph_img->common.extended_format_code;
glyph_flags = glyph_img->common.flags;
-
+
_pixman_implementation_lookup_composite (
get_implementation(), op,
src->common.extended_format_code, src->common.flags,
glyph_format, glyph_flags | extra,
dest_format, dest_flags,
&implementation, &func);
-
- if (!func)
- goto out;
}
info.src_x = src_x + composite_box.x1 - dest_x;
@@ -582,9 +579,6 @@ add_glyphs (pixman_glyph_cache_t *cache,
mask_format, info.mask_flags,
dest_format, dest_flags,
&implementation, &func);
-
- if (!func)
- goto out;
}
glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x;
diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c
index ec467a619..c0a643633 100644
--- a/pixman/pixman/pixman-implementation.c
+++ b/pixman/pixman/pixman-implementation.c
@@ -65,7 +65,13 @@ typedef struct
PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
-pixman_bool_t
+static void
+dummy_composite_rect (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+}
+
+void
_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
pixman_op_t op,
pixman_format_code_t src_format,
@@ -142,7 +148,11 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
++info;
}
}
- return FALSE;
+
+ /* We should never reach this point */
+ _pixman_log_error (FUNC, "No known composite function\n");
+ *out_imp = NULL;
+ *out_func = dummy_composite_rect;
update_cache:
if (i)
@@ -160,8 +170,16 @@ update_cache:
cache->cache[0].fast_path.dest_flags = dest_flags;
cache->cache[0].fast_path.func = *out_func;
}
+}
- return TRUE;
+static void
+dummy_combine (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
}
pixman_combine_32_func_t
@@ -199,7 +217,9 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
imp = imp->fallback;
}
- return NULL;
+ /* We should never reach this point */
+ _pixman_log_error (FUNC, "No known combine function\n");
+ return dummy_combine;
}
pixman_bool_t
diff --git a/pixman/pixman/pixman-inlines.h b/pixman/pixman/pixman-inlines.h
index ab4def0dc..dd1c2f17f 100644
--- a/pixman/pixman/pixman-inlines.h
+++ b/pixman/pixman/pixman-inlines.h
@@ -88,6 +88,42 @@ pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
((1 << BILINEAR_INTERPOLATION_BITS) - 1);
}
+#if BILINEAR_INTERPOLATION_BITS <= 4
+/* Inspired by Filter_32_opaque from Skia */
+static force_inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+ uint32_t bl, uint32_t br,
+ int distx, int disty)
+{
+ int distxy, distxiy, distixy, distixiy;
+ uint32_t lo, hi;
+
+ distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
+ disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
+
+ distxy = distx * disty;
+ distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */
+ distixy = (disty << 4) - distxy; /* disty * (16 - distx) */
+ distixiy =
+ 16 * 16 - (disty << 4) -
+ (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
+
+ lo = (tl & 0xff00ff) * distixiy;
+ hi = ((tl >> 8) & 0xff00ff) * distixiy;
+
+ lo += (tr & 0xff00ff) * distxiy;
+ hi += ((tr >> 8) & 0xff00ff) * distxiy;
+
+ lo += (bl & 0xff00ff) * distixy;
+ hi += ((bl >> 8) & 0xff00ff) * distixy;
+
+ lo += (br & 0xff00ff) * distxy;
+ hi += ((br >> 8) & 0xff00ff) * distxy;
+
+ return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
+}
+
+#else
#if SIZEOF_LONG > 4
static force_inline uint32_t
@@ -184,6 +220,7 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
}
#endif
+#endif // BILINEAR_INTERPOLATION_BITS <= 4
/*
* For each scanline fetched from source image with PAD repeat:
diff --git a/pixman/pixman/pixman-matrix.c b/pixman/pixman/pixman-matrix.c
index cd2f1b5b8..89b96826b 100644
--- a/pixman/pixman/pixman-matrix.c
+++ b/pixman/pixman/pixman-matrix.c
@@ -34,6 +34,338 @@
#define F(x) pixman_int_to_fixed (x)
+static force_inline int
+count_leading_zeros (uint32_t x)
+{
+#ifdef __GNUC__
+ return __builtin_clz (x);
+#else
+ int n = 0;
+ while (x)
+ {
+ n++;
+ x >>= 1;
+ }
+ return 32 - n;
+#endif
+}
+
+/*
+ * Large signed/unsigned integer division with rounding for the platforms with
+ * only 64-bit integer data type supported (no 128-bit data type).
+ *
+ * Arguments:
+ * hi, lo - high and low 64-bit parts of the dividend
+ * div - 48-bit divisor
+ *
+ * Returns: lowest 64 bits of the result as a return value and highest 64
+ * bits of the result to "result_hi" pointer
+ */
+
+/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */
+static force_inline uint64_t
+rounded_udiv_128_by_48 (uint64_t hi,
+ uint64_t lo,
+ uint64_t div,
+ uint64_t *result_hi)
+{
+ uint64_t tmp, remainder, result_lo;
+ assert(div < ((uint64_t)1 << 48));
+
+ remainder = hi % div;
+ *result_hi = hi / div;
+
+ tmp = (remainder << 16) + (lo >> 48);
+ result_lo = tmp / div;
+ remainder = tmp % div;
+
+ tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF);
+ result_lo = (result_lo << 16) + (tmp / div);
+ remainder = tmp % div;
+
+ tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF);
+ result_lo = (result_lo << 16) + (tmp / div);
+ remainder = tmp % div;
+
+ tmp = (remainder << 16) + (lo & 0xFFFF);
+ result_lo = (result_lo << 16) + (tmp / div);
+ remainder = tmp % div;
+
+ /* round to nearest */
+ if (remainder * 2 >= div && ++result_lo == 0)
+ *result_hi += 1;
+
+ return result_lo;
+}
+
+/* signed division (128-bit by 49-bit) with rounding to nearest */
+static inline int64_t
+rounded_sdiv_128_by_49 (int64_t hi,
+ uint64_t lo,
+ int64_t div,
+ int64_t *signed_result_hi)
+{
+ uint64_t result_lo, result_hi;
+ int sign = 0;
+ if (div < 0)
+ {
+ div = -div;
+ sign ^= 1;
+ }
+ if (hi < 0)
+ {
+ if (lo != 0)
+ hi++;
+ hi = -hi;
+ lo = -lo;
+ sign ^= 1;
+ }
+ result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi);
+ if (sign)
+ {
+ if (result_lo != 0)
+ result_hi++;
+ result_hi = -result_hi;
+ result_lo = -result_lo;
+ }
+ if (signed_result_hi)
+ {
+ *signed_result_hi = result_hi;
+ }
+ return result_lo;
+}
+
+/*
+ * Multiply 64.16 fixed point value by (2^scalebits) and convert
+ * to 128-bit integer.
+ */
+static force_inline void
+fixed_64_16_to_int128 (int64_t hi,
+ int64_t lo,
+ int64_t *rhi,
+ int64_t *rlo,
+ int scalebits)
+{
+ /* separate integer and fractional parts */
+ hi += lo >> 16;
+ lo &= 0xFFFF;
+
+ if (scalebits <= 0)
+ {
+ *rlo = hi >> (-scalebits);
+ *rhi = *rlo >> 63;
+ }
+ else
+ {
+ *rhi = hi >> (64 - scalebits);
+ *rlo = (uint64_t)hi << scalebits;
+ if (scalebits < 16)
+ *rlo += lo >> (16 - scalebits);
+ else
+ *rlo += lo << (scalebits - 16);
+ }
+}
+
+/*
+ * Convert 112.16 fixed point value to 48.16 with clamping for the out
+ * of range values.
+ */
+static force_inline pixman_fixed_48_16_t
+fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag)
+{
+ if ((lo >> 63) != hi)
+ {
+ *clampflag = TRUE;
+ return hi >= 0 ? INT64_MAX : INT64_MIN;
+ }
+ else
+ {
+ return lo;
+ }
+}
+
+/*
+ * Transform a point with 31.16 fixed point coordinates from the destination
+ * space to a point with 48.16 fixed point coordinates in the source space.
+ * No overflows are possible for affine transformations and the results are
+ * accurate including the least significant bit. Projective transformations
+ * may overflow, in this case the results are just clamped to return maximum
+ * or minimum 48.16 values (so that the caller can at least handle the NONE
+ * and PAD repeats correctly) and the return value is FALSE to indicate that
+ * such clamping has happened.
+ */
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_point_31_16 (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result)
+{
+ pixman_bool_t clampflag = FALSE;
+ int i;
+ int64_t tmp[3][2], divint;
+ uint16_t divfrac;
+
+ /* input vector values must have no more than 31 bits (including sign)
+ * in the integer part */
+ assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+
+ for (i = 0; i < 3; i++)
+ {
+ tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
+ tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
+ tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
+ tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
+ tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
+ tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
+ }
+
+ /*
+ * separate 64-bit integer and 16-bit fractional parts for the divisor,
+ * which is also scaled by 65536 after fixed point multiplication.
+ */
+ divint = tmp[2][0] + (tmp[2][1] >> 16);
+ divfrac = tmp[2][1] & 0xFFFF;
+
+ if (divint == pixman_fixed_1 && divfrac == 0)
+ {
+ /*
+ * this is a simple affine transformation
+ */
+ result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+ result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+ result->v[2] = pixman_fixed_1;
+ }
+ else if (divint == 0 && divfrac == 0)
+ {
+ /*
+ * handle zero divisor (if the values are non-zero, set the
+ * results to maximum positive or minimum negative)
+ */
+ clampflag = TRUE;
+
+ result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+ result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+
+ if (result->v[0] > 0)
+ result->v[0] = INT64_MAX;
+ else if (result->v[0] < 0)
+ result->v[0] = INT64_MIN;
+
+ if (result->v[1] > 0)
+ result->v[1] = INT64_MAX;
+ else if (result->v[1] < 0)
+ result->v[1] = INT64_MIN;
+ }
+ else
+ {
+ /*
+ * projective transformation, analyze the top 32 bits of the divisor
+ */
+ int32_t hi32divbits = divint >> 32;
+ if (hi32divbits < 0)
+ hi32divbits = ~hi32divbits;
+
+ if (hi32divbits == 0)
+ {
+ /* the divisor is small, we can actually keep all the bits */
+ int64_t hi, rhi, lo, rlo;
+ int64_t div = (divint << 16) + divfrac;
+
+ fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32);
+ rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+ result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+
+ fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32);
+ rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+ result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+ }
+ else
+ {
+ /* the divisor needs to be reduced to 48 bits */
+ int64_t hi, rhi, lo, rlo, div;
+ int shift = 32 - count_leading_zeros (hi32divbits);
+ fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift);
+
+ fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift);
+ rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+ result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+
+ fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift);
+ rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+ result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+ }
+ }
+ result->v[2] = pixman_fixed_1;
+ return !clampflag;
+}
+
+PIXMAN_EXPORT void
+pixman_transform_point_31_16_affine (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result)
+{
+ int64_t hi0, lo0, hi1, lo1;
+
+ /* input vector values must have no more than 31 bits (including sign)
+ * in the integer part */
+ assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+
+ hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16);
+ lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF);
+ hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16);
+ lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF);
+ hi0 += (int64_t)t->matrix[0][2];
+
+ hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16);
+ lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF);
+ hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16);
+ lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF);
+ hi1 += (int64_t)t->matrix[1][2];
+
+ result->v[0] = hi0 + ((lo0 + 0x8000) >> 16);
+ result->v[1] = hi1 + ((lo1 + 0x8000) >> 16);
+ result->v[2] = pixman_fixed_1;
+}
+
+PIXMAN_EXPORT void
+pixman_transform_point_31_16_3d (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result)
+{
+ int i;
+ int64_t tmp[3][2];
+
+ /* input vector values must have no more than 31 bits (including sign)
+ * in the integer part */
+ assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+
+ for (i = 0; i < 3; i++)
+ {
+ tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
+ tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
+ tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
+ tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
+ tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
+ tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
+ }
+
+ result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+ result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+ result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16);
+}
+
PIXMAN_EXPORT void
pixman_transform_init_identity (struct pixman_transform *matrix)
{
@@ -50,69 +382,41 @@ PIXMAN_EXPORT pixman_bool_t
pixman_transform_point_3d (const struct pixman_transform *transform,
struct pixman_vector * vector)
{
- struct pixman_vector result;
- pixman_fixed_32_32_t partial;
- pixman_fixed_48_16_t v;
- int i, j;
+ pixman_vector_48_16_t tmp;
+ tmp.v[0] = vector->vector[0];
+ tmp.v[1] = vector->vector[1];
+ tmp.v[2] = vector->vector[2];
- for (j = 0; j < 3; j++)
- {
- v = 0;
- for (i = 0; i < 3; i++)
- {
- partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] *
- (pixman_fixed_48_16_t) vector->vector[i]);
- v += (partial + 0x8000) >> 16;
- }
-
- if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
- return FALSE;
-
- result.vector[j] = (pixman_fixed_t) v;
- }
-
- *vector = result;
+ pixman_transform_point_31_16_3d (transform, &tmp, &tmp);
- if (!result.vector[2])
- return FALSE;
+ vector->vector[0] = tmp.v[0];
+ vector->vector[1] = tmp.v[1];
+ vector->vector[2] = tmp.v[2];
- return TRUE;
+ return vector->vector[0] == tmp.v[0] &&
+ vector->vector[1] == tmp.v[1] &&
+ vector->vector[2] == tmp.v[2];
}
PIXMAN_EXPORT pixman_bool_t
pixman_transform_point (const struct pixman_transform *transform,
struct pixman_vector * vector)
{
- pixman_fixed_32_32_t partial;
- pixman_fixed_34_30_t v[3];
- pixman_fixed_48_16_t quo;
- int i, j;
+ pixman_vector_48_16_t tmp;
+ tmp.v[0] = vector->vector[0];
+ tmp.v[1] = vector->vector[1];
+ tmp.v[2] = vector->vector[2];
- for (j = 0; j < 3; j++)
- {
- v[j] = 0;
-
- for (i = 0; i < 3; i++)
- {
- partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] *
- (pixman_fixed_32_32_t) vector->vector[i]);
- v[j] += (partial + 2) >> 2;
- }
- }
-
- if (!((v[2] + 0x8000) >> 16))
- return FALSE;
+ if (!pixman_transform_point_31_16 (transform, &tmp, &tmp))
+ return FALSE;
- for (j = 0; j < 2; j++)
- {
- quo = v[j] / ((v[2] + 0x8000) >> 16);
- if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16)
- return FALSE;
- vector->vector[j] = (pixman_fixed_t) quo;
- }
-
- vector->vector[2] = pixman_fixed_1;
- return TRUE;
+ vector->vector[0] = tmp.v[0];
+ vector->vector[1] = tmp.v[1];
+ vector->vector[2] = tmp.v[2];
+
+ return vector->vector[0] == tmp.v[0] &&
+ vector->vector[1] == tmp.v[1] &&
+ vector->vector[2] == tmp.v[2];
}
PIXMAN_EXPORT pixman_bool_t
diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h
index e5ab873ed..cb78a2ed8 100644
--- a/pixman/pixman/pixman-private.h
+++ b/pixman/pixman/pixman-private.h
@@ -497,7 +497,7 @@ pixman_implementation_t *
_pixman_implementation_create (pixman_implementation_t *fallback,
const pixman_fast_path_t *fast_paths);
-pixman_bool_t
+void
_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
pixman_op_t op,
pixman_format_code_t src_format,
@@ -1052,7 +1052,7 @@ _pixman_log_error (const char *function, const char *message);
#else
-#define _pixman_log_error(f,m) do { } while (0) \
+#define _pixman_log_error(f,m) do { } while (0)
#define return_if_fail(expr) \
do \
@@ -1078,6 +1078,27 @@ _pixman_log_error (const char *function, const char *message);
#endif
/*
+ * Matrix
+ */
+
+typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
+
+pixman_bool_t
+pixman_transform_point_31_16 (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result);
+
+void
+pixman_transform_point_31_16_3d (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result);
+
+void
+pixman_transform_point_31_16_affine (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result);
+
+/*
* Timers
*/
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index 5a0e0626a..fc873cc96 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -4523,7 +4523,163 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
sse2_combine_add_u (imp, op, dst, src, NULL, width);
}
+}
+
+static void
+sse2_composite_add_n_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t *dst_line, *dst, src;
+ int dst_stride;
+
+ __m128i xmm_src;
+
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+ if (src == 0)
+ return;
+
+ if (src == ~0)
+ {
+ pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
+ dest_x, dest_y, width, height, ~0);
+
+ return;
+ }
+
+ xmm_src = _mm_set_epi32 (src, src, src, src);
+ while (height--)
+ {
+ int w = width;
+ uint32_t d;
+
+ dst = dst_line;
+ dst_line += dst_stride;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ d = *dst;
+ *dst++ =
+ _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d)));
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ save_128_aligned
+ ((__m128i*)dst,
+ _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
+
+ dst += 4;
+ w -= 4;
+ }
+
+ while (w--)
+ {
+ d = *dst;
+ *dst++ =
+ _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src,
+ _mm_cvtsi32_si128 (d)));
+ }
+ }
+}
+
+static void
+sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint32_t src;
+
+ __m128i xmm_src;
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+ if (src == 0)
+ return;
+ xmm_src = expand_pixel_32_1x128 (src);
+
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ uint8_t m = *mask++;
+ if (m)
+ {
+ *dst = pack_1x128_32
+ (_mm_adds_epu16
+ (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
+ unpack_32_1x128 (*dst)));
+ }
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ uint32_t m = *(uint32_t*)mask;
+ if (m)
+ {
+ __m128i xmm_mask_lo, xmm_mask_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+
+ __m128i xmm_dst = load_128_aligned ((__m128i*)dst);
+ __m128i xmm_mask =
+ _mm_unpacklo_epi8 (unpack_32_1x128(m),
+ _mm_setzero_si128 ());
+
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_multiply_2x128 (&xmm_src, &xmm_src,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
+ xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ w -= 4;
+ dst += 4;
+ mask += 4;
+ }
+
+ while (w)
+ {
+ uint8_t m = *mask++;
+ if (m)
+ {
+ *dst = pack_1x128_32
+ (_mm_adds_epu16
+ (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
+ unpack_32_1x128 (*dst)));
+ }
+ dst++;
+ w--;
+ }
+ }
}
static pixman_bool_t
@@ -5786,6 +5942,121 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
uint32_t, uint8_t, uint32_t,
NORMAL, FLAG_HAVE_NON_SOLID_MASK)
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
+ const uint32_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+ __m128i xmm_mask;
+
+ if (zero_src || (*mask >> 24) == 0)
+ return;
+
+ xmm_mask = create_mask_16_128 (*mask >> 24);
+
+ while (w && ((uintptr_t)dst & 15))
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ if (pix1)
+ {
+ uint32_t d = *dst;
+
+ __m128i ms = unpack_32_1x128 (pix1);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i dest = xmm_mask;
+ __m128i alpha_dst = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32
+ (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+ }
+
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+
+ if (pix1 | pix2 | pix3 | pix4)
+ {
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_mask, &xmm_mask,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned
+ ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ dst += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ if (pix1)
+ {
+ uint32_t d = *dst;
+
+ __m128i ms = unpack_32_1x128 (pix1);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i dest = xmm_mask;
+ __m128i alpha_dst = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32
+ (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+ }
+
+ dst++;
+ w--;
+ }
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ COVER, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ PAD, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NONE, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NORMAL, FLAG_HAVE_SOLID_MASK)
+
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
@@ -5848,6 +6119,14 @@ static const pixman_fast_path_t sse2_fast_paths[] =
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8),
+ PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888),
/* PIXMAN_OP_SRC */
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
@@ -5912,6 +6191,11 @@ static const pixman_fast_path_t sse2_fast_paths[] =
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c
index 3fabed161..184f0c4e6 100644
--- a/pixman/pixman/pixman.c
+++ b/pixman/pixman/pixman.c
@@ -581,11 +581,13 @@ pixman_image_composite32 (pixman_op_t op,
int32_t height)
{
pixman_format_code_t src_format, mask_format, dest_format;
- uint32_t src_flags, mask_flags, dest_flags;
pixman_region32_t region;
pixman_box32_t extents;
pixman_implementation_t *imp;
pixman_composite_func_t func;
+ pixman_composite_info_t info;
+ const pixman_box32_t *pbox;
+ int n;
_pixman_image_validate (src);
if (mask)
@@ -593,27 +595,27 @@ pixman_image_composite32 (pixman_op_t op,
_pixman_image_validate (dest);
src_format = src->common.extended_format_code;
- src_flags = src->common.flags;
+ info.src_flags = src->common.flags;
if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE))
{
mask_format = mask->common.extended_format_code;
- mask_flags = mask->common.flags;
+ info.mask_flags = mask->common.flags;
}
else
{
mask_format = PIXMAN_null;
- mask_flags = FAST_PATH_IS_OPAQUE;
+ info.mask_flags = FAST_PATH_IS_OPAQUE;
}
dest_format = dest->common.extended_format_code;
- dest_flags = dest->common.flags;
+ info.dest_flags = dest->common.flags;
/* Check for pixbufs */
if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) &&
(src->type == BITS && src->bits.bits == mask->bits.bits) &&
(src->common.repeat == mask->common.repeat) &&
- (src_flags & mask_flags & FAST_PATH_ID_TRANSFORM) &&
+ (info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM) &&
(src_x == mask_x && src_y == mask_y))
{
if (src_format == PIXMAN_x8b8g8r8)
@@ -638,7 +640,7 @@ pixman_image_composite32 (pixman_op_t op,
extents.x2 -= dest_x - src_x;
extents.y2 -= dest_y - src_y;
- if (!analyze_extent (src, &extents, &src_flags))
+ if (!analyze_extent (src, &extents, &info.src_flags))
goto out;
extents.x1 -= src_x - mask_x;
@@ -646,7 +648,7 @@ pixman_image_composite32 (pixman_op_t op,
extents.x2 -= src_x - mask_x;
extents.y2 -= src_y - mask_y;
- if (!analyze_extent (mask, &extents, &mask_flags))
+ if (!analyze_extent (mask, &extents, &info.mask_flags))
goto out;
/* If the clip is within the source samples, and the samples are
@@ -659,16 +661,16 @@ pixman_image_composite32 (pixman_op_t op,
FAST_PATH_BILINEAR_FILTER | \
FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR)
- if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
- (src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+ if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+ (info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
{
- src_flags |= FAST_PATH_IS_OPAQUE;
+ info.src_flags |= FAST_PATH_IS_OPAQUE;
}
- if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
- (mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+ if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+ (info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
{
- mask_flags |= FAST_PATH_IS_OPAQUE;
+ info.mask_flags |= FAST_PATH_IS_OPAQUE;
}
/*
@@ -676,42 +678,35 @@ pixman_image_composite32 (pixman_op_t op,
* if the src or dest are opaque. The output operator should be
* mathematically equivalent to the source.
*/
- op = optimize_operator (op, src_flags, mask_flags, dest_flags);
+ info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags);
- if (_pixman_implementation_lookup_composite (
- get_implementation (), op,
- src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags,
- &imp, &func))
- {
- pixman_composite_info_t info;
- const pixman_box32_t *pbox;
- int n;
+ _pixman_implementation_lookup_composite (
+ get_implementation (), info.op,
+ src_format, info.src_flags,
+ mask_format, info.mask_flags,
+ dest_format, info.dest_flags,
+ &imp, &func);
- info.op = op;
- info.src_image = src;
- info.mask_image = mask;
- info.dest_image = dest;
- info.src_flags = src_flags;
- info.mask_flags = mask_flags;
- info.dest_flags = dest_flags;
+ info.src_image = src;
+ info.mask_image = mask;
+ info.dest_image = dest;
- pbox = pixman_region32_rectangles (&region, &n);
+ pbox = pixman_region32_rectangles (&region, &n);
- while (n--)
- {
- info.src_x = pbox->x1 + src_x - dest_x;
- info.src_y = pbox->y1 + src_y - dest_y;
- info.mask_x = pbox->x1 + mask_x - dest_x;
- info.mask_y = pbox->y1 + mask_y - dest_y;
- info.dest_x = pbox->x1;
- info.dest_y = pbox->y1;
- info.width = pbox->x2 - pbox->x1;
- info.height = pbox->y2 - pbox->y1;
-
- func (imp, &info);
-
- pbox++;
- }
+ while (n--)
+ {
+ info.src_x = pbox->x1 + src_x - dest_x;
+ info.src_y = pbox->y1 + src_y - dest_y;
+ info.mask_x = pbox->x1 + mask_x - dest_x;
+ info.mask_y = pbox->y1 + mask_y - dest_y;
+ info.dest_x = pbox->x1;
+ info.dest_y = pbox->y1;
+ info.width = pbox->x2 - pbox->x1;
+ info.height = pbox->y2 - pbox->y1;
+
+ func (imp, &info);
+
+ pbox++;
}
out:
diff --git a/pixman/test/Makefile.sources b/pixman/test/Makefile.sources
index 8c0b505df..e323a8e8c 100644
--- a/pixman/test/Makefile.sources
+++ b/pixman/test/Makefile.sources
@@ -17,6 +17,7 @@ TESTPROGRAMS = \
gradient-crash-test \
region-contains-test \
alphamap \
+ matrix-test \
stress-test \
composite-traps-test \
blitters-test \
diff --git a/pixman/test/affine-test.c b/pixman/test/affine-test.c
index 678fbe844..2506250db 100644
--- a/pixman/test/affine-test.c
+++ b/pixman/test/affine-test.c
@@ -307,11 +307,11 @@ test_composite (int testnum,
}
#if BILINEAR_INTERPOLATION_BITS == 8
-#define CHECKSUM 0x97097336
+#define CHECKSUM 0x2CDF1F07
#elif BILINEAR_INTERPOLATION_BITS == 7
-#define CHECKSUM 0x31D2DC21
+#define CHECKSUM 0xBC00B1DF
#elif BILINEAR_INTERPOLATION_BITS == 4
-#define CHECKSUM 0x8B925154
+#define CHECKSUM 0xA227306B
#else
#define CHECKSUM 0x00000000
#endif
diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c
index 2f97b7b24..7336fa0d5 100644
--- a/pixman/test/lowlevel-blt-bench.c
+++ b/pixman/test/lowlevel-blt-bench.c
@@ -630,6 +630,8 @@ tests_tbl[] =
{ "src_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
{ "src_1555_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
{ "src_0565_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
+ { "src_8_8", PIXMAN_a8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 },
+ { "src_n_8", PIXMAN_a8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 },
{ "src_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
{ "src_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
{ "src_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 },
@@ -772,7 +774,7 @@ main (int argc, char *argv[])
for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)
{
- if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern))
+ if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0)
{
bench_composite (tests_tbl[i].testname,
tests_tbl[i].src_fmt,
diff --git a/pixman/test/matrix-test.c b/pixman/test/matrix-test.c
new file mode 100644
index 000000000..8437dd291
--- /dev/null
+++ b/pixman/test/matrix-test.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "utils.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef HAVE_FLOAT128
+
+#define pixman_fixed_to_float128(x) (((__float128)(x)) / 65536.0Q)
+
+typedef struct { __float128 v[3]; } pixman_vector_f128_t;
+typedef struct { __float128 m[3][3]; } pixman_transform_f128_t;
+
+pixman_bool_t
+pixman_transform_point_f128 (const pixman_transform_f128_t *t,
+ const pixman_vector_f128_t *v,
+ pixman_vector_f128_t *result)
+{
+ int i;
+ for (i = 0; i < 3; i++)
+ {
+ result->v[i] = t->m[i][0] * v->v[0] +
+ t->m[i][1] * v->v[1] +
+ t->m[i][2] * v->v[2];
+ }
+ if (result->v[2] != 0)
+ {
+ result->v[0] /= result->v[2];
+ result->v[1] /= result->v[2];
+ result->v[2] = 1;
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+}
+
+pixman_bool_t does_it_fit_fixed_48_16 (__float128 x)
+{
+ if (x >= 65536.0Q * 65536.0Q * 32768.0Q)
+ return FALSE;
+ if (x <= -65536.0Q * 65536.0Q * 32768.0Q)
+ return FALSE;
+ return TRUE;
+}
+
+#endif
+
+uint32_t
+test_matrix (int testnum, int verbose)
+{
+ uint32_t crc32 = 0;
+ int i, j, k;
+ pixman_bool_t is_affine;
+
+ prng_srand (testnum);
+
+ for (i = 0; i < 100; i++)
+ {
+ pixman_bool_t transform_ok;
+ pixman_transform_t ti;
+ pixman_vector_48_16_t vi, result_i;
+#ifdef HAVE_FLOAT128
+ pixman_transform_f128_t tf;
+ pixman_vector_f128_t vf, result_f;
+#endif
+ prng_randmemset (&ti, sizeof(ti), 0);
+ prng_randmemset (&vi, sizeof(vi), 0);
+
+ for (j = 0; j < 3; j++)
+ {
+ /* make sure that "vi" contains 31.16 fixed point data */
+ vi.v[j] >>= 17;
+ /* and apply random shift */
+ if (prng_rand_n (3) == 0)
+ vi.v[j] >>= prng_rand_n (46);
+ }
+
+ if (prng_rand_n (2))
+ {
+ /* random shift for the matrix */
+ for (j = 0; j < 3; j++)
+ for (k = 0; k < 3; k++)
+ ti.matrix[j][k] >>= prng_rand_n (30);
+ }
+
+ if (prng_rand_n (2))
+ {
+ /* affine matrix */
+ ti.matrix[2][0] = 0;
+ ti.matrix[2][1] = 0;
+ ti.matrix[2][2] = pixman_fixed_1;
+ }
+
+ if (prng_rand_n (2))
+ {
+ /* cartesian coordinates */
+ vi.v[2] = pixman_fixed_1;
+ }
+
+ is_affine = (ti.matrix[2][0] == 0 && ti.matrix[2][1] == 0 &&
+ ti.matrix[2][2] == pixman_fixed_1 &&
+ vi.v[2] == pixman_fixed_1);
+
+ transform_ok = TRUE;
+ if (is_affine && prng_rand_n (2))
+ pixman_transform_point_31_16_affine (&ti, &vi, &result_i);
+ else
+ transform_ok = pixman_transform_point_31_16 (&ti, &vi, &result_i);
+
+ crc32 = compute_crc32 (crc32, &result_i, sizeof(result_i));
+
+#ifdef HAVE_FLOAT128
+ /* compare with a reference 128-bit floating point implementation */
+ for (j = 0; j < 3; j++)
+ {
+ vf.v[j] = pixman_fixed_to_float128 (vi.v[j]);
+ for (k = 0; k < 3; k++)
+ {
+ tf.m[j][k] = pixman_fixed_to_float128 (ti.matrix[j][k]);
+ }
+ }
+
+ if (pixman_transform_point_f128 (&tf, &vf, &result_f))
+ {
+ if (transform_ok ||
+ (does_it_fit_fixed_48_16 (result_f.v[0]) &&
+ does_it_fit_fixed_48_16 (result_f.v[1]) &&
+ does_it_fit_fixed_48_16 (result_f.v[2])))
+ {
+ for (j = 0; j < 3; j++)
+ {
+ double diff = fabs (result_f.v[j] -
+ pixman_fixed_to_float128 (result_i.v[j]));
+
+ if (is_affine && diff > (0.51 / 65536.0))
+ {
+ printf ("%d:%d: bad precision for affine (%.12f)\n",
+ testnum, i, diff);
+ abort ();
+ }
+ else if (diff > (0.71 / 65536.0))
+ {
+ printf ("%d:%d: bad precision for projective (%.12f)\n",
+ testnum, i, diff);
+ abort ();
+ }
+ }
+ }
+ }
+#endif
+ }
+ return crc32;
+}
+
+int
+main (int argc, const char *argv[])
+{
+ return fuzzer_test_main ("matrix", 20000,
+ 0xBEBF98C3,
+ test_matrix, argc, argv);
+}