From 69c8cec54b01ed522bf10baf20da70304bac701a Mon Sep 17 00:00:00 2001 From: marha Date: Mon, 28 Jan 2013 07:55:57 +0100 Subject: mesa mkfontscale pixman git update 28 jan 2013 mesa: 87592cff57feef29565150b9203e220b50623f30 mkfontscale: b3af8de8d25128f565c2ed2f7c63b6e4099eb84e pixman: 65fc1adb6545737058e938105ae948a3607c277c --- mesalib/configure.ac | 61 +- mesalib/include/GLES3/gl3.h | 4 +- mesalib/src/gallium/auxiliary/Makefile.am | 6 - mesalib/src/gallium/auxiliary/util/u_debug.c | 2 +- mesalib/src/gallium/auxiliary/util/u_tile.c | 4 +- mesalib/src/gallium/auxiliary/util/u_tile.h | 8 +- mesalib/src/gallium/auxiliary/util/u_upload_mgr.c | 9 +- mesalib/src/gallium/auxiliary/util/u_vbuf.c | 76 +- mesalib/src/glsl/Makefile.am | 2 + mesalib/src/glsl/Makefile.sources | 3 + mesalib/src/glsl/SConscript | 2 + mesalib/src/glsl/ast.h | 29 +- mesalib/src/glsl/ast_function.cpp | 15 +- mesalib/src/glsl/ast_to_hir.cpp | 333 +++-- mesalib/src/glsl/builtin_compiler/Makefile.am | 1 + mesalib/src/glsl/builtin_types.h | 74 +- mesalib/src/glsl/builtin_variables.cpp | 94 +- mesalib/src/glsl/glcpp/glcpp-parse.y | 3 + mesalib/src/glsl/glsl_lexer.ll | 10 +- mesalib/src/glsl/glsl_parser.yy | 82 +- mesalib/src/glsl/glsl_parser_extras.cpp | 1 + mesalib/src/glsl/glsl_parser_extras.h | 2 + mesalib/src/glsl/glsl_symbol_table.cpp | 14 +- mesalib/src/glsl/glsl_symbol_table.h | 1 - mesalib/src/glsl/glsl_types.cpp | 94 +- mesalib/src/glsl/glsl_types.h | 43 +- mesalib/src/glsl/hir_field_selection.cpp | 3 +- mesalib/src/glsl/ir.cpp | 52 +- mesalib/src/glsl/ir.h | 100 +- mesalib/src/glsl/ir_builder.cpp | 159 +++ mesalib/src/glsl/ir_builder.h | 72 ++ mesalib/src/glsl/ir_clone.cpp | 12 +- mesalib/src/glsl/ir_constant_expression.cpp | 376 +++++- mesalib/src/glsl/ir_function.cpp | 6 +- mesalib/src/glsl/ir_optimization.h | 26 + mesalib/src/glsl/ir_print_visitor.cpp | 3 +- mesalib/src/glsl/ir_reader.cpp | 10 +- mesalib/src/glsl/ir_set_program_inouts.cpp | 6 +- mesalib/src/glsl/ir_validate.cpp | 42 +- .../src/glsl/link_uniform_block_active_visitor.cpp | 162 +++ .../src/glsl/link_uniform_block_active_visitor.h | 62 + mesalib/src/glsl/link_uniform_blocks.cpp | 313 +++++ mesalib/src/glsl/link_uniform_initializers.cpp | 6 +- mesalib/src/glsl/link_uniforms.cpp | 255 ++-- mesalib/src/glsl/link_varyings.cpp | 42 +- mesalib/src/glsl/linker.cpp | 58 +- mesalib/src/glsl/linker.h | 51 +- mesalib/src/glsl/lower_clip_distance.cpp | 8 +- mesalib/src/glsl/lower_output_reads.cpp | 4 +- mesalib/src/glsl/lower_packed_varyings.cpp | 28 +- mesalib/src/glsl/lower_packing_builtins.cpp | 1314 ++++++++++++++++++++ mesalib/src/glsl/lower_ubo_reference.cpp | 104 +- .../glsl/lower_variable_index_to_cond_assign.cpp | 14 +- mesalib/src/glsl/opt_constant_folding.cpp | 3 +- mesalib/src/glsl/opt_constant_propagation.cpp | 3 +- mesalib/src/glsl/opt_constant_variable.cpp | 4 +- mesalib/src/glsl/opt_copy_propagation.cpp | 3 +- mesalib/src/glsl/opt_copy_propagation_elements.cpp | 3 +- mesalib/src/glsl/opt_dead_code.cpp | 14 +- mesalib/src/glsl/opt_function_inlining.cpp | 8 +- mesalib/src/glsl/opt_structure_splitting.cpp | 3 +- mesalib/src/glsl/opt_tree_grafting.cpp | 8 +- mesalib/src/glsl/s_expression.cpp | 6 +- mesalib/src/glsl/standalone_scaffolding.cpp | 19 + mesalib/src/glsl/standalone_scaffolding.h | 3 + mesalib/src/glsl/strtod.c | 22 + mesalib/src/glsl/strtod.h | 3 + mesalib/src/mesa/Android.libmesa_glsl_utils.mk | 10 +- mesalib/src/mesa/main/extensions.c | 1 + mesalib/src/mesa/main/getstring.c | 4 +- mesalib/src/mesa/main/imports.c | 86 +- mesalib/src/mesa/main/imports.h | 3 + mesalib/src/mesa/main/mtypes.h | 28 + mesalib/src/mesa/main/remap.c | 6 +- mesalib/src/mesa/main/shader_query.cpp | 12 +- mesalib/src/mesa/main/texparam.c | 8 +- mesalib/src/mesa/main/uniforms.c | 2 +- mesalib/src/mesa/main/version.c | 25 +- mesalib/src/mesa/program/ir_to_mesa.cpp | 49 +- mesalib/src/mesa/program/program.c | 32 +- mesalib/src/mesa/state_tracker/st_cb_bitmap.c | 5 +- mesalib/src/mesa/state_tracker/st_cb_clear.c | 5 +- mesalib/src/mesa/state_tracker/st_cb_drawpixels.c | 9 +- mesalib/src/mesa/state_tracker/st_cb_drawtex.c | 7 +- mesalib/src/mesa/state_tracker/st_cb_texture.c | 1 + mesalib/src/mesa/state_tracker/st_draw.c | 21 +- mesalib/src/mesa/state_tracker/st_extensions.c | 4 +- mesalib/src/mesa/state_tracker/st_format.c | 34 + mesalib/src/mesa/state_tracker/st_format.h | 3 + mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 43 +- mesalib/src/mesa/swrast/s_texfilter.c | 38 +- mesalib/src/mesa/vbo/vbo_exec_api.c | 5 + mkfontscale/configure.ac | 1 + mkfontscale/hash.c | 27 +- mkfontscale/ident.c | 3 +- mkfontscale/mkfontscale.c | 5 +- pixman/configure.ac | 16 + pixman/demos/scale.c | 7 +- pixman/demos/scale.ui | 30 + pixman/pixman/pixman-fast-path.c | 446 ++++--- pixman/pixman/pixman-general.c | 3 - pixman/pixman/pixman-glyph.c | 8 +- pixman/pixman/pixman-implementation.c | 28 +- pixman/pixman/pixman-inlines.h | 37 + pixman/pixman/pixman-matrix.c | 408 +++++- pixman/pixman/pixman-private.h | 25 +- pixman/pixman/pixman-sse2.c | 284 +++++ pixman/pixman/pixman.c | 87 +- pixman/test/Makefile.sources | 1 + pixman/test/affine-test.c | 6 +- pixman/test/lowlevel-blt-bench.c | 4 +- pixman/test/matrix-test.c | 186 +++ 112 files changed, 5425 insertions(+), 991 deletions(-) create mode 100644 mesalib/src/glsl/link_uniform_block_active_visitor.cpp create mode 100644 mesalib/src/glsl/link_uniform_block_active_visitor.h create mode 100644 mesalib/src/glsl/link_uniform_blocks.cpp create mode 100644 mesalib/src/glsl/lower_packing_builtins.cpp create mode 100644 pixman/test/matrix-test.c diff --git a/mesalib/configure.ac b/mesalib/configure.ac index e769edadb..9cc5c4ae5 100644 --- a/mesalib/configure.ac +++ b/mesalib/configure.ac @@ -608,8 +608,10 @@ AC_ARG_ENABLE([vdpau], [enable_vdpau=auto]) AC_ARG_ENABLE([opencl], [AS_HELP_STRING([--enable-opencl], - [enable OpenCL library @<:@default=no@:>@])], - [enable_opencl="$enableval"], + [enable OpenCL library NOTE: Enabling this option will also enable + --with-llvm-shared-libs + @<:@default=no@:>@])], + [enable_opencl="$enableval" with_llvm_shared_libs="$enableval"], [enable_opencl=no]) AC_ARG_ENABLE([xlib_glx], [AS_HELP_STRING([--enable-xlib-glx], @@ -1660,10 +1662,7 @@ if test "x$enable_gallium_llvm" = xyes; then if test "x$LLVM_CONFIG" != xno; then LLVM_VERSION=`$LLVM_CONFIG --version | sed 's/svn.*//g'` LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e 's/\([[0-9]]\)\.\([[0-9]]\)/\10\2/g'` - if test "x$with_llvm_shared_libs" = xyes; then - dnl We can't use $LLVM_VERSION because it has 'svn' stripped out, - LLVM_LIBS="-lLLVM-`$LLVM_CONFIG --version`" - else + if test "x$with_llvm_shared_libs" != xyes; then LLVM_COMPONENTS="engine bitwriter" if $LLVM_CONFIG --components | grep -q '\'; then LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit" @@ -1672,7 +1671,6 @@ if test "x$enable_gallium_llvm" = xyes; then if test "x$enable_opencl" = xyes; then LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation" fi - LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`" fi LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags` LLVM_BINDIR=`$LLVM_CONFIG --bindir` @@ -1797,7 +1795,7 @@ radeon_llvm_check() { configure flag]) fi AC_MSG_WARN([Please ensure you use the latest llvm tree from git://people.freedesktop.org/~tstellar/llvm master before submitting a bug]) - LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --libs r600`" + LLVM_COMPONENTS="${LLVM_COMPONENTS} r600" } dnl Gallium drivers @@ -1836,12 +1834,13 @@ if test "x$with_gallium_drivers" != x; then if test "x$enable_r600_llvm" = xyes -o "x$enable_opencl" = xyes; then radeon_llvm_check NEED_RADEON_GALLIUM=yes; + LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo" fi if test "x$enable_r600_llvm" = xyes; then USE_R600_LLVM_COMPILER=yes; fi if test "x$enable_opencl" = xyes -a "x$with_llvm_shared_libs" = xno; then - LLVM_LIBS="${LLVM_LIBS} `$LLVM_CONFIG --libs bitreader asmparser`" + LLVM_COMPONENTS="${LLVM_COMPONENTS} bitreader asmparser" fi gallium_check_st "radeon/drm" "dri-r600" "xorg-r600" "" "xvmc-r600" "vdpau-r600" ;; @@ -1891,6 +1890,50 @@ if test "x$with_gallium_drivers" != x; then esac done fi + +dnl Set LLVM_LIBS - This is done after the driver configuration so +dnl that drivers can add additonal components to LLVM_COMPONENTS. +dnl Previously, gallium drivers were updating LLVM_LIBS directly +dnl by calling llvm-config --libs ${DRIVER_LLVM_COMPONENTS}, but +dnl this was causing the same libraries to be appear multiple times +dnl in LLVM_LIBS. + +if test "x$MESA_LLVM" != x0; then + + LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`" + + if test "x$with_llvm_shared_libs" = xyes; then + dnl We can't use $LLVM_VERSION because it has 'svn' stripped out, + LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version` + AC_CHECK_FILE("$LLVM_LIBDIR/lib$LLVM_SO_NAME.so", llvm_have_one_so=yes,) + + if test "x$llvm_have_one_so" = xyes; then + dnl LLVM was built using auto*, so there is only one shared object. + LLVM_LIBS="-l$LLVM_SO_NAME" + else + dnl If LLVM was built with CMake, there will be one shared object per + dnl component. + AC_CHECK_FILE("$LLVM_LIBDIR/libLLVMTarget.so",, + AC_MSG_ERROR([Could not find llvm shared libraries: + Please make sure you have built llvm with the --enable-shared option + and that your llvm libraries are installed in $LLVM_LIBDIR + If you have installed your llvm libraries to a different directory you + can use the --with-llvm-prefix= configure flag to specify this directory. + NOTE: Mesa is attempting to use llvm shared libraries because you have + passed one of the following options to configure: + --with-llvm-shared-libs + --enable-opencl + If you do not want to build with llvm shared libraries and instead want to + use llvm static libraries then remove these options from your configure + invocation and reconfigure.])) + + dnl We don't need to update LLVM_LIBS in this case because the LLVM + dnl install uses a shared object for each compoenent and we have + dnl already added all of these objects to LLVM_LIBS. + fi + fi +fi + AM_CONDITIONAL(HAVE_GALLIUM_SVGA, test "x$HAVE_GALLIUM_SVGA" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_I915, test "x$HAVE_GALLIUM_I915" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes) diff --git a/mesalib/include/GLES3/gl3.h b/mesalib/include/GLES3/gl3.h index b9399e994..09f2b5333 100644 --- a/mesalib/include/GLES3/gl3.h +++ b/mesalib/include/GLES3/gl3.h @@ -2,7 +2,7 @@ #define __gl3_h_ /* - * gl3.h last updated on $Date: 2012-09-12 10:13:02 -0700 (Wed, 12 Sep 2012) $ + * gl3.h last updated on $Date: 2012-10-03 07:52:40 -0700 (Wed, 03 Oct 2012) $ */ #include @@ -796,7 +796,7 @@ typedef struct __GLsync *GLsync; #define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F #define GL_MAX_ELEMENT_INDEX 0x8D6B #define GL_NUM_SAMPLE_COUNTS 0x9380 -#define GL_TEXTURE_IMMUTABLE_LEVELS 0x8D63 +#define GL_TEXTURE_IMMUTABLE_LEVELS 0x82DF /*------------------------------------------------------------------------- * Entrypoint definitions diff --git a/mesalib/src/gallium/auxiliary/Makefile.am b/mesalib/src/gallium/auxiliary/Makefile.am index 49792930a..a4eee4773 100644 --- a/mesalib/src/gallium/auxiliary/Makefile.am +++ b/mesalib/src/gallium/auxiliary/Makefile.am @@ -45,9 +45,3 @@ util/u_format_srgb.c: $(srcdir)/util/u_format_srgb.py util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv $(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@ - -# XXX: As a work around for https://bugs.freedesktop.org/show_bug.cgi?id=59334 -# clover needs to link against libgallium.a. Delete this once we have a real -# fix for this bug. -all-local: libgallium.la - ln -f $(builddir)/.libs/libgallium.a $(builddir)/libgallium.a diff --git a/mesalib/src/gallium/auxiliary/util/u_debug.c b/mesalib/src/gallium/auxiliary/util/u_debug.c index 6e8c5b993..f4670f28c 100644 --- a/mesalib/src/gallium/auxiliary/util/u_debug.c +++ b/mesalib/src/gallium/auxiliary/util/u_debug.c @@ -232,7 +232,7 @@ debug_get_flags_option(const char *name, unsigned long result; const char *str; const struct debug_named_value *orig = flags; - int namealign = 0; + unsigned namealign = 0; str = os_get_option(name); if(!str) diff --git a/mesalib/src/gallium/auxiliary/util/u_tile.c b/mesalib/src/gallium/auxiliary/util/u_tile.c index 6c618a674..62298cdab 100644 --- a/mesalib/src/gallium/auxiliary/util/u_tile.c +++ b/mesalib/src/gallium/auxiliary/util/u_tile.c @@ -806,7 +806,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, for (j = 0; j < w; j++) { /* convert 32-bit integer Z to float Z */ const double scale = 1.0 / 0xffffffffU; - pDest[j] = ptrc[j] * scale; + pDest[j] = (float) (ptrc[j] * scale); } pDest += pt->stride/4; ptrc += srcStride; @@ -820,7 +820,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, for (j = 0; j < w; j++) { /* convert 32-bit integer Z to float Z */ const double scale = 1.0 / 0xffffffffU; - pDest[j*2] = ptrc[j] * scale; + pDest[j*2] = (float) (ptrc[j] * scale); } pDest += pt->stride/4; ptrc += srcStride; diff --git a/mesalib/src/gallium/auxiliary/util/u_tile.h b/mesalib/src/gallium/auxiliary/util/u_tile.h index abcd402c8..9e8194459 100644 --- a/mesalib/src/gallium/auxiliary/util/u_tile.h +++ b/mesalib/src/gallium/auxiliary/util/u_tile.h @@ -45,13 +45,13 @@ struct pipe_transfer; static INLINE boolean u_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_box *box) { - if (x >= box->width) + if ((int) x >= box->width) return TRUE; - if (y >= box->height) + if ((int) y >= box->height) return TRUE; - if (x + *w > box->width) + if ((int) (x + *w) > box->width) *w = box->width - x; - if (y + *h > box->height) + if ((int) (y + *h) > box->height) *h = box->height - y; return FALSE; } diff --git a/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c b/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c index ee1c6881e..6859751c5 100644 --- a/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c @@ -163,6 +163,13 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload, unsigned alloc_offset = align(min_out_offset, upload->alignment); unsigned offset; + /* Init these return values here in case we fail below to make + * sure the caller doesn't get garbage values. + */ + *out_offset = ~0; + pipe_resource_reference(outbuf, NULL); + *ptr = NULL; + /* Make sure we have enough space in the upload buffer * for the sub-allocation. */ if (MAX2(upload->offset, alloc_offset) + alloc_size > upload->size) { @@ -182,8 +189,6 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload, PIPE_TRANSFER_UNSYNCHRONIZED, &upload->transfer); if (!upload->map) { - pipe_resource_reference(outbuf, NULL); - *ptr = NULL; upload->transfer = NULL; return PIPE_ERROR_OUT_OF_MEMORY; } diff --git a/mesalib/src/gallium/auxiliary/util/u_vbuf.c b/mesalib/src/gallium/auxiliary/util/u_vbuf.c index b712b52de..244b04d2a 100644 --- a/mesalib/src/gallium/auxiliary/util/u_vbuf.c +++ b/mesalib/src/gallium/auxiliary/util/u_vbuf.c @@ -323,7 +323,7 @@ void u_vbuf_destroy(struct u_vbuf *mgr) FREE(mgr); } -static void +static enum pipe_error u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, unsigned vb_mask, unsigned out_vb, int start_vertex, unsigned num_vertices, @@ -335,6 +335,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, struct pipe_resource *out_buffer = NULL; uint8_t *out_map; unsigned out_offset, mask; + enum pipe_error err; /* Get a translate object. */ tr = translate_cache_find(mgr->translate_cache, key); @@ -381,6 +382,14 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, assert((ib->buffer || ib->user_buffer) && ib->index_size); + /* Create and map the output buffer. */ + err = u_upload_alloc(mgr->uploader, 0, + key->output_stride * num_indices, + &out_offset, &out_buffer, + (void**)&out_map); + if (err != PIPE_OK) + return err; + if (ib->user_buffer) { map = (uint8_t*)ib->user_buffer + offset; } else { @@ -389,12 +398,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, PIPE_TRANSFER_READ, &transfer); } - /* Create and map the output buffer. */ - u_upload_alloc(mgr->uploader, 0, - key->output_stride * num_indices, - &out_offset, &out_buffer, - (void**)&out_map); - switch (ib->index_size) { case 4: tr->run_elts(tr, (unsigned*)map, num_indices, 0, out_map); @@ -412,11 +415,13 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, } } else { /* Create and map the output buffer. */ - u_upload_alloc(mgr->uploader, - key->output_stride * start_vertex, - key->output_stride * num_vertices, - &out_offset, &out_buffer, - (void**)&out_map); + err = u_upload_alloc(mgr->uploader, + key->output_stride * start_vertex, + key->output_stride * num_vertices, + &out_offset, &out_buffer, + (void**)&out_map); + if (err != PIPE_OK) + return err; out_offset -= key->output_stride * start_vertex; @@ -441,6 +446,8 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, pipe_resource_reference( &mgr->real_vertex_buffer[out_vb].buffer, NULL); mgr->real_vertex_buffer[out_vb].buffer = out_buffer; + + return PIPE_OK; } static boolean @@ -588,11 +595,14 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, /* Translate buffers. */ for (type = 0; type < VB_NUM; type++) { if (key[type].nr_elements) { - u_vbuf_translate_buffers(mgr, &key[type], mask[type], - mgr->fallback_vbs[type], - start[type], num[type], - start_index, num_indices, min_index, - unroll_indices && type == VB_VERTEX); + enum pipe_error err; + err = u_vbuf_translate_buffers(mgr, &key[type], mask[type], + mgr->fallback_vbs[type], + start[type], num[type], + start_index, num_indices, min_index, + unroll_indices && type == VB_VERTEX); + if (err != PIPE_OK) + return FALSE; /* Fixup the stride for constant attribs. */ if (type == VB_CONST) { @@ -884,7 +894,7 @@ void u_vbuf_set_index_buffer(struct u_vbuf *mgr, pipe->set_index_buffer(pipe, ib); } -static void +static enum pipe_error u_vbuf_upload_buffers(struct u_vbuf *mgr, int start_vertex, unsigned num_vertices, int start_instance, unsigned num_instances) @@ -953,6 +963,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr, unsigned start, end; struct pipe_vertex_buffer *real_vb; const uint8_t *ptr; + enum pipe_error err; i = u_bit_scan(&buffer_mask); @@ -963,11 +974,15 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr, real_vb = &mgr->real_vertex_buffer[i]; ptr = mgr->vertex_buffer[i].user_buffer; - u_upload_data(mgr->uploader, start, end - start, ptr + start, - &real_vb->buffer_offset, &real_vb->buffer); + err = u_upload_data(mgr->uploader, start, end - start, ptr + start, + &real_vb->buffer_offset, &real_vb->buffer); + if (err != PIPE_OK) + return err; real_vb->buffer_offset -= start; } + + return PIPE_OK; } static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr) @@ -1176,11 +1191,13 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) if (unroll_indices || incompatible_vb_mask || mgr->ve->incompatible_elem_mask) { - /* XXX check the return value */ - u_vbuf_translate_begin(mgr, start_vertex, num_vertices, - info->start_instance, info->instance_count, - info->start, info->count, min_index, - unroll_indices); + if (!u_vbuf_translate_begin(mgr, start_vertex, num_vertices, + info->start_instance, info->instance_count, + info->start, info->count, min_index, + unroll_indices)) { + debug_warn_once("u_vbuf_translate_begin() failed"); + return; + } user_vb_mask &= ~(incompatible_vb_mask | mgr->ve->incompatible_vb_mask_all); @@ -1188,8 +1205,13 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) /* Upload user buffers. */ if (user_vb_mask) { - u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, - info->start_instance, info->instance_count); + if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, + info->start_instance, + info->instance_count) != PIPE_OK) { + debug_warn_once("u_vbuf_upload_buffers() failed"); + return; + } + mgr->dirty_real_vb_mask |= user_vb_mask; } diff --git a/mesalib/src/glsl/Makefile.am b/mesalib/src/glsl/Makefile.am index 058d8aed3..d0e5cd1d0 100644 --- a/mesalib/src/glsl/Makefile.am +++ b/mesalib/src/glsl/Makefile.am @@ -52,6 +52,7 @@ check_PROGRAMS = \ tests_uniform_initializer_test_SOURCES = \ $(top_srcdir)/src/mesa/main/hash_table.c \ + $(top_srcdir)/src/mesa/main/imports.c \ $(top_srcdir)/src/mesa/program/prog_hash_table.c\ $(top_srcdir)/src/mesa/program/symbol_table.c \ tests/copy_constant_to_storage_tests.cpp \ @@ -100,6 +101,7 @@ endif glsl_test_SOURCES = \ $(top_srcdir)/src/mesa/main/hash_table.c \ + $(top_srcdir)/src/mesa/main/imports.c \ $(top_srcdir)/src/mesa/program/prog_hash_table.c \ $(top_srcdir)/src/mesa/program/symbol_table.c \ $(GLSL_SRCDIR)/standalone_scaffolding.cpp \ diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources index de63c3246..c294aa429 100644 --- a/mesalib/src/glsl/Makefile.sources +++ b/mesalib/src/glsl/Makefile.sources @@ -47,6 +47,8 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/link_functions.cpp \ $(GLSL_SRCDIR)/link_uniforms.cpp \ $(GLSL_SRCDIR)/link_uniform_initializers.cpp \ + $(GLSL_SRCDIR)/link_uniform_block_active_visitor.cpp \ + $(GLSL_SRCDIR)/link_uniform_blocks.cpp \ $(GLSL_SRCDIR)/link_varyings.cpp \ $(GLSL_SRCDIR)/loop_analysis.cpp \ $(GLSL_SRCDIR)/loop_controls.cpp \ @@ -60,6 +62,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/lower_mat_op_to_vec.cpp \ $(GLSL_SRCDIR)/lower_noise.cpp \ $(GLSL_SRCDIR)/lower_packed_varyings.cpp \ + $(GLSL_SRCDIR)/lower_packing_builtins.cpp \ $(GLSL_SRCDIR)/lower_texture_projection.cpp \ $(GLSL_SRCDIR)/lower_variable_index_to_cond_assign.cpp \ $(GLSL_SRCDIR)/lower_vec_index_to_cond_assign.cpp \ diff --git a/mesalib/src/glsl/SConscript b/mesalib/src/glsl/SConscript index 6981f041b..c4ab97c1e 100644 --- a/mesalib/src/glsl/SConscript +++ b/mesalib/src/glsl/SConscript @@ -59,6 +59,7 @@ else: # Copy these files to avoid generation object files into src/mesa/program env.Prepend(CPPPATH = ['#src/mesa/main']) env.Command('hash_table.c', '#src/mesa/main/hash_table.c', Copy('$TARGET', '$SOURCE')) + env.Command('imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', '$SOURCE')) # Copy these files to avoid generation object files into src/mesa/program env.Prepend(CPPPATH = ['#src/mesa/program']) env.Command('prog_hash_table.c', '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE')) @@ -68,6 +69,7 @@ else: mesa_objs = env.StaticObject([ 'hash_table.c', + 'imports.c', 'prog_hash_table.c', 'symbol_table.c', ]) diff --git a/mesalib/src/glsl/ast.h b/mesalib/src/glsl/ast.h index 50747822d..1a28963c4 100644 --- a/mesalib/src/glsl/ast.h +++ b/mesalib/src/glsl/ast.h @@ -804,11 +804,12 @@ public: class ast_uniform_block : public ast_node { public: ast_uniform_block(ast_type_qualifier layout, - const char *block_name, - ast_declarator_list *member_list) - : layout(layout), block_name(block_name) + const char *instance_name, + ast_expression *array_size) + : layout(layout), block_name(NULL), instance_name(instance_name), + array_size(array_size) { - declarations.push_degenerate_list_at_head(&member_list->link); + /* empty */ } virtual ir_rvalue *hir(exec_list *instructions, @@ -816,8 +817,28 @@ public: ast_type_qualifier layout; const char *block_name; + + /** + * Declared name of the block instance, if specified. + * + * If the block does not have an instance name, this field will be + * \c NULL. + */ + const char *instance_name; + /** List of ast_declarator_list * */ exec_list declarations; + + /** + * Declared array size of the block instance + * + * If the block is not declared as an array, this field will be \c NULL. + * + * \note + * A block can only be an array if it also has an instance name. If this + * field is not \c NULL, ::instance_name must also not be \c NULL. + */ + ast_expression *array_size; }; /*@}*/ diff --git a/mesalib/src/glsl/ast_function.cpp b/mesalib/src/glsl/ast_function.cpp index dc7a58bf2..26f72cf8e 100644 --- a/mesalib/src/glsl/ast_function.cpp +++ b/mesalib/src/glsl/ast_function.cpp @@ -132,12 +132,13 @@ verify_parameter_modes(_mesa_glsl_parse_state *state, } /* Verify that 'out' and 'inout' actual parameters are lvalues. */ - if (formal->mode == ir_var_out || formal->mode == ir_var_inout) { + if (formal->mode == ir_var_function_out + || formal->mode == ir_var_function_inout) { const char *mode = NULL; switch (formal->mode) { - case ir_var_out: mode = "out"; break; - case ir_var_inout: mode = "inout"; break; - default: assert(false); break; + case ir_var_function_out: mode = "out"; break; + case ir_var_function_inout: mode = "inout"; break; + default: assert(false); break; } /* This AST-based check catches errors like f(i++). The IR-based @@ -210,13 +211,13 @@ generate_call(exec_list *instructions, ir_function_signature *sig, if (formal->type->is_numeric() || formal->type->is_boolean()) { switch (formal->mode) { case ir_var_const_in: - case ir_var_in: { + case ir_var_function_in: { ir_rvalue *converted = convert_component(actual, formal->type); actual->replace_with(converted); break; } - case ir_var_out: + case ir_var_function_out: if (actual->type != formal->type) { /* To convert an out parameter, we need to create a * temporary variable to hold the value before conversion, @@ -254,7 +255,7 @@ generate_call(exec_list *instructions, ir_function_signature *sig, actual->replace_with(deref_tmp_2); } break; - case ir_var_inout: + case ir_var_function_inout: /* Inout parameters should never require conversion, since that * would require an implicit conversion to exist both to and * from the formal parameter type, and there are no diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp index de3ce902e..49093d88f 100644 --- a/mesalib/src/glsl/ast_to_hir.cpp +++ b/mesalib/src/glsl/ast_to_hir.cpp @@ -857,14 +857,11 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) case GLSL_TYPE_ERROR: case GLSL_TYPE_VOID: case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_INTERFACE: /* I assume a comparison of a struct containing a sampler just * ignores the sampler present in the type. */ break; - - default: - assert(!"Should not get here."); - break; } if (cmp == NULL) @@ -1625,6 +1622,15 @@ ast_expression::hir(exec_list *instructions, } } else if (array->type->array_size() == 0) { _mesa_glsl_error(&loc, state, "unsized array index must be constant"); + } else if (array->type->is_array() + && array->type->fields.array->is_interface()) { + /* Page 46 in section 4.3.7 of the OpenGL ES 3.00 spec says: + * + * "All indexes used to index a uniform block array must be + * constant integral expressions." + */ + _mesa_glsl_error(&loc, state, + "uniform block array index must be constant"); } else { if (array->type->is_array()) { /* whole_variable_referenced can return NULL if the array is a @@ -1924,11 +1930,11 @@ is_varying_var(ir_variable *var, _mesa_glsl_parser_targets target) { switch (target) { case vertex_shader: - return var->mode == ir_var_out; + return var->mode == ir_var_shader_out; case fragment_shader: - return var->mode == ir_var_in; + return var->mode == ir_var_shader_in; default: - return var->mode == ir_var_out || var->mode == ir_var_in; + return var->mode == ir_var_shader_out || var->mode == ir_var_shader_in; } } @@ -1997,13 +2003,16 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, * the setting alone. */ if (qual->flags.q.in && qual->flags.q.out) - var->mode = ir_var_inout; - else if (qual->flags.q.attribute || qual->flags.q.in + var->mode = ir_var_function_inout; + else if (qual->flags.q.in) + var->mode = is_parameter ? ir_var_function_in : ir_var_shader_in; + else if (qual->flags.q.attribute || (qual->flags.q.varying && (state->target == fragment_shader))) - var->mode = ir_var_in; - else if (qual->flags.q.out - || (qual->flags.q.varying && (state->target == vertex_shader))) - var->mode = ir_var_out; + var->mode = ir_var_shader_in; + else if (qual->flags.q.out) + var->mode = is_parameter ? ir_var_function_out : ir_var_shader_out; + else if (qual->flags.q.varying && (state->target == vertex_shader)) + var->mode = ir_var_shader_out; else if (qual->flags.q.uniform) var->mode = ir_var_uniform; @@ -2028,10 +2037,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, * Similar text exists in the section on vertex shader outputs. * * Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES - * 3.00 spec claims to allow structs as well. However, this is likely - * an error, since section 11 of the spec ("Counting of Inputs and - * Outputs") enumerates all possible types of interstage linkage - * variables, and it does not mention structs. + * 3.00 spec allows structs as well. Varying structs are also allowed + * in GLSL 1.50. */ switch (var->type->get_scalar_type()->base_type) { case GLSL_TYPE_FLOAT: @@ -2046,6 +2053,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, state->get_version_string()); break; case GLSL_TYPE_STRUCT: + if (state->is_version(150, 300)) + break; _mesa_glsl_error(loc, state, "varying variables may not be of type struct"); break; @@ -2058,15 +2067,16 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, if (state->all_invariant && (state->current_function == NULL)) { switch (state->target) { case vertex_shader: - if (var->mode == ir_var_out) + if (var->mode == ir_var_shader_out) var->invariant = true; break; case geometry_shader: - if ((var->mode == ir_var_in) || (var->mode == ir_var_out)) + if ((var->mode == ir_var_shader_in) + || (var->mode == ir_var_shader_out)) var->invariant = true; break; case fragment_shader: - if (var->mode == ir_var_in) + if (var->mode == ir_var_shader_in) var->invariant = true; break; } @@ -2082,8 +2092,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, var->interpolation = INTERP_QUALIFIER_NONE; if (var->interpolation != INTERP_QUALIFIER_NONE && - !(state->target == vertex_shader && var->mode == ir_var_out) && - !(state->target == fragment_shader && var->mode == ir_var_in)) { + !(state->target == vertex_shader && var->mode == ir_var_shader_out) && + !(state->target == fragment_shader && var->mode == ir_var_shader_in)) { _mesa_glsl_error(loc, state, "interpolation qualifier `%s' can only be applied to " "vertex shader outputs and fragment shader inputs.", @@ -2116,7 +2126,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, */ switch (state->target) { case vertex_shader: - if (!global_scope || (var->mode != ir_var_in)) { + if (!global_scope || (var->mode != ir_var_shader_in)) { fail = true; string = "input"; } @@ -2129,7 +2139,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, break; case fragment_shader: - if (!global_scope || (var->mode != ir_var_out)) { + if (!global_scope || (var->mode != ir_var_shader_out)) { fail = true; string = "output"; } @@ -2440,7 +2450,7 @@ process_initializer(ir_variable *var, ast_declaration *decl, "cannot initialize samplers"); } - if ((var->mode == ir_var_in) && (state->current_function == NULL)) { + if ((var->mode == ir_var_shader_in) && (state->current_function == NULL)) { _mesa_glsl_error(& initializer_loc, state, "cannot initialize %s shader input / %s", _mesa_glsl_shader_target_name(state->target), @@ -2579,12 +2589,12 @@ ast_declarator_list::hir(exec_list *instructions, "Undeclared variable `%s' cannot be marked " "invariant\n", decl->identifier); } else if ((state->target == vertex_shader) - && (earlier->mode != ir_var_out)) { + && (earlier->mode != ir_var_shader_out)) { _mesa_glsl_error(& loc, state, "`%s' cannot be marked invariant, vertex shader " "outputs only\n", decl->identifier); } else if ((state->target == fragment_shader) - && (earlier->mode != ir_var_in)) { + && (earlier->mode != ir_var_shader_in)) { _mesa_glsl_error(& loc, state, "`%s' cannot be marked invariant, fragment shader " "inputs only\n", decl->identifier); @@ -2707,16 +2717,13 @@ ast_declarator_list::hir(exec_list *instructions, & loc, this->ubo_qualifiers_valid, false); if (this->type->qualifier.flags.q.invariant) { - if ((state->target == vertex_shader) && !(var->mode == ir_var_out || - var->mode == ir_var_inout)) { - /* FINISHME: Note that this doesn't work for invariant on - * a function signature outval - */ + if ((state->target == vertex_shader) && + var->mode != ir_var_shader_out) { _mesa_glsl_error(& loc, state, "`%s' cannot be marked invariant, vertex shader " "outputs only\n", var->name); } else if ((state->target == fragment_shader) && - !(var->mode == ir_var_in || var->mode == ir_var_inout)) { + var->mode != ir_var_shader_in) { /* FINISHME: Note that this doesn't work for invariant on * a function signature inval */ @@ -2753,7 +2760,7 @@ ast_declarator_list::hir(exec_list *instructions, "global scope%s", mode, var->name, extra); } - } else if (var->mode == ir_var_in) { + } else if (var->mode == ir_var_shader_in) { var->read_only = true; if (state->target == vertex_shader) { @@ -2833,7 +2840,7 @@ ast_declarator_list::hir(exec_list *instructions, && state->target == vertex_shader && state->current_function == NULL && var->type->is_integer() - && var->mode == ir_var_out + && var->mode == ir_var_shader_out && var->interpolation != INTERP_QUALIFIER_FLAT) { _mesa_glsl_error(&loc, state, "If a vertex output is an integer, " @@ -3137,7 +3144,8 @@ ast_parameter_declarator::hir(exec_list *instructions, } is_void = false; - ir_variable *var = new(ctx) ir_variable(type, this->identifier, ir_var_in); + ir_variable *var = new(ctx) + ir_variable(type, this->identifier, ir_var_function_in); /* Apply any specified qualifiers to the parameter declaration. Note that * for function parameters the default mode is 'in'. @@ -3151,7 +3159,7 @@ ast_parameter_declarator::hir(exec_list *instructions, * as out or inout function parameters, nor can they be assigned * into." */ - if ((var->mode == ir_var_inout || var->mode == ir_var_out) + if ((var->mode == ir_var_function_inout || var->mode == ir_var_function_out) && type->contains_sampler()) { _mesa_glsl_error(&loc, state, "out and inout parameters cannot contain samplers"); type = glsl_type::error_type; @@ -3171,7 +3179,7 @@ ast_parameter_declarator::hir(exec_list *instructions, * So for GLSL 1.10, passing an array as an out or inout parameter is not * allowed. This restriction is removed in GLSL 1.20, and in GLSL ES. */ - if ((var->mode == ir_var_inout || var->mode == ir_var_out) + if ((var->mode == ir_var_function_inout || var->mode == ir_var_function_out) && type->is_array() && !state->check_version(120, 100, &loc, "Arrays cannot be out or inout parameters")) { @@ -4018,35 +4026,50 @@ ast_type_specifier::hir(exec_list *instructions, } -ir_rvalue * -ast_struct_specifier::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) +/** + * Process a structure or interface block tree into an array of structure fields + * + * After parsing, where there are some syntax differnces, structures and + * interface blocks are almost identical. They are similar enough that the + * AST for each can be processed the same way into a set of + * \c glsl_struct_field to describe the members. + * + * \return + * The number of fields processed. A pointer to the array structure fields is + * stored in \c *fields_ret. + */ +unsigned +ast_process_structure_or_interface_block(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + exec_list *declarations, + YYLTYPE &loc, + glsl_struct_field **fields_ret, + bool is_interface, + bool block_row_major) { unsigned decl_count = 0; - /* Make an initial pass over the list of structure fields to determine how + /* Make an initial pass over the list of fields to determine how * many there are. Each element in this list is an ast_declarator_list. * This means that we actually need to count the number of elements in the * 'declarations' list in each of the elements. */ - foreach_list_typed (ast_declarator_list, decl_list, link, - &this->declarations) { + foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { foreach_list_const (decl_ptr, & decl_list->declarations) { decl_count++; } } - /* Allocate storage for the structure fields and process the field + /* Allocate storage for the fields and process the field * declarations. As the declarations are processed, try to also convert * the types to HIR. This ensures that structure definitions embedded in - * other structure definitions are processed. + * other structure definitions or in interface blocks are processed. */ glsl_struct_field *const fields = ralloc_array(state, glsl_struct_field, decl_count); unsigned i = 0; - foreach_list_typed (ast_declarator_list, decl_list, link, - &this->declarations) { + foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { const char *type_name; decl_list->type->specifier->hir(instructions, state); @@ -4055,7 +4078,6 @@ ast_struct_specifier::hir(exec_list *instructions, * embedded structure definitions have been removed from the language. */ if (state->es_shader && decl_list->type->specifier->structure != NULL) { - YYLTYPE loc = this->get_location(); _mesa_glsl_error(&loc, state, "Embedded structure definitions are " "not allowed in GLSL ES 1.00."); } @@ -4065,25 +4087,88 @@ ast_struct_specifier::hir(exec_list *instructions, foreach_list_typed (ast_declaration, decl, link, &decl_list->declarations) { - const struct glsl_type *field_type = decl_type; + /* From the GL_ARB_uniform_buffer_object spec: + * + * "Sampler types are not allowed inside of uniform + * blocks. All other types, arrays, and structures + * allowed for uniforms are allowed within a uniform + * block." + */ + const struct glsl_type *field_type = decl_type; + + if (is_interface && field_type->contains_sampler()) { + YYLTYPE loc = decl_list->get_location(); + _mesa_glsl_error(&loc, state, + "Uniform in non-default uniform block contains sampler\n"); + } + + const struct ast_type_qualifier *const qual = + & decl_list->type->qualifier; + if (qual->flags.q.std140 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(&loc, state, + "uniform block layout qualifiers std140, packed, and " + "shared can only be applied to uniform blocks, not " + "members"); + } + if (decl->is_array) { - YYLTYPE loc = decl->get_location(); field_type = process_array_type(&loc, decl_type, decl->array_size, state); } fields[i].type = (field_type != NULL) ? field_type : glsl_type::error_type; fields[i].name = decl->identifier; + + if (qual->flags.q.row_major || qual->flags.q.column_major) { + if (!field_type->is_matrix() && !field_type->is_record()) { + _mesa_glsl_error(&loc, state, + "uniform block layout qualifiers row_major and " + "column_major can only be applied to matrix and " + "structure types"); + } else + validate_matrix_layout_for_type(state, &loc, field_type); + } + + if (field_type->is_matrix() || + (field_type->is_array() && field_type->fields.array->is_matrix())) { + fields[i].row_major = block_row_major; + if (qual->flags.q.row_major) + fields[i].row_major = true; + else if (qual->flags.q.column_major) + fields[i].row_major = false; + } + i++; } } assert(i == decl_count); + *fields_ret = fields; + return decl_count; +} + + +ir_rvalue * +ast_struct_specifier::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + glsl_struct_field *fields; + unsigned decl_count = + ast_process_structure_or_interface_block(instructions, + state, + &this->declarations, + loc, + &fields, + false, + false); + const glsl_type *t = glsl_type::get_record_instance(fields, decl_count, this->name); - YYLTYPE loc = this->get_location(); if (!state->symbols->add_type(name, t)) { _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name); } else { @@ -4102,96 +4187,98 @@ ast_struct_specifier::hir(exec_list *instructions, return NULL; } -static struct gl_uniform_block * -get_next_uniform_block(struct _mesa_glsl_parse_state *state) -{ - if (state->num_uniform_blocks >= state->uniform_block_array_size) { - state->uniform_block_array_size *= 2; - if (state->uniform_block_array_size <= 4) - state->uniform_block_array_size = 4; - - state->uniform_blocks = reralloc(state, - state->uniform_blocks, - struct gl_uniform_block, - state->uniform_block_array_size); - } - - memset(&state->uniform_blocks[state->num_uniform_blocks], - 0, sizeof(*state->uniform_blocks)); - return &state->uniform_blocks[state->num_uniform_blocks++]; -} - ir_rvalue * ast_uniform_block::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) { + YYLTYPE loc = this->get_location(); + /* The ast_uniform_block has a list of ast_declarator_lists. We * need to turn those into ir_variables with an association * with this uniform block. */ - struct gl_uniform_block *ubo = get_next_uniform_block(state); - ubo->Name = ralloc_strdup(state->uniform_blocks, this->block_name); + enum glsl_interface_packing packing; + if (this->layout.flags.q.shared) { + packing = GLSL_INTERFACE_PACKING_SHARED; + } else if (this->layout.flags.q.packed) { + packing = GLSL_INTERFACE_PACKING_PACKED; + } else { + /* The default layout is std140. + */ + packing = GLSL_INTERFACE_PACKING_STD140; + } - if (!state->symbols->add_uniform_block(ubo)) { + bool block_row_major = this->layout.flags.q.row_major; + exec_list declared_variables; + glsl_struct_field *fields; + unsigned int num_variables = + ast_process_structure_or_interface_block(&declared_variables, + state, + &this->declarations, + loc, + &fields, + true, + block_row_major); + + const glsl_type *block_type = + glsl_type::get_interface_instance(fields, + num_variables, + packing, + this->block_name); + + if (!state->symbols->add_type(block_type->name, block_type)) { YYLTYPE loc = this->get_location(); _mesa_glsl_error(&loc, state, "Uniform block name `%s' already taken in " - "the current scope.\n", ubo->Name); + "the current scope.\n", this->block_name); } - unsigned int num_variables = 0; - foreach_list_typed(ast_declarator_list, decl_list, link, &declarations) { - foreach_list_const(node, &decl_list->declarations) { - num_variables++; - } - } - - bool block_row_major = this->layout.flags.q.row_major; - - ubo->Uniforms = rzalloc_array(state->uniform_blocks, - struct gl_uniform_buffer_variable, - num_variables); - - foreach_list_typed(ast_declarator_list, decl_list, link, &declarations) { - exec_list declared_variables; - - decl_list->hir(&declared_variables, state); + /* Since interface blocks cannot contain statements, it should be + * impossible for the block to generate any instructions. + */ + assert(declared_variables.is_empty()); - foreach_list_const(node, &declared_variables) { - ir_variable *var = (ir_variable *)node; + /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec + * says: + * + * "If an instance name (instance-name) is used, then it puts all the + * members inside a scope within its own name space, accessed with the + * field selector ( . ) operator (analogously to structures)." + */ + if (this->instance_name) { + ir_variable *var; - struct gl_uniform_buffer_variable *ubo_var = - &ubo->Uniforms[ubo->NumUniforms++]; + if (this->array_size != NULL) { + const glsl_type *block_array_type = + process_array_type(&loc, block_type, this->array_size, state); - var->uniform_block = ubo - state->uniform_blocks; + var = new(state) ir_variable(block_array_type, + this->instance_name, + ir_var_uniform); + } else { + var = new(state) ir_variable(block_type, + this->instance_name, + ir_var_uniform); + } - ubo_var->Name = ralloc_strdup(state->uniform_blocks, var->name); - ubo_var->Type = var->type; - ubo_var->Offset = 0; /* Assigned at link time. */ + var->interface_type = block_type; + state->symbols->add_variable(var); + instructions->push_tail(var); + } else { + /* In order to have an array size, the block must also be declared with + * an instane name. + */ + assert(this->array_size == NULL); - if (var->type->is_matrix() || - (var->type->is_array() && var->type->fields.array->is_matrix())) { - ubo_var->RowMajor = block_row_major; - if (decl_list->type->qualifier.flags.q.row_major) - ubo_var->RowMajor = true; - else if (decl_list->type->qualifier.flags.q.column_major) - ubo_var->RowMajor = false; - } + for (unsigned i = 0; i < num_variables; i++) { + ir_variable *var = + new(state) ir_variable(fields[i].type, + ralloc_strdup(state, fields[i].name), + ir_var_uniform); + var->interface_type = block_type; - /* From the GL_ARB_uniform_buffer_object spec: - * - * "Sampler types are not allowed inside of uniform - * blocks. All other types, arrays, and structures - * allowed for uniforms are allowed within a uniform - * block." - */ - if (var->type->contains_sampler()) { - YYLTYPE loc = decl_list->get_location(); - _mesa_glsl_error(&loc, state, - "Uniform in non-default uniform block contains sampler\n"); - } + state->symbols->add_variable(var); + instructions->push_tail(var); } - - instructions->append_list(&declared_variables); } return NULL; @@ -4222,7 +4309,7 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, gl_FragData_assigned = true; else if (strncmp(var->name, "gl_", 3) != 0) { if (state->target == fragment_shader && - (var->mode == ir_var_out || var->mode == ir_var_inout)) { + var->mode == ir_var_shader_out) { user_defined_fs_output_assigned = true; user_defined_fs_output = var; } diff --git a/mesalib/src/glsl/builtin_compiler/Makefile.am b/mesalib/src/glsl/builtin_compiler/Makefile.am index 1a863b228..976640822 100644 --- a/mesalib/src/glsl/builtin_compiler/Makefile.am +++ b/mesalib/src/glsl/builtin_compiler/Makefile.am @@ -55,6 +55,7 @@ libglslcore_la_SOURCES = \ builtin_compiler_SOURCES = \ $(top_srcdir)/src/mesa/main/hash_table.c \ + $(top_srcdir)/src/mesa/main/imports.c \ $(top_srcdir)/src/mesa/program/prog_hash_table.c\ $(top_srcdir)/src/mesa/program/symbol_table.c \ $(BUILTIN_COMPILER_CXX_FILES) \ diff --git a/mesalib/src/glsl/builtin_types.h b/mesalib/src/glsl/builtin_types.h index a4c995fd1..c78c2d270 100644 --- a/mesalib/src/glsl/builtin_types.h +++ b/mesalib/src/glsl/builtin_types.h @@ -89,9 +89,9 @@ const glsl_type *const glsl_type::mat4_type = & builtin_core_types[14]; /*@{*/ static const struct glsl_struct_field gl_DepthRangeParameters_fields[] = { - { glsl_type::float_type, "near" }, - { glsl_type::float_type, "far" }, - { glsl_type::float_type, "diff" }, + { glsl_type::float_type, "near", false }, + { glsl_type::float_type, "far", false }, + { glsl_type::float_type, "diff", false }, }; const glsl_type glsl_type::builtin_structure_types[] = { @@ -106,58 +106,58 @@ const glsl_type glsl_type::builtin_structure_types[] = { /*@{*/ static const struct glsl_struct_field gl_PointParameters_fields[] = { - { glsl_type::float_type, "size" }, - { glsl_type::float_type, "sizeMin" }, - { glsl_type::float_type, "sizeMax" }, - { glsl_type::float_type, "fadeThresholdSize" }, - { glsl_type::float_type, "distanceConstantAttenuation" }, - { glsl_type::float_type, "distanceLinearAttenuation" }, - { glsl_type::float_type, "distanceQuadraticAttenuation" }, + { glsl_type::float_type, "size", false }, + { glsl_type::float_type, "sizeMin", false }, + { glsl_type::float_type, "sizeMax", false }, + { glsl_type::float_type, "fadeThresholdSize", false }, + { glsl_type::float_type, "distanceConstantAttenuation", false }, + { glsl_type::float_type, "distanceLinearAttenuation", false }, + { glsl_type::float_type, "distanceQuadraticAttenuation", false }, }; static const struct glsl_struct_field gl_MaterialParameters_fields[] = { - { glsl_type::vec4_type, "emission" }, - { glsl_type::vec4_type, "ambient" }, - { glsl_type::vec4_type, "diffuse" }, - { glsl_type::vec4_type, "specular" }, - { glsl_type::float_type, "shininess" }, + { glsl_type::vec4_type, "emission", false }, + { glsl_type::vec4_type, "ambient", false }, + { glsl_type::vec4_type, "diffuse", false }, + { glsl_type::vec4_type, "specular", false }, + { glsl_type::float_type, "shininess", false }, }; static const struct glsl_struct_field gl_LightSourceParameters_fields[] = { - { glsl_type::vec4_type, "ambient" }, - { glsl_type::vec4_type, "diffuse" }, - { glsl_type::vec4_type, "specular" }, - { glsl_type::vec4_type, "position" }, - { glsl_type::vec4_type, "halfVector" }, - { glsl_type::vec3_type, "spotDirection" }, - { glsl_type::float_type, "spotExponent" }, - { glsl_type::float_type, "spotCutoff" }, - { glsl_type::float_type, "spotCosCutoff" }, - { glsl_type::float_type, "constantAttenuation" }, - { glsl_type::float_type, "linearAttenuation" }, - { glsl_type::float_type, "quadraticAttenuation" }, + { glsl_type::vec4_type, "ambient", false }, + { glsl_type::vec4_type, "diffuse", false }, + { glsl_type::vec4_type, "specular", false }, + { glsl_type::vec4_type, "position", false }, + { glsl_type::vec4_type, "halfVector", false }, + { glsl_type::vec3_type, "spotDirection", false }, + { glsl_type::float_type, "spotExponent", false }, + { glsl_type::float_type, "spotCutoff", false }, + { glsl_type::float_type, "spotCosCutoff", false }, + { glsl_type::float_type, "constantAttenuation", false }, + { glsl_type::float_type, "linearAttenuation", false }, + { glsl_type::float_type, "quadraticAttenuation", false }, }; static const struct glsl_struct_field gl_LightModelParameters_fields[] = { - { glsl_type::vec4_type, "ambient" }, + { glsl_type::vec4_type, "ambient", false }, }; static const struct glsl_struct_field gl_LightModelProducts_fields[] = { - { glsl_type::vec4_type, "sceneColor" }, + { glsl_type::vec4_type, "sceneColor", false }, }; static const struct glsl_struct_field gl_LightProducts_fields[] = { - { glsl_type::vec4_type, "ambient" }, - { glsl_type::vec4_type, "diffuse" }, - { glsl_type::vec4_type, "specular" }, + { glsl_type::vec4_type, "ambient", false }, + { glsl_type::vec4_type, "diffuse", false }, + { glsl_type::vec4_type, "specular", false }, }; static const struct glsl_struct_field gl_FogParameters_fields[] = { - { glsl_type::vec4_type, "color" }, - { glsl_type::float_type, "density" }, - { glsl_type::float_type, "start" }, - { glsl_type::float_type, "end" }, - { glsl_type::float_type, "scale" }, + { glsl_type::vec4_type, "color", false }, + { glsl_type::float_type, "density", false }, + { glsl_type::float_type, "start", false }, + { glsl_type::float_type, "end", false }, + { glsl_type::float_type, "scale", false }, }; const glsl_type glsl_type::builtin_110_deprecated_structure_types[] = { diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp index e7769419f..ccee7746e 100644 --- a/mesalib/src/glsl/builtin_variables.cpp +++ b/mesalib/src/glsl/builtin_variables.cpp @@ -47,18 +47,18 @@ struct builtin_variable { }; static const builtin_variable builtin_core_vs_variables[] = { - { ir_var_out, VERT_RESULT_HPOS, "vec4", "gl_Position" }, - { ir_var_out, VERT_RESULT_PSIZ, "float", "gl_PointSize" }, + { ir_var_shader_out, VERT_RESULT_HPOS, "vec4", "gl_Position" }, + { ir_var_shader_out, VERT_RESULT_PSIZ, "float", "gl_PointSize" }, }; static const builtin_variable builtin_core_fs_variables[] = { - { ir_var_in, FRAG_ATTRIB_WPOS, "vec4", "gl_FragCoord" }, - { ir_var_in, FRAG_ATTRIB_FACE, "bool", "gl_FrontFacing" }, - { ir_var_out, FRAG_RESULT_COLOR, "vec4", "gl_FragColor" }, + { ir_var_shader_in, FRAG_ATTRIB_WPOS, "vec4", "gl_FragCoord" }, + { ir_var_shader_in, FRAG_ATTRIB_FACE, "bool", "gl_FrontFacing" }, + { ir_var_shader_out, FRAG_RESULT_COLOR, "vec4", "gl_FragColor" }, }; static const builtin_variable builtin_100ES_fs_variables[] = { - { ir_var_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" }, + { ir_var_shader_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" }, }; static const builtin_variable builtin_300ES_vs_variables[] = { @@ -66,46 +66,46 @@ static const builtin_variable builtin_300ES_vs_variables[] = { }; static const builtin_variable builtin_300ES_fs_variables[] = { - { ir_var_in, FRAG_ATTRIB_WPOS, "vec4", "gl_FragCoord" }, - { ir_var_in, FRAG_ATTRIB_FACE, "bool", "gl_FrontFacing" }, - { ir_var_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" }, - { ir_var_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" }, + { ir_var_shader_in, FRAG_ATTRIB_WPOS, "vec4", "gl_FragCoord" }, + { ir_var_shader_in, FRAG_ATTRIB_FACE, "bool", "gl_FrontFacing" }, + { ir_var_shader_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" }, + { ir_var_shader_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" }, }; static const builtin_variable builtin_110_fs_variables[] = { - { ir_var_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" }, + { ir_var_shader_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" }, }; static const builtin_variable builtin_110_deprecated_fs_variables[] = { - { ir_var_in, FRAG_ATTRIB_COL0, "vec4", "gl_Color" }, - { ir_var_in, FRAG_ATTRIB_COL1, "vec4", "gl_SecondaryColor" }, - { ir_var_in, FRAG_ATTRIB_FOGC, "float", "gl_FogFragCoord" }, + { ir_var_shader_in, FRAG_ATTRIB_COL0, "vec4", "gl_Color" }, + { ir_var_shader_in, FRAG_ATTRIB_COL1, "vec4", "gl_SecondaryColor" }, + { ir_var_shader_in, FRAG_ATTRIB_FOGC, "float", "gl_FogFragCoord" }, }; static const builtin_variable builtin_110_deprecated_vs_variables[] = { - { ir_var_in, VERT_ATTRIB_POS, "vec4", "gl_Vertex" }, - { ir_var_in, VERT_ATTRIB_NORMAL, "vec3", "gl_Normal" }, - { ir_var_in, VERT_ATTRIB_COLOR0, "vec4", "gl_Color" }, - { ir_var_in, VERT_ATTRIB_COLOR1, "vec4", "gl_SecondaryColor" }, - { ir_var_in, VERT_ATTRIB_TEX0, "vec4", "gl_MultiTexCoord0" }, - { ir_var_in, VERT_ATTRIB_TEX1, "vec4", "gl_MultiTexCoord1" }, - { ir_var_in, VERT_ATTRIB_TEX2, "vec4", "gl_MultiTexCoord2" }, - { ir_var_in, VERT_ATTRIB_TEX3, "vec4", "gl_MultiTexCoord3" }, - { ir_var_in, VERT_ATTRIB_TEX4, "vec4", "gl_MultiTexCoord4" }, - { ir_var_in, VERT_ATTRIB_TEX5, "vec4", "gl_MultiTexCoord5" }, - { ir_var_in, VERT_ATTRIB_TEX6, "vec4", "gl_MultiTexCoord6" }, - { ir_var_in, VERT_ATTRIB_TEX7, "vec4", "gl_MultiTexCoord7" }, - { ir_var_in, VERT_ATTRIB_FOG, "float", "gl_FogCoord" }, - { ir_var_out, VERT_RESULT_CLIP_VERTEX, "vec4", "gl_ClipVertex" }, - { ir_var_out, VERT_RESULT_COL0, "vec4", "gl_FrontColor" }, - { ir_var_out, VERT_RESULT_BFC0, "vec4", "gl_BackColor" }, - { ir_var_out, VERT_RESULT_COL1, "vec4", "gl_FrontSecondaryColor" }, - { ir_var_out, VERT_RESULT_BFC1, "vec4", "gl_BackSecondaryColor" }, - { ir_var_out, VERT_RESULT_FOGC, "float", "gl_FogFragCoord" }, + { ir_var_shader_in, VERT_ATTRIB_POS, "vec4", "gl_Vertex" }, + { ir_var_shader_in, VERT_ATTRIB_NORMAL, "vec3", "gl_Normal" }, + { ir_var_shader_in, VERT_ATTRIB_COLOR0, "vec4", "gl_Color" }, + { ir_var_shader_in, VERT_ATTRIB_COLOR1, "vec4", "gl_SecondaryColor" }, + { ir_var_shader_in, VERT_ATTRIB_TEX0, "vec4", "gl_MultiTexCoord0" }, + { ir_var_shader_in, VERT_ATTRIB_TEX1, "vec4", "gl_MultiTexCoord1" }, + { ir_var_shader_in, VERT_ATTRIB_TEX2, "vec4", "gl_MultiTexCoord2" }, + { ir_var_shader_in, VERT_ATTRIB_TEX3, "vec4", "gl_MultiTexCoord3" }, + { ir_var_shader_in, VERT_ATTRIB_TEX4, "vec4", "gl_MultiTexCoord4" }, + { ir_var_shader_in, VERT_ATTRIB_TEX5, "vec4", "gl_MultiTexCoord5" }, + { ir_var_shader_in, VERT_ATTRIB_TEX6, "vec4", "gl_MultiTexCoord6" }, + { ir_var_shader_in, VERT_ATTRIB_TEX7, "vec4", "gl_MultiTexCoord7" }, + { ir_var_shader_in, VERT_ATTRIB_FOG, "float", "gl_FogCoord" }, + { ir_var_shader_out, VERT_RESULT_CLIP_VERTEX, "vec4", "gl_ClipVertex" }, + { ir_var_shader_out, VERT_RESULT_COL0, "vec4", "gl_FrontColor" }, + { ir_var_shader_out, VERT_RESULT_BFC0, "vec4", "gl_BackColor" }, + { ir_var_shader_out, VERT_RESULT_COL1, "vec4", "gl_FrontSecondaryColor" }, + { ir_var_shader_out, VERT_RESULT_BFC1, "vec4", "gl_BackSecondaryColor" }, + { ir_var_shader_out, VERT_RESULT_FOGC, "float", "gl_FogFragCoord" }, }; static const builtin_variable builtin_120_fs_variables[] = { - { ir_var_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" }, + { ir_var_shader_in, FRAG_ATTRIB_PNTC, "vec2", "gl_PointCoord" }, }; static const builtin_variable builtin_130_vs_variables[] = { @@ -403,16 +403,18 @@ add_variable(exec_list *instructions, glsl_symbol_table *symtab, switch (var->mode) { case ir_var_auto: - case ir_var_in: - case ir_var_const_in: + case ir_var_shader_in: case ir_var_uniform: case ir_var_system_value: var->read_only = true; break; - case ir_var_inout: - case ir_var_out: + case ir_var_shader_out: break; default: + /* The only variables that are added using this function should be + * uniforms, shader inputs, and shader outputs, constants (which use + * ir_var_auto), and system values. + */ assert(0); break; } @@ -752,7 +754,8 @@ generate_110_vs_variables(exec_list *instructions, glsl_type::get_array_instance(glsl_type::vec4_type, 0); add_variable(instructions, state->symbols, - "gl_TexCoord", vec4_array_type, ir_var_out, VERT_RESULT_TEX0); + "gl_TexCoord", vec4_array_type, ir_var_shader_out, + VERT_RESULT_TEX0); generate_ARB_draw_buffers_variables(instructions, state, false, vertex_shader); @@ -812,7 +815,7 @@ generate_130_vs_variables(exec_list *instructions, glsl_type::get_array_instance(glsl_type::float_type, 0); add_variable(instructions, state->symbols, - "gl_ClipDistance", clip_distance_array_type, ir_var_out, + "gl_ClipDistance", clip_distance_array_type, ir_var_shader_out, VERT_RESULT_CLIP_DIST0); } @@ -937,7 +940,8 @@ generate_110_fs_variables(exec_list *instructions, glsl_type::get_array_instance(glsl_type::vec4_type, 0); add_variable(instructions, state->symbols, - "gl_TexCoord", vec4_array_type, ir_var_in, FRAG_ATTRIB_TEX0); + "gl_TexCoord", vec4_array_type, ir_var_shader_in, + FRAG_ATTRIB_TEX0); generate_ARB_draw_buffers_variables(instructions, state, false, fragment_shader); @@ -969,7 +973,7 @@ generate_ARB_draw_buffers_variables(exec_list *instructions, ir_variable *const fd = add_variable(instructions, state->symbols, "gl_FragData", vec4_array_type, - ir_var_out, FRAG_RESULT_DATA0); + ir_var_shader_out, FRAG_RESULT_DATA0); if (warn) fd->warn_extension = "GL_ARB_draw_buffers"; @@ -1026,7 +1030,7 @@ generate_ARB_shader_stencil_export_variables(exec_list *instructions, ir_variable *const fd = add_variable(instructions, state->symbols, "gl_FragStencilRefARB", glsl_type::int_type, - ir_var_out, FRAG_RESULT_STENCIL); + ir_var_shader_out, FRAG_RESULT_STENCIL); if (warn) fd->warn_extension = "GL_ARB_shader_stencil_export"; @@ -1042,7 +1046,7 @@ generate_AMD_shader_stencil_export_variables(exec_list *instructions, ir_variable *const fd = add_variable(instructions, state->symbols, "gl_FragStencilRefAMD", glsl_type::int_type, - ir_var_out, FRAG_RESULT_STENCIL); + ir_var_shader_out, FRAG_RESULT_STENCIL); if (warn) fd->warn_extension = "GL_AMD_shader_stencil_export"; @@ -1083,7 +1087,7 @@ generate_fs_clipdistance(exec_list *instructions, glsl_type::get_array_instance(glsl_type::float_type, 0); add_variable(instructions, state->symbols, - "gl_ClipDistance", clip_distance_array_type, ir_var_in, + "gl_ClipDistance", clip_distance_array_type, ir_var_shader_in, FRAG_ATTRIB_CLIP_DIST0); } diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y index 8fba923a2..e927c7cb7 100644 --- a/mesalib/src/glsl/glcpp/glcpp-parse.y +++ b/mesalib/src/glsl/glcpp/glcpp-parse.y @@ -1227,6 +1227,9 @@ glcpp_parser_create (const struct gl_extensions *extensions, int api) if (extensions->ARB_texture_cube_map_array) add_builtin_define(parser, "GL_ARB_texture_cube_map_array", 1); + + if (extensions->ARB_shading_language_packing) + add_builtin_define(parser, "GL_ARB_shading_language_packing", 1); } } diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll index 2f66c5828..ddc9f8073 100644 --- a/mesalib/src/glsl/glsl_lexer.ll +++ b/mesalib/src/glsl/glsl_lexer.ll @@ -399,23 +399,23 @@ layout { } [0-9]+\.[0-9]+([eE][+-]?[0-9]+)?[fF]? { - yylval->real = glsl_strtod(yytext, NULL); + yylval->real = glsl_strtof(yytext, NULL); return FLOATCONSTANT; } \.[0-9]+([eE][+-]?[0-9]+)?[fF]? { - yylval->real = glsl_strtod(yytext, NULL); + yylval->real = glsl_strtof(yytext, NULL); return FLOATCONSTANT; } [0-9]+\.([eE][+-]?[0-9]+)?[fF]? { - yylval->real = glsl_strtod(yytext, NULL); + yylval->real = glsl_strtof(yytext, NULL); return FLOATCONSTANT; } [0-9]+[eE][+-]?[0-9]+[fF]? { - yylval->real = glsl_strtod(yytext, NULL); + yylval->real = glsl_strtof(yytext, NULL); return FLOATCONSTANT; } [0-9]+[fF] { - yylval->real = glsl_strtod(yytext, NULL); + yylval->real = glsl_strtof(yytext, NULL); return FLOATCONSTANT; } diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy index 88aae64d4..154ce2d09 100644 --- a/mesalib/src/glsl/glsl_parser.yy +++ b/mesalib/src/glsl/glsl_parser.yy @@ -79,6 +79,7 @@ static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg) ast_case_label_list *case_label_list; ast_case_statement *case_statement; ast_case_statement_list *case_statement_list; + ast_uniform_block *uniform_block; struct { ast_node *cond; @@ -112,6 +113,7 @@ static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg) %token STRUCT VOID_TOK WHILE %token IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER %type any_identifier +%type instance_name_opt %token FLOATCONSTANT %token INTCONSTANT UINTCONSTANT BOOLCONSTANT %token FIELD_SELECTION @@ -221,6 +223,7 @@ static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg) %type declaration_statement %type jump_statement %type uniform_block +%type basic_uniform_block %type struct_specifier %type struct_declaration_list %type struct_declaration @@ -1884,31 +1887,27 @@ function_definition: /* layout_qualifieropt is packed into this rule */ uniform_block: - UNIFORM NEW_IDENTIFIER '{' member_list '}' ';' + basic_uniform_block { - void *ctx = state; - $$ = new(ctx) ast_uniform_block(*state->default_uniform_qualifier, - $2, $4); - - if (!state->ARB_uniform_buffer_object_enable) { - _mesa_glsl_error(& @1, state, - "#version 140 / GL_ARB_uniform_buffer_object " - "required for defining uniform blocks\n"); - } else if (state->ARB_uniform_buffer_object_warn) { - _mesa_glsl_warning(& @1, state, - "#version 140 / GL_ARB_uniform_buffer_object " - "required for defining uniform blocks\n"); - } + $$ = $1; } - | layout_qualifier UNIFORM NEW_IDENTIFIER '{' member_list '}' ';' + | layout_qualifier basic_uniform_block { - void *ctx = state; - - ast_type_qualifier qual = *state->default_uniform_qualifier; - if (!qual.merge_qualifier(& @1, state, $1)) { + ast_uniform_block *block = $2; + if (!block->layout.merge_qualifier(& @1, state, $1)) { YYERROR; } - $$ = new(ctx) ast_uniform_block(qual, $3, $5); + $$ = block; + } + ; + +basic_uniform_block: + UNIFORM NEW_IDENTIFIER '{' member_list '}' instance_name_opt ';' + { + ast_uniform_block *const block = $6; + + block->block_name = $2; + block->declarations.push_degenerate_list_at_head(& $4->link); if (!state->ARB_uniform_buffer_object_enable) { _mesa_glsl_error(& @1, state, @@ -1919,6 +1918,49 @@ uniform_block: "#version 140 / GL_ARB_uniform_buffer_object " "required for defining uniform blocks\n"); } + + /* Since block arrays require names, and both features are added in + * the same language versions, we don't have to explicitly + * version-check both things. + */ + if (block->instance_name != NULL + && !(state->language_version == 300 && state->es_shader)) { + _mesa_glsl_error(& @1, state, + "#version 300 es required for using uniform " + "blocks with an instance name\n"); + } + + $$ = block; + } + ; + +instance_name_opt: + /* empty */ + { + $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier, + NULL, + NULL); + } + | NEW_IDENTIFIER + { + $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier, + $1, + NULL); + } + | NEW_IDENTIFIER '[' constant_expression ']' + { + $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier, + $1, + $3); + } + | NEW_IDENTIFIER '[' ']' + { + _mesa_glsl_error(& @1, state, + "instance block arrays must be explicitly sized\n"); + + $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier, + $1, + NULL); } ; diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp index b460c8619..c8dbc89ff 100644 --- a/mesalib/src/glsl/glsl_parser_extras.cpp +++ b/mesalib/src/glsl/glsl_parser_extras.cpp @@ -462,6 +462,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_uniform_buffer_object, true, false, true, true, false, ARB_uniform_buffer_object), EXT(OES_standard_derivatives, false, false, true, false, true, OES_standard_derivatives), EXT(ARB_texture_cube_map_array, true, false, true, true, false, ARB_texture_cube_map_array), + EXT(ARB_shading_language_packing, true, false, true, true, false, ARB_shading_language_packing), }; #undef EXT diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h index 2e6bb0b0a..53df149d8 100644 --- a/mesalib/src/glsl/glsl_parser_extras.h +++ b/mesalib/src/glsl/glsl_parser_extras.h @@ -272,6 +272,8 @@ struct _mesa_glsl_parse_state { bool OES_standard_derivatives_warn; bool ARB_texture_cube_map_array_enable; bool ARB_texture_cube_map_array_warn; + bool ARB_shading_language_packing_enable; + bool ARB_shading_language_packing_warn; /*@}*/ /** Extensions supported by the OpenGL implementation. */ diff --git a/mesalib/src/glsl/glsl_symbol_table.cpp b/mesalib/src/glsl/glsl_symbol_table.cpp index eb275b12e..8d34547c6 100644 --- a/mesalib/src/glsl/glsl_symbol_table.cpp +++ b/mesalib/src/glsl/glsl_symbol_table.cpp @@ -41,15 +41,13 @@ public: ralloc_free(entry); } - symbol_table_entry(ir_variable *v) : v(v), f(0), t(0), u(0) {} - symbol_table_entry(ir_function *f) : v(0), f(f), t(0), u(0) {} - symbol_table_entry(const glsl_type *t) : v(0), f(0), t(t), u(0) {} - symbol_table_entry(struct gl_uniform_block *u) : v(0), f(0), t(0), u(u) {} + symbol_table_entry(ir_variable *v) : v(v), f(0), t(0) {} + symbol_table_entry(ir_function *f) : v(0), f(f), t(0) {} + symbol_table_entry(const glsl_type *t) : v(0), f(0), t(t) {} ir_variable *v; ir_function *f; const glsl_type *t; - struct gl_uniform_block *u; }; glsl_symbol_table::glsl_symbol_table() @@ -134,12 +132,6 @@ bool glsl_symbol_table::add_function(ir_function *f) return _mesa_symbol_table_add_symbol(table, -1, f->name, entry) == 0; } -bool glsl_symbol_table::add_uniform_block(struct gl_uniform_block *u) -{ - symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(u); - return _mesa_symbol_table_add_symbol(table, -1, u->Name, entry) == 0; -} - void glsl_symbol_table::add_global_function(ir_function *f) { symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f); diff --git a/mesalib/src/glsl/glsl_symbol_table.h b/mesalib/src/glsl/glsl_symbol_table.h index f95fb8a01..9f5602787 100644 --- a/mesalib/src/glsl/glsl_symbol_table.h +++ b/mesalib/src/glsl/glsl_symbol_table.h @@ -99,7 +99,6 @@ public: bool add_variable(ir_variable *v); bool add_type(const char *name, const glsl_type *t); bool add_function(ir_function *f); - bool add_uniform_block(struct gl_uniform_block *u); /*@}*/ /** diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp index 71b185027..4a2c87907 100644 --- a/mesalib/src/glsl/glsl_types.cpp +++ b/mesalib/src/glsl/glsl_types.cpp @@ -34,6 +34,7 @@ extern "C" { hash_table *glsl_type::array_types = NULL; hash_table *glsl_type::record_types = NULL; +hash_table *glsl_type::interface_types = NULL; void *glsl_type::mem_ctx = NULL; void @@ -51,7 +52,7 @@ glsl_type::glsl_type(GLenum gl_type, gl_type(gl_type), base_type(base_type), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), + sampler_type(0), interface_packing(0), vector_elements(vector_elements), matrix_columns(matrix_columns), length(0) { @@ -69,7 +70,7 @@ glsl_type::glsl_type(GLenum gl_type, gl_type(gl_type), base_type(GLSL_TYPE_SAMPLER), sampler_dimensionality(dim), sampler_shadow(shadow), - sampler_array(array), sampler_type(type), + sampler_array(array), sampler_type(type), interface_packing(0), vector_elements(0), matrix_columns(0), length(0) { @@ -82,7 +83,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, const char *name) : base_type(GLSL_TYPE_STRUCT), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), + sampler_type(0), interface_packing(0), vector_elements(0), matrix_columns(0), length(num_fields) { @@ -96,6 +97,29 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, this->fields.structure[i].type = fields[i].type; this->fields.structure[i].name = ralloc_strdup(this->fields.structure, fields[i].name); + this->fields.structure[i].row_major = fields[i].row_major; + } +} + +glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, + enum glsl_interface_packing packing, const char *name) : + base_type(GLSL_TYPE_INTERFACE), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing((unsigned) packing), + vector_elements(0), matrix_columns(0), + length(num_fields) +{ + unsigned int i; + + init_ralloc_type_ctx(); + this->name = ralloc_strdup(this->mem_ctx, name); + this->fields.structure = ralloc_array(this->mem_ctx, + glsl_struct_field, length); + for (i = 0; i < length; i++) { + this->fields.structure[i].type = fields[i].type; + this->fields.structure[i].name = ralloc_strdup(this->fields.structure, + fields[i].name); + this->fields.structure[i].row_major = fields[i].row_major; } } @@ -429,7 +453,7 @@ _mesa_glsl_release_types(void) glsl_type::glsl_type(const glsl_type *array, unsigned length) : base_type(GLSL_TYPE_ARRAY), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), + sampler_type(0), interface_packing(0), vector_elements(0), matrix_columns(0), name(NULL), length(length) { @@ -561,12 +585,18 @@ glsl_type::record_key_compare(const void *a, const void *b) if (key1->length != key2->length) return 1; + if (key1->interface_packing != key2->interface_packing) + return 1; + for (unsigned i = 0; i < key1->length; i++) { if (key1->fields.structure[i].type != key2->fields.structure[i].type) return 1; if (strcmp(key1->fields.structure[i].name, key2->fields.structure[i].name) != 0) return 1; + if (key1->fields.structure[i].row_major + != key2->fields.structure[i].row_major) + return 1; } return 0; @@ -620,10 +650,38 @@ glsl_type::get_record_instance(const glsl_struct_field *fields, } +const glsl_type * +glsl_type::get_interface_instance(const glsl_struct_field *fields, + unsigned num_fields, + enum glsl_interface_packing packing, + const char *name) +{ + const glsl_type key(fields, num_fields, packing, name); + + if (interface_types == NULL) { + interface_types = hash_table_ctor(64, record_key_hash, record_key_compare); + } + + const glsl_type *t = (glsl_type *) hash_table_find(interface_types, & key); + if (t == NULL) { + t = new glsl_type(fields, num_fields, packing, name); + + hash_table_insert(interface_types, (void *) t, t); + } + + assert(t->base_type == GLSL_TYPE_INTERFACE); + assert(t->length == num_fields); + assert(strcmp(t->name, name) == 0); + + return t; +} + + const glsl_type * glsl_type::field_type(const char *name) const { - if (this->base_type != GLSL_TYPE_STRUCT) + if (this->base_type != GLSL_TYPE_STRUCT + && this->base_type != GLSL_TYPE_INTERFACE) return error_type; for (unsigned i = 0; i < this->length; i++) { @@ -638,7 +696,8 @@ glsl_type::field_type(const char *name) const int glsl_type::field_index(const char *name) const { - if (this->base_type != GLSL_TYPE_STRUCT) + if (this->base_type != GLSL_TYPE_STRUCT + && this->base_type != GLSL_TYPE_INTERFACE) return -1; for (unsigned i = 0; i < this->length; i++) { @@ -660,7 +719,8 @@ glsl_type::component_slots() const case GLSL_TYPE_BOOL: return this->components(); - case GLSL_TYPE_STRUCT: { + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { unsigned size = 0; for (unsigned i = 0; i < this->length; i++) @@ -672,9 +732,13 @@ glsl_type::component_slots() const case GLSL_TYPE_ARRAY: return this->length * this->fields.array->component_slots(); - default: - return 0; + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + break; } + + return 0; } bool @@ -799,12 +863,6 @@ glsl_type::std140_base_alignment(bool row_major) const return -1; } -static unsigned -align(unsigned val, unsigned align) -{ - return (val + align - 1) / align * align; -} - unsigned glsl_type::std140_size(bool row_major) const { @@ -906,11 +964,11 @@ glsl_type::std140_size(bool row_major) const for (unsigned i = 0; i < this->length; i++) { const struct glsl_type *field_type = this->fields.structure[i].type; unsigned align = field_type->std140_base_alignment(row_major); - size = (size + align - 1) / align * align; + size = glsl_align(size, align); size += field_type->std140_size(row_major); } - size = align(size, - this->fields.structure[0].type->std140_base_alignment(row_major)); + size = glsl_align(size, + this->fields.structure[0].type->std140_base_alignment(row_major)); return size; } diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h index d6f5c105e..b0db2bf11 100644 --- a/mesalib/src/glsl/glsl_types.h +++ b/mesalib/src/glsl/glsl_types.h @@ -54,6 +54,7 @@ enum glsl_base_type { GLSL_TYPE_BOOL, GLSL_TYPE_SAMPLER, GLSL_TYPE_STRUCT, + GLSL_TYPE_INTERFACE, GLSL_TYPE_ARRAY, GLSL_TYPE_VOID, GLSL_TYPE_ERROR @@ -69,6 +70,12 @@ enum glsl_sampler_dim { GLSL_SAMPLER_DIM_EXTERNAL }; +enum glsl_interface_packing { + GLSL_INTERFACE_PACKING_STD140, + GLSL_INTERFACE_PACKING_SHARED, + GLSL_INTERFACE_PACKING_PACKED +}; + #ifdef __cplusplus #include "GL/gl.h" #include "ralloc.h" @@ -84,6 +91,7 @@ struct glsl_type { * only \c GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT, * and \c GLSL_TYPE_UINT are valid. */ + unsigned interface_packing:2; /* Callers of this ralloc-based new need not call delete. It's * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */ @@ -130,8 +138,9 @@ struct glsl_type { /** * For \c GLSL_TYPE_ARRAY, this is the length of the array. For - * \c GLSL_TYPE_STRUCT, it is the number of elements in the structure and - * the number of values pointed to by \c fields.structure (below). + * \c GLSL_TYPE_STRUCT or \c GLSL_TYPE_INTERFACE, it is the number of + * elements in the structure and the number of values pointed to by + * \c fields.structure (below). */ unsigned length; @@ -231,6 +240,14 @@ struct glsl_type { unsigned num_fields, const char *name); + /** + * Get the instance of an interface block type + */ + static const glsl_type *get_interface_instance(const glsl_struct_field *fields, + unsigned num_fields, + enum glsl_interface_packing packing, + const char *name); + /** * Query the total number of scalars that make up a scalar, vector or matrix */ @@ -393,6 +410,14 @@ struct glsl_type { return base_type == GLSL_TYPE_STRUCT; } + /** + * Query whether or not a type is an interface + */ + bool is_interface() const + { + return base_type == GLSL_TYPE_INTERFACE; + } + /** * Query whether or not a type is the void type singleton. */ @@ -491,6 +516,10 @@ private: glsl_type(const glsl_struct_field *fields, unsigned num_fields, const char *name); + /** Constructor for interface types */ + glsl_type(const glsl_struct_field *fields, unsigned num_fields, + enum glsl_interface_packing packing, const char *name); + /** Constructor for array types */ glsl_type(const glsl_type *array, unsigned length); @@ -500,6 +529,9 @@ private: /** Hash table containing the known record types. */ static struct hash_table *record_types; + /** Hash table containing the known interface types. */ + static struct hash_table *interface_types; + static int record_key_compare(const void *a, const void *b); static unsigned record_key_hash(const void *key); @@ -566,8 +598,15 @@ private: struct glsl_struct_field { const struct glsl_type *type; const char *name; + bool row_major; }; +static inline unsigned int +glsl_align(unsigned int a, unsigned int align) +{ + return (a + align - 1) / align * align; +} + #endif /* __cplusplus */ #endif /* GLSL_TYPES_H */ diff --git a/mesalib/src/glsl/hir_field_selection.cpp b/mesalib/src/glsl/hir_field_selection.cpp index ac416d5da..0035a5f81 100644 --- a/mesalib/src/glsl/hir_field_selection.cpp +++ b/mesalib/src/glsl/hir_field_selection.cpp @@ -61,7 +61,8 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr, _mesa_glsl_error(& loc, state, "Invalid swizzle / mask `%s'", expr->primary_expression.identifier); } - } else if (op->type->base_type == GLSL_TYPE_STRUCT) { + } else if (op->type->base_type == GLSL_TYPE_STRUCT + || op->type->base_type == GLSL_TYPE_INTERFACE) { result = new(ctx) ir_dereference_record(op, expr->primary_expression.identifier); diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp index 703f5ec58..954995db3 100644 --- a/mesalib/src/glsl/ir.cpp +++ b/mesalib/src/glsl/ir.cpp @@ -306,6 +306,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) break; case ir_unop_noise: + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: this->type = glsl_type::float_type; break; @@ -313,6 +315,25 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) this->type = glsl_type::bool_type; break; + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: + case ir_unop_pack_half_2x16: + this->type = glsl_type::uint_type; + break; + + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + this->type = glsl_type::vec2_type; + break; + + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + this->type = glsl_type::vec4_type; + break; + default: assert(!"not reached: missing automatic type setup for ir_expression"); this->type = op0->type; @@ -364,10 +385,15 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) case ir_binop_bit_and: case ir_binop_bit_xor: case ir_binop_bit_or: + assert(!op0->type->is_matrix()); + assert(!op1->type->is_matrix()); if (op0->type->is_scalar()) { - this->type = op1->type; + this->type = op1->type; } else if (op1->type->is_scalar()) { - this->type = op0->type; + this->type = op0->type; + } else { + assert(op0->type->vector_elements == op1->type->vector_elements); + this->type = op0->type; } break; @@ -386,6 +412,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) this->type = glsl_type::float_type; break; + case ir_binop_pack_half_2x16_split: + this->type = glsl_type::uint_type; + break; + case ir_binop_lshift: case ir_binop_rshift: this->type = op0->type; @@ -454,6 +484,18 @@ static const char *const operator_strs[] = { "cos_reduced", "dFdx", "dFdy", + "packSnorm2x16", + "packSnorm4x8", + "packUnorm2x16", + "packUnorm4x8", + "packHalf2x16", + "unpackSnorm2x16", + "unpackSnorm4x8", + "unpackUnorm2x16", + "unpackUnorm4x8", + "unpackHalf2x16", + "unpackHalf2x16_split_x", + "unpackHalf2x16_split_y", "noise", "+", "-", @@ -480,6 +522,7 @@ static const char *const operator_strs[] = { "min", "max", "pow", + "packHalf2x16_split", "ubo_load", "vector", }; @@ -1493,7 +1536,6 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name, this->has_initializer = false; this->location = -1; this->location_frac = 0; - this->uniform_block = -1; this->warn_extension = NULL; this->constant_value = NULL; this->constant_initializer = NULL; @@ -1553,8 +1595,8 @@ modes_match(unsigned a, unsigned b) return true; /* Accept "in" vs. "const in" */ - if ((a == ir_var_const_in && b == ir_var_in) || - (b == ir_var_const_in && a == ir_var_in)) + if ((a == ir_var_const_in && b == ir_var_function_in) || + (b == ir_var_const_in && a == ir_var_function_in)) return true; return false; diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h index 85fc5ce95..efd80dad8 100644 --- a/mesalib/src/glsl/ir.h +++ b/mesalib/src/glsl/ir.h @@ -265,9 +265,11 @@ protected: enum ir_variable_mode { ir_var_auto = 0, /**< Function local variables and globals. */ ir_var_uniform, /**< Variable declared as a uniform. */ - ir_var_in, - ir_var_out, - ir_var_inout, + ir_var_shader_in, + ir_var_shader_out, + ir_var_function_in, + ir_var_function_out, + ir_var_function_inout, ir_var_const_in, /**< "in" param that must be a constant expression */ ir_var_system_value, /**< Ex: front-face, instance-id, etc. */ ir_var_temporary /**< Temporary variable generated during compilation. */ @@ -347,6 +349,41 @@ public: */ glsl_interp_qualifier determine_interpolation_mode(bool flat_shade); + /** + * Determine whether or not a variable is part of a uniform block. + */ + inline bool is_in_uniform_block() const + { + return this->mode == ir_var_uniform && this->interface_type != NULL; + } + + /** + * Determine whether or not a variable is the declaration of an interface + * block + * + * For the first declaration below, there will be an \c ir_variable named + * "instance" whose type and whose instance_type will be the same + * \cglsl_type. For the second declaration, there will be an \c ir_variable + * named "f" whose type is float and whose instance_type is B2. + * + * "instance" is an interface instance variable, but "f" is not. + * + * uniform B1 { + * float f; + * } instance; + * + * uniform B2 { + * float f; + * }; + */ + inline bool is_interface_instance() const + { + const glsl_type *const t = this->type; + + return (t == this->interface_type) + || (t->is_array() && t->fields.array == this->interface_type); + } + /** * Declared type of the variable */ @@ -401,7 +438,7 @@ public: * * \sa ir_variable_mode */ - unsigned mode:3; + unsigned mode:4; /** * Interpolation mode for shader inputs / outputs @@ -480,16 +517,6 @@ public: */ int location; - /** - * Uniform block number for uniforms. - * - * This index is into the shader's list of uniform blocks, not the - * linked program's merged list. - * - * If the variable is not in a uniform block, the value will be -1. - */ - int uniform_block; - /** * output index for dual source blending. */ @@ -530,6 +557,14 @@ public: * objects. */ ir_constant *constant_initializer; + + /** + * For variables that are in an interface block or are an instance of an + * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. + * + * \sa ir_variable::location + */ + const glsl_type *interface_type; }; @@ -908,7 +943,7 @@ public: unsigned write_mask:4; }; -/* Update ir_expression::num_operands() and operator_strs when +/* Update ir_expression::get_num_operands() and operator_strs when * updating this list. */ enum ir_expression_operation { @@ -969,6 +1004,32 @@ enum ir_expression_operation { ir_unop_dFdy, /*@}*/ + /** + * \name Floating point pack and unpack operations. + */ + /*@{*/ + ir_unop_pack_snorm_2x16, + ir_unop_pack_snorm_4x8, + ir_unop_pack_unorm_2x16, + ir_unop_pack_unorm_4x8, + ir_unop_pack_half_2x16, + ir_unop_unpack_snorm_2x16, + ir_unop_unpack_snorm_4x8, + ir_unop_unpack_unorm_2x16, + ir_unop_unpack_unorm_4x8, + ir_unop_unpack_half_2x16, + /*@}*/ + + /** + * \name Lowered floating point unpacking operations. + * + * \see lower_packing_builtins_visitor::split_unpack_half_2x16 + */ + /*@{*/ + ir_unop_unpack_half_2x16_split_x, + ir_unop_unpack_half_2x16_split_y, + /*@}*/ + ir_unop_noise, /** @@ -1035,6 +1096,15 @@ enum ir_expression_operation { ir_binop_pow, + /** + * \name Lowered floating point packing operations. + * + * \see lower_packing_builtins_visitor::split_pack_half_2x16 + */ + /*@{*/ + ir_binop_pack_half_2x16_split, + /*@}*/ + /** * Load a value the size of a given GLSL type from a uniform block. * diff --git a/mesalib/src/glsl/ir_builder.cpp b/mesalib/src/glsl/ir_builder.cpp index c62f0b115..8fb30a02a 100644 --- a/mesalib/src/glsl/ir_builder.cpp +++ b/mesalib/src/glsl/ir_builder.cpp @@ -188,11 +188,27 @@ ir_expression *mul(operand a, operand b) return expr(ir_binop_mul, a, b); } +ir_expression *div(operand a, operand b) +{ + return expr(ir_binop_div, a, b); +} + +ir_expression *round_even(operand a) +{ + return expr(ir_unop_round_even, a); +} + ir_expression *dot(operand a, operand b) { return expr(ir_binop_dot, a, b); } +ir_expression* +clamp(operand a, operand b, operand c) +{ + return expr(ir_binop_min, expr(ir_binop_max, a, b), c); +} + ir_expression * saturate(operand a) { @@ -203,4 +219,147 @@ saturate(operand a) new(mem_ctx) ir_constant(0.0f)); } +ir_expression* +equal(operand a, operand b) +{ + return expr(ir_binop_equal, a, b); +} + +ir_expression* +less(operand a, operand b) +{ + return expr(ir_binop_less, a, b); +} + +ir_expression* +greater(operand a, operand b) +{ + return expr(ir_binop_greater, a, b); +} + +ir_expression* +lequal(operand a, operand b) +{ + return expr(ir_binop_lequal, a, b); +} + +ir_expression* +gequal(operand a, operand b) +{ + return expr(ir_binop_gequal, a, b); +} + +ir_expression* +logic_not(operand a) +{ + return expr(ir_unop_logic_not, a); +} + +ir_expression* +logic_and(operand a, operand b) +{ + return expr(ir_binop_logic_and, a, b); +} + +ir_expression* +logic_or(operand a, operand b) +{ + return expr(ir_binop_logic_or, a, b); +} + +ir_expression* +bit_not(operand a) +{ + return expr(ir_unop_bit_not, a); +} + +ir_expression* +bit_and(operand a, operand b) +{ + return expr(ir_binop_bit_and, a, b); +} + +ir_expression* +bit_or(operand a, operand b) +{ + return expr(ir_binop_bit_or, a, b); +} + +ir_expression* +lshift(operand a, operand b) +{ + return expr(ir_binop_lshift, a, b); +} + +ir_expression* +rshift(operand a, operand b) +{ + return expr(ir_binop_rshift, a, b); +} + +ir_expression* +f2i(operand a) +{ + return expr(ir_unop_f2i, a); +} + +ir_expression* +i2f(operand a) +{ + return expr(ir_unop_i2f, a); +} + +ir_expression* +i2u(operand a) +{ + return expr(ir_unop_i2u, a); +} + +ir_expression* +u2i(operand a) +{ + return expr(ir_unop_u2i, a); +} + +ir_expression* +f2u(operand a) +{ + return expr(ir_unop_f2u, a); +} + +ir_expression* +u2f(operand a) +{ + return expr(ir_unop_u2f, a); +} + +ir_if* +if_tree(operand condition, + ir_instruction *then_branch) +{ + assert(then_branch != NULL); + + void *mem_ctx = ralloc_parent(condition.val); + + ir_if *result = new(mem_ctx) ir_if(condition.val); + result->then_instructions.push_tail(then_branch); + return result; +} + +ir_if* +if_tree(operand condition, + ir_instruction *then_branch, + ir_instruction *else_branch) +{ + assert(then_branch != NULL); + assert(else_branch != NULL); + + void *mem_ctx = ralloc_parent(condition.val); + + ir_if *result = new(mem_ctx) ir_if(condition.val); + result->then_instructions.push_tail(then_branch); + result->else_instructions.push_tail(else_branch); + return result; +} + } /* namespace ir_builder */ diff --git a/mesalib/src/glsl/ir_builder.h b/mesalib/src/glsl/ir_builder.h index 067858df4..690ac74eb 100644 --- a/mesalib/src/glsl/ir_builder.h +++ b/mesalib/src/glsl/ir_builder.h @@ -25,6 +25,15 @@ namespace ir_builder { +#ifndef WRITEMASK_X +enum writemask { + WRITEMASK_X = 0x1, + WRITEMASK_Y = 0x2, + WRITEMASK_Z = 0x4, + WRITEMASK_W = 0x8, +}; +#endif + /** * This little class exists to let the helper expression generators * take either an ir_rvalue * or an ir_variable * to be automatically @@ -73,9 +82,40 @@ public: class ir_factory { public: + ir_factory() + : instructions(NULL), + mem_ctx(NULL) + { + return; + } + void emit(ir_instruction *ir); ir_variable *make_temp(const glsl_type *type, const char *name); + ir_constant* + constant(float f) + { + return new(mem_ctx) ir_constant(f); + } + + ir_constant* + constant(int i) + { + return new(mem_ctx) ir_constant(i); + } + + ir_constant* + constant(unsigned u) + { + return new(mem_ctx) ir_constant(u); + } + + ir_constant* + constant(bool b) + { + return new(mem_ctx) ir_constant(b); + } + exec_list *instructions; void *mem_ctx; }; @@ -88,9 +128,35 @@ ir_expression *expr(ir_expression_operation op, operand a, operand b); ir_expression *add(operand a, operand b); ir_expression *sub(operand a, operand b); ir_expression *mul(operand a, operand b); +ir_expression *div(operand a, operand b); +ir_expression *round_even(operand a); ir_expression *dot(operand a, operand b); +ir_expression *clamp(operand a, operand b, operand c); ir_expression *saturate(operand a); +ir_expression *equal(operand a, operand b); +ir_expression *less(operand a, operand b); +ir_expression *greater(operand a, operand b); +ir_expression *lequal(operand a, operand b); +ir_expression *gequal(operand a, operand b); + +ir_expression *logic_not(operand a); +ir_expression *logic_and(operand a, operand b); +ir_expression *logic_or(operand a, operand b); + +ir_expression *bit_not(operand a); +ir_expression *bit_or(operand a, operand b); +ir_expression *bit_and(operand a, operand b); +ir_expression *lshift(operand a, operand b); +ir_expression *rshift(operand a, operand b); + +ir_expression *f2i(operand a); +ir_expression *i2f(operand a); +ir_expression *f2u(operand a); +ir_expression *u2f(operand a); +ir_expression *i2u(operand a); +ir_expression *u2i(operand a); + /** * Swizzle away later components, but preserve the ordering. */ @@ -108,4 +174,10 @@ ir_swizzle *swizzle_xy(operand a); ir_swizzle *swizzle_xyz(operand a); ir_swizzle *swizzle_xyzw(operand a); +ir_if *if_tree(operand condition, + ir_instruction *then_branch); +ir_if *if_tree(operand condition, + ir_instruction *then_branch, + ir_instruction *else_branch); + } /* namespace ir_builder */ diff --git a/mesalib/src/glsl/ir_clone.cpp b/mesalib/src/glsl/ir_clone.cpp index c62c1fc20..b94ff05df 100644 --- a/mesalib/src/glsl/ir_clone.cpp +++ b/mesalib/src/glsl/ir_clone.cpp @@ -50,7 +50,6 @@ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const var->interpolation = this->interpolation; var->location = this->location; var->index = this->index; - var->uniform_block = this->uniform_block; var->warn_extension = this->warn_extension; var->origin_upper_left = this->origin_upper_left; var->pixel_center_integer = this->pixel_center_integer; @@ -77,6 +76,8 @@ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const var->constant_initializer = this->constant_initializer->clone(mem_ctx, ht); + var->interface_type = this->interface_type; + if (ht) { hash_table_insert(ht, var, (void *)const_cast(this)); } @@ -375,10 +376,15 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const return c; } - default: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: assert(!"Should not get here."); - return NULL; + break; } + + return NULL; } diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp index 17b54b923..86b863f31 100644 --- a/mesalib/src/glsl/ir_constant_expression.cpp +++ b/mesalib/src/glsl/ir_constant_expression.cpp @@ -40,25 +40,6 @@ #include "glsl_types.h" #include "program/hash_table.h" -/* Using C99 rounding functions for roundToEven() implementation is - * difficult, because round(), rint, and nearbyint() are affected by - * fesetenv(), which the application may have done for its own - * purposes. Mesa's IROUND macro is close to what we want, but it - * rounds away from 0 on n + 0.5. - */ -static int -round_to_even(float val) -{ - int rounded = IROUND(val); - - if (val - floor(val) == 0.5) { - if (rounded % 2 != 0) - rounded += val > 0 ? -1 : 1; - } - - return rounded; -} - static float dot(ir_constant *op0, ir_constant *op1) { @@ -94,6 +75,297 @@ bitcast_f2u(float f) return u; } +/** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef uint8_t +(*pack_1x8_func_t)(float); + +/** + * Evaluate one component of a floating-point 2x16 unpacking function. + */ +typedef uint16_t +(*pack_1x16_func_t)(float); + +/** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef float +(*unpack_1x8_func_t)(uint8_t); + +/** + * Evaluate one component of a floating-point 2x16 unpacking function. + */ +typedef float +(*unpack_1x16_func_t)(uint16_t); + +/** + * Evaluate a 2x16 floating-point packing function. + */ +static uint32_t +pack_2x16(pack_1x16_func_t pack_1x16, + float x, float y) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packSnorm2x16 + * ------------- + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * The specifications for the other packing functions contain similar + * language. + */ + uint32_t u = 0; + u |= ((uint32_t) pack_1x16(x) << 0); + u |= ((uint32_t) pack_1x16(y) << 16); + return u; +} + +/** + * Evaluate a 4x8 floating-point packing function. + */ +static uint32_t +pack_4x8(pack_1x8_func_t pack_1x8, + float x, float y, float z, float w) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packSnorm4x8 + * ------------ + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * The specifications for the other packing functions contain similar + * language. + */ + uint32_t u = 0; + u |= ((uint32_t) pack_1x8(x) << 0); + u |= ((uint32_t) pack_1x8(y) << 8); + u |= ((uint32_t) pack_1x8(z) << 16); + u |= ((uint32_t) pack_1x8(w) << 24); + return u; +} + +/** + * Evaluate a 2x16 floating-point unpacking function. + */ +static void +unpack_2x16(unpack_1x16_func_t unpack_1x16, + uint32_t u, + float *x, float *y) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackSnorm2x16 + * --------------- + * The first component of the returned vector will be extracted from + * the least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * The specifications for the other unpacking functions contain similar + * language. + */ + *x = unpack_1x16((uint16_t) (u & 0xffff)); + *y = unpack_1x16((uint16_t) (u >> 16)); +} + +/** + * Evaluate a 4x8 floating-point unpacking function. + */ +static void +unpack_4x8(unpack_1x8_func_t unpack_1x8, uint32_t u, + float *x, float *y, float *z, float *w) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackSnorm4x8 + * -------------- + * The first component of the returned vector will be extracted from + * the least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * The specifications for the other unpacking functions contain similar + * language. + */ + *x = unpack_1x8((uint8_t) (u & 0xff)); + *y = unpack_1x8((uint8_t) (u >> 8)); + *z = unpack_1x8((uint8_t) (u >> 16)); + *w = unpack_1x8((uint8_t) (u >> 24)); +} + +/** + * Evaluate one component of packSnorm4x8. + */ +static uint8_t +pack_snorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packSnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + * + * We must first cast the float to an int, because casting a negative + * float to a uint is undefined. + */ + return (uint8_t) (int8_t) + _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 127.0f); +} + +/** + * Evaluate one component of packSnorm2x16. + */ +static uint16_t +pack_snorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packSnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) + * + * We must first cast the float to an int, because casting a negative + * float to a uint is undefined. + */ + return (uint16_t) (int16_t) + _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f); +} + +/** + * Evaluate one component of unpackSnorm4x8. + */ +static float +unpack_snorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackSnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + */ + return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component of unpackSnorm2x16. + */ +static float +unpack_snorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackSnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) + */ + return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component packUnorm4x8. + */ +static uint8_t +pack_unorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packUnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + */ + return (uint8_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 255.0f); +} + +/** + * Evaluate one component packUnorm2x16. + */ +static uint16_t +pack_unorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packUnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) + */ + return (uint16_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f); +} + +/** + * Evaluate one component of unpackUnorm4x8. + */ +static float +unpack_unorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackUnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + */ + return (float) u / 255.0f; +} + +/** + * Evaluate one component of unpackUnorm2x16. + */ +static float +unpack_unorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackUnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm2x16: f / 65535.0 + */ + return (float) u / 65535.0f; +} + +/** + * Evaluate one component of packHalf2x16. + */ +static uint16_t +pack_half_1x16(float x) +{ + return _mesa_float_to_half(x); +} + +/** + * Evaluate one component of unpackHalf2x16. + */ +static float +unpack_half_1x16(uint16_t u) +{ + return _mesa_half_to_float(u); +} + ir_constant * ir_rvalue::constant_expression_value(struct hash_table *variable_context) { @@ -279,7 +551,7 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) case ir_unop_round_even: assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = round_to_even(op[0]->value.f[c]); + data.f[c] = _mesa_round_to_even(op[0]->value.f[c]); } break; @@ -459,6 +731,70 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) } break; + case ir_unop_pack_snorm_2x16: + assert(op[0]->type == glsl_type::vec2_type); + data.u[0] = pack_2x16(pack_snorm_1x16, + op[0]->value.f[0], + op[0]->value.f[1]); + break; + case ir_unop_pack_snorm_4x8: + assert(op[0]->type == glsl_type::vec4_type); + data.u[0] = pack_4x8(pack_snorm_1x8, + op[0]->value.f[0], + op[0]->value.f[1], + op[0]->value.f[2], + op[0]->value.f[3]); + break; + case ir_unop_unpack_snorm_2x16: + assert(op[0]->type == glsl_type::uint_type); + unpack_2x16(unpack_snorm_1x16, + op[0]->value.u[0], + &data.f[0], &data.f[1]); + break; + case ir_unop_unpack_snorm_4x8: + assert(op[0]->type == glsl_type::uint_type); + unpack_4x8(unpack_snorm_1x8, + op[0]->value.u[0], + &data.f[0], &data.f[1], &data.f[2], &data.f[3]); + break; + case ir_unop_pack_unorm_2x16: + assert(op[0]->type == glsl_type::vec2_type); + data.u[0] = pack_2x16(pack_unorm_1x16, + op[0]->value.f[0], + op[0]->value.f[1]); + break; + case ir_unop_pack_unorm_4x8: + assert(op[0]->type == glsl_type::vec4_type); + data.u[0] = pack_4x8(pack_unorm_1x8, + op[0]->value.f[0], + op[0]->value.f[1], + op[0]->value.f[2], + op[0]->value.f[3]); + break; + case ir_unop_unpack_unorm_2x16: + assert(op[0]->type == glsl_type::uint_type); + unpack_2x16(unpack_unorm_1x16, + op[0]->value.u[0], + &data.f[0], &data.f[1]); + break; + case ir_unop_unpack_unorm_4x8: + assert(op[0]->type == glsl_type::uint_type); + unpack_4x8(unpack_unorm_1x8, + op[0]->value.u[0], + &data.f[0], &data.f[1], &data.f[2], &data.f[3]); + break; + case ir_unop_pack_half_2x16: + assert(op[0]->type == glsl_type::vec2_type); + data.u[0] = pack_2x16(pack_half_1x16, + op[0]->value.f[0], + op[0]->value.f[1]); + break; + case ir_unop_unpack_half_2x16: + assert(op[0]->type == glsl_type::uint_type); + unpack_2x16(unpack_half_1x16, + op[0]->value.u[0], + &data.f[0], &data.f[1]); + break; case ir_binop_pow: assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { diff --git a/mesalib/src/glsl/ir_function.cpp b/mesalib/src/glsl/ir_function.cpp index a525693ed..fe4209c77 100644 --- a/mesalib/src/glsl/ir_function.cpp +++ b/mesalib/src/glsl/ir_function.cpp @@ -78,17 +78,17 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b) return PARAMETER_LIST_NO_MATCH; case ir_var_const_in: - case ir_var_in: + case ir_var_function_in: if (!actual->type->can_implicitly_convert_to(param->type)) return PARAMETER_LIST_NO_MATCH; break; - case ir_var_out: + case ir_var_function_out: if (!param->type->can_implicitly_convert_to(actual->type)) return PARAMETER_LIST_NO_MATCH; break; - case ir_var_inout: + case ir_var_function_inout: /* Since there are no bi-directional automatic conversions (e.g., * there is int -> float but no float -> int), inout parameters must * be exact matches. diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h index 6b9519174..8f3301840 100644 --- a/mesalib/src/glsl/ir_optimization.h +++ b/mesalib/src/glsl/ir_optimization.h @@ -37,6 +37,31 @@ #define MOD_TO_FRACT 0x20 #define INT_DIV_TO_MUL_RCP 0x40 +/** + * \see class lower_packing_builtins_visitor + */ +enum lower_packing_builtins_op { + LOWER_PACK_UNPACK_NONE = 0x0000, + + LOWER_PACK_SNORM_2x16 = 0x0001, + LOWER_UNPACK_SNORM_2x16 = 0x0002, + + LOWER_PACK_UNORM_2x16 = 0x0004, + LOWER_UNPACK_UNORM_2x16 = 0x0008, + + LOWER_PACK_HALF_2x16 = 0x0010, + LOWER_UNPACK_HALF_2x16 = 0x0020, + + LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040, + LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, + + LOWER_PACK_SNORM_4x8 = 0x0100, + LOWER_UNPACK_SNORM_4x8 = 0x0200, + + LOWER_PACK_UNORM_4x8 = 0x0400, + LOWER_UNPACK_UNORM_4x8 = 0x0800, +}; + bool do_common_optimization(exec_list *ir, bool linked, bool uniform_locations_assigned, unsigned max_unroll_iterations); @@ -74,6 +99,7 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions, bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); bool lower_clip_distance(gl_shader *shader); void lower_output_reads(exec_list *instructions); +bool lower_packing_builtins(exec_list *instructions, int op_mask); void lower_ubo_reference(struct gl_shader *shader, exec_list *instructions); void lower_packed_varyings(void *mem_ctx, unsigned location_base, unsigned locations_used, ir_variable_mode mode, diff --git a/mesalib/src/glsl/ir_print_visitor.cpp b/mesalib/src/glsl/ir_print_visitor.cpp index 8aa26e5d0..acc92dbf1 100644 --- a/mesalib/src/glsl/ir_print_visitor.cpp +++ b/mesalib/src/glsl/ir_print_visitor.cpp @@ -146,7 +146,8 @@ void ir_print_visitor::visit(ir_variable *ir) const char *const cent = (ir->centroid) ? "centroid " : ""; const char *const inv = (ir->invariant) ? "invariant " : ""; - const char *const mode[] = { "", "uniform ", "in ", "out ", "inout ", + const char *const mode[] = { "", "uniform ", "shader_in ", "shader_out ", + "in ", "out ", "inout ", "const_in ", "sys ", "temporary " }; const char *const interp[] = { "", "flat", "noperspective" }; diff --git a/mesalib/src/glsl/ir_reader.cpp b/mesalib/src/glsl/ir_reader.cpp index 03dbb67c3..405e75b64 100644 --- a/mesalib/src/glsl/ir_reader.cpp +++ b/mesalib/src/glsl/ir_reader.cpp @@ -400,13 +400,17 @@ ir_reader::read_declaration(s_expression *expr) } else if (strcmp(qualifier->value(), "auto") == 0) { var->mode = ir_var_auto; } else if (strcmp(qualifier->value(), "in") == 0) { - var->mode = ir_var_in; + var->mode = ir_var_function_in; + } else if (strcmp(qualifier->value(), "shader_in") == 0) { + var->mode = ir_var_shader_in; } else if (strcmp(qualifier->value(), "const_in") == 0) { var->mode = ir_var_const_in; } else if (strcmp(qualifier->value(), "out") == 0) { - var->mode = ir_var_out; + var->mode = ir_var_function_out; + } else if (strcmp(qualifier->value(), "shader_out") == 0) { + var->mode = ir_var_shader_out; } else if (strcmp(qualifier->value(), "inout") == 0) { - var->mode = ir_var_inout; + var->mode = ir_var_function_inout; } else if (strcmp(qualifier->value(), "temporary") == 0) { var->mode = ir_var_temporary; } else if (strcmp(qualifier->value(), "smooth") == 0) { diff --git a/mesalib/src/glsl/ir_set_program_inouts.cpp b/mesalib/src/glsl/ir_set_program_inouts.cpp index e5de07e01..1e102bfbb 100644 --- a/mesalib/src/glsl/ir_set_program_inouts.cpp +++ b/mesalib/src/glsl/ir_set_program_inouts.cpp @@ -85,7 +85,7 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len, for (int i = 0; i < len; i++) { GLbitfield64 bitfield = BITFIELD64_BIT(var->location + var->index + offset + i); - if (var->mode == ir_var_in) { + if (var->mode == ir_var_shader_in) { prog->InputsRead |= bitfield; if (is_fragment_shader) { gl_fragment_program *fprog = (gl_fragment_program *) prog; @@ -152,8 +152,8 @@ ir_set_program_inouts_visitor::visit_enter(ir_dereference_array *ir) ir_visitor_status ir_set_program_inouts_visitor::visit(ir_variable *ir) { - if (ir->mode == ir_var_in || - ir->mode == ir_var_out || + if (ir->mode == ir_var_shader_in || + ir->mode == ir_var_shader_out || ir->mode == ir_var_system_value) { hash_table_insert(this->ht, ir, ir); } diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp index ad57a3149..d8cafd55f 100644 --- a/mesalib/src/glsl/ir_validate.cpp +++ b/mesalib/src/glsl/ir_validate.cpp @@ -329,6 +329,38 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type == ir->type); break; + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_half_2x16: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::vec2_type); + break; + + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_4x8: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::vec4_type); + break; + + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + assert(ir->type == glsl_type::vec2_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + assert(ir->type == glsl_type::vec4_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + assert(ir->type == glsl_type::float_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + case ir_unop_noise: /* XXX what can we assert here? */ break; @@ -423,6 +455,12 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[0]->type == ir->operands[1]->type); break; + case ir_binop_pack_half_2x16_split: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::float_type); + assert(ir->operands[1]->type == glsl_type::float_type); + break; + case ir_binop_ubo_load: assert(ir->operands[0]->as_constant()); assert(ir->operands[0]->type == glsl_type::uint_type); @@ -605,8 +643,8 @@ ir_validate::visit_enter(ir_call *ir) printf("ir_call parameter type mismatch:\n"); goto dump_ir; } - if (formal_param->mode == ir_var_out - || formal_param->mode == ir_var_inout) { + if (formal_param->mode == ir_var_function_out + || formal_param->mode == ir_var_function_inout) { if (!actual_param->is_lvalue()) { printf("ir_call out/inout parameters must be lvalues:\n"); goto dump_ir; diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.cpp b/mesalib/src/glsl/link_uniform_block_active_visitor.cpp new file mode 100644 index 000000000..56a8384e9 --- /dev/null +++ b/mesalib/src/glsl/link_uniform_block_active_visitor.cpp @@ -0,0 +1,162 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "link_uniform_block_active_visitor.h" +#include "program.h" + +link_uniform_block_active * +process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var) +{ + const uint32_t h = _mesa_hash_string(var->interface_type->name); + const hash_entry *const existing_block = + _mesa_hash_table_search(ht, h, var->interface_type->name); + + const glsl_type *const block_type = var->is_interface_instance() + ? var->type : var->interface_type; + + + /* If a block with this block-name has not previously been seen, add it. + * If a block with this block-name has been seen, it must be identical to + * the block currently being examined. + */ + if (existing_block == NULL) { + link_uniform_block_active *const b = + rzalloc(mem_ctx, struct link_uniform_block_active); + + b->type = block_type; + b->has_instance_name = var->is_interface_instance(); + + _mesa_hash_table_insert(ht, h, var->interface_type->name, + (void *) b); + return b; + } else { + link_uniform_block_active *const b = + (link_uniform_block_active *) existing_block->data; + + if (b->type != block_type + || b->has_instance_name != var->is_interface_instance()) + return NULL; + else + return b; + } + + assert(!"Should not get here."); + return NULL; +} + +ir_visitor_status +link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir) +{ + ir_dereference_variable *const d = ir->array->as_dereference_variable(); + ir_variable *const var = (d == NULL) ? NULL : d->var; + + /* If the r-value being dereferenced is not a variable (e.g., a field of a + * structure) or is not a uniform block instance, continue. + * + * WARNING: It is not enough for the variable to be part of uniform block. + * It must represent the entire block. Arrays (or matrices) inside blocks + * that lack an instance name are handled by the ir_dereference_variable + * function. + */ + if (var == NULL + || !var->is_in_uniform_block() + || !var->is_interface_instance()) + return visit_continue; + + /* Process the block. Bail if there was an error. + */ + link_uniform_block_active *const b = + process_block(this->mem_ctx, this->ht, var); + if (b == NULL) { + linker_error(prog, + "uniform block `%s' has mismatching definitions", + var->interface_type->name); + this->success = false; + return visit_stop; + } + + /* Block arrays must be declared with an instance name. + */ + assert(b->has_instance_name); + assert((b->num_array_elements == 0) == (b->array_elements == NULL)); + assert(b->type != NULL); + + /* Determine whether or not this array index has already been added to the + * list of active array indices. At this point all constant folding must + * have occured, and the array index must be a constant. + */ + ir_constant *c = ir->array_index->as_constant(); + assert(c != NULL); + + const unsigned idx = c->get_uint_component(0); + + unsigned i; + for (i = 0; i < b->num_array_elements; i++) { + if (b->array_elements[i] == idx) + break; + } + + assert(i <= b->num_array_elements); + + if (i == b->num_array_elements) { + b->array_elements = reralloc(this->mem_ctx, + b->array_elements, + unsigned, + b->num_array_elements + 1); + + b->array_elements[b->num_array_elements] = idx; + + b->num_array_elements++; + } + + return visit_continue_with_parent; +} + +ir_visitor_status +link_uniform_block_active_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *var = ir->var; + + if (!var->is_in_uniform_block()) + return visit_continue; + + assert(!var->is_interface_instance() || !var->type->is_array()); + + /* Process the block. Bail if there was an error. + */ + link_uniform_block_active *const b = + process_block(this->mem_ctx, this->ht, var); + if (b == NULL) { + linker_error(this->prog, + "uniform block `%s' has mismatching definitions", + var->interface_type->name); + this->success = false; + return visit_stop; + } + + assert(b->num_array_elements == 0); + assert(b->array_elements == NULL); + assert(b->type != NULL); + + return visit_continue; +} diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.h b/mesalib/src/glsl/link_uniform_block_active_visitor.h new file mode 100644 index 000000000..fba628a8f --- /dev/null +++ b/mesalib/src/glsl/link_uniform_block_active_visitor.h @@ -0,0 +1,62 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H +#define LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H + +#include "ir.h" +#include "ir_visitor.h" +#include "glsl_types.h" +#include "main/hash_table.h" + +struct link_uniform_block_active { + const glsl_type *type; + + unsigned *array_elements; + unsigned num_array_elements; + + bool has_instance_name; +}; + +class link_uniform_block_active_visitor : public ir_hierarchical_visitor { +public: + link_uniform_block_active_visitor(void *mem_ctx, struct hash_table *ht, + struct gl_shader_program *prog) + : success(true), prog(prog), ht(ht), mem_ctx(mem_ctx) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit(ir_dereference_variable *); + + bool success; + +private: + struct gl_shader_program *prog; + struct hash_table *ht; + void *mem_ctx; +}; + +#endif /* LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H */ diff --git a/mesalib/src/glsl/link_uniform_blocks.cpp b/mesalib/src/glsl/link_uniform_blocks.cpp new file mode 100644 index 000000000..74fe1e29f --- /dev/null +++ b/mesalib/src/glsl/link_uniform_blocks.cpp @@ -0,0 +1,313 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "ir.h" +#include "linker.h" +#include "ir_uniform.h" +#include "link_uniform_block_active_visitor.h" +#include "main/hash_table.h" +#include "program.h" + +class ubo_visitor : public uniform_field_visitor { +public: + ubo_visitor(void *mem_ctx, gl_uniform_buffer_variable *variables, + unsigned num_variables) + : index(0), offset(0), buffer_size(0), variables(variables), + num_variables(num_variables), mem_ctx(mem_ctx), is_array_instance(false) + { + /* empty */ + } + + void process(const glsl_type *type, const char *name) + { + this->offset = 0; + this->buffer_size = 0; + this->is_array_instance = strchr(name, ']') != NULL; + this->uniform_field_visitor::process(type, name); + } + + unsigned index; + unsigned offset; + unsigned buffer_size; + gl_uniform_buffer_variable *variables; + unsigned num_variables; + void *mem_ctx; + bool is_array_instance; + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + assert(this->index < this->num_variables); + + gl_uniform_buffer_variable *v = &this->variables[this->index++]; + + v->Name = ralloc_strdup(mem_ctx, name); + v->Type = type; + v->RowMajor = row_major; + + if (this->is_array_instance) { + v->IndexName = ralloc_strdup(mem_ctx, name); + + char *open_bracket = strchr(v->IndexName, '['); + assert(open_bracket != NULL); + + char *close_bracket = strchr(open_bracket, ']'); + assert(close_bracket != NULL); + + /* Length of the tail without the ']' but with the NUL. + */ + unsigned len = strlen(close_bracket + 1) + 1; + + memmove(open_bracket, close_bracket + 1, len); + } else { + v->IndexName = v->Name; + } + + unsigned alignment = type->std140_base_alignment(v->RowMajor); + unsigned size = type->std140_size(v->RowMajor); + + this->offset = glsl_align(this->offset, alignment); + v->Offset = this->offset; + this->offset += size; + + /* From the GL_ARB_uniform_buffer_object spec: + * + * "For uniform blocks laid out according to [std140] rules, the + * minimum buffer object size returned by the + * UNIFORM_BLOCK_DATA_SIZE query is derived by taking the offset of + * the last basic machine unit consumed by the last uniform of the + * uniform block (including any end-of-array or end-of-structure + * padding), adding one, and rounding up to the next multiple of + * the base alignment required for a vec4." + */ + this->buffer_size = glsl_align(this->offset, 16); + } + + virtual void visit_field(const glsl_struct_field *field) + { + this->offset = glsl_align(this->offset, + field->type->std140_base_alignment(false)); + } +}; + +class count_block_size : public uniform_field_visitor { +public: + count_block_size() : num_active_uniforms(0) + { + /* empty */ + } + + unsigned num_active_uniforms; + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + (void) type; + (void) name; + (void) row_major; + this->num_active_uniforms++; + } +}; + +struct block { + const glsl_type *type; + bool has_instance_name; +}; + +int +link_uniform_blocks(void *mem_ctx, + struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders, + struct gl_uniform_block **blocks_ret) +{ + /* This hash table will track all of the uniform blocks that have been + * encountered. Since blocks with the same block-name must be the same, + * the hash is organized by block-name. + */ + struct hash_table *block_hash = + _mesa_hash_table_create(mem_ctx, _mesa_key_string_equal); + + /* Determine which uniform blocks are active. + */ + link_uniform_block_active_visitor v(mem_ctx, block_hash, prog); + for (unsigned i = 0; i < num_shaders; i++) { + visit_list_elements(&v, shader_list[i]->ir); + } + + /* Count the number of active uniform blocks. Count the total number of + * active slots in those uniform blocks. + */ + unsigned num_blocks = 0; + unsigned num_variables = 0; + count_block_size block_size; + struct hash_entry *entry; + + hash_table_foreach (block_hash, entry) { + const struct link_uniform_block_active *const b = + (const struct link_uniform_block_active *) entry->data; + + const glsl_type *const block_type = + b->type->is_array() ? b->type->fields.array : b->type; + + assert((b->num_array_elements > 0) == b->type->is_array()); + + block_size.num_active_uniforms = 0; + block_size.process(block_type, ""); + + if (b->num_array_elements > 0) { + num_blocks += b->num_array_elements; + num_variables += b->num_array_elements + * block_size.num_active_uniforms; + } else { + num_blocks++; + num_variables += block_size.num_active_uniforms; + } + + } + + if (num_blocks == 0) { + assert(num_variables == 0); + _mesa_hash_table_destroy(block_hash, NULL); + return 0; + } + + assert(num_variables != 0); + + /* Allocate storage to hold all of the informatation related to uniform + * blocks that can be queried through the API. + */ + gl_uniform_block *blocks = + ralloc_array(mem_ctx, gl_uniform_block, num_blocks); + gl_uniform_buffer_variable *variables = + ralloc_array(blocks, gl_uniform_buffer_variable, num_variables); + + /* Add each variable from each uniform block to the API tracking + * structures. + */ + unsigned i = 0; + ubo_visitor parcel(blocks, variables, num_variables); + + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140) + == unsigned(ubo_packing_std140)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED) + == unsigned(ubo_packing_shared)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED) + == unsigned(ubo_packing_packed)); + + + hash_table_foreach (block_hash, entry) { + const struct link_uniform_block_active *const b = + (const struct link_uniform_block_active *) entry->data; + const glsl_type *block_type = b->type; + + if (b->num_array_elements > 0) { + const char *const name = block_type->fields.array->name; + + assert(b->has_instance_name); + for (unsigned j = 0; j < b->num_array_elements; j++) { + blocks[i].Name = ralloc_asprintf(blocks, "%s[%u]", name, + b->array_elements[j]); + blocks[i].Uniforms = &variables[parcel.index]; + blocks[i].Binding = 0; + blocks[i].UniformBufferSize = 0; + blocks[i]._Packing = + gl_uniform_block_packing(block_type->interface_packing); + + parcel.process(block_type->fields.array, + blocks[i].Name); + + blocks[i].UniformBufferSize = parcel.buffer_size; + + blocks[i].NumUniforms = + (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + + i++; + } + } else { + blocks[i].Name = ralloc_strdup(blocks, block_type->name); + blocks[i].Uniforms = &variables[parcel.index]; + blocks[i].Binding = 0; + blocks[i].UniformBufferSize = 0; + blocks[i]._Packing = + gl_uniform_block_packing(block_type->interface_packing); + + parcel.process(block_type, + b->has_instance_name ? block_type->name : ""); + + blocks[i].UniformBufferSize = parcel.buffer_size; + + blocks[i].NumUniforms = + (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + + i++; + } + } + + assert(parcel.index == num_variables); + + _mesa_hash_table_destroy(block_hash, NULL); + + *blocks_ret = blocks; + return num_blocks; +} + +bool +link_uniform_blocks_are_compatible(const gl_uniform_block *a, + const gl_uniform_block *b) +{ + assert(strcmp(a->Name, b->Name) == 0); + + /* Page 35 (page 42 of the PDF) in section 4.3.7 of the GLSL 1.50 spec says: + * + * "Matched block names within an interface (as defined above) must + * match in terms of having the same number of declarations with the + * same sequence of types and the same sequence of member names, as + * well as having the same member-wise layout qualification....if a + * matching block is declared as an array, then the array sizes must + * also match... Any mismatch will generate a link error." + * + * Arrays are not yet supported, so there is no check for that. + */ + if (a->NumUniforms != b->NumUniforms) + return false; + + if (a->_Packing != b->_Packing) + return false; + + for (unsigned i = 0; i < a->NumUniforms; i++) { + if (strcmp(a->Uniforms[i].Name, b->Uniforms[i].Name) != 0) + return false; + + if (a->Uniforms[i].Type != b->Uniforms[i].Type) + return false; + + if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor) + return false; + } + + return true; +} diff --git a/mesalib/src/glsl/link_uniform_initializers.cpp b/mesalib/src/glsl/link_uniform_initializers.cpp index 849e08097..836a360fa 100644 --- a/mesalib/src/glsl/link_uniform_initializers.cpp +++ b/mesalib/src/glsl/link_uniform_initializers.cpp @@ -67,7 +67,11 @@ copy_constant_to_storage(union gl_constant_value *storage, case GLSL_TYPE_BOOL: storage[i].b = int(val->value.b[i]); break; - default: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: /* All other types should have already been filtered by other * paths in the caller. */ diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp index 07d9c18de..f1284adb2 100644 --- a/mesalib/src/glsl/link_uniforms.cpp +++ b/mesalib/src/glsl/link_uniforms.cpp @@ -29,12 +29,6 @@ #include "program/hash_table.h" #include "program.h" -static inline unsigned int -align(unsigned int a, unsigned int align) -{ - return (a + align - 1) / align * align; -} - /** * \file link_uniforms.cpp * Assign locations for GLSL uniforms. @@ -57,24 +51,50 @@ values_for_type(const glsl_type *type) } } +void +uniform_field_visitor::process(const glsl_type *type, const char *name) +{ + assert(type->is_record() + || (type->is_array() && type->fields.array->is_record()) + || type->is_interface() + || (type->is_array() && type->fields.array->is_interface())); + + char *name_copy = ralloc_strdup(NULL, name); + recursion(type, &name_copy, strlen(name), false); + ralloc_free(name_copy); +} + void uniform_field_visitor::process(ir_variable *var) { const glsl_type *t = var->type; + /* false is always passed for the row_major parameter to the other + * processing functions because no information is available to do + * otherwise. See the warning in linker.h. + */ + /* Only strdup the name if we actually will need to modify it. */ if (t->is_record() || (t->is_array() && t->fields.array->is_record())) { char *name = ralloc_strdup(NULL, var->name); - recursion(var->type, &name, strlen(name)); + recursion(var->type, &name, strlen(name), false); + ralloc_free(name); + } else if (t->is_interface()) { + char *name = ralloc_strdup(NULL, var->type->name); + recursion(var->type, &name, strlen(name), false); + ralloc_free(name); + } else if (t->is_array() && t->fields.array->is_interface()) { + char *name = ralloc_strdup(NULL, var->type->fields.array->name); + recursion(var->type, &name, strlen(name), false); ralloc_free(name); } else { - this->visit_field(t, var->name); + this->visit_field(t, var->name, false); } } void uniform_field_visitor::recursion(const glsl_type *t, char **name, - size_t name_length) + size_t name_length, bool row_major) { /* Records need to have each field processed individually. * @@ -82,30 +102,47 @@ uniform_field_visitor::recursion(const glsl_type *t, char **name, * individually, then each field of the resulting array elements processed * individually. */ - if (t->is_record()) { + if (t->is_record() || t->is_interface()) { for (unsigned i = 0; i < t->length; i++) { const char *field = t->fields.structure[i].name; size_t new_length = name_length; - /* Append '.field' to the current uniform name. */ - ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); + if (t->fields.structure[i].type->is_record()) + this->visit_field(&t->fields.structure[i]); + + /* Append '.field' to the current uniform name. */ + if (name_length == 0) { + ralloc_asprintf_rewrite_tail(name, &new_length, "%s", field); + } else { + ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); + } - recursion(t->fields.structure[i].type, name, new_length); + recursion(t->fields.structure[i].type, name, new_length, + t->fields.structure[i].row_major); } - } else if (t->is_array() && t->fields.array->is_record()) { + } else if (t->is_array() && (t->fields.array->is_record() + || t->fields.array->is_interface())) { for (unsigned i = 0; i < t->length; i++) { size_t new_length = name_length; /* Append the subscript to the current uniform name */ ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); - recursion(t->fields.array, name, new_length); + recursion(t->fields.array, name, new_length, + t->fields.structure[i].row_major); } } else { - this->visit_field(t, *name); + this->visit_field(t, *name, row_major); } } +void +uniform_field_visitor::visit_field(const glsl_struct_field *field) +{ + (void) field; + /* empty */ +} + /** * Class to help calculate the storage requirements for a set of uniforms * @@ -131,6 +168,15 @@ public: this->num_shader_uniform_components = 0; } + void process(ir_variable *var) + { + if (var->is_interface_instance()) + uniform_field_visitor::process(var->interface_type, + var->interface_type->name); + else + uniform_field_visitor::process(var); + } + /** * Total number of active uniforms counted */ @@ -152,10 +198,15 @@ public: unsigned num_shader_uniform_components; private: - virtual void visit_field(const glsl_type *type, const char *name) + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) { assert(!type->is_record()); assert(!(type->is_array() && type->fields.array->is_record())); + assert(!type->is_interface()); + assert(!(type->is_array() && type->fields.array->is_interface())); + + (void) row_major; /* Count the number of samplers regardless of whether the uniform is * already in the hash table. The hash table prevents adding the same @@ -224,42 +275,77 @@ public: } void set_and_process(struct gl_shader_program *prog, - struct gl_shader *shader, ir_variable *var) { - ubo_var = NULL; - if (var->uniform_block != -1) { - struct gl_uniform_block *block = - &shader->UniformBlocks[var->uniform_block]; - - ubo_block_index = -1; - for (unsigned i = 0; i < prog->NumUniformBlocks; i++) { - if (!strcmp(prog->UniformBlocks[i].Name, - shader->UniformBlocks[var->uniform_block].Name)) { - ubo_block_index = i; - break; + ubo_block_index = -1; + if (var->is_in_uniform_block()) { + if (var->is_interface_instance() && var->type->is_array()) { + unsigned l = strlen(var->interface_type->name); + + for (unsigned i = 0; i < prog->NumUniformBlocks; i++) { + if (strncmp(var->interface_type->name, + prog->UniformBlocks[i].Name, + l) == 0 + && prog->UniformBlocks[i].Name[l] == '[') { + ubo_block_index = i; + break; + } + } + } else { + for (unsigned i = 0; i < prog->NumUniformBlocks; i++) { + if (strcmp(var->interface_type->name, + prog->UniformBlocks[i].Name) == 0) { + ubo_block_index = i; + break; + } } } assert(ubo_block_index != -1); - ubo_var_index = var->location; - ubo_var = &block->Uniforms[var->location]; - ubo_byte_offset = ubo_var->Offset; - } - - process(var); + /* Uniform blocks that were specified with an instance name must be + * handled a little bit differently. The name of the variable is the + * name used to reference the uniform block instead of being the name + * of a variable within the block. Therefore, searching for the name + * within the block will fail. + */ + if (var->is_interface_instance()) { + ubo_byte_offset = 0; + ubo_row_major = false; + } else { + const struct gl_uniform_block *const block = + &prog->UniformBlocks[ubo_block_index]; + + assert(var->location != -1); + + const struct gl_uniform_buffer_variable *const ubo_var = + &block->Uniforms[var->location]; + + ubo_row_major = ubo_var->RowMajor; + ubo_byte_offset = ubo_var->Offset; + } + + if (var->is_interface_instance()) + process(var->interface_type, var->interface_type->name); + else + process(var); + } else + process(var); } - struct gl_uniform_buffer_variable *ubo_var; int ubo_block_index; - int ubo_var_index; int ubo_byte_offset; + bool ubo_row_major; private: - virtual void visit_field(const glsl_type *type, const char *name) + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) { assert(!type->is_record()); assert(!(type->is_array() && type->fields.array->is_record())); + assert(!type->is_interface()); + assert(!(type->is_array() && type->fields.array->is_interface())); + + (void) row_major; unsigned id; bool found = this->map->get(id, name); @@ -330,17 +416,17 @@ private: this->uniforms[id].num_driver_storage = 0; this->uniforms[id].driver_storage = NULL; this->uniforms[id].storage = this->values; - if (this->ubo_var) { + if (this->ubo_block_index != -1) { this->uniforms[id].block_index = this->ubo_block_index; - unsigned alignment = type->std140_base_alignment(ubo_var->RowMajor); - this->ubo_byte_offset = align(this->ubo_byte_offset, alignment); + unsigned alignment = type->std140_base_alignment(ubo_row_major); + this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment); this->uniforms[id].offset = this->ubo_byte_offset; - this->ubo_byte_offset += type->std140_size(ubo_var->RowMajor); + this->ubo_byte_offset += type->std140_size(ubo_row_major); if (type->is_array()) { this->uniforms[id].array_stride = - align(type->fields.array->std140_size(ubo_var->RowMajor), 16); + glsl_align(type->fields.array->std140_size(ubo_row_major), 16); } else { this->uniforms[id].array_stride = 0; } @@ -348,7 +434,7 @@ private: if (type->is_matrix() || (type->is_array() && type->fields.array->is_matrix())) { this->uniforms[id].matrix_stride = 16; - this->uniforms[id].row_major = ubo_var->RowMajor; + this->uniforms[id].row_major = ubo_row_major; } else { this->uniforms[id].matrix_stride = 0; this->uniforms[id].row_major = false; @@ -399,26 +485,10 @@ link_cross_validate_uniform_block(void *mem_ctx, { for (unsigned int i = 0; i < *num_linked_blocks; i++) { struct gl_uniform_block *old_block = &(*linked_blocks)[i]; - if (strcmp(old_block->Name, new_block->Name) == 0) { - if (old_block->NumUniforms != new_block->NumUniforms) { - return -1; - } - for (unsigned j = 0; j < old_block->NumUniforms; j++) { - if (strcmp(old_block->Uniforms[j].Name, - new_block->Uniforms[j].Name) != 0) - return -1; - - if (old_block->Uniforms[j].Offset != - new_block->Uniforms[j].Offset) - return -1; - - if (old_block->Uniforms[j].RowMajor != - new_block->Uniforms[j].RowMajor) - return -1; - } - return i; - } + if (strcmp(old_block->Name, new_block->Name) == 0) + return link_uniform_blocks_are_compatible(old_block, new_block) + ? i : -1; } *linked_blocks = reralloc(mem_ctx, *linked_blocks, @@ -440,7 +510,13 @@ link_cross_validate_uniform_block(void *mem_ctx, struct gl_uniform_buffer_variable *ubo_var = &linked_block->Uniforms[i]; - ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); + if (ubo_var->Name == ubo_var->IndexName) { + ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); + ubo_var->IndexName = ubo_var->Name; + } else { + ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); + ubo_var->IndexName = ralloc_strdup(*linked_blocks, ubo_var->IndexName); + } } return linked_block_index; @@ -458,17 +534,47 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) foreach_list(node, shader->ir) { ir_variable *const var = ((ir_instruction *) node)->as_variable(); - if ((var == NULL) || (var->uniform_block == -1)) + if ((var == NULL) || !var->is_in_uniform_block()) continue; assert(var->mode == ir_var_uniform); + if (var->is_interface_instance()) { + var->location = 0; + continue; + } + bool found = false; + char sentinel = '\0'; + + if (var->type->is_record()) { + sentinel = '.'; + } else if (var->type->is_array() + && var->type->fields.array->is_record()) { + sentinel = '['; + } + + const unsigned l = strlen(var->name); for (unsigned i = 0; i < shader->NumUniformBlocks; i++) { for (unsigned j = 0; j < shader->UniformBlocks[i].NumUniforms; j++) { - if (!strcmp(var->name, shader->UniformBlocks[i].Uniforms[j].Name)) { + if (sentinel) { + const char *begin = shader->UniformBlocks[i].Uniforms[j].Name; + const char *end = strchr(begin, sentinel); + + if (end == NULL) + continue; + + if (l != (end - begin)) + continue; + + if (strncmp(var->name, begin, l) == 0) { + found = true; + var->location = j; + break; + } + } else if (!strcmp(var->name, + shader->UniformBlocks[i].Uniforms[j].Name)) { found = true; - var->uniform_block = i; var->location = j; break; } @@ -494,7 +600,7 @@ link_assign_uniform_block_offsets(struct gl_shader *shader) unsigned alignment = type->std140_base_alignment(ubo_var->RowMajor); unsigned size = type->std140_size(ubo_var->RowMajor); - offset = align(offset, alignment); + offset = glsl_align(offset, alignment); ubo_var->Offset = offset; offset += size; } @@ -510,7 +616,7 @@ link_assign_uniform_block_offsets(struct gl_shader *shader) * and rounding up to the next multiple of the base * alignment required for a vec4." */ - block->UniformBufferSize = align(offset, 16); + block->UniformBufferSize = glsl_align(offset, 16); } } @@ -538,13 +644,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog) */ memset(prog->SamplerUnits, 0, sizeof(prog->SamplerUnits)); - for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - link_update_uniform_buffer_variables(prog->_LinkedShaders[i]); - } - /* First pass: Count the uniform resources used by the user-defined * uniforms. While this happens, each active uniform will have an index * assigned to it. @@ -557,6 +656,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog) if (prog->_LinkedShaders[i] == NULL) continue; + link_update_uniform_buffer_variables(prog->_LinkedShaders[i]); + /* Reset various per-shader target counts. */ uniform_size.start_shader(); @@ -620,7 +721,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog) if (strncmp("gl_", var->name, 3) == 0) continue; - parcel.set_and_process(prog, prog->_LinkedShaders[i], var); + parcel.set_and_process(prog, var); } prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used; diff --git a/mesalib/src/glsl/link_varyings.cpp b/mesalib/src/glsl/link_varyings.cpp index 5c27f231e..25681d618 100644 --- a/mesalib/src/glsl/link_varyings.cpp +++ b/mesalib/src/glsl/link_varyings.cpp @@ -54,10 +54,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, foreach_list(node, producer->ir) { ir_variable *const var = ((ir_instruction *) node)->as_variable(); - /* FINISHME: For geometry shaders, this should also look for inout - * FINISHME: variables. - */ - if ((var == NULL) || (var->mode != ir_var_out)) + if ((var == NULL) || (var->mode != ir_var_shader_out)) continue; parameters.add_variable(var); @@ -71,10 +68,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, foreach_list(node, consumer->ir) { ir_variable *const input = ((ir_instruction *) node)->as_variable(); - /* FINISHME: For geometry shaders, this should also look for inout - * FINISHME: variables. - */ - if ((input == NULL) || (input->mode != ir_var_in)) + if ((input == NULL) || (input->mode != ir_var_shader_in)) continue; ir_variable *const output = parameters.get_variable(input->name); @@ -417,8 +411,17 @@ tfeedback_decl::find_output_var(gl_shader_program *prog, const char *name = this->is_clip_distance_mesa ? "gl_ClipDistanceMESA" : this->var_name; ir_variable *var = producer->symbols->get_variable(name); - if (var && var->mode == ir_var_out) + if (var && var->mode == ir_var_shader_out) { + const glsl_type *type = var->type; + while (type->base_type == GLSL_TYPE_ARRAY) + type = type->fields.array; + if (type->base_type == GLSL_TYPE_STRUCT) { + linker_error(prog, "Transform feedback of varying structs not " + "implemented yet."); + return NULL; + } return var; + } /* From GL_EXT_transform_feedback: * A program will fail to link if: @@ -810,16 +813,15 @@ varying_matches::compute_packing_order(ir_variable *var) { const glsl_type *element_type = var->type; - /* FINISHME: Support for "varying" records in GLSL 1.50. */ while (element_type->base_type == GLSL_TYPE_ARRAY) { element_type = element_type->fields.array; } - switch (element_type->vector_elements) { + switch (element_type->component_slots() % 4) { case 1: return PACKING_ORDER_SCALAR; case 2: return PACKING_ORDER_VEC2; case 3: return PACKING_ORDER_VEC3; - case 4: return PACKING_ORDER_VEC4; + case 0: return PACKING_ORDER_VEC4; default: assert(!"Unexpected value of vector_elements"); return PACKING_ORDER_VEC4; @@ -854,7 +856,7 @@ is_varying_var(GLenum shaderType, const ir_variable *var) { /* Only fragment shaders will take a varying variable as an input */ if (shaderType == GL_FRAGMENT_SHADER && - var->mode == ir_var_in) { + var->mode == ir_var_shader_in) { switch (var->location) { case FRAG_ATTRIB_WPOS: case FRAG_ATTRIB_FACE: @@ -915,13 +917,13 @@ assign_varying_locations(struct gl_context *ctx, foreach_list(node, producer->ir) { ir_variable *const output_var = ((ir_instruction *) node)->as_variable(); - if ((output_var == NULL) || (output_var->mode != ir_var_out)) + if ((output_var == NULL) || (output_var->mode != ir_var_shader_out)) continue; ir_variable *input_var = consumer ? consumer->symbols->get_variable(output_var->name) : NULL; - if (input_var && input_var->mode != ir_var_in) + if (input_var && input_var->mode != ir_var_shader_in) input_var = NULL; if (input_var) { @@ -965,11 +967,11 @@ assign_varying_locations(struct gl_context *ctx, */ assert(!ctx->Extensions.EXT_transform_feedback); } else { - lower_packed_varyings(mem_ctx, producer_base, slots_used, ir_var_out, - producer); + lower_packed_varyings(mem_ctx, producer_base, slots_used, + ir_var_shader_out, producer); if (consumer) { - lower_packed_varyings(mem_ctx, consumer_base, slots_used, ir_var_in, - consumer); + lower_packed_varyings(mem_ctx, consumer_base, slots_used, + ir_var_shader_in, consumer); } } @@ -979,7 +981,7 @@ assign_varying_locations(struct gl_context *ctx, foreach_list(node, consumer->ir) { ir_variable *const var = ((ir_instruction *) node)->as_variable(); - if ((var == NULL) || (var->mode != ir_var_in)) + if ((var == NULL) || (var->mode != ir_var_shader_in)) continue; if (var->is_unmatched_generic_inout) { diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index 63548e071..63ce178f4 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -107,8 +107,8 @@ public: ir_rvalue *param_rval = (ir_rvalue *)iter.get(); ir_variable *sig_param = (ir_variable *)sig_iter.get(); - if (sig_param->mode == ir_var_out || - sig_param->mode == ir_var_inout) { + if (sig_param->mode == ir_var_function_out || + sig_param->mode == ir_var_function_inout) { ir_variable *var = param_rval->variable_referenced(); if (var && strcmp(name, var->name) == 0) { found = true; @@ -212,10 +212,10 @@ link_invalidate_variable_locations(gl_shader *sh, int input_base, int base; switch (var->mode) { - case ir_var_in: + case ir_var_shader_in: base = input_base; break; - case ir_var_out: + case ir_var_shader_out: base = output_base; break; default: @@ -393,10 +393,9 @@ mode_string(const ir_variable *var) case ir_var_auto: return (var->read_only) ? "global constant" : "global variable"; - case ir_var_uniform: return "uniform"; - case ir_var_in: return "shader input"; - case ir_var_out: return "shader output"; - case ir_var_inout: return "shader inout"; + case ir_var_uniform: return "uniform"; + case ir_var_shader_in: return "shader input"; + case ir_var_shader_out: return "shader output"; case ir_var_const_in: case ir_var_temporary: @@ -874,7 +873,6 @@ link_intrastage_shaders(void *mem_ctx, unsigned num_shaders) { struct gl_uniform_block *uniform_blocks = NULL; - unsigned num_uniform_blocks = 0; /* Check that global variables defined in multiple shaders are consistent. */ @@ -882,23 +880,11 @@ link_intrastage_shaders(void *mem_ctx, return NULL; /* Check that uniform blocks between shaders for a stage agree. */ - for (unsigned i = 0; i < num_shaders; i++) { - struct gl_shader *sh = shader_list[i]; - - for (unsigned j = 0; j < sh->NumUniformBlocks; j++) { - link_assign_uniform_block_offsets(sh); - - int index = link_cross_validate_uniform_block(mem_ctx, - &uniform_blocks, - &num_uniform_blocks, - &sh->UniformBlocks[j]); - if (index == -1) { - linker_error(prog, "uniform block `%s' has mismatching definitions", - sh->UniformBlocks[j].Name); - return NULL; - } - } - } + const int num_uniform_blocks = + link_uniform_blocks(mem_ctx, prog, shader_list, num_shaders, + &uniform_blocks); + if (num_uniform_blocks < 0) + return NULL; /* Check that there is only a single definition of each function signature * across all shaders. @@ -1069,8 +1055,8 @@ update_array_sizes(struct gl_shader_program *prog) ir_variable *const var = ((ir_instruction *) node)->as_variable(); if ((var == NULL) || (var->mode != ir_var_uniform && - var->mode != ir_var_in && - var->mode != ir_var_out) || + var->mode != ir_var_shader_in && + var->mode != ir_var_shader_out) || !var->type->is_array()) continue; @@ -1078,7 +1064,7 @@ update_array_sizes(struct gl_shader_program *prog) * will not be eliminated. Since we always do std140, just * don't resize arrays in UBOs. */ - if (var->uniform_block != -1) + if (var->is_in_uniform_block()) continue; unsigned int size = var->max_array_access; @@ -1206,7 +1192,8 @@ assign_attribute_or_color_locations(gl_shader_program *prog, ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0; const enum ir_variable_mode direction = - (target_index == MESA_SHADER_VERTEX) ? ir_var_in : ir_var_out; + (target_index == MESA_SHADER_VERTEX) + ? ir_var_shader_in : ir_var_shader_out; /* Temporary storage for the set of attributes that need locations assigned. @@ -1428,7 +1415,7 @@ store_fragdepth_layout(struct gl_shader_program *prog) foreach_list(node, ir) { ir_variable *const var = ((ir_instruction *) node)->as_variable(); - if (var == NULL || var->mode != ir_var_out) { + if (var == NULL || var->mode != ir_var_shader_out) { continue; } @@ -1809,7 +1796,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->_LinkedShaders[MESA_SHADER_VERTEX] != NULL) { demote_shader_inputs_and_outputs(prog->_LinkedShaders[MESA_SHADER_VERTEX], - ir_var_out); + ir_var_shader_out); /* Eliminate code that is now dead due to unused vertex outputs being * demoted. @@ -1821,9 +1808,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) { gl_shader *const sh = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - demote_shader_inputs_and_outputs(sh, ir_var_in); - demote_shader_inputs_and_outputs(sh, ir_var_inout); - demote_shader_inputs_and_outputs(sh, ir_var_out); + demote_shader_inputs_and_outputs(sh, ir_var_shader_in); + demote_shader_inputs_and_outputs(sh, ir_var_shader_out); /* Eliminate code that is now dead due to unused geometry outputs being * demoted. @@ -1835,7 +1821,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] != NULL) { gl_shader *const sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - demote_shader_inputs_and_outputs(sh, ir_var_in); + demote_shader_inputs_and_outputs(sh, ir_var_shader_in); /* Eliminate code that is now dead due to unused fragment inputs being * demoted. This shouldn't actually do anything other than remove diff --git a/mesalib/src/glsl/linker.h b/mesalib/src/glsl/linker.h index 67c7f3488..14eb9c1cd 100644 --- a/mesalib/src/glsl/linker.h +++ b/mesalib/src/glsl/linker.h @@ -49,6 +49,17 @@ link_cross_validate_uniform_block(void *mem_ctx, void link_assign_uniform_block_offsets(struct gl_shader *shader); +extern bool +link_uniform_blocks_are_compatible(const gl_uniform_block *a, + const gl_uniform_block *b); + +extern int +link_uniform_blocks(void *mem_ctx, + struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders, + struct gl_uniform_block **blocks_ret); + /** * Class for processing all of the leaf fields of an uniform * @@ -71,24 +82,60 @@ public: * \param var The uniform variable that is to be processed * * Calls \c ::visit_field for each leaf of the uniform. + * + * \warning + * This entry should only be used with uniform blocks in cases where the + * row / column ordering of matrices in the block does not matter. For + * example, enumerating the names of members of the block, but not for + * determining the offsets of members. */ void process(ir_variable *var); + /** + * Begin processing a uniform of a structured type. + * + * This flavor of \c process should be used to handle structured types + * (i.e., structures, interfaces, or arrays there of) that need special + * name handling. A common usage is to handle cases where the block name + * (instead of the instance name) is used for an interface block. + * + * \param type Type that is to be processed, associated with \c name + * \param name Base name of the structured uniform being processed + * + * \note + * \c type must be \c GLSL_TYPE_RECORD, \c GLSL_TYPE_INTERFACE, or an array + * there of. + */ + void process(const glsl_type *type, const char *name); + protected: /** * Method invoked for each leaf of the uniform * * \param type Type of the field. * \param name Fully qualified name of the field. + * \param row_major For a matrix type, is it stored row-major. + */ + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) = 0; + + /** + * Visit a record before visiting its fields + * + * For structures-of-structures or interfaces-of-structures, this visits + * the inner structure before visiting its fields. + * + * The default implementation does nothing. */ - virtual void visit_field(const glsl_type *type, const char *name) = 0; + virtual void visit_field(const glsl_struct_field *field); private: /** * \param name_length Length of the current name \b not including the * terminating \c NUL character. */ - void recursion(const glsl_type *t, char **name, size_t name_length); + void recursion(const glsl_type *t, char **name, size_t name_length, + bool row_major); }; void diff --git a/mesalib/src/glsl/lower_clip_distance.cpp b/mesalib/src/glsl/lower_clip_distance.cpp index 09bdc36e1..643807de8 100644 --- a/mesalib/src/glsl/lower_clip_distance.cpp +++ b/mesalib/src/glsl/lower_clip_distance.cpp @@ -301,8 +301,8 @@ lower_clip_distance_visitor::visit_leave(ir_call *ir) this->base_ir->insert_before(temp_clip_distance); actual_param->replace_with( new(ctx) ir_dereference_variable(temp_clip_distance)); - if (formal_param->mode == ir_var_in - || formal_param->mode == ir_var_inout) { + if (formal_param->mode == ir_var_function_in + || formal_param->mode == ir_var_function_inout) { /* Copy from gl_ClipDistance to the temporary before the call. * Since we are going to insert this copy before the current * instruction, we need to visit it afterwards to make sure it @@ -314,8 +314,8 @@ lower_clip_distance_visitor::visit_leave(ir_call *ir) this->base_ir->insert_before(new_assignment); this->visit_new_assignment(new_assignment); } - if (formal_param->mode == ir_var_out - || formal_param->mode == ir_var_inout) { + if (formal_param->mode == ir_var_function_out + || formal_param->mode == ir_var_function_inout) { /* Copy from the temporary to gl_ClipDistance after the call. * Since visit_list_elements() has already decided which * instruction it's going to visit next, we need to visit diff --git a/mesalib/src/glsl/lower_output_reads.cpp b/mesalib/src/glsl/lower_output_reads.cpp index a6192a517..b93e254ec 100644 --- a/mesalib/src/glsl/lower_output_reads.cpp +++ b/mesalib/src/glsl/lower_output_reads.cpp @@ -41,7 +41,7 @@ class output_read_remover : public ir_hierarchical_visitor { protected: /** * A hash table mapping from the original ir_variable shader outputs - * (ir_var_out mode) to the new temporaries to be used instead. + * (ir_var_shader_out mode) to the new temporaries to be used instead. */ hash_table *replacements; @@ -86,7 +86,7 @@ output_read_remover::~output_read_remover() ir_visitor_status output_read_remover::visit(ir_dereference_variable *ir) { - if (ir->var->mode != ir_var_out) + if (ir->var->mode != ir_var_shader_out) return visit_continue; ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var); diff --git a/mesalib/src/glsl/lower_packed_varyings.cpp b/mesalib/src/glsl/lower_packed_varyings.cpp index 9e7f274b7..8a40f5e72 100644 --- a/mesalib/src/glsl/lower_packed_varyings.cpp +++ b/mesalib/src/glsl/lower_packed_varyings.cpp @@ -70,6 +70,10 @@ * This lowering pass also packs flat floats, ints, and uints together, by * using ivec4 as the base type of flat "varyings", and using appropriate * casts to convert floats and uints into ints. + * + * This lowering pass also handles varyings whose type is a struct or an array + * of struct. Structs are packed in order and with no gaps, so there may be a + * performance penalty due to structure elements being double-parked. */ #include "glsl_symbol_table.h" @@ -135,8 +139,8 @@ private: ir_variable **packed_varyings; /** - * Type of varying which is being lowered in this pass (either ir_var_in or - * ir_var_out). + * Type of varying which is being lowered in this pass (either + * ir_var_shader_in or ir_var_shader_out). */ const ir_variable_mode mode; @@ -274,10 +278,20 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue, ir_variable *unpacked_var, const char *name) { - /* FINISHME: Support for "varying" records in GLSL 1.50. */ - assert(!rvalue->type->is_record()); - - if (rvalue->type->is_array()) { + if (rvalue->type->is_record()) { + for (unsigned i = 0; i < rvalue->type->length; i++) { + if (i != 0) + rvalue = rvalue->clone(this->mem_ctx, NULL); + const char *field_name = rvalue->type->fields.structure[i].name; + ir_dereference_record *dereference_record = new(this->mem_ctx) + ir_dereference_record(rvalue, field_name); + char *deref_name + = ralloc_asprintf(this->mem_ctx, "%s.%s", name, field_name); + fine_location = this->lower_rvalue(dereference_record, fine_location, + unpacked_var, deref_name); + } + return fine_location; + } else if (rvalue->type->is_array()) { /* Arrays are packed/unpacked by considering each array element in * sequence. */ @@ -336,7 +350,7 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue, unpacked_var, name)); ir_swizzle *swizzle = new(this->mem_ctx) ir_swizzle(packed_deref, swizzle_values, components); - if (this->mode == ir_var_out) { + if (this->mode == ir_var_shader_out) { ir_assignment *assignment = this->bitwise_assign_pack(swizzle, rvalue); this->main_instructions->push_tail(assignment); diff --git a/mesalib/src/glsl/lower_packing_builtins.cpp b/mesalib/src/glsl/lower_packing_builtins.cpp new file mode 100644 index 000000000..db73c7b0f --- /dev/null +++ b/mesalib/src/glsl/lower_packing_builtins.cpp @@ -0,0 +1,1314 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" + +namespace { + +using namespace ir_builder; + +/** + * A visitor that lowers built-in floating-point pack/unpack expressions + * such packSnorm2x16. + */ +class lower_packing_builtins_visitor : public ir_rvalue_visitor { +public: + /** + * \param op_mask is a bitmask of `enum lower_packing_builtins_op` + */ + explicit lower_packing_builtins_visitor(int op_mask) + : op_mask(op_mask), + progress(false) + { + /* Mutually exclusive options. */ + assert(!((op_mask & LOWER_PACK_HALF_2x16) && + (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT))); + + assert(!((op_mask & LOWER_UNPACK_HALF_2x16) && + (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT))); + + factory.instructions = &factory_instructions; + } + + virtual ~lower_packing_builtins_visitor() + { + assert(factory_instructions.is_empty()); + } + + bool get_progress() { return progress; } + + void handle_rvalue(ir_rvalue **rvalue) + { + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr) + return; + + enum lower_packing_builtins_op lowering_op = + choose_lowering_op(expr->operation); + + if (lowering_op == LOWER_PACK_UNPACK_NONE) + return; + + setup_factory(ralloc_parent(expr)); + + ir_rvalue *op0 = expr->operands[0]; + ralloc_steal(factory.mem_ctx, op0); + + switch (lowering_op) { + case LOWER_PACK_SNORM_2x16: + *rvalue = lower_pack_snorm_2x16(op0); + break; + case LOWER_PACK_SNORM_4x8: + *rvalue = lower_pack_snorm_4x8(op0); + break; + case LOWER_PACK_UNORM_2x16: + *rvalue = lower_pack_unorm_2x16(op0); + break; + case LOWER_PACK_UNORM_4x8: + *rvalue = lower_pack_unorm_4x8(op0); + break; + case LOWER_PACK_HALF_2x16: + *rvalue = lower_pack_half_2x16(op0); + break; + case LOWER_PACK_HALF_2x16_TO_SPLIT: + *rvalue = split_pack_half_2x16(op0); + break; + case LOWER_UNPACK_SNORM_2x16: + *rvalue = lower_unpack_snorm_2x16(op0); + break; + case LOWER_UNPACK_SNORM_4x8: + *rvalue = lower_unpack_snorm_4x8(op0); + break; + case LOWER_UNPACK_UNORM_2x16: + *rvalue = lower_unpack_unorm_2x16(op0); + break; + case LOWER_UNPACK_UNORM_4x8: + *rvalue = lower_unpack_unorm_4x8(op0); + break; + case LOWER_UNPACK_HALF_2x16: + *rvalue = lower_unpack_half_2x16(op0); + break; + case LOWER_UNPACK_HALF_2x16_TO_SPLIT: + *rvalue = split_unpack_half_2x16(op0); + break; + case LOWER_PACK_UNPACK_NONE: + assert(!"not reached"); + break; + } + + teardown_factory(); + progress = true; + } + +private: + const int op_mask; + bool progress; + ir_factory factory; + exec_list factory_instructions; + + /** + * Determine the needed lowering operation by filtering \a expr_op + * through \ref op_mask. + */ + enum lower_packing_builtins_op + choose_lowering_op(ir_expression_operation expr_op) + { + /* C++ regards int and enum as fundamentally different types. + * So, we can't simply return from each case; we must cast the return + * value. + */ + int result; + + switch (expr_op) { + case ir_unop_pack_snorm_2x16: + result = op_mask & LOWER_PACK_SNORM_2x16; + break; + case ir_unop_pack_snorm_4x8: + result = op_mask & LOWER_PACK_SNORM_4x8; + break; + case ir_unop_pack_unorm_2x16: + result = op_mask & LOWER_PACK_UNORM_2x16; + break; + case ir_unop_pack_unorm_4x8: + result = op_mask & LOWER_PACK_UNORM_4x8; + break; + case ir_unop_pack_half_2x16: + result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); + break; + case ir_unop_unpack_snorm_2x16: + result = op_mask & LOWER_UNPACK_SNORM_2x16; + break; + case ir_unop_unpack_snorm_4x8: + result = op_mask & LOWER_UNPACK_SNORM_4x8; + break; + case ir_unop_unpack_unorm_2x16: + result = op_mask & LOWER_UNPACK_UNORM_2x16; + break; + case ir_unop_unpack_unorm_4x8: + result = op_mask & LOWER_UNPACK_UNORM_4x8; + break; + case ir_unop_unpack_half_2x16: + result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); + break; + default: + result = LOWER_PACK_UNPACK_NONE; + break; + } + + return static_cast(result); + } + + void + setup_factory(void *mem_ctx) + { + assert(factory.mem_ctx == NULL); + assert(factory.instructions->is_empty()); + + factory.mem_ctx = mem_ctx; + } + + void + teardown_factory() + { + base_ir->insert_before(factory.instructions); + assert(factory.instructions->is_empty()); + factory.mem_ctx = NULL; + } + + template + ir_constant* + constant(T x) + { + return factory.constant(x); + } + + /** + * \brief Pack two uint16's into a single uint32. + * + * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 + * where the least significant bits specify the first element of the pair. + * Return the uint32. + */ + ir_rvalue* + pack_uvec2_to_uint(ir_rvalue *uvec2_rval) + { + assert(uvec2_rval->type == glsl_type::uvec2_type); + + /* uvec2 u = UVEC2_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_uvec2_to_uint"); + factory.emit(assign(u, uvec2_rval)); + + /* return (u.y << 16) | (u.x & 0xffff); */ + return bit_or(lshift(swizzle_y(u), constant(16u)), + bit_and(swizzle_x(u), constant(0xffffu))); + } + + /** + * \brief Pack four uint8's into a single uint32. + * + * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a + * uint32 where the least significant bits specify the first element of the + * 4-tuple. Return the uint32. + */ + ir_rvalue* + pack_uvec4_to_uint(ir_rvalue *uvec4_rval) + { + assert(uvec4_rval->type == glsl_type::uvec4_type); + + /* uvec4 u = UVEC4_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uvec4_type, + "tmp_pack_uvec4_to_uint"); + factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); + + /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ + return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), + lshift(swizzle_z(u), constant(16u))), + bit_or(lshift(swizzle_y(u), constant(8u)), + swizzle_x(u))); + } + + /** + * \brief Unpack a uint32 into two uint16's. + * + * Interpret the given uint32 as a uint16 pair where the uint32's least + * significant bits specify the pair's first element. Return the uint16 + * pair as a uvec2. + */ + ir_rvalue* + unpack_uint_to_uvec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_uint_to_uvec2_u"); + factory.emit(assign(u, uint_rval)); + + /* uvec2 u2; */ + ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_uint_to_uvec2_u2"); + + /* u2.x = u & 0xffffu; */ + factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); + + /* u2.y = u >> 16u; */ + factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); + + return deref(u2).val; + } + + /** + * \brief Unpack a uint32 into four uint8's. + * + * Interpret the given uint32 as a uint8 4-tuple where the uint32's least + * significant bits specify the 4-tuple's first element. Return the uint8 + * 4-tuple as a uvec4. + */ + ir_rvalue* + unpack_uint_to_uvec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_uint_to_uvec4_u"); + factory.emit(assign(u, uint_rval)); + + /* uvec4 u4; */ + ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, + "tmp_unpack_uint_to_uvec4_u4"); + + /* u4.x = u & 0xffu; */ + factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); + + /* u4.y = (u >> 8u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), + constant(0xffu)), WRITEMASK_Y)); + + /* u4.z = (u >> 16u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), + constant(0xffu)), WRITEMASK_Z)); + + /* u4.w = (u >> 24u) */ + factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); + + return deref(u4).val; + } + + /** + * \brief Lower a packSnorm2x16 expression. + * + * \param vec2_rval is packSnorm2x16's input + * \return packSnorm2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_snorm_2x16(ir_rvalue *vec2_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packSnorm2x16(vec2 v) + * -------------------------------- + * First, converts each component of the normalized floating-point value + * v into 16-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec2_to_uint( + * uvec2(ivec2( + * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); + * + * It is necessary to first convert the vec2 to ivec2 rather than directly + * converting vec2 to uvec2 because the latter conversion is undefined. + * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to + * convert a negative floating point value to an uint". + */ + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_rvalue *result = pack_uvec2_to_uint( + i2u(f2i(round_even(mul(clamp(vec2_rval, + constant(-1.0f), + constant(1.0f)), + constant(32767.0f)))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower a packSnorm4x8 expression. + * + * \param vec4_rval is packSnorm4x8's input + * \return packSnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_snorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packSnorm4x8(vec4 v) + * ------------------------------- + * First, converts each component of the normalized floating-point value + * v into 8-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint( + * uvec4(ivec4( + * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); + * + * It is necessary to first convert the vec4 to ivec4 rather than directly + * converting vec4 to uvec4 because the latter conversion is undefined. + * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to + * convert a negative floating point value to an uint". + */ + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + i2u(f2i(round_even(mul(clamp(vec4_rval, + constant(-1.0f), + constant(1.0f)), + constant(127.0f)))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower an unpackSnorm2x16 expression. + * + * \param uint_rval is unpackSnorm2x16's input + * \return unpackSnorm2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_snorm_2x16(ir_rvalue *uint_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp vec2 unpackSnorm2x16 (highp uint p) + * ----------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into a pair of + * 16-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm2x16: clamp(f / 32767.0, -1,+1) + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return clamp( + * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, + * -1.0f, 1.0f); + * + * The above IR may appear unnecessarily complex, but the intermediate + * conversion to ivec2 and the bit shifts are necessary to correctly unpack + * negative floats. + * + * To see why, consider packing and then unpacking vec2(-1.0, 0.0). + * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we + * place that int16 into an int32, which results in the *positive* integer + * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather + * unimportant bit 16. We must now extend the int16's sign bit into bits + * 17-32, which is accomplished by left-shifting then right-shifting. + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = + clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), + constant(16)), + constant(16u))), + constant(32767.0f)), + constant(-1.0f), + constant(1.0f)); + + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Lower an unpackSnorm4x8 expression. + * + * \param uint_rval is unpackSnorm4x8's input + * \return unpackSnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_snorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackSnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * four-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return clamp( + * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, + * -1.0f, 1.0f); + * + * The above IR may appear unnecessarily complex, but the intermediate + * conversion to ivec4 and the bit shifts are necessary to correctly unpack + * negative floats. + * + * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, + * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we + * place that int8 into an int32, which results in the *positive* integer + * 0x000000ff. The int8's sign bit becomes, in the int32, the rather + * unimportant bit 8. We must now extend the int8's sign bit into bits + * 9-32, which is accomplished by left-shifting then right-shifting. + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = + clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), + constant(24u)), + constant(24u))), + constant(127.0f)), + constant(-1.0f), + constant(1.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** + * \brief Lower a packUnorm2x16 expression. + * + * \param vec2_rval is packUnorm2x16's input + * \return packUnorm2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_unorm_2x16(ir_rvalue *vec2_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packUnorm2x16 (vec2 v) + * --------------------------------- + * First, converts each component of the normalized floating-point value + * v into 16-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec2_to_uint(uvec2( + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); + * + * Here it is safe to directly convert the vec2 to uvec2 because the the + * vec2 has been clamped to a non-negative range. + */ + + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_rvalue *result = pack_uvec2_to_uint( + f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower a packUnorm4x8 expression. + * + * \param vec4_rval is packUnorm4x8's input + * \return packUnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_unorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packUnorm4x8 (vec4 v) + * -------------------------------- + * First, converts each component of the normalized floating-point value + * v into 8-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint(uvec4( + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); + * + * Here it is safe to directly convert the vec4 to uvec4 because the the + * vec4 has been clamped to a non-negative range. + */ + + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower an unpackUnorm2x16 expression. + * + * \param uint_rval is unpackUnorm2x16's input + * \return unpackUnorm2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_unorm_2x16(ir_rvalue *uint_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * highp vec2 unpackUnorm2x16 (highp uint p) + * ----------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into a pair of + * 16-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm2x16: f / 65535.0 + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), + constant(65535.0f)); + + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Lower an unpackUnorm4x8 expression. + * + * \param uint_rval is unpackUnorm4x8's input + * \return unpackUnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_unorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackUnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), + constant(255.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** + * \brief Lower the component-wise calculation of packHalf2x16. + * + * \param f_rval is one component of packHafl2x16's input + * \param e_rval is the unshifted exponent bits of f_rval + * \param m_rval is the unshifted mantissa bits of f_rval + * + * \return a uint rvalue that encodes a float16 in its lower 16 bits + */ + ir_rvalue* + pack_half_1x16_nosign(ir_rvalue *f_rval, + ir_rvalue *e_rval, + ir_rvalue *m_rval) + { + assert(e_rval->type == glsl_type::uint_type); + assert(m_rval->type == glsl_type::uint_type); + + /* uint u16; */ + ir_variable *u16 = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_u16"); + + /* float f = FLOAT_RVAL; */ + ir_variable *f = factory.make_temp(glsl_type::float_type, + "tmp_pack_half_1x16_f"); + factory.emit(assign(f, f_rval)); + + /* uint e = E_RVAL; */ + ir_variable *e = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_e"); + factory.emit(assign(e, e_rval)); + + /* uint m = M_RVAL; */ + ir_variable *m = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_m"); + factory.emit(assign(m, m_rval)); + + /* Preliminaries + * ------------- + * + * For a float16, the bit layout is: + * + * sign: 15 + * exponent: 10:14 + * mantissa: 0:9 + * + * Let f16 be a float16 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) + * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) + * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) + * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) + * if e16 = 31 and m16 != 0, then NaN (5) + * + * where 0 <= m16 < 2^10. + * + * For a float32, the bit layout is: + * + * sign: 31 + * exponent: 23:30 + * mantissa: 0:22 + * + * Let f32 be a float32 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) + * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) + * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) + * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) + * if e32 = 255 and m32 != 0, then NaN (14) + * + * where 0 <= m32 < 2^23. + * + * The minimum and maximum normal float16 values are + * + * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20) + * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21) + * + * The step at max_norm16 is + * + * max_step16 = 2^5 (22) + * + * Observe that the float16 boundary values in equations 20-21 lie in the + * range of normal float32 values. + * + * + * Rounding Behavior + * ----------------- + * Not all float32 values can be exactly represented as a float16. We + * round all such intermediate float32 values to the nearest float16; if + * the float32 is exactly between to float16 values, we round to the one + * with an even mantissa. This rounding behavior has several benefits: + * + * - It has no sign bias. + * + * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's + * GPU ISA. + * + * - By reproducing the behavior of the GPU (at least on Intel hardware), + * compile-time evaluation of constant packHalf2x16 GLSL expressions will + * result in the same value as if the expression were executed on the + * GPU. + * + * Calculation + * ----------- + * Our task is to compute s16, e16, m16 given f32. Since this function + * ignores the sign bit, assume that s32 = s16 = 0. There are several + * cases consider. + */ + + factory.emit( + + /* Case 1) f32 is NaN + * + * The resultant f16 will also be NaN. + */ + + /* if (e32 == 255 && m32 != 0) { */ + if_tree(logic_and(equal(e, constant(0xffu << 23u)), + logic_not(equal(m, constant(0u)))), + + assign(u16, constant(0x7fffu)), + + /* Case 2) f32 lies in the range [0, min_norm16). + * + * The resultant float16 will be either zero, subnormal, or normal. + * + * Solving + * + * f32 = min_norm16 (30) + * + * gives + * + * e32 = 113 and m32 = 0 (31) + * + * Therefore this case occurs if and only if + * + * e32 < 113 (32) + */ + + /* } else if (e32 < 113) { */ + if_tree(less(e, constant(113u << 23u)), + + /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ + assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), + constant((float) (1 << 24)))))), + + /* Case 3) f32 lies in the range + * [min_norm16, max_norm16 + max_step16). + * + * The resultant float16 will be either normal or infinite. + * + * Solving + * + * f32 = max_norm16 + max_step16 (40) + * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41) + * = 2^16 (42) + * gives + * + * e32 = 143 and m32 = 0 (43) + * + * We already solved the boundary condition f32 = min_norm16 above + * in equation 31. Therefore this case occurs if and only if + * + * 113 <= e32 and e32 < 143 + */ + + /* } else if (e32 < 143) { */ + if_tree(less(e, constant(143u << 23u)), + + /* The addition below handles the case where the mantissa rounds + * up to 1024 and bumps the exponent. + * + * u16 = ((e - (112u << 23u)) >> 13u) + * + round_to_even((float(m) / (1u << 13u)); + */ + assign(u16, add(rshift(sub(e, constant(112u << 23u)), + constant(13u)), + f2u(round_even( + div(u2f(m), constant((float) (1 << 13))))))), + + /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. + * + * The resultant float16 will be infinite. + * + * The cases above caught all float32 values in the range + * [0, max_norm16 + max_step16), so this is the fall-through case. + */ + + /* } else { */ + + assign(u16, constant(31u << 10u)))))); + + /* } */ + + return deref(u16).val; + } + + /** + * \brief Lower a packHalf2x16 expression. + * + * \param vec2_rval is packHalf2x16's input + * \return packHalf2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_half_2x16(ir_rvalue *vec2_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packHalf2x16 (mediump vec2 v) + * ---------------------------------------- + * Returns an unsigned integer obtained by converting the components of + * a two-component floating-point vector to the 16-bit floating-point + * representation found in the OpenGL ES Specification, and then packing + * these two 16-bit integers into a 32-bit unsigned integer. + * + * The first vector component specifies the 16 least- significant bits + * of the result; the second component specifies the 16 most-significant + * bits. + */ + + assert(vec2_rval->type == glsl_type::vec2_type); + + /* vec2 f = VEC2_RVAL; */ + ir_variable *f = factory.make_temp(glsl_type::vec2_type, + "tmp_pack_half_2x16_f"); + factory.emit(assign(f, vec2_rval)); + + /* uvec2 f32 = bitcast_f2u(f); */ + ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_f32"); + factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); + + /* uvec2 f16; */ + ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_f16"); + + /* Get f32's unshifted exponent bits. + * + * uvec2 e = f32 & 0x7f800000u; + */ + ir_variable *e = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_e"); + factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); + + /* Get f32's unshifted mantissa bits. + * + * uvec2 m = f32 & 0x007fffffu; + */ + ir_variable *m = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_m"); + factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); + + /* Set f16's exponent and mantissa bits. + * + * f16.x = pack_half_1x16_nosign(e.x, m.x); + * f16.y = pack_half_1y16_nosign(e.y, m.y); + */ + factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), + swizzle_x(e), + swizzle_x(m)), + WRITEMASK_X)); + factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), + swizzle_y(e), + swizzle_y(m)), + WRITEMASK_Y)); + + /* Set f16's sign bits. + * + * f16 |= (f32 & (1u << 31u) >> 16u; + */ + factory.emit( + assign(f16, bit_or(f16, + rshift(bit_and(f32, constant(1u << 31u)), + constant(16u))))); + + + /* return (f16.y << 16u) | f16.x; */ + ir_rvalue *result = bit_or(lshift(swizzle_y(f16), + constant(16u)), + swizzle_x(f16)); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Split packHalf2x16's vec2 operand into two floats. + * + * \param vec2_rval is packHalf2x16's input + * \return a uint rvalue + * + * Some code generators, such as the i965 fragment shader, require that all + * vector expressions be lowered to a sequence of scalar expressions. + * However, packHalf2x16 cannot be scalarized by the same mechanism as + * a true vector operation because its input and output have a differing + * number of vector components. + * + * This method scalarizes packHalf2x16 by transforming it from an unary + * operation having vector input to a binary operation having scalar input. + * That is, it transforms + * + * packHalf2x16(VEC2_RVAL); + * + * into + * + * vec2 v = VEC2_RVAL; + * return packHalf2x16_split(v.x, v.y); + */ + ir_rvalue* + split_pack_half_2x16(ir_rvalue *vec2_rval) + { + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_variable *v = factory.make_temp(glsl_type::vec2_type, + "tmp_split_pack_half_2x16_v"); + factory.emit(assign(v, vec2_rval)); + + return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v)); + } + + /** + * \brief Lower the component-wise calculation of unpackHalf2x16. + * + * Given a uint that encodes a float16 in its lower 16 bits, this function + * returns a uint that encodes a float32 with the same value. The sign bit + * of the float16 is ignored. + * + * \param e_rval is the unshifted exponent bits of a float16 + * \param m_rval is the unshifted mantissa bits of a float16 + * \param a uint rvalue that encodes a float32 + */ + ir_rvalue* + unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) + { + assert(e_rval->type == glsl_type::uint_type); + assert(m_rval->type == glsl_type::uint_type); + + /* uint u32; */ + ir_variable *u32 = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_u32"); + + /* uint e = E_RVAL; */ + ir_variable *e = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_e"); + factory.emit(assign(e, e_rval)); + + /* uint m = M_RVAL; */ + ir_variable *m = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_m"); + factory.emit(assign(m, m_rval)); + + /* Preliminaries + * ------------- + * + * For a float16, the bit layout is: + * + * sign: 15 + * exponent: 10:14 + * mantissa: 0:9 + * + * Let f16 be a float16 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) + * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) + * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) + * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) + * if e16 = 31 and m16 != 0, then NaN (5) + * + * where 0 <= m16 < 2^10. + * + * For a float32, the bit layout is: + * + * sign: 31 + * exponent: 23:30 + * mantissa: 0:22 + * + * Let f32 be a float32 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) + * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) + * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) + * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) + * if e32 = 255 and m32 != 0, then NaN (14) + * + * where 0 <= m32 < 2^23. + * + * Calculation + * ----------- + * Our task is to compute s32, e32, m32 given f16. Since this function + * ignores the sign bit, assume that s32 = s16 = 0. There are several + * cases consider. + */ + + factory.emit( + + /* Case 1) f16 is zero or subnormal. + * + * The simplest method of calcuating f32 in this case is + * + * f32 = f16 (20) + * = 2^(-14) * (m16 / 2^10) (21) + * = m16 / 2^(-24) (22) + */ + + /* if (e16 == 0) { */ + if_tree(equal(e, constant(0u)), + + /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ + assign(u32, expr(ir_unop_bitcast_f2u, + div(u2f(m), constant((float)(1 << 24))))), + + /* Case 2) f16 is normal. + * + * The equation + * + * f32 = f16 (30) + * 2^(e32 - 127) * (1 + m32 / 2^23) = (31) + * 2^(e16 - 15) * (1 + m16 / 2^10) + * + * can be decomposed into two + * + * 2^(e32 - 127) = 2^(e16 - 15) (32) + * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33) + * + * which solve to + * + * e32 = e16 + 112 (34) + * m32 = m16 * 2^13 (35) + */ + + /* } else if (e16 < 31)) { */ + if_tree(less(e, constant(31u << 10u)), + + /* u32 = ((e + (112 << 10)) | m) << 13; + */ + assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), + constant(13u))), + + + /* Case 3) f16 is infinite. */ + if_tree(equal(m, constant(0u)), + + assign(u32, constant(255u << 23u)), + + /* Case 4) f16 is NaN. */ + /* } else { */ + + assign(u32, constant(0x7fffffffu)))))); + + /* } */ + + return deref(u32).val; + } + + /** + * \brief Lower an unpackHalf2x16 expression. + * + * \param uint_rval is unpackHalf2x16's input + * \return unpackHalf2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_half_2x16(ir_rvalue *uint_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * mediump vec2 unpackHalf2x16 (highp uint v) + * ------------------------------------------ + * Returns a two-component floating-point vector with components + * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit + * values, interpreting those values as 16-bit floating-point numbers + * according to the OpenGL ES Specification, and converting them to + * 32-bit floating-point values. + * + * The first component of the vector is obtained from the + * 16 least-significant bits of v; the second component is obtained + * from the 16 most-significant bits of v. + */ + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = RVALUE; + * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); + */ + ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_f16"); + factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); + + /* uvec2 f32; */ + ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_f32"); + + /* Get f16's unshifted exponent bits. + * + * uvec2 e = f16 & 0x7c00u; + */ + ir_variable *e = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_e"); + factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); + + /* Get f16's unshifted mantissa bits. + * + * uvec2 m = f16 & 0x03ffu; + */ + ir_variable *m = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_m"); + factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); + + /* Set f32's exponent and mantissa bits. + * + * f32.x = unpack_half_1x16_nosign(e.x, m.x); + * f32.y = unpack_half_1x16_nosign(e.y, m.y); + */ + factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), + swizzle_x(m)), + WRITEMASK_X)); + factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), + swizzle_y(m)), + WRITEMASK_Y)); + + /* Set f32's sign bit. + * + * f32 |= (f16 & 0x8000u) << 16u; + */ + factory.emit(assign(f32, bit_or(f32, + lshift(bit_and(f16, + constant(0x8000u)), + constant(16u))))); + + /* return bitcast_u2f(f32); */ + ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Split unpackHalf2x16 into two operations. + * + * \param uint_rval is unpackHalf2x16's input + * \return a vec2 rvalue + * + * Some code generators, such as the i965 fragment shader, require that all + * vector expressions be lowered to a sequence of scalar expressions. + * However, unpackHalf2x16 cannot be scalarized by the same method as + * a true vector operation because the number of components of its input + * and output differ. + * + * This method scalarizes unpackHalf2x16 by transforming it from a single + * operation having vec2 output to a pair of operations each having float + * output. That is, it transforms + * + * unpackHalf2x16(UINT_RVAL) + * + * into + * + * uint u = UINT_RVAL; + * vec2 v; + * + * v.x = unpackHalf2x16_split_x(u); + * v.y = unpackHalf2x16_split_y(u); + * + * return v; + */ + ir_rvalue* + split_unpack_half_2x16(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = uint_rval; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_split_unpack_half_2x16_u"); + factory.emit(assign(u, uint_rval)); + + /* vec2 v; */ + ir_variable *v = factory.make_temp(glsl_type::vec2_type, + "tmp_split_unpack_half_2x16_v"); + + /* v.x = unpack_half_2x16_split_x(u); */ + factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u), + WRITEMASK_X)); + + /* v.y = unpack_half_2x16_split_y(u); */ + factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u), + WRITEMASK_Y)); + + return deref(v).val; + } +}; + +} // namespace anonymous + +/** + * \brief Lower the builtin packing functions. + * + * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. + */ +bool +lower_packing_builtins(exec_list *instructions, int op_mask) +{ + lower_packing_builtins_visitor v(op_mask); + visit_list_elements(&v, instructions, true); + return v.get_progress(); +} diff --git a/mesalib/src/glsl/lower_ubo_reference.cpp b/mesalib/src/glsl/lower_ubo_reference.cpp index e8d2c4742..026197df7 100644 --- a/mesalib/src/glsl/lower_ubo_reference.cpp +++ b/mesalib/src/glsl/lower_ubo_reference.cpp @@ -61,10 +61,58 @@ public: bool progress; }; -static inline unsigned int -align(unsigned int a, unsigned int align) +/** + * Determine the name of the interface block field + * + * This is the name of the specific member as it would appear in the + * \c gl_uniform_buffer_variable::Name field in the shader's + * \c UniformBlocks array. + */ +static const char * +interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d) { - return (a + align - 1) / align * align; + ir_constant *previous_index = NULL; + + while (d != NULL) { + switch (d->ir_type) { + case ir_type_dereference_variable: { + ir_dereference_variable *v = (ir_dereference_variable *) d; + if (previous_index + && v->var->is_interface_instance() + && v->var->type->is_array()) + return ralloc_asprintf(mem_ctx, + "%s[%d]", + base_name, + previous_index->get_uint_component(0)); + else + return base_name; + + break; + } + + case ir_type_dereference_record: { + ir_dereference_record *r = (ir_dereference_record *) d; + + d = r->record->as_dereference(); + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *a = (ir_dereference_array *) d; + + d = a->array->as_dereference(); + previous_index = a->array_index->as_constant(); + break; + } + + default: + assert(!"Should not get here."); + break; + } + } + + assert(!"Should not get here."); + return NULL; } void @@ -78,13 +126,30 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) return; ir_variable *var = deref->variable_referenced(); - if (!var || var->uniform_block == -1) + if (!var || !var->is_in_uniform_block()) return; mem_ctx = ralloc_parent(*rvalue); - uniform_block = var->uniform_block; - struct gl_uniform_block *block = &shader->UniformBlocks[uniform_block]; - this->ubo_var = &block->Uniforms[var->location]; + + const char *const field_name = + interface_field_name(mem_ctx, (char *) var->interface_type->name, deref); + + this->uniform_block = -1; + for (unsigned i = 0; i < shader->NumUniformBlocks; i++) { + if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) { + this->uniform_block = i; + + struct gl_uniform_block *block = &shader->UniformBlocks[i]; + + this->ubo_var = var->is_interface_instance() + ? &block->Uniforms[0] : &block->Uniforms[var->location]; + + break; + } + } + + assert(this->uniform_block != (unsigned) -1); + ir_rvalue *offset = new(mem_ctx) ir_constant(0u); unsigned const_offset = 0; bool row_major = ubo_var->RowMajor; @@ -111,9 +176,21 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) * vector) is handled below in emit_ubo_loads. */ array_stride = 4; + } else if (deref_array->type->is_interface()) { + /* We're processing an array dereference of an interface instance + * array. The thing being dereferenced *must* be a variable + * dereference because intefaces cannot be embedded an other + * types. In terms of calculating the offsets for the lowering + * pass, we don't care about the array index. All elements of an + * interface instance array will have the same offsets relative to + * the base of the block that backs them. + */ + assert(deref_array->array->as_dereference_variable()); + deref = deref_array->array->as_dereference(); + break; } else { array_stride = deref_array->type->std140_size(row_major); - array_stride = align(array_stride, 16); + array_stride = glsl_align(array_stride, 16); } ir_constant *const_index = deref_array->array_index->as_constant(); @@ -138,7 +215,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) const glsl_type *type = struct_type->fields.structure[i].type; unsigned field_align = type->std140_base_alignment(row_major); max_field_align = MAX2(field_align, max_field_align); - intra_struct_offset = align(intra_struct_offset, field_align); + intra_struct_offset = glsl_align(intra_struct_offset, field_align); if (strcmp(struct_type->fields.structure[i].name, deref_record->field) == 0) @@ -146,7 +223,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) intra_struct_offset += type->std140_size(row_major); } - const_offset = align(const_offset, max_field_align); + const_offset = glsl_align(const_offset, max_field_align); const_offset += intra_struct_offset; deref = deref_record->record->as_dereference(); @@ -217,8 +294,8 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, field->name); field_offset = - align(field_offset, - field->type->std140_base_alignment(ubo_var->RowMajor)); + glsl_align(field_offset, + field->type->std140_base_alignment(ubo_var->RowMajor)); emit_ubo_loads(field_deref, base_offset, deref_offset + field_offset); @@ -229,7 +306,8 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, if (deref->type->is_array()) { unsigned array_stride = - align(deref->type->fields.array->std140_size(ubo_var->RowMajor), 16); + glsl_align(deref->type->fields.array->std140_size(ubo_var->RowMajor), + 16); for (unsigned i = 0; i < deref->type->length; i++) { ir_constant *element = new(mem_ctx) ir_constant(i); diff --git a/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp b/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp index 57771074a..040b0bf83 100644 --- a/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -364,12 +364,16 @@ public: return this->lower_temps; case ir_var_uniform: return this->lower_uniforms; - case ir_var_in: + case ir_var_function_in: case ir_var_const_in: - return (var->location == -1) ? this->lower_temps : this->lower_inputs; - case ir_var_out: - return (var->location == -1) ? this->lower_temps : this->lower_outputs; - case ir_var_inout: + return this->lower_temps; + case ir_var_shader_in: + return this->lower_inputs; + case ir_var_function_out: + return this->lower_temps; + case ir_var_shader_out: + return this->lower_outputs; + case ir_var_function_inout: return this->lower_temps; } diff --git a/mesalib/src/glsl/opt_constant_folding.cpp b/mesalib/src/glsl/opt_constant_folding.cpp index 7d94d481c..072fefe9a 100644 --- a/mesalib/src/glsl/opt_constant_folding.cpp +++ b/mesalib/src/glsl/opt_constant_folding.cpp @@ -127,7 +127,8 @@ ir_constant_folding_visitor::visit_enter(ir_call *ir) ir_rvalue *param_rval = (ir_rvalue *)iter.get(); ir_variable *sig_param = (ir_variable *)sig_iter.get(); - if (sig_param->mode == ir_var_in || sig_param->mode == ir_var_const_in) { + if (sig_param->mode == ir_var_function_in + || sig_param->mode == ir_var_const_in) { ir_rvalue *new_param = param_rval; handle_rvalue(&new_param); diff --git a/mesalib/src/glsl/opt_constant_propagation.cpp b/mesalib/src/glsl/opt_constant_propagation.cpp index a03811999..2f65937fe 100644 --- a/mesalib/src/glsl/opt_constant_propagation.cpp +++ b/mesalib/src/glsl/opt_constant_propagation.cpp @@ -285,7 +285,8 @@ ir_constant_propagation_visitor::visit_enter(ir_call *ir) foreach_iter(exec_list_iterator, iter, ir->actual_parameters) { ir_variable *sig_param = (ir_variable *)sig_param_iter.get(); ir_rvalue *param = (ir_rvalue *)iter.get(); - if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) { + if (sig_param->mode != ir_var_function_out + && sig_param->mode != ir_var_function_inout) { ir_rvalue *new_param = param; handle_rvalue(&new_param); if (new_param != param) diff --git a/mesalib/src/glsl/opt_constant_variable.cpp b/mesalib/src/glsl/opt_constant_variable.cpp index 1bbaf8e47..cbe6450c6 100644 --- a/mesalib/src/glsl/opt_constant_variable.cpp +++ b/mesalib/src/glsl/opt_constant_variable.cpp @@ -137,8 +137,8 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir) ir_rvalue *param_rval = (ir_rvalue *)iter.get(); ir_variable *param = (ir_variable *)sig_iter.get(); - if (param->mode == ir_var_out || - param->mode == ir_var_inout) { + if (param->mode == ir_var_function_out || + param->mode == ir_var_function_inout) { ir_variable *var = param_rval->variable_referenced(); struct assignment_entry *entry; diff --git a/mesalib/src/glsl/opt_copy_propagation.cpp b/mesalib/src/glsl/opt_copy_propagation.cpp index 2952ce594..7282b611e 100644 --- a/mesalib/src/glsl/opt_copy_propagation.cpp +++ b/mesalib/src/glsl/opt_copy_propagation.cpp @@ -189,7 +189,8 @@ ir_copy_propagation_visitor::visit_enter(ir_call *ir) foreach_iter(exec_list_iterator, iter, ir->actual_parameters) { ir_variable *sig_param = (ir_variable *)sig_param_iter.get(); ir_instruction *ir = (ir_instruction *)iter.get(); - if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) { + if (sig_param->mode != ir_var_function_out + && sig_param->mode != ir_var_function_inout) { ir->accept(this); } sig_param_iter.next(); diff --git a/mesalib/src/glsl/opt_copy_propagation_elements.cpp b/mesalib/src/glsl/opt_copy_propagation_elements.cpp index de9f4ef6f..6a19da40d 100644 --- a/mesalib/src/glsl/opt_copy_propagation_elements.cpp +++ b/mesalib/src/glsl/opt_copy_propagation_elements.cpp @@ -297,7 +297,8 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir) foreach_iter(exec_list_iterator, iter, ir->actual_parameters) { ir_variable *sig_param = (ir_variable *)sig_param_iter.get(); ir_instruction *ir = (ir_instruction *)iter.get(); - if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) { + if (sig_param->mode != ir_var_function_out + && sig_param->mode != ir_var_function_inout) { ir->accept(this); } sig_param_iter.next(); diff --git a/mesalib/src/glsl/opt_dead_code.cpp b/mesalib/src/glsl/opt_dead_code.cpp index 47247e20d..b65e5c2ce 100644 --- a/mesalib/src/glsl/opt_dead_code.cpp +++ b/mesalib/src/glsl/opt_dead_code.cpp @@ -77,10 +77,11 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned) if (entry->assign) { /* Remove a single dead assignment to the variable we found. - * Don't do so if it's a shader output, though. + * Don't do so if it's a shader or function output, though. */ - if (entry->var->mode != ir_var_out && - entry->var->mode != ir_var_inout) { + if (entry->var->mode != ir_var_function_out && + entry->var->mode != ir_var_function_inout && + entry->var->mode != ir_var_shader_out) { entry->assign->remove(); progress = true; @@ -97,15 +98,10 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned) /* uniform initializers are precious, and could get used by another * stage. Also, once uniform locations have been assigned, the * declaration cannot be deleted. - * - * Also, GL_ARB_uniform_buffer_object says that std140 - * uniforms will not be eliminated. Since we always do - * std140, just don't eliminate uniforms in UBOs. */ if (entry->var->mode == ir_var_uniform && (uniform_locations_assigned || - entry->var->constant_value || - entry->var->uniform_block != -1)) + entry->var->constant_value)) continue; entry->var->remove(); diff --git a/mesalib/src/glsl/opt_function_inlining.cpp b/mesalib/src/glsl/opt_function_inlining.cpp index f9f5bd442..0733d5180 100644 --- a/mesalib/src/glsl/opt_function_inlining.cpp +++ b/mesalib/src/glsl/opt_function_inlining.cpp @@ -144,9 +144,9 @@ ir_call::generate_inline(ir_instruction *next_ir) } /* Move the actual param into our param variable if it's an 'in' type. */ - if (parameters[i] && (sig_param->mode == ir_var_in || + if (parameters[i] && (sig_param->mode == ir_var_function_in || sig_param->mode == ir_var_const_in || - sig_param->mode == ir_var_inout)) { + sig_param->mode == ir_var_function_inout)) { ir_assignment *assign; assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]), @@ -202,8 +202,8 @@ ir_call::generate_inline(ir_instruction *next_ir) const ir_variable *const sig_param = (ir_variable *) sig_param_iter.get(); /* Move our param variable into the actual param if it's an 'out' type. */ - if (parameters[i] && (sig_param->mode == ir_var_out || - sig_param->mode == ir_var_inout)) { + if (parameters[i] && (sig_param->mode == ir_var_function_out || + sig_param->mode == ir_var_function_inout)) { ir_assignment *assign; assign = new(ctx) ir_assignment(param->clone(ctx, NULL)->as_rvalue(), diff --git a/mesalib/src/glsl/opt_structure_splitting.cpp b/mesalib/src/glsl/opt_structure_splitting.cpp index 9b3f048e4..806c079e5 100644 --- a/mesalib/src/glsl/opt_structure_splitting.cpp +++ b/mesalib/src/glsl/opt_structure_splitting.cpp @@ -104,7 +104,8 @@ ir_structure_reference_visitor::get_variable_entry(ir_variable *var) { assert(var); - if (!var->type->is_record() || var->mode == ir_var_uniform) + if (!var->type->is_record() || var->mode == ir_var_uniform + || var->mode == ir_var_shader_in || var->mode == ir_var_shader_out) return NULL; foreach_iter(exec_list_iterator, iter, this->variable_list) { diff --git a/mesalib/src/glsl/opt_tree_grafting.cpp b/mesalib/src/glsl/opt_tree_grafting.cpp index 25b18ea94..113abb7b0 100644 --- a/mesalib/src/glsl/opt_tree_grafting.cpp +++ b/mesalib/src/glsl/opt_tree_grafting.cpp @@ -211,7 +211,8 @@ ir_tree_grafting_visitor::visit_enter(ir_call *ir) ir_rvalue *ir = (ir_rvalue *)iter.get(); ir_rvalue *new_ir = ir; - if (sig_param->mode != ir_var_in && sig_param->mode != ir_var_const_in) { + if (sig_param->mode != ir_var_function_in + && sig_param->mode != ir_var_const_in) { if (check_graft(ir, sig_param) == visit_stop) return visit_stop; continue; @@ -350,8 +351,9 @@ tree_grafting_basic_block(ir_instruction *bb_first, if (!lhs_var) continue; - if (lhs_var->mode == ir_var_out || - lhs_var->mode == ir_var_inout) + if (lhs_var->mode == ir_var_function_out || + lhs_var->mode == ir_var_function_inout || + lhs_var->mode == ir_var_shader_out) continue; ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var); diff --git a/mesalib/src/glsl/s_expression.cpp b/mesalib/src/glsl/s_expression.cpp index 57de9d334..1bdf6bca6 100644 --- a/mesalib/src/glsl/s_expression.cpp +++ b/mesalib/src/glsl/s_expression.cpp @@ -66,18 +66,18 @@ read_atom(void *ctx, const char *&src, char *&symbol_buffer) return NULL; // no atom // Check for the special symbol '+INF', which means +Infinity. Note: C99 - // requires strtod to parse '+INF' as +Infinity, but we still support some + // requires strtof to parse '+INF' as +Infinity, but we still support some // non-C99-compliant compilers (e.g. MSVC). if (n == 4 && strncmp(src, "+INF", 4) == 0) { expr = new(ctx) s_float(std::numeric_limits::infinity()); } else { // Check if the atom is a number. char *float_end = NULL; - double f = glsl_strtod(src, &float_end); + float f = glsl_strtof(src, &float_end); if (float_end != src) { char *int_end = NULL; int i = strtol(src, &int_end, 10); - // If strtod matched more characters, it must have a decimal part + // If strtof matched more characters, it must have a decimal part if (float_end > int_end) expr = new(ctx) s_float(f); else diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp index 33d3804c6..0fb4f5b16 100644 --- a/mesalib/src/glsl/standalone_scaffolding.cpp +++ b/mesalib/src/glsl/standalone_scaffolding.cpp @@ -33,6 +33,24 @@ #include #include "ralloc.h" +void +_mesa_warning(struct gl_context *ctx, const char *fmt, ...) +{ + va_list vargs; + (void) ctx; + + va_start(vargs, fmt); + + /* This output is not thread-safe, but that's good enough for the + * standalone compiler. + */ + fprintf(stderr, "Mesa warning: "); + vfprintf(stderr, fmt, vargs); + fprintf(stderr, "\n"); + + va_end(vargs); +} + void _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, struct gl_shader *sh) @@ -81,6 +99,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) ctx->Extensions.EXT_texture3D = true; ctx->Extensions.OES_EGL_image_external = true; ctx->Extensions.ARB_shader_bit_encoding = true; + ctx->Extensions.ARB_shading_language_packing = true; ctx->Extensions.OES_standard_derivatives = true; ctx->Extensions.ARB_texture_cube_map_array = true; diff --git a/mesalib/src/glsl/standalone_scaffolding.h b/mesalib/src/glsl/standalone_scaffolding.h index 41ce35bef..096b2f114 100644 --- a/mesalib/src/glsl/standalone_scaffolding.h +++ b/mesalib/src/glsl/standalone_scaffolding.h @@ -33,6 +33,9 @@ #include "main/mtypes.h" +extern "C" void +_mesa_warning(struct gl_context *ctx, const char *fmtString, ... ); + extern "C" void _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, struct gl_shader *sh); diff --git a/mesalib/src/glsl/strtod.c b/mesalib/src/glsl/strtod.c index 47c1f0ed6..5d4346b5a 100644 --- a/mesalib/src/glsl/strtod.c +++ b/mesalib/src/glsl/strtod.c @@ -55,3 +55,25 @@ glsl_strtod(const char *s, char **end) return strtod(s, end); #endif } + + +/** + * Wrapper around strtof which uses the "C" locale so the decimal + * point is always '.' + */ +float +glsl_strtof(const char *s, char **end) +{ +#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \ + !defined(__HAIKU__) && !defined(__UCLIBC__) + static locale_t loc = NULL; + if (!loc) { + loc = newlocale(LC_CTYPE_MASK, "C", NULL); + } + return strtof_l(s, end, loc); +#elif _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE + return strtof(s, end); +#else + return (float) strtod(s, end); +#endif +} diff --git a/mesalib/src/glsl/strtod.h b/mesalib/src/glsl/strtod.h index 0cf6409d4..ad847dbb0 100644 --- a/mesalib/src/glsl/strtod.h +++ b/mesalib/src/glsl/strtod.h @@ -34,6 +34,9 @@ extern "C" { extern double glsl_strtod(const char *s, char **end); +extern float +glsl_strtof(const char *s, char **end); + #ifdef __cplusplus } diff --git a/mesalib/src/mesa/Android.libmesa_glsl_utils.mk b/mesalib/src/mesa/Android.libmesa_glsl_utils.mk index 9c5f3493c..47f2e151b 100644 --- a/mesalib/src/mesa/Android.libmesa_glsl_utils.mk +++ b/mesalib/src/mesa/Android.libmesa_glsl_utils.mk @@ -35,10 +35,13 @@ include $(CLEAR_VARS) LOCAL_MODULE := libmesa_glsl_utils -LOCAL_C_INCLUDES := $(MESA_TOP)/src/glsl +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/glsl \ + $(MESA_TOP)/src/mapi LOCAL_SRC_FILES := \ main/hash_table.c \ + main/imports.c \ program/prog_hash_table.c \ program/symbol_table.c @@ -54,10 +57,13 @@ include $(CLEAR_VARS) LOCAL_MODULE := libmesa_glsl_utils LOCAL_IS_HOST_MODULE := true -LOCAL_C_INCLUDES := $(MESA_TOP)/src/glsl +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/glsl \ + $(MESA_TOP)/src/mapi LOCAL_SRC_FILES := \ main/hash_table.c \ + main/imports.c \ program/prog_hash_table.c \ program/symbol_table.c diff --git a/mesalib/src/mesa/main/extensions.c b/mesalib/src/mesa/main/extensions.c index 5d01ac8ea..7ae07fb5a 100644 --- a/mesalib/src/mesa/main/extensions.c +++ b/mesalib/src/mesa/main/extensions.c @@ -125,6 +125,7 @@ static const struct extension extension_table[] = { { "GL_ARB_shader_stencil_export", o(ARB_shader_stencil_export), GL, 2009 }, { "GL_ARB_shader_texture_lod", o(ARB_shader_texture_lod), GL, 2009 }, { "GL_ARB_shading_language_100", o(ARB_shading_language_100), GLL, 2003 }, + { "GL_ARB_shading_language_packing", o(ARB_shading_language_packing), GL, 2011 }, { "GL_ARB_shadow", o(ARB_shadow), GLL, 2001 }, { "GL_ARB_sync", o(ARB_sync), GL, 2003 }, { "GL_ARB_texture_border_clamp", o(ARB_texture_border_clamp), GLL, 2000 }, diff --git a/mesalib/src/mesa/main/getstring.c b/mesalib/src/mesa/main/getstring.c index 1f23cc0a4..aa3a528fd 100644 --- a/mesalib/src/mesa/main/getstring.c +++ b/mesalib/src/mesa/main/getstring.c @@ -74,7 +74,9 @@ shading_language_version(struct gl_context *ctx) break; case API_OPENGLES2: - return (const GLubyte *) "OpenGL ES GLSL ES 1.0.16"; + return (ctx->Version < 30) + ? (const GLubyte *) "OpenGL ES GLSL ES 1.0.16" + : (const GLubyte *) "OpenGL ES GLSL ES 3.0"; case API_OPENGLES: /* fall-through */ diff --git a/mesalib/src/mesa/main/imports.c b/mesalib/src/mesa/main/imports.c index 76f835e0e..e6f754254 100644 --- a/mesalib/src/mesa/main/imports.c +++ b/mesalib/src/mesa/main/imports.c @@ -314,10 +314,43 @@ _mesa_bitcount_64(uint64_t n) #endif +/* Using C99 rounding functions for roundToEven() implementation is + * difficult, because round(), rint, and nearbyint() are affected by + * fesetenv(), which the application may have done for its own + * purposes. Mesa's IROUND macro is close to what we want, but it + * rounds away from 0 on n + 0.5. + */ +int +_mesa_round_to_even(float val) +{ + int rounded = IROUND(val); + + if (val - floor(val) == 0.5) { + if (rounded % 2 != 0) + rounded += val > 0 ? -1 : 1; + } + + return rounded; +} + + /** * Convert a 4-byte float to a 2-byte half float. - * Based on code from: - * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html + * + * Not all float32 values can be represented exactly as a float16 value. We + * round such intermediate float32 values to the nearest float16. When the + * float32 lies exactly between to float16 values, we round to the one with + * an even mantissa. + * + * This rounding behavior has several benefits: + * - It has no sign bias. + * + * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's + * GPU ISA. + * + * - By reproducing the behavior of the GPU (at least on Intel hardware), + * compile-time evaluation of constant packHalf2x16 GLSL expressions will + * result in the same value as if the expression were executed on the GPU. */ GLhalfARB _mesa_float_to_half(float val) @@ -356,32 +389,13 @@ _mesa_float_to_half(float val) else { /* regular number */ const int new_exp = flt_e - 127; - if (new_exp < -24) { - /* this maps to 0 */ - /* m = 0; - already set */ - e = 0; - } - else if (new_exp < -14) { - /* this maps to a denorm */ - unsigned int exp_val = (unsigned int) (-14 - new_exp); /* 2^-exp_val*/ + if (new_exp < -14) { + /* The float32 lies in the range (0.0, min_normal16) and is rounded + * to a nearby float16 value. The result will be either zero, subnormal, + * or normal. + */ e = 0; - switch (exp_val) { - case 0: - _mesa_warning(NULL, - "float_to_half: logical error in denorm creation!\n"); - /* m = 0; - already set */ - break; - case 1: m = 512 + (flt_m >> 14); break; - case 2: m = 256 + (flt_m >> 15); break; - case 3: m = 128 + (flt_m >> 16); break; - case 4: m = 64 + (flt_m >> 17); break; - case 5: m = 32 + (flt_m >> 18); break; - case 6: m = 16 + (flt_m >> 19); break; - case 7: m = 8 + (flt_m >> 20); break; - case 8: m = 4 + (flt_m >> 21); break; - case 9: m = 2 + (flt_m >> 22); break; - case 10: m = 1; break; - } + m = _mesa_round_to_even((1 << 24) * fabsf(fi.f)); } else if (new_exp > 15) { /* map this value to infinity */ @@ -389,12 +403,26 @@ _mesa_float_to_half(float val) e = 31; } else { - /* regular */ + /* The float32 lies in the range + * [min_normal16, max_normal16 + max_step16) + * and is rounded to a nearby float16 value. The result will be + * either normal or infinite. + */ e = new_exp + 15; - m = flt_m >> 13; + m = _mesa_round_to_even(flt_m / (float) (1 << 13)); } } + assert(0 <= m && m <= 1024); + if (m == 1024) { + /* The float32 was rounded upwards into the range of the next exponent, + * so bump the exponent. This correctly handles the case where f32 + * should be rounded up to float16 infinity. + */ + ++e; + m = 0; + } + result = (s << 15) | (e << 10) | m; return result; } diff --git a/mesalib/src/mesa/main/imports.h b/mesalib/src/mesa/main/imports.h index 8446ea2a3..4b783818b 100644 --- a/mesalib/src/mesa/main/imports.h +++ b/mesalib/src/mesa/main/imports.h @@ -548,6 +548,9 @@ _mesa_fls(unsigned int n) #endif } +extern int +_mesa_round_to_even(float val); + extern GLhalfARB _mesa_float_to_half(float f); diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h index d37e6c4c0..3369623f7 100644 --- a/mesalib/src/mesa/main/mtypes.h +++ b/mesalib/src/mesa/main/mtypes.h @@ -2273,11 +2273,30 @@ typedef enum struct gl_uniform_buffer_variable { char *Name; + + /** + * Name of the uniform as seen by glGetUniformIndices. + * + * glGetUniformIndices requires that the block instance index \b not be + * present in the name of queried uniforms. + * + * \note + * \c gl_uniform_buffer_variable::IndexName and + * \c gl_uniform_buffer_variable::Name may point to identical storage. + */ + char *IndexName; + const struct glsl_type *Type; unsigned int Offset; GLboolean RowMajor; }; +enum gl_uniform_block_packing { + ubo_packing_std140, + ubo_packing_shared, + ubo_packing_packed +}; + struct gl_uniform_block { /** Declared name of the uniform block */ @@ -2299,6 +2318,14 @@ struct gl_uniform_block * (GL_UNIFORM_BLOCK_DATA_SIZE). */ GLuint UniformBufferSize; + + /** + * Layout specified in the shader + * + * This isn't accessible through the API, but it is used while + * cross-validating uniform blocks. + */ + enum gl_uniform_block_packing _Packing; }; /** @@ -3042,6 +3069,7 @@ struct gl_extensions GLboolean ARB_shader_stencil_export; GLboolean ARB_shader_texture_lod; GLboolean ARB_shading_language_100; + GLboolean ARB_shading_language_packing; GLboolean ARB_shadow; GLboolean ARB_sync; GLboolean ARB_texture_border_clamp; diff --git a/mesalib/src/mesa/main/remap.c b/mesalib/src/mesa/main/remap.c index c89fba453..a09870561 100644 --- a/mesalib/src/mesa/main/remap.c +++ b/mesalib/src/mesa/main/remap.c @@ -208,8 +208,10 @@ _mesa_do_init_remap_table(const char *pool, offset = _mesa_map_function_spec(spec); /* store the dispatch offset in the remap table */ driDispatchRemapTable[i] = offset; - if (offset < 0) - _mesa_warning(NULL, "failed to remap index %d", i); + if (offset < 0) { + const char *name = spec + strlen(spec) + 1; + _mesa_warning(NULL, "failed to remap %s", name); + } } } diff --git a/mesalib/src/mesa/main/shader_query.cpp b/mesalib/src/mesa/main/shader_query.cpp index 27b1b8f56..3014a9778 100644 --- a/mesalib/src/mesa/main/shader_query.cpp +++ b/mesalib/src/mesa/main/shader_query.cpp @@ -106,7 +106,7 @@ _mesa_GetActiveAttrib(GLhandleARB program, GLuint desired_index, const ir_variable *const var = ((ir_instruction *) node)->as_variable(); if (var == NULL - || var->mode != ir_var_in + || var->mode != ir_var_shader_in || var->location == -1) continue; @@ -169,7 +169,7 @@ _mesa_GetAttribLocation(GLhandleARB program, const GLcharARB * name) * attribute, or if an error occurs, -1 will be returned." */ if (var == NULL - || var->mode != ir_var_in + || var->mode != ir_var_shader_in || var->location == -1 || var->location < VERT_ATTRIB_GENERIC0) continue; @@ -197,7 +197,7 @@ _mesa_count_active_attribs(struct gl_shader_program *shProg) const ir_variable *const var = ((ir_instruction *) node)->as_variable(); if (var == NULL - || var->mode != ir_var_in + || var->mode != ir_var_shader_in || var->location == -1) continue; @@ -223,7 +223,7 @@ _mesa_longest_attribute_name_length(struct gl_shader_program *shProg) const ir_variable *const var = ((ir_instruction *) node)->as_variable(); if (var == NULL - || var->mode != ir_var_in + || var->mode != ir_var_shader_in || var->location == -1) continue; @@ -333,7 +333,7 @@ _mesa_GetFragDataIndex(GLuint program, const GLchar *name) * attribute, or if an error occurs, -1 will be returned." */ if (var == NULL - || var->mode != ir_var_out + || var->mode != ir_var_shader_out || var->location == -1 || var->location < FRAG_RESULT_DATA0) continue; @@ -389,7 +389,7 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name) * attribute, or if an error occurs, -1 will be returned." */ if (var == NULL - || var->mode != ir_var_out + || var->mode != ir_var_shader_out || var->location == -1 || var->location < FRAG_RESULT_DATA0) continue; diff --git a/mesalib/src/mesa/main/texparam.c b/mesalib/src/mesa/main/texparam.c index 8d0ae16fb..52ede13c0 100644 --- a/mesalib/src/mesa/main/texparam.c +++ b/mesalib/src/mesa/main/texparam.c @@ -1388,10 +1388,10 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params ) if (ctx->API != API_OPENGLES || !ctx->Extensions.OES_draw_texture) goto invalid_pname; - params[0] = obj->CropRect[0]; - params[1] = obj->CropRect[1]; - params[2] = obj->CropRect[2]; - params[3] = obj->CropRect[3]; + params[0] = (GLfloat) obj->CropRect[0]; + params[1] = (GLfloat) obj->CropRect[1]; + params[2] = (GLfloat) obj->CropRect[2]; + params[3] = (GLfloat) obj->CropRect[3]; break; case GL_TEXTURE_SWIZZLE_R_EXT: diff --git a/mesalib/src/mesa/main/uniforms.c b/mesalib/src/mesa/main/uniforms.c index 62c85b3c0..d902407a0 100644 --- a/mesalib/src/mesa/main/uniforms.c +++ b/mesalib/src/mesa/main/uniforms.c @@ -695,7 +695,7 @@ _mesa_GetActiveUniformBlockiv(GLuint program, for (i = 0; i < block->NumUniforms; i++) { unsigned offset; params[i] = _mesa_get_uniform_location(ctx, shProg, - block->Uniforms[i].Name, + block->Uniforms[i].IndexName, &offset); } return; diff --git a/mesalib/src/mesa/main/version.c b/mesalib/src/mesa/main/version.c index 4373d7b91..e944a5518 100644 --- a/mesalib/src/mesa/main/version.c +++ b/mesalib/src/mesa/main/version.c @@ -323,7 +323,30 @@ compute_version_es2(struct gl_context *ctx) ctx->Extensions.ARB_fragment_shader && ctx->Extensions.ARB_texture_non_power_of_two && ctx->Extensions.EXT_blend_equation_separate); - if (ver_2_0) { + /* FINISHME: This list isn't quite right. */ + const GLboolean ver_3_0 = (ctx->Extensions.ARB_half_float_vertex && + ctx->Extensions.ARB_internalformat_query && + ctx->Extensions.ARB_map_buffer_range && + ctx->Extensions.ARB_shader_texture_lod && + ctx->Extensions.ARB_texture_float && + ctx->Extensions.ARB_texture_rg && + ctx->Extensions.ARB_texture_compression_rgtc && + ctx->Extensions.EXT_draw_buffers2 && + /* ctx->Extensions.ARB_framebuffer_object && */ + ctx->Extensions.EXT_framebuffer_sRGB && + ctx->Extensions.EXT_packed_float && + ctx->Extensions.EXT_texture_array && + ctx->Extensions.EXT_texture_shared_exponent && + ctx->Extensions.EXT_transform_feedback && + ctx->Extensions.NV_conditional_render && + ctx->Extensions.ARB_draw_instanced && + ctx->Extensions.ARB_uniform_buffer_object && + ctx->Extensions.EXT_texture_snorm && + ctx->Extensions.NV_primitive_restart && + ctx->Extensions.OES_depth_texture_cube_map); + if (ver_3_0) { + ctx->Version = 30; + } else if (ver_2_0) { ctx->Version = 20; } else { _mesa_problem(ctx, "Incomplete OpenGL ES 2.0 support."); diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp index 0f7439b3b..cd89171da 100644 --- a/mesalib/src/mesa/program/ir_to_mesa.cpp +++ b/mesalib/src/mesa/program/ir_to_mesa.cpp @@ -623,10 +623,14 @@ type_size(const struct glsl_type *type) * at link time. */ return 1; - default: - assert(0); - return 0; + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + assert(!"Invalid type in type_size"); + break; } + + return 0; } /** @@ -1427,7 +1431,21 @@ ir_to_mesa_visitor::visit(ir_expression *ir) case ir_unop_fract: emit(ir, OPCODE_FRC, result_dst, op[0]); break; - + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: + case ir_unop_pack_half_2x16: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_unorm_4x8: + case ir_unop_unpack_half_2x16: + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + case ir_binop_pack_half_2x16_split: + assert(!"not supported"); + break; case ir_binop_min: emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); break; @@ -1529,21 +1547,18 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir) var->location); this->variables.push_tail(entry); break; - case ir_var_in: - case ir_var_inout: + case ir_var_shader_in: /* The linker assigns locations for varyings and attributes, * including deprecated builtins (like gl_Color), * user-assigned generic attributes (glBindVertexLocation), * and user-defined varyings. - * - * FINISHME: We would hit this path for function arguments. Fix! */ assert(var->location != -1); entry = new(mem_ctx) variable_storage(var, PROGRAM_INPUT, var->location); break; - case ir_var_out: + case ir_var_shader_out: assert(var->location != -1); entry = new(mem_ctx) variable_storage(var, PROGRAM_OUTPUT, @@ -2378,7 +2393,8 @@ public: } private: - virtual void visit_field(const glsl_type *type, const char *name); + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major); struct gl_shader_program *shader_program; struct gl_program_parameter_list *params; @@ -2386,10 +2402,13 @@ private: }; void -add_uniform_to_shader::visit_field(const glsl_type *type, const char *name) +add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, + bool row_major) { unsigned int size; + (void) row_major; + if (type->is_vector() || type->is_scalar()) { size = type->vector_elements; } else { @@ -2459,7 +2478,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program ir_variable *var = ((ir_instruction *) node)->as_variable(); if ((var == NULL) || (var->mode != ir_var_uniform) - || var->uniform_block != -1 || (strncmp(var->name, "gl_", 3) == 0)) + || var->is_in_uniform_block() || (strncmp(var->name, "gl_", 3) == 0)) continue; add.process(var); @@ -2522,7 +2541,11 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, format = uniform_native; columns = 1; break; - default: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: assert(!"Should not get here."); break; } diff --git a/mesalib/src/mesa/program/program.c b/mesalib/src/mesa/program/program.c index 993803dd5..fb0aeb7ed 100644 --- a/mesalib/src/mesa/program/program.c +++ b/mesalib/src/mesa/program/program.c @@ -696,7 +696,7 @@ _mesa_combine_programs(struct gl_context *ctx, const GLuint newLength = lenA + lenB; GLboolean usedTemps[MAX_PROGRAM_TEMPS]; GLuint firstTemp = 0; - GLbitfield inputsB; + GLbitfield64 inputsB; GLuint i; ASSERT(progA->Target == progB->Target); @@ -724,7 +724,7 @@ _mesa_combine_programs(struct gl_context *ctx, if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) { const struct gl_fragment_program *fprogA, *fprogB; struct gl_fragment_program *newFprog; - GLbitfield progB_inputsRead = progB->InputsRead; + GLbitfield64 progB_inputsRead = progB->InputsRead; GLint progB_colorFile, progB_colorIndex; fprogA = gl_fragment_program_const(progA); @@ -840,8 +840,8 @@ _mesa_find_used_registers(const struct gl_program *prog, for (j = 0; j < n; j++) { if (inst->SrcReg[j].File == file) { - ASSERT(inst->SrcReg[j].Index < usedSize); - if(inst->SrcReg[j].Index < usedSize) + ASSERT(inst->SrcReg[j].Index < (GLint) usedSize); + if (inst->SrcReg[j].Index < (GLint) usedSize) used[inst->SrcReg[j].Index] = GL_TRUE; } } @@ -908,23 +908,23 @@ _mesa_valid_register_index(const struct gl_context *ctx, return GL_TRUE; /* XXX or maybe false? */ case PROGRAM_TEMPORARY: - return index >= 0 && index < c->MaxTemps; + return index >= 0 && index < (GLint) c->MaxTemps; case PROGRAM_ENV_PARAM: - return index >= 0 && index < c->MaxEnvParams; + return index >= 0 && index < (GLint) c->MaxEnvParams; case PROGRAM_LOCAL_PARAM: - return index >= 0 && index < c->MaxLocalParams; + return index >= 0 && index < (GLint) c->MaxLocalParams; case PROGRAM_UNIFORM: case PROGRAM_STATE_VAR: /* aka constant buffer */ - return index >= 0 && index < c->MaxUniformComponents / 4; + return index >= 0 && index < (GLint) c->MaxUniformComponents / 4; case PROGRAM_CONSTANT: /* constant buffer w/ possible relative negative addressing */ return (index > (int) c->MaxUniformComponents / -4 && - index < c->MaxUniformComponents / 4); + index < (int) c->MaxUniformComponents / 4); case PROGRAM_INPUT: if (index < 0) @@ -932,11 +932,11 @@ _mesa_valid_register_index(const struct gl_context *ctx, switch (shaderType) { case MESA_SHADER_VERTEX: - return index < VERT_ATTRIB_GENERIC0 + c->MaxAttribs; + return index < VERT_ATTRIB_GENERIC0 + (GLint) c->MaxAttribs; case MESA_SHADER_FRAGMENT: - return index < FRAG_ATTRIB_VAR0 + ctx->Const.MaxVarying; + return index < FRAG_ATTRIB_VAR0 + (GLint) ctx->Const.MaxVarying; case MESA_SHADER_GEOMETRY: - return index < GEOM_ATTRIB_VAR0 + ctx->Const.MaxVarying; + return index < GEOM_ATTRIB_VAR0 + (GLint) ctx->Const.MaxVarying; default: return GL_FALSE; } @@ -947,17 +947,17 @@ _mesa_valid_register_index(const struct gl_context *ctx, switch (shaderType) { case MESA_SHADER_VERTEX: - return index < VERT_RESULT_VAR0 + ctx->Const.MaxVarying; + return index < VERT_RESULT_VAR0 + (GLint) ctx->Const.MaxVarying; case MESA_SHADER_FRAGMENT: - return index < FRAG_RESULT_DATA0 + ctx->Const.MaxDrawBuffers; + return index < FRAG_RESULT_DATA0 + (GLint) ctx->Const.MaxDrawBuffers; case MESA_SHADER_GEOMETRY: - return index < GEOM_RESULT_VAR0 + ctx->Const.MaxVarying; + return index < GEOM_RESULT_VAR0 + (GLint) ctx->Const.MaxVarying; default: return GL_FALSE; } case PROGRAM_ADDRESS: - return index >= 0 && index < c->MaxAddressRegs; + return index >= 0 && index < (GLint) c->MaxAddressRegs; default: _mesa_problem(ctx, diff --git a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c index 843dc5be3..63dbdb29b 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c +++ b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c @@ -350,9 +350,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized, tBot = (GLfloat) height; } - u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), vbuf_offset, vbuf, - (void**)&vertices); - if (!vbuf) { + if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), + vbuf_offset, vbuf, (void **) &vertices) != PIPE_OK) { return; } diff --git a/mesalib/src/mesa/state_tracker/st_cb_clear.c b/mesalib/src/mesa/state_tracker/st_cb_clear.c index d01236e28..a5aa8f496 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_clear.c +++ b/mesalib/src/mesa/state_tracker/st_cb_clear.c @@ -141,9 +141,8 @@ draw_quad(struct st_context *st, GLuint i, offset; float (*vertices)[2][4]; /**< vertex pos + color */ - u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), &offset, &vbuf, - (void**)&vertices); - if (!vbuf) { + if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), + &offset, &vbuf, (void **) &vertices) != PIPE_OK) { return; } diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c index ff8a9dc43..c944b81f6 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c @@ -568,9 +568,8 @@ draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z, struct pipe_resource *buf = NULL; unsigned offset; - u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset, &buf, - (void**)&verts); - if (!buf) { + if (u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset, + &buf, (void **) &verts) != PIPE_OK) { return; } @@ -795,7 +794,7 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, y1 = y + height * ctx->Pixel.ZoomY; /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ - z = z * 2.0 - 1.0; + z = z * 2.0f - 1.0f; draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex, normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width, @@ -1063,7 +1062,7 @@ static void clamp_size(struct pipe_context *pipe, GLsizei *width, GLsizei *height, struct gl_pixelstore_attrib *unpack) { - const unsigned maxSize = + const int maxSize = 1 << (pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c index 269068da2..5ca097004 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c @@ -148,10 +148,9 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, GLfloat *vbuf = NULL; GLuint attr; - u_upload_alloc(st->uploader, 0, - numAttribs * 4 * 4 * sizeof(GLfloat), - &offset, &vbuffer, (void**)&vbuf); - if (!vbuffer) { + if (u_upload_alloc(st->uploader, 0, + numAttribs * 4 * 4 * sizeof(GLfloat), + &offset, &vbuffer, (void **) &vbuf) != PIPE_OK) { return; } diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c index 7f07b741e..3cea2df07 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_texture.c +++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c @@ -1555,6 +1555,7 @@ void st_init_texture_functions(struct dd_function_table *functions) { functions->ChooseTextureFormat = st_ChooseTextureFormat; + functions->QuerySamplesForFormat = st_QuerySamplesForFormat; functions->TexImage = st_TexImage; functions->TexSubImage = _mesa_store_texsubimage; functions->CompressedTexSubImage = _mesa_store_compressed_texsubimage; diff --git a/mesalib/src/mesa/state_tracker/st_draw.c b/mesalib/src/mesa/state_tracker/st_draw.c index de539ca5a..de62264a1 100644 --- a/mesalib/src/mesa/state_tracker/st_draw.c +++ b/mesalib/src/mesa/state_tracker/st_draw.c @@ -84,7 +84,12 @@ all_varyings_in_vbos(const struct gl_client_array *arrays[]) } -static void +/** + * Basically, translate Mesa's index buffer information into + * a pipe_index_buffer object. + * \return TRUE or FALSE for success/failure + */ +static boolean setup_index_buffer(struct st_context *st, const struct _mesa_index_buffer *ib, struct pipe_index_buffer *ibuffer) @@ -100,8 +105,12 @@ setup_index_buffer(struct st_context *st, ibuffer->offset = pointer_to_offset(ib->ptr); } else if (st->indexbuf_uploader) { - u_upload_data(st->indexbuf_uploader, 0, ib->count * ibuffer->index_size, - ib->ptr, &ibuffer->offset, &ibuffer->buffer); + if (u_upload_data(st->indexbuf_uploader, 0, + ib->count * ibuffer->index_size, ib->ptr, + &ibuffer->offset, &ibuffer->buffer) != PIPE_OK) { + /* out of memory */ + return FALSE; + } u_upload_unmap(st->indexbuf_uploader); } else { @@ -110,6 +119,7 @@ setup_index_buffer(struct st_context *st, } cso_set_index_buffer(st->cso_context, ibuffer); + return TRUE; } @@ -220,7 +230,10 @@ st_draw_vbo(struct gl_context *ctx, vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims); - setup_index_buffer(st, ib, &ibuffer); + if (!setup_index_buffer(st, ib, &ibuffer)) { + /* out of memory */ + return; + } info.indexed = TRUE; if (min_index != ~0 && max_index != ~0) { diff --git a/mesalib/src/mesa/state_tracker/st_extensions.c b/mesalib/src/mesa/state_tracker/st_extensions.c index 18d89815d..af54cf7c8 100644 --- a/mesalib/src/mesa/state_tracker/st_extensions.c +++ b/mesalib/src/mesa/state_tracker/st_extensions.c @@ -516,6 +516,7 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.ARB_fragment_shader = GL_TRUE; ctx->Extensions.ARB_half_float_pixel = GL_TRUE; ctx->Extensions.ARB_half_float_vertex = GL_TRUE; + ctx->Extensions.ARB_internalformat_query = GL_TRUE; ctx->Extensions.ARB_map_buffer_range = GL_TRUE; ctx->Extensions.ARB_shader_objects = GL_TRUE; ctx->Extensions.ARB_shading_language_100 = GL_TRUE; @@ -594,9 +595,10 @@ void st_init_extensions(struct st_context *st) ctx->Const.NativeIntegers = GL_TRUE; ctx->Const.MaxClipPlanes = 8; - /* Extensions that only depend on GLSL 1.3. */ + /* Extensions that either depend on GLSL 1.30 or are a subset thereof. */ ctx->Extensions.ARB_conservative_depth = GL_TRUE; ctx->Extensions.ARB_shader_bit_encoding = GL_TRUE; + ctx->Extensions.OES_depth_texture_cube_map = GL_TRUE; } else { /* Optional integer support for GLSL 1.2. */ if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX, diff --git a/mesalib/src/mesa/state_tracker/st_format.c b/mesalib/src/mesa/state_tracker/st_format.c index af81f732d..7ef063953 100644 --- a/mesalib/src/mesa/state_tracker/st_format.c +++ b/mesalib/src/mesa/state_tracker/st_format.c @@ -1642,6 +1642,40 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target, } +/** + * Called via ctx->Driver.ChooseTextureFormat(). + */ +size_t +st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat, + int samples[16]) +{ + struct pipe_screen *screen = st_context(ctx)->pipe->screen; + enum pipe_format format; + unsigned i, bind, num_sample_counts = 0; + + if (_mesa_is_depth_or_stencil_format(internalFormat)) + bind = PIPE_BIND_DEPTH_STENCIL; + else + bind = PIPE_BIND_RENDER_TARGET; + + /* Set sample counts in descending order. */ + for (i = 16; i > 1; i--) { + format = st_choose_format(screen, internalFormat, GL_NONE, GL_NONE, + PIPE_TEXTURE_2D, i, bind); + + if (format != PIPE_FORMAT_NONE) { + samples[num_sample_counts++] = i; + } + } + + if (!num_sample_counts) { + samples[num_sample_counts++] = 1; + } + + return num_sample_counts; +} + + GLboolean st_sampler_compat_formats(enum pipe_format format1, enum pipe_format format2) { diff --git a/mesalib/src/mesa/state_tracker/st_format.h b/mesalib/src/mesa/state_tracker/st_format.h index 39397b17a..cb6e5bc96 100644 --- a/mesalib/src/mesa/state_tracker/st_format.h +++ b/mesalib/src/mesa/state_tracker/st_format.h @@ -67,6 +67,9 @@ st_ChooseTextureFormat(struct gl_context * ctx, GLenum target, GLint internalFormat, GLenum format, GLenum type); +size_t +st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat, + int samples[16]); /* can we use a sampler view to translate these formats only used to make TFP so far */ diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 1d96e905c..c6ac634a2 100644 --- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -984,10 +984,13 @@ type_size(const struct glsl_type *type) * at link time. */ return 1; - default: - assert(0); - return 0; + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + assert(!"Invalid type in type_size"); + break; } + return 0; } /** @@ -1932,10 +1935,23 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) } break; } + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_half_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_4x8: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + case ir_binop_pack_half_2x16_split: case ir_quadop_vector: - /* This operation should have already been handled. + /* This operation is not supported, or should have already been handled. */ - assert(!"Should not get here."); + assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); break; } @@ -2001,21 +2017,18 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) var->location); this->variables.push_tail(entry); break; - case ir_var_in: - case ir_var_inout: + case ir_var_shader_in: /* The linker assigns locations for varyings and attributes, * including deprecated builtins (like gl_Color), user-assign * generic attributes (glBindVertexLocation), and * user-defined varyings. - * - * FINISHME: We would hit this path for function arguments. Fix! */ assert(var->location != -1); entry = new(mem_ctx) variable_storage(var, PROGRAM_INPUT, var->location); break; - case ir_var_out: + case ir_var_shader_out: assert(var->location != -1); entry = new(mem_ctx) variable_storage(var, PROGRAM_OUTPUT, @@ -2304,7 +2317,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); l.writemask = WRITEMASK_XYZW; } else if (ir->lhs->type->is_scalar() && - ir->lhs->variable_referenced()->mode == ir_var_out) { + ir->lhs->variable_referenced()->mode == ir_var_shader_out) { /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the * FINISHME: W component of fragment shader output zero, work correctly. */ @@ -2581,8 +2594,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) ir_rvalue *param_rval = (ir_rvalue *)iter.get(); ir_variable *param = (ir_variable *)sig_iter.get(); - if (param->mode == ir_var_in || - param->mode == ir_var_inout) { + if (param->mode == ir_var_function_in || + param->mode == ir_var_function_inout) { variable_storage *storage = find_variable_storage(param); assert(storage); @@ -2617,8 +2630,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) ir_rvalue *param_rval = (ir_rvalue *)iter.get(); ir_variable *param = (ir_variable *)sig_iter.get(); - if (param->mode == ir_var_out || - param->mode == ir_var_inout) { + if (param->mode == ir_var_function_out || + param->mode == ir_var_function_inout) { variable_storage *storage = find_variable_storage(param); assert(storage); diff --git a/mesalib/src/mesa/swrast/s_texfilter.c b/mesalib/src/mesa/swrast/s_texfilter.c index 0a91cca06..953300f65 100644 --- a/mesalib/src/mesa/swrast/s_texfilter.c +++ b/mesalib/src/mesa/swrast/s_texfilter.c @@ -1647,14 +1647,14 @@ sample_2d_ewa(struct gl_context *ctx, GLfloat rgba[]) { GLint level = lod > 0 ? lod : 0; - GLfloat scaling = 1.0 / (1 << level); + GLfloat scaling = 1.0f / (1 << level); const struct gl_texture_image *img = tObj->Image[0][level]; const struct gl_texture_image *mostDetailedImage = tObj->Image[0][tObj->BaseLevel]; const struct swrast_texture_image *swImg = swrast_texture_image_const(mostDetailedImage); - GLfloat tex_u=-0.5 + texcoord[0] * swImg->WidthScale * scaling; - GLfloat tex_v=-0.5 + texcoord[1] * swImg->HeightScale * scaling; + GLfloat tex_u = -0.5f + texcoord[0] * swImg->WidthScale * scaling; + GLfloat tex_v = -0.5f + texcoord[1] * swImg->HeightScale * scaling; GLfloat ux = dudx * scaling; GLfloat vx = dvdx * scaling; @@ -1667,20 +1667,20 @@ sample_2d_ewa(struct gl_context *ctx, GLfloat A = vx*vx+vy*vy+1; GLfloat B = -2*(ux*vx+uy*vy); GLfloat C = ux*ux+uy*uy+1; - GLfloat F = A*C-B*B/4.0; + GLfloat F = A*C-B*B/4.0f; /* check if it is an ellipse */ /* ASSERT(F > 0.0); */ /* Compute the ellipse's (u,v) bounding box in texture space */ - GLfloat d = -B*B+4.0*C*A; - GLfloat box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with */ - GLfloat box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */ + GLfloat d = -B*B+4.0f*C*A; + GLfloat box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with */ + GLfloat box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */ - GLint u0 = floor(tex_u - box_u); - GLint u1 = ceil (tex_u + box_u); - GLint v0 = floor(tex_v - box_v); - GLint v1 = ceil (tex_v + box_v); + GLint u0 = (GLint) floorf(tex_u - box_u); + GLint u1 = (GLint) ceilf (tex_u + box_u); + GLint v0 = (GLint) floorf(tex_v - box_v); + GLint v1 = (GLint) ceilf (tex_v + box_v); GLfloat num[4] = {0.0F, 0.0F, 0.0F, 0.0F}; GLfloat newCoord[2]; @@ -1692,7 +1692,7 @@ sample_2d_ewa(struct gl_context *ctx, /* Scale ellipse formula to directly index the Filter Lookup Table. * i.e. scale so that F = WEIGHT_LUT_SIZE-1 */ - double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F; + GLfloat formScale = (GLfloat) (WEIGHT_LUT_SIZE - 1) / F; A *= formScale; B *= formScale; C *= formScale; @@ -1715,7 +1715,7 @@ sample_2d_ewa(struct gl_context *ctx, /* as a LUT is used, q must never be negative; * should not happen, though */ - const GLint qClamped = q >= 0.0F ? q : 0; + const GLint qClamped = q >= 0.0F ? (GLint) q : 0; GLfloat weight = weightLut[qClamped]; newCoord[0] = u / ((GLfloat) img->Width2); @@ -1795,19 +1795,19 @@ sample_2d_footprint(struct gl_context *ctx, /* Calculate the per anisotropic sample offsets in s,t space. */ if (Px2 > Py2) { - numSamples = ceil(sqrtf(Px2)); + numSamples = (GLint) ceilf(sqrtf(Px2)); ds = ux / ((GLfloat) img->Width2); dt = vx / ((GLfloat) img->Height2); } else { - numSamples = ceil(sqrtf(Py2)); + numSamples = (GLint) ceilf(sqrtf(Py2)); ds = uy / ((GLfloat) img->Width2); dt = vy / ((GLfloat) img->Height2); } for (s = 0; svtx.buffer_ptr) { + /* probably ran out of memory earlier when allocating the VBO */ + return; + } + /* Copy stored stored vertices to start of new list. */ assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr); diff --git a/mkfontscale/configure.ac b/mkfontscale/configure.ac index 4340f99e1..4c7e599d5 100644 --- a/mkfontscale/configure.ac +++ b/mkfontscale/configure.ac @@ -27,6 +27,7 @@ AC_INIT([mkfontscale], [1.1.0], [mkfontscale]) AC_CONFIG_SRCDIR([Makefile.am]) AC_CONFIG_HEADERS([config.h]) +AC_USE_SYSTEM_EXTENSIONS # Initialize Automake AM_INIT_AUTOMAKE([foreign dist-bzip2]) diff --git a/mkfontscale/hash.c b/mkfontscale/hash.c index c2cf9caa3..3adfb6861 100644 --- a/mkfontscale/hash.c +++ b/mkfontscale/hash.c @@ -20,6 +20,8 @@ THE SOFTWARE. */ +#include "config.h" + #include #include #include @@ -41,14 +43,11 @@ hash(const char *string) } static void -strcpy_lwr(char *dst, const char *src) +str_tolower(char *s) { - while(1) { - *dst = tolower(*src); - if(*src == '\0') - break; - src++; - dst++; + while(*s != '\0') { + *s = tolower(*s); + s++; } } @@ -97,12 +96,11 @@ putHash(HashTablePtr table, char *key, char *value, int prio) for(bp = table[i]; bp; bp = bp->next) { if(strcasecmp(bp->key, key) == 0) { if(prio > bp->prio) { - keycopy = malloc(strlen(key) + 1); + keycopy = strdup(key); if(keycopy == NULL) goto fail; - strcpy_lwr(keycopy, key); - valuecopy = malloc(strlen(value) + 1); + str_tolower(keycopy); + valuecopy = strdup(value); if(valuecopy == NULL) goto fail; - strcpy(valuecopy, value); free(bp->key); free(bp->value); bp->key = keycopy; @@ -111,14 +109,13 @@ putHash(HashTablePtr table, char *key, char *value, int prio) return 1; } } - keycopy = malloc(strlen(key) + 1); + keycopy = strdup(key); if(keycopy == NULL) goto fail; - strcpy_lwr(keycopy, key); - valuecopy = malloc(strlen(value) + 1); + str_tolower(keycopy); + valuecopy = strdup(value); if(valuecopy == NULL) goto fail; - strcpy(valuecopy, value); bp = malloc(sizeof(HashBucketRec)); if(bp == NULL) goto fail; diff --git a/mkfontscale/ident.c b/mkfontscale/ident.c index bf544832c..41212575e 100644 --- a/mkfontscale/ident.c +++ b/mkfontscale/ident.c @@ -315,10 +315,9 @@ pcfIdentify(fontFile *f, char **name) if(i >= nprops) goto fail; - s = malloc(strlen(strings + props[i].value) + 1); + s = strdup(strings + props[i].value); if(s == NULL) goto fail; - strcpy(s, strings + props[i].value); *name = s; free(strings); free(props); diff --git a/mkfontscale/mkfontscale.c b/mkfontscale/mkfontscale.c index 5cf5cb9af..a67f28338 100644 --- a/mkfontscale/mkfontscale.c +++ b/mkfontscale/mkfontscale.c @@ -20,6 +20,8 @@ THE SOFTWARE. */ +#include "config.h" + #include #include #include @@ -896,10 +898,9 @@ doDirectory(const char *dirname_given, int numEncodings, ListPtr encodingsToDo) BDF_PropertyRec prop; rc = FT_Get_BDF_Property(face, "FONT", &prop); if(rc == 0 && prop.type == BDF_PROPERTY_TYPE_ATOM) { - xlfd_name = malloc(strlen(prop.u.atom) + 1); + xlfd_name = strdup(prop.u.atom); if(xlfd_name == NULL) goto done; - strcpy(xlfd_name, prop.u.atom); } } } diff --git a/pixman/configure.ac b/pixman/configure.ac index 515e31218..a93e2905b 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -968,6 +968,22 @@ fi AC_MSG_RESULT($support_for_attribute_constructor) AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR) +dnl ===================================== +dnl __float128 + +support_for_float128=no + +AC_MSG_CHECKING(for __float128) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; } +]])], support_for_float128=yes) + +if test x$support_for_float128 = xyes; then + AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128]) +fi + +AC_MSG_RESULT($support_for_float128) + dnl ================== dnl libpng diff --git a/pixman/demos/scale.c b/pixman/demos/scale.c index 9100ff72a..869ada12b 100644 --- a/pixman/demos/scale.c +++ b/pixman/demos/scale.c @@ -39,6 +39,7 @@ typedef struct GtkAdjustment * scale_x_adjustment; GtkAdjustment * scale_y_adjustment; GtkAdjustment * rotate_adjustment; + GtkAdjustment * subsample_adjustment; int scaled_width; int scaled_height; } app_t; @@ -236,7 +237,8 @@ rescale (GtkWidget *may_be_null, app_t *app) get_value (app, filters, "reconstruct_y_combo_box"), get_value (app, filters, "sample_x_combo_box"), get_value (app, filters, "sample_y_combo_box"), - 4, 4); + gtk_adjustment_get_value (app->subsample_adjustment), + gtk_adjustment_get_value (app->subsample_adjustment)); pixman_image_set_filter (app->original, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, params, n_params); @@ -360,10 +362,13 @@ app_new (pixman_image_t *original) GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_y_adjustment")); app->rotate_adjustment = GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "rotate_adjustment")); + app->subsample_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "subsample_adjustment")); g_signal_connect (app->scale_x_adjustment, "value_changed", G_CALLBACK (rescale), app); g_signal_connect (app->scale_y_adjustment, "value_changed", G_CALLBACK (rescale), app); g_signal_connect (app->rotate_adjustment, "value_changed", G_CALLBACK (rescale), app); + g_signal_connect (app->subsample_adjustment, "value_changed", G_CALLBACK (rescale), app); widget = get_widget (app, "scale_x_scale"); gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL); diff --git a/pixman/demos/scale.ui b/pixman/demos/scale.ui index f7c0c805f..b3450d34d 100644 --- a/pixman/demos/scale.ui +++ b/pixman/demos/scale.ui @@ -23,6 +23,14 @@ 10 10 + + 1 + 12 + 1 + 1 + 0 + 4 + @@ -51,6 +59,7 @@ True + 12 True @@ -233,6 +242,17 @@ 4 + + + True + 1 + <b>Subsample:</b> + True + + + 5 + + True @@ -277,6 +297,16 @@ 4 + + + True + subsample_adjustment + + + 1 + 5 + + False diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index c625e0c4a..247aea645 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -738,36 +738,6 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp, } } -static void -fast_composite_src_x888_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - *dst = convert_8888_to_0565 (s); - dst++; - } - } -} - static void fast_composite_add_8_8 (pixman_implementation_t *imp, pixman_composite_info_t *info) @@ -1243,6 +1213,18 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, pixman_composite_func_t func; pixman_format_code_t mask_format; uint32_t src_flags, mask_flags; + int32_t sx, sy; + int32_t width_remain; + int32_t num_pixels; + int32_t src_width; + int32_t i, j; + pixman_image_t extended_src_image; + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; + pixman_bool_t need_src_extension; + uint32_t *src_line; + int32_t src_stride; + int32_t src_bpp; + pixman_composite_info_t info2 = *info; src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; @@ -1258,149 +1240,131 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, mask_flags = FAST_PATH_IS_OPAQUE; } - if (_pixman_implementation_lookup_composite ( - imp->toplevel, info->op, - src_image->common.extended_format_code, src_flags, - mask_format, mask_flags, - dest_image->common.extended_format_code, info->dest_flags, - &imp, &func)) + _pixman_implementation_lookup_composite ( + imp->toplevel, info->op, + src_image->common.extended_format_code, src_flags, + mask_format, mask_flags, + dest_image->common.extended_format_code, info->dest_flags, + &imp, &func); + + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && + !src_image->bits.indexed) { - int32_t sx, sy; - int32_t width_remain; - int32_t num_pixels; - int32_t src_width; - int32_t i, j; - pixman_image_t extended_src_image; - uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; - pixman_bool_t need_src_extension; - uint32_t *src_line; - int32_t src_stride; - int32_t src_bpp; - pixman_composite_info_t info2 = *info; - - src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); - - if (src_image->bits.width < REPEAT_MIN_WIDTH && - (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && - !src_image->bits.indexed) - { - sx = src_x; - sx = MOD (sx, src_image->bits.width); - sx += width; - src_width = 0; + sx = src_x; + sx = MOD (sx, src_image->bits.width); + sx += width; + src_width = 0; - while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) - src_width += src_image->bits.width; + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) + src_width += src_image->bits.width; - src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); - /* Initialize/validate stack-allocated temporary image */ - _pixman_bits_image_init (&extended_src_image, src_image->bits.format, - src_width, 1, &extended_src[0], src_stride, - FALSE); - _pixman_image_validate (&extended_src_image); + /* Initialize/validate stack-allocated temporary image */ + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, + src_width, 1, &extended_src[0], src_stride, + FALSE); + _pixman_image_validate (&extended_src_image); - info2.src_image = &extended_src_image; - need_src_extension = TRUE; - } - else - { - src_width = src_image->bits.width; - need_src_extension = FALSE; - } + info2.src_image = &extended_src_image; + need_src_extension = TRUE; + } + else + { + src_width = src_image->bits.width; + need_src_extension = FALSE; + } - sx = src_x; - sy = src_y; + sx = src_x; + sy = src_y; - while (--height >= 0) - { - sx = MOD (sx, src_width); - sy = MOD (sy, src_image->bits.height); + while (--height >= 0) + { + sx = MOD (sx, src_width); + sy = MOD (sy, src_image->bits.height); - if (need_src_extension) + if (need_src_extension) + { + if (src_bpp == 32) { - if (src_bpp == 32) - { - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - extended_src[i] = src_line[j]; - } - } - else if (src_bpp == 16) + for (i = 0; i < src_width; ) { - uint16_t *src_line_16; - - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, - src_line_16, 1); - src_line = (uint32_t*)src_line_16; - - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; - } + for (j = 0; j < src_image->bits.width; j++, i++) + extended_src[i] = src_line[j]; } - else if (src_bpp == 8) - { - uint8_t *src_line_8; + } + else if (src_bpp == 16) + { + uint16_t *src_line_16; - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, - src_line_8, 1); - src_line = (uint32_t*)src_line_8; + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, + src_line_16, 1); + src_line = (uint32_t*)src_line_16; - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; - } + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; } - - info2.src_y = 0; } - else + else if (src_bpp == 8) { - info2.src_y = sy; + uint8_t *src_line_8; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, + src_line_8, 1); + src_line = (uint32_t*)src_line_8; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; + } } - width_remain = width; + info2.src_y = 0; + } + else + { + info2.src_y = sy; + } - while (width_remain > 0) - { - num_pixels = src_width - sx; + width_remain = width; - if (num_pixels > width_remain) - num_pixels = width_remain; + while (width_remain > 0) + { + num_pixels = src_width - sx; - info2.src_x = sx; - info2.width = num_pixels; - info2.height = 1; + if (num_pixels > width_remain) + num_pixels = width_remain; - func (imp, &info2); + info2.src_x = sx; + info2.width = num_pixels; + info2.height = 1; - width_remain -= num_pixels; - info2.mask_x += num_pixels; - info2.dest_x += num_pixels; - sx = 0; - } + func (imp, &info2); - sx = src_x; - sy++; - info2.mask_x = info->mask_x; - info2.mask_y++; - info2.dest_x = info->dest_x; - info2.dest_y++; + width_remain -= num_pixels; + info2.mask_x += num_pixels; + info2.dest_x += num_pixels; + sx = 0; } - if (need_src_extension) - _pixman_image_fini (&extended_src_image); - } - else - { - _pixman_log_error (FUNC, "Didn't find a suitable function "); + sx = src_x; + sy++; + info2.mask_x = info->mask_x; + info2.mask_y++; + info2.dest_x = info->dest_x; + info2.dest_y++; } + + if (need_src_extension) + _pixman_image_fini (&extended_src_image); } /* Use more unrolling for src_0565_0565 because it is typically CPU bound */ @@ -1913,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), @@ -2199,12 +2159,200 @@ fast_path_fill (pixman_implementation_t *imp, return TRUE; } +/*****************************************************************************/ + +static uint32_t * +fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int32_t w = iter->width; + uint32_t *dst = iter->buffer; + const uint16_t *src = (const uint16_t *)iter->bits; + + iter->bits += iter->stride; + + /* Align the source buffer at 4 bytes boundary */ + if (w > 0 && ((uintptr_t)src & 3)) + { + *dst++ = convert_0565_to_8888 (*src++); + w--; + } + /* Process two pixels per iteration */ + while ((w -= 2) >= 0) + { + uint32_t sr, sb, sg, t0, t1; + uint32_t s = *(const uint32_t *)src; + src += 2; + sr = (s >> 8) & 0x00F800F8; + sb = (s << 3) & 0x00F800F8; + sg = (s >> 3) & 0x00FC00FC; + sr |= sr >> 5; + sb |= sb >> 5; + sg |= sg >> 6; + t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | + (sb & 0xFF) | 0xFF000000; + t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | + (sb >> 16) | 0xFF000000; +#ifdef WORDS_BIGENDIAN + *dst++ = t1; + *dst++ = t0; +#else + *dst++ = t0; + *dst++ = t1; +#endif + } + if (w & 1) + { + *dst = convert_0565_to_8888 (*src); + } + + return iter->buffer; +} + +static uint32_t * +fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) +{ + iter->bits += iter->stride; + return iter->buffer; +} + +/* Helper function for a workaround, which tries to ensure that 0x1F001F + * constant is always allocated in a register on RISC architectures. + */ +static force_inline uint32_t +convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) +{ + uint32_t a, b; + a = (s >> 3) & x1F001F; + b = s & 0xFC00; + a |= a >> 5; + a |= b >> 5; + return a; +} + +static void +fast_write_back_r5g6b5 (pixman_iter_t *iter) +{ + int32_t w = iter->width; + uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); + const uint32_t *src = iter->buffer; + /* Workaround to ensure that x1F001F variable is allocated in a register */ + static volatile uint32_t volatile_x1F001F = 0x1F001F; + uint32_t x1F001F = volatile_x1F001F; + + while ((w -= 4) >= 0) + { + uint32_t s1 = *src++; + uint32_t s2 = *src++; + uint32_t s3 = *src++; + uint32_t s4 = *src++; + *dst++ = convert_8888_to_0565_workaround (s1, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s2, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s3, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s4, x1F001F); + } + if (w & 2) + { + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + } + if (w & 1) + { + *dst = convert_8888_to_0565_workaround (*src, x1F001F); + } +} + +typedef struct +{ + pixman_format_code_t format; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +} fetcher_info_t; + +static const fetcher_info_t fetchers[] = +{ + { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + { PIXMAN_null } +}; + +static pixman_bool_t +fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + +#define FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FLAGS) == FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + iter->get_scanline = f->get_scanline; + return TRUE; + } + } + } + + return FALSE; +} + +static pixman_bool_t +fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) + { + iter->get_scanline = fast_dest_fetch_noop; + } + else + { + iter->get_scanline = f->get_scanline; + } + iter->write_back = f->write_back; + return TRUE; + } + } + } + return FALSE; +} + + pixman_implementation_t * _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) { pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); imp->fill = fast_path_fill; + imp->src_iter_init = fast_src_iter_init; + imp->dest_iter_init = fast_dest_iter_init; return imp; } diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c index f175d771e..93a1b9acf 100644 --- a/pixman/pixman/pixman-general.c +++ b/pixman/pixman/pixman-general.c @@ -188,9 +188,6 @@ general_composite_rect (pixman_implementation_t *imp, compose = _pixman_implementation_lookup_combiner ( imp->toplevel, op, component_alpha, narrow); - if (!compose) - return; - for (i = 0; i < height; ++i) { uint32_t *s, *m, *d; diff --git a/pixman/pixman/pixman-glyph.c b/pixman/pixman/pixman-glyph.c index 6d2c8bbb7..5a271b64b 100644 --- a/pixman/pixman/pixman-glyph.c +++ b/pixman/pixman/pixman-glyph.c @@ -463,16 +463,13 @@ pixman_composite_glyphs_no_mask (pixman_op_t op, { glyph_format = glyph_img->common.extended_format_code; glyph_flags = glyph_img->common.flags; - + _pixman_implementation_lookup_composite ( get_implementation(), op, src->common.extended_format_code, src->common.flags, glyph_format, glyph_flags | extra, dest_format, dest_flags, &implementation, &func); - - if (!func) - goto out; } info.src_x = src_x + composite_box.x1 - dest_x; @@ -582,9 +579,6 @@ add_glyphs (pixman_glyph_cache_t *cache, mask_format, info.mask_flags, dest_format, dest_flags, &implementation, &func); - - if (!func) - goto out; } glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x; diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c index ec467a619..c0a643633 100644 --- a/pixman/pixman/pixman-implementation.c +++ b/pixman/pixman/pixman-implementation.c @@ -65,7 +65,13 @@ typedef struct PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); -pixman_bool_t +static void +dummy_composite_rect (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ +} + +void _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, pixman_op_t op, pixman_format_code_t src_format, @@ -142,7 +148,11 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, ++info; } } - return FALSE; + + /* We should never reach this point */ + _pixman_log_error (FUNC, "No known composite function\n"); + *out_imp = NULL; + *out_func = dummy_composite_rect; update_cache: if (i) @@ -160,8 +170,16 @@ update_cache: cache->cache[0].fast_path.dest_flags = dest_flags; cache->cache[0].fast_path.func = *out_func; } +} - return TRUE; +static void +dummy_combine (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ } pixman_combine_32_func_t @@ -199,7 +217,9 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp, imp = imp->fallback; } - return NULL; + /* We should never reach this point */ + _pixman_log_error (FUNC, "No known combine function\n"); + return dummy_combine; } pixman_bool_t diff --git a/pixman/pixman/pixman-inlines.h b/pixman/pixman/pixman-inlines.h index ab4def0dc..dd1c2f17f 100644 --- a/pixman/pixman/pixman-inlines.h +++ b/pixman/pixman/pixman-inlines.h @@ -88,6 +88,42 @@ pixman_fixed_to_bilinear_weight (pixman_fixed_t x) ((1 << BILINEAR_INTERPOLATION_BITS) - 1); } +#if BILINEAR_INTERPOLATION_BITS <= 4 +/* Inspired by Filter_32_opaque from Skia */ +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t lo, hi; + + distx <<= (4 - BILINEAR_INTERPOLATION_BITS); + disty <<= (4 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ + distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ + distixiy = + 16 * 16 - (disty << 4) - + (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ + + lo = (tl & 0xff00ff) * distixiy; + hi = ((tl >> 8) & 0xff00ff) * distixiy; + + lo += (tr & 0xff00ff) * distxiy; + hi += ((tr >> 8) & 0xff00ff) * distxiy; + + lo += (bl & 0xff00ff) * distixy; + hi += ((bl >> 8) & 0xff00ff) * distixy; + + lo += (br & 0xff00ff) * distxy; + hi += ((br >> 8) & 0xff00ff) * distxy; + + return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); +} + +#else #if SIZEOF_LONG > 4 static force_inline uint32_t @@ -184,6 +220,7 @@ bilinear_interpolation (uint32_t tl, uint32_t tr, } #endif +#endif // BILINEAR_INTERPOLATION_BITS <= 4 /* * For each scanline fetched from source image with PAD repeat: diff --git a/pixman/pixman/pixman-matrix.c b/pixman/pixman/pixman-matrix.c index cd2f1b5b8..89b96826b 100644 --- a/pixman/pixman/pixman-matrix.c +++ b/pixman/pixman/pixman-matrix.c @@ -34,6 +34,338 @@ #define F(x) pixman_int_to_fixed (x) +static force_inline int +count_leading_zeros (uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz (x); +#else + int n = 0; + while (x) + { + n++; + x >>= 1; + } + return 32 - n; +#endif +} + +/* + * Large signed/unsigned integer division with rounding for the platforms with + * only 64-bit integer data type supported (no 128-bit data type). + * + * Arguments: + * hi, lo - high and low 64-bit parts of the dividend + * div - 48-bit divisor + * + * Returns: lowest 64 bits of the result as a return value and highest 64 + * bits of the result to "result_hi" pointer + */ + +/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */ +static force_inline uint64_t +rounded_udiv_128_by_48 (uint64_t hi, + uint64_t lo, + uint64_t div, + uint64_t *result_hi) +{ + uint64_t tmp, remainder, result_lo; + assert(div < ((uint64_t)1 << 48)); + + remainder = hi % div; + *result_hi = hi / div; + + tmp = (remainder << 16) + (lo >> 48); + result_lo = tmp / div; + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + (lo & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + /* round to nearest */ + if (remainder * 2 >= div && ++result_lo == 0) + *result_hi += 1; + + return result_lo; +} + +/* signed division (128-bit by 49-bit) with rounding to nearest */ +static inline int64_t +rounded_sdiv_128_by_49 (int64_t hi, + uint64_t lo, + int64_t div, + int64_t *signed_result_hi) +{ + uint64_t result_lo, result_hi; + int sign = 0; + if (div < 0) + { + div = -div; + sign ^= 1; + } + if (hi < 0) + { + if (lo != 0) + hi++; + hi = -hi; + lo = -lo; + sign ^= 1; + } + result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi); + if (sign) + { + if (result_lo != 0) + result_hi++; + result_hi = -result_hi; + result_lo = -result_lo; + } + if (signed_result_hi) + { + *signed_result_hi = result_hi; + } + return result_lo; +} + +/* + * Multiply 64.16 fixed point value by (2^scalebits) and convert + * to 128-bit integer. + */ +static force_inline void +fixed_64_16_to_int128 (int64_t hi, + int64_t lo, + int64_t *rhi, + int64_t *rlo, + int scalebits) +{ + /* separate integer and fractional parts */ + hi += lo >> 16; + lo &= 0xFFFF; + + if (scalebits <= 0) + { + *rlo = hi >> (-scalebits); + *rhi = *rlo >> 63; + } + else + { + *rhi = hi >> (64 - scalebits); + *rlo = (uint64_t)hi << scalebits; + if (scalebits < 16) + *rlo += lo >> (16 - scalebits); + else + *rlo += lo << (scalebits - 16); + } +} + +/* + * Convert 112.16 fixed point value to 48.16 with clamping for the out + * of range values. + */ +static force_inline pixman_fixed_48_16_t +fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag) +{ + if ((lo >> 63) != hi) + { + *clampflag = TRUE; + return hi >= 0 ? INT64_MAX : INT64_MIN; + } + else + { + return lo; + } +} + +/* + * Transform a point with 31.16 fixed point coordinates from the destination + * space to a point with 48.16 fixed point coordinates in the source space. + * No overflows are possible for affine transformations and the results are + * accurate including the least significant bit. Projective transformations + * may overflow, in this case the results are just clamped to return maximum + * or minimum 48.16 values (so that the caller can at least handle the NONE + * and PAD repeats correctly) and the return value is FALSE to indicate that + * such clamping has happened. + */ +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + pixman_bool_t clampflag = FALSE; + int i; + int64_t tmp[3][2], divint; + uint16_t divfrac; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + /* + * separate 64-bit integer and 16-bit fractional parts for the divisor, + * which is also scaled by 65536 after fixed point multiplication. + */ + divint = tmp[2][0] + (tmp[2][1] >> 16); + divfrac = tmp[2][1] & 0xFFFF; + + if (divint == pixman_fixed_1 && divfrac == 0) + { + /* + * this is a simple affine transformation + */ + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; + } + else if (divint == 0 && divfrac == 0) + { + /* + * handle zero divisor (if the values are non-zero, set the + * results to maximum positive or minimum negative) + */ + clampflag = TRUE; + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + + if (result->v[0] > 0) + result->v[0] = INT64_MAX; + else if (result->v[0] < 0) + result->v[0] = INT64_MIN; + + if (result->v[1] > 0) + result->v[1] = INT64_MAX; + else if (result->v[1] < 0) + result->v[1] = INT64_MIN; + } + else + { + /* + * projective transformation, analyze the top 32 bits of the divisor + */ + int32_t hi32divbits = divint >> 32; + if (hi32divbits < 0) + hi32divbits = ~hi32divbits; + + if (hi32divbits == 0) + { + /* the divisor is small, we can actually keep all the bits */ + int64_t hi, rhi, lo, rlo; + int64_t div = (divint << 16) + divfrac; + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + else + { + /* the divisor needs to be reduced to 48 bits */ + int64_t hi, rhi, lo, rlo, div; + int shift = 32 - count_leading_zeros (hi32divbits); + fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift); + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + } + result->v[2] = pixman_fixed_1; + return !clampflag; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int64_t hi0, lo0, hi1, lo1; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16); + lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16); + lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][2]; + + hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16); + lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16); + lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][2]; + + result->v[0] = hi0 + ((lo0 + 0x8000) >> 16); + result->v[1] = hi1 + ((lo1 + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int i; + int64_t tmp[3][2]; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16); +} + PIXMAN_EXPORT void pixman_transform_init_identity (struct pixman_transform *matrix) { @@ -50,69 +382,41 @@ PIXMAN_EXPORT pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform, struct pixman_vector * vector) { - struct pixman_vector result; - pixman_fixed_32_32_t partial; - pixman_fixed_48_16_t v; - int i, j; + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; - for (j = 0; j < 3; j++) - { - v = 0; - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * - (pixman_fixed_48_16_t) vector->vector[i]); - v += (partial + 0x8000) >> 16; - } - - if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) - return FALSE; - - result.vector[j] = (pixman_fixed_t) v; - } - - *vector = result; + pixman_transform_point_31_16_3d (transform, &tmp, &tmp); - if (!result.vector[2]) - return FALSE; + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; - return TRUE; + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; } PIXMAN_EXPORT pixman_bool_t pixman_transform_point (const struct pixman_transform *transform, struct pixman_vector * vector) { - pixman_fixed_32_32_t partial; - pixman_fixed_34_30_t v[3]; - pixman_fixed_48_16_t quo; - int i, j; + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; - for (j = 0; j < 3; j++) - { - v[j] = 0; - - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * - (pixman_fixed_32_32_t) vector->vector[i]); - v[j] += (partial + 2) >> 2; - } - } - - if (!((v[2] + 0x8000) >> 16)) - return FALSE; + if (!pixman_transform_point_31_16 (transform, &tmp, &tmp)) + return FALSE; - for (j = 0; j < 2; j++) - { - quo = v[j] / ((v[2] + 0x8000) >> 16); - if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) - return FALSE; - vector->vector[j] = (pixman_fixed_t) quo; - } - - vector->vector[2] = pixman_fixed_1; - return TRUE; + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; + + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; } PIXMAN_EXPORT pixman_bool_t diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index e5ab873ed..cb78a2ed8 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -497,7 +497,7 @@ pixman_implementation_t * _pixman_implementation_create (pixman_implementation_t *fallback, const pixman_fast_path_t *fast_paths); -pixman_bool_t +void _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, pixman_op_t op, pixman_format_code_t src_format, @@ -1052,7 +1052,7 @@ _pixman_log_error (const char *function, const char *message); #else -#define _pixman_log_error(f,m) do { } while (0) \ +#define _pixman_log_error(f,m) do { } while (0) #define return_if_fail(expr) \ do \ @@ -1077,6 +1077,27 @@ _pixman_log_error (const char *function, const char *message); while (0) #endif +/* + * Matrix + */ + +typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t; + +pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + /* * Timers */ diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index 5a0e0626a..fc873cc96 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -4523,7 +4523,163 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp, sse2_combine_add_u (imp, op, dst, src, NULL, width); } +} + +static void +sse2_composite_add_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst, src; + int dst_stride; + + __m128i xmm_src; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + + if (src == ~0) + { + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, + dest_x, dest_y, width, height, ~0); + + return; + } + + xmm_src = _mm_set_epi32 (src, src, src, src); + while (height--) + { + int w = width; + uint32_t d; + + dst = dst_line; + dst_line += dst_stride; + + while (w && (unsigned long)dst & 15) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d))); + w--; + } + + while (w >= 4) + { + save_128_aligned + ((__m128i*)dst, + _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); + + dst += 4; + w -= 4; + } + + while (w--) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src, + _mm_cvtsi32_si128 (d))); + } + } +} + +static void +sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + + __m128i xmm_src; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + xmm_src = expand_pixel_32_1x128 (src); + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && ((unsigned long)dst & 15)) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + + while (w >= 4) + { + uint32_t m = *(uint32_t*)mask; + if (m) + { + __m128i xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + __m128i xmm_dst = load_128_aligned ((__m128i*)dst); + __m128i xmm_mask = + _mm_unpacklo_epi8 (unpack_32_1x128(m), + _mm_setzero_si128 ()); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); + xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + } } static pixman_bool_t @@ -5786,6 +5942,121 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER, uint32_t, uint8_t, uint32_t, NORMAL, FLAG_HAVE_NON_SOLID_MASK) +static force_inline void +scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + __m128i xmm_mask; + + if (zero_src || (*mask >> 24) == 0) + return; + + xmm_mask = create_mask_16_128 (*mask >> 24); + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } + + while (w >= 4) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + + if (pix1 | pix2 | pix3 | pix4) + { + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned + ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + w -= 4; + } + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_HAVE_SOLID_MASK) + static const pixman_fast_path_t sse2_fast_paths[] = { /* PIXMAN_OP_OVER */ @@ -5848,6 +6119,14 @@ static const pixman_fast_path_t sse2_fast_paths[] = PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8), PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888), /* PIXMAN_OP_SRC */ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888), @@ -5912,6 +6191,11 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888), diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c index 3fabed161..184f0c4e6 100644 --- a/pixman/pixman/pixman.c +++ b/pixman/pixman/pixman.c @@ -581,11 +581,13 @@ pixman_image_composite32 (pixman_op_t op, int32_t height) { pixman_format_code_t src_format, mask_format, dest_format; - uint32_t src_flags, mask_flags, dest_flags; pixman_region32_t region; pixman_box32_t extents; pixman_implementation_t *imp; pixman_composite_func_t func; + pixman_composite_info_t info; + const pixman_box32_t *pbox; + int n; _pixman_image_validate (src); if (mask) @@ -593,27 +595,27 @@ pixman_image_composite32 (pixman_op_t op, _pixman_image_validate (dest); src_format = src->common.extended_format_code; - src_flags = src->common.flags; + info.src_flags = src->common.flags; if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE)) { mask_format = mask->common.extended_format_code; - mask_flags = mask->common.flags; + info.mask_flags = mask->common.flags; } else { mask_format = PIXMAN_null; - mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_flags = FAST_PATH_IS_OPAQUE; } dest_format = dest->common.extended_format_code; - dest_flags = dest->common.flags; + info.dest_flags = dest->common.flags; /* Check for pixbufs */ if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) && (src->type == BITS && src->bits.bits == mask->bits.bits) && (src->common.repeat == mask->common.repeat) && - (src_flags & mask_flags & FAST_PATH_ID_TRANSFORM) && + (info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM) && (src_x == mask_x && src_y == mask_y)) { if (src_format == PIXMAN_x8b8g8r8) @@ -638,7 +640,7 @@ pixman_image_composite32 (pixman_op_t op, extents.x2 -= dest_x - src_x; extents.y2 -= dest_y - src_y; - if (!analyze_extent (src, &extents, &src_flags)) + if (!analyze_extent (src, &extents, &info.src_flags)) goto out; extents.x1 -= src_x - mask_x; @@ -646,7 +648,7 @@ pixman_image_composite32 (pixman_op_t op, extents.x2 -= src_x - mask_x; extents.y2 -= src_y - mask_y; - if (!analyze_extent (mask, &extents, &mask_flags)) + if (!analyze_extent (mask, &extents, &info.mask_flags)) goto out; /* If the clip is within the source samples, and the samples are @@ -659,16 +661,16 @@ pixman_image_composite32 (pixman_op_t op, FAST_PATH_BILINEAR_FILTER | \ FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR) - if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || - (src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) { - src_flags |= FAST_PATH_IS_OPAQUE; + info.src_flags |= FAST_PATH_IS_OPAQUE; } - if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || - (mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) { - mask_flags |= FAST_PATH_IS_OPAQUE; + info.mask_flags |= FAST_PATH_IS_OPAQUE; } /* @@ -676,42 +678,35 @@ pixman_image_composite32 (pixman_op_t op, * if the src or dest are opaque. The output operator should be * mathematically equivalent to the source. */ - op = optimize_operator (op, src_flags, mask_flags, dest_flags); + info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags); - if (_pixman_implementation_lookup_composite ( - get_implementation (), op, - src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags, - &imp, &func)) - { - pixman_composite_info_t info; - const pixman_box32_t *pbox; - int n; + _pixman_implementation_lookup_composite ( + get_implementation (), info.op, + src_format, info.src_flags, + mask_format, info.mask_flags, + dest_format, info.dest_flags, + &imp, &func); - info.op = op; - info.src_image = src; - info.mask_image = mask; - info.dest_image = dest; - info.src_flags = src_flags; - info.mask_flags = mask_flags; - info.dest_flags = dest_flags; + info.src_image = src; + info.mask_image = mask; + info.dest_image = dest; - pbox = pixman_region32_rectangles (®ion, &n); + pbox = pixman_region32_rectangles (®ion, &n); - while (n--) - { - info.src_x = pbox->x1 + src_x - dest_x; - info.src_y = pbox->y1 + src_y - dest_y; - info.mask_x = pbox->x1 + mask_x - dest_x; - info.mask_y = pbox->y1 + mask_y - dest_y; - info.dest_x = pbox->x1; - info.dest_y = pbox->y1; - info.width = pbox->x2 - pbox->x1; - info.height = pbox->y2 - pbox->y1; - - func (imp, &info); - - pbox++; - } + while (n--) + { + info.src_x = pbox->x1 + src_x - dest_x; + info.src_y = pbox->y1 + src_y - dest_y; + info.mask_x = pbox->x1 + mask_x - dest_x; + info.mask_y = pbox->y1 + mask_y - dest_y; + info.dest_x = pbox->x1; + info.dest_y = pbox->y1; + info.width = pbox->x2 - pbox->x1; + info.height = pbox->y2 - pbox->y1; + + func (imp, &info); + + pbox++; } out: diff --git a/pixman/test/Makefile.sources b/pixman/test/Makefile.sources index 8c0b505df..e323a8e8c 100644 --- a/pixman/test/Makefile.sources +++ b/pixman/test/Makefile.sources @@ -17,6 +17,7 @@ TESTPROGRAMS = \ gradient-crash-test \ region-contains-test \ alphamap \ + matrix-test \ stress-test \ composite-traps-test \ blitters-test \ diff --git a/pixman/test/affine-test.c b/pixman/test/affine-test.c index 678fbe844..2506250db 100644 --- a/pixman/test/affine-test.c +++ b/pixman/test/affine-test.c @@ -307,11 +307,11 @@ test_composite (int testnum, } #if BILINEAR_INTERPOLATION_BITS == 8 -#define CHECKSUM 0x97097336 +#define CHECKSUM 0x2CDF1F07 #elif BILINEAR_INTERPOLATION_BITS == 7 -#define CHECKSUM 0x31D2DC21 +#define CHECKSUM 0xBC00B1DF #elif BILINEAR_INTERPOLATION_BITS == 4 -#define CHECKSUM 0x8B925154 +#define CHECKSUM 0xA227306B #else #define CHECKSUM 0x00000000 #endif diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c index 2f97b7b24..7336fa0d5 100644 --- a/pixman/test/lowlevel-blt-bench.c +++ b/pixman/test/lowlevel-blt-bench.c @@ -630,6 +630,8 @@ tests_tbl[] = { "src_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_1555_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_0565_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, + { "src_8_8", PIXMAN_a8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, + { "src_n_8", PIXMAN_a8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, { "src_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, { "src_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, { "src_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 }, @@ -772,7 +774,7 @@ main (int argc, char *argv[]) for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++) { - if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern)) + if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0) { bench_composite (tests_tbl[i].testname, tests_tbl[i].src_fmt, diff --git a/pixman/test/matrix-test.c b/pixman/test/matrix-test.c new file mode 100644 index 000000000..8437dd291 --- /dev/null +++ b/pixman/test/matrix-test.c @@ -0,0 +1,186 @@ +/* + * Copyright © 2012 Siarhei Siamashka + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include +#include +#include +#include + +#ifdef HAVE_FLOAT128 + +#define pixman_fixed_to_float128(x) (((__float128)(x)) / 65536.0Q) + +typedef struct { __float128 v[3]; } pixman_vector_f128_t; +typedef struct { __float128 m[3][3]; } pixman_transform_f128_t; + +pixman_bool_t +pixman_transform_point_f128 (const pixman_transform_f128_t *t, + const pixman_vector_f128_t *v, + pixman_vector_f128_t *result) +{ + int i; + for (i = 0; i < 3; i++) + { + result->v[i] = t->m[i][0] * v->v[0] + + t->m[i][1] * v->v[1] + + t->m[i][2] * v->v[2]; + } + if (result->v[2] != 0) + { + result->v[0] /= result->v[2]; + result->v[1] /= result->v[2]; + result->v[2] = 1; + return TRUE; + } + else + { + return FALSE; + } +} + +pixman_bool_t does_it_fit_fixed_48_16 (__float128 x) +{ + if (x >= 65536.0Q * 65536.0Q * 32768.0Q) + return FALSE; + if (x <= -65536.0Q * 65536.0Q * 32768.0Q) + return FALSE; + return TRUE; +} + +#endif + +uint32_t +test_matrix (int testnum, int verbose) +{ + uint32_t crc32 = 0; + int i, j, k; + pixman_bool_t is_affine; + + prng_srand (testnum); + + for (i = 0; i < 100; i++) + { + pixman_bool_t transform_ok; + pixman_transform_t ti; + pixman_vector_48_16_t vi, result_i; +#ifdef HAVE_FLOAT128 + pixman_transform_f128_t tf; + pixman_vector_f128_t vf, result_f; +#endif + prng_randmemset (&ti, sizeof(ti), 0); + prng_randmemset (&vi, sizeof(vi), 0); + + for (j = 0; j < 3; j++) + { + /* make sure that "vi" contains 31.16 fixed point data */ + vi.v[j] >>= 17; + /* and apply random shift */ + if (prng_rand_n (3) == 0) + vi.v[j] >>= prng_rand_n (46); + } + + if (prng_rand_n (2)) + { + /* random shift for the matrix */ + for (j = 0; j < 3; j++) + for (k = 0; k < 3; k++) + ti.matrix[j][k] >>= prng_rand_n (30); + } + + if (prng_rand_n (2)) + { + /* affine matrix */ + ti.matrix[2][0] = 0; + ti.matrix[2][1] = 0; + ti.matrix[2][2] = pixman_fixed_1; + } + + if (prng_rand_n (2)) + { + /* cartesian coordinates */ + vi.v[2] = pixman_fixed_1; + } + + is_affine = (ti.matrix[2][0] == 0 && ti.matrix[2][1] == 0 && + ti.matrix[2][2] == pixman_fixed_1 && + vi.v[2] == pixman_fixed_1); + + transform_ok = TRUE; + if (is_affine && prng_rand_n (2)) + pixman_transform_point_31_16_affine (&ti, &vi, &result_i); + else + transform_ok = pixman_transform_point_31_16 (&ti, &vi, &result_i); + + crc32 = compute_crc32 (crc32, &result_i, sizeof(result_i)); + +#ifdef HAVE_FLOAT128 + /* compare with a reference 128-bit floating point implementation */ + for (j = 0; j < 3; j++) + { + vf.v[j] = pixman_fixed_to_float128 (vi.v[j]); + for (k = 0; k < 3; k++) + { + tf.m[j][k] = pixman_fixed_to_float128 (ti.matrix[j][k]); + } + } + + if (pixman_transform_point_f128 (&tf, &vf, &result_f)) + { + if (transform_ok || + (does_it_fit_fixed_48_16 (result_f.v[0]) && + does_it_fit_fixed_48_16 (result_f.v[1]) && + does_it_fit_fixed_48_16 (result_f.v[2]))) + { + for (j = 0; j < 3; j++) + { + double diff = fabs (result_f.v[j] - + pixman_fixed_to_float128 (result_i.v[j])); + + if (is_affine && diff > (0.51 / 65536.0)) + { + printf ("%d:%d: bad precision for affine (%.12f)\n", + testnum, i, diff); + abort (); + } + else if (diff > (0.71 / 65536.0)) + { + printf ("%d:%d: bad precision for projective (%.12f)\n", + testnum, i, diff); + abort (); + } + } + } + } +#endif + } + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main ("matrix", 20000, + 0xBEBF98C3, + test_matrix, argc, argv); +} -- cgit v1.2.3