diff options
112 files changed, 5425 insertions, 991 deletions
| diff --git a/mesalib/configure.ac b/mesalib/configure.ac index e769edadb..9cc5c4ae5 100644 --- a/mesalib/configure.ac +++ b/mesalib/configure.ac @@ -608,8 +608,10 @@ AC_ARG_ENABLE([vdpau],     [enable_vdpau=auto])  AC_ARG_ENABLE([opencl],     [AS_HELP_STRING([--enable-opencl], -         [enable OpenCL library @<:@default=no@:>@])], -   [enable_opencl="$enableval"], +         [enable OpenCL library NOTE: Enabling this option will also enable +          --with-llvm-shared-libs +          @<:@default=no@:>@])], +   [enable_opencl="$enableval" with_llvm_shared_libs="$enableval"],     [enable_opencl=no])  AC_ARG_ENABLE([xlib_glx],      [AS_HELP_STRING([--enable-xlib-glx], @@ -1660,10 +1662,7 @@ if test "x$enable_gallium_llvm" = xyes; then      if test "x$LLVM_CONFIG" != xno; then  	LLVM_VERSION=`$LLVM_CONFIG --version | sed 's/svn.*//g'`  	LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e 's/\([[0-9]]\)\.\([[0-9]]\)/\10\2/g'` -        if test "x$with_llvm_shared_libs" = xyes; then -	    dnl We can't use $LLVM_VERSION because it has 'svn' stripped out, -	    LLVM_LIBS="-lLLVM-`$LLVM_CONFIG --version`" -	else +        if test "x$with_llvm_shared_libs" != xyes; then              LLVM_COMPONENTS="engine bitwriter"              if $LLVM_CONFIG --components | grep -q '\<mcjit\>'; then                  LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit" @@ -1672,7 +1671,6 @@ if test "x$enable_gallium_llvm" = xyes; then              if test "x$enable_opencl" = xyes; then                  LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation"              fi -            LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"  	fi  	LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags`  	LLVM_BINDIR=`$LLVM_CONFIG --bindir` @@ -1797,7 +1795,7 @@ radeon_llvm_check() {                        configure flag])      fi      AC_MSG_WARN([Please ensure you use the latest llvm tree from git://people.freedesktop.org/~tstellar/llvm master before submitting a bug]) -    LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --libs r600`" +    LLVM_COMPONENTS="${LLVM_COMPONENTS} r600"  }  dnl Gallium drivers @@ -1836,12 +1834,13 @@ if test "x$with_gallium_drivers" != x; then              if test "x$enable_r600_llvm" = xyes -o "x$enable_opencl" = xyes; then                  radeon_llvm_check                  NEED_RADEON_GALLIUM=yes; +                LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo"              fi              if test "x$enable_r600_llvm" = xyes; then                  USE_R600_LLVM_COMPILER=yes;              fi              if test "x$enable_opencl" = xyes -a "x$with_llvm_shared_libs" = xno; then -                LLVM_LIBS="${LLVM_LIBS} `$LLVM_CONFIG --libs bitreader asmparser`" +                LLVM_COMPONENTS="${LLVM_COMPONENTS} bitreader asmparser"              fi              gallium_check_st "radeon/drm" "dri-r600" "xorg-r600" "" "xvmc-r600" "vdpau-r600"              ;; @@ -1891,6 +1890,50 @@ if test "x$with_gallium_drivers" != x; then          esac      done  fi + +dnl Set LLVM_LIBS - This is done after the driver configuration so +dnl that drivers can add additonal components to LLVM_COMPONENTS. +dnl Previously, gallium drivers were updating LLVM_LIBS directly +dnl by calling llvm-config --libs ${DRIVER_LLVM_COMPONENTS}, but +dnl this was causing the same libraries to be appear multiple times +dnl in LLVM_LIBS. + +if test "x$MESA_LLVM" != x0; then + +    LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`" + +    if test "x$with_llvm_shared_libs" = xyes; then +        dnl We can't use $LLVM_VERSION because it has 'svn' stripped out, +        LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version` +        AC_CHECK_FILE("$LLVM_LIBDIR/lib$LLVM_SO_NAME.so", llvm_have_one_so=yes,) + +        if test "x$llvm_have_one_so" = xyes; then +            dnl LLVM was built using auto*, so there is only one shared object. +            LLVM_LIBS="-l$LLVM_SO_NAME" +        else +            dnl If LLVM was built with CMake, there will be one shared object per +            dnl component. +            AC_CHECK_FILE("$LLVM_LIBDIR/libLLVMTarget.so",, +                    AC_MSG_ERROR([Could not find llvm shared libraries: +	Please make sure you have built llvm with the --enable-shared option +	and that your llvm libraries are installed in $LLVM_LIBDIR +	If you have installed your llvm libraries to a different directory you +	can use the --with-llvm-prefix= configure flag to specify this directory. +	NOTE: Mesa is attempting to use llvm shared libraries because you have +	passed one of the following options to configure: +		--with-llvm-shared-libs +		--enable-opencl +	If you do not want to build with llvm shared libraries and instead want to +	use llvm static libraries then remove these options from your configure +	invocation and reconfigure.])) + +           dnl We don't need to update LLVM_LIBS in this case because the LLVM +           dnl install uses a shared object for each compoenent and we have +           dnl already added all of these objects to LLVM_LIBS. +        fi +    fi +fi +  AM_CONDITIONAL(HAVE_GALLIUM_SVGA, test "x$HAVE_GALLIUM_SVGA" = xyes)  AM_CONDITIONAL(HAVE_GALLIUM_I915, test "x$HAVE_GALLIUM_I915" = xyes)  AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes) diff --git a/mesalib/include/GLES3/gl3.h b/mesalib/include/GLES3/gl3.h index b9399e994..09f2b5333 100644 --- a/mesalib/include/GLES3/gl3.h +++ b/mesalib/include/GLES3/gl3.h @@ -2,7 +2,7 @@  #define __gl3_h_  /*  - * gl3.h last updated on $Date: 2012-09-12 10:13:02 -0700 (Wed, 12 Sep 2012) $ + * gl3.h last updated on $Date: 2012-10-03 07:52:40 -0700 (Wed, 03 Oct 2012) $   */  #include <GLES3/gl3platform.h> @@ -796,7 +796,7 @@ typedef struct __GLsync *GLsync;  #define GL_TEXTURE_IMMUTABLE_FORMAT                      0x912F  #define GL_MAX_ELEMENT_INDEX                             0x8D6B  #define GL_NUM_SAMPLE_COUNTS                             0x9380 -#define GL_TEXTURE_IMMUTABLE_LEVELS                      0x8D63 +#define GL_TEXTURE_IMMUTABLE_LEVELS                      0x82DF  /*-------------------------------------------------------------------------   * Entrypoint definitions diff --git a/mesalib/src/gallium/auxiliary/Makefile.am b/mesalib/src/gallium/auxiliary/Makefile.am index 49792930a..a4eee4773 100644 --- a/mesalib/src/gallium/auxiliary/Makefile.am +++ b/mesalib/src/gallium/auxiliary/Makefile.am @@ -45,9 +45,3 @@ util/u_format_srgb.c: $(srcdir)/util/u_format_srgb.py  util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv  	$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@ - -# XXX: As a work around for https://bugs.freedesktop.org/show_bug.cgi?id=59334 -# clover needs to link against libgallium.a. Delete this once we have a real -# fix for this bug. -all-local: libgallium.la -	ln -f $(builddir)/.libs/libgallium.a $(builddir)/libgallium.a diff --git a/mesalib/src/gallium/auxiliary/util/u_debug.c b/mesalib/src/gallium/auxiliary/util/u_debug.c index 6e8c5b993..f4670f28c 100644 --- a/mesalib/src/gallium/auxiliary/util/u_debug.c +++ b/mesalib/src/gallium/auxiliary/util/u_debug.c @@ -232,7 +232,7 @@ debug_get_flags_option(const char *name,     unsigned long result;     const char *str;     const struct debug_named_value *orig = flags; -   int namealign = 0; +   unsigned namealign = 0;     str = os_get_option(name);     if(!str) diff --git a/mesalib/src/gallium/auxiliary/util/u_tile.c b/mesalib/src/gallium/auxiliary/util/u_tile.c index 6c618a674..62298cdab 100644 --- a/mesalib/src/gallium/auxiliary/util/u_tile.c +++ b/mesalib/src/gallium/auxiliary/util/u_tile.c @@ -806,7 +806,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,              for (j = 0; j < w; j++) {                 /* convert 32-bit integer Z to float Z */                 const double scale = 1.0 / 0xffffffffU; -               pDest[j] = ptrc[j] * scale; +               pDest[j] = (float) (ptrc[j] * scale);              }              pDest += pt->stride/4;              ptrc += srcStride; @@ -820,7 +820,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,              for (j = 0; j < w; j++) {                 /* convert 32-bit integer Z to float Z */                 const double scale = 1.0 / 0xffffffffU; -               pDest[j*2] = ptrc[j] * scale; +               pDest[j*2] = (float) (ptrc[j] * scale);              }              pDest += pt->stride/4;              ptrc += srcStride; diff --git a/mesalib/src/gallium/auxiliary/util/u_tile.h b/mesalib/src/gallium/auxiliary/util/u_tile.h index abcd402c8..9e8194459 100644 --- a/mesalib/src/gallium/auxiliary/util/u_tile.h +++ b/mesalib/src/gallium/auxiliary/util/u_tile.h @@ -45,13 +45,13 @@ struct pipe_transfer;  static INLINE boolean  u_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_box *box)  { -   if (x >= box->width) +   if ((int) x >= box->width)        return TRUE; -   if (y >= box->height) +   if ((int) y >= box->height)        return TRUE; -   if (x + *w > box->width) +   if ((int) (x + *w) > box->width)        *w = box->width - x; -   if (y + *h > box->height) +   if ((int) (y + *h) > box->height)        *h = box->height - y;     return FALSE;  } diff --git a/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c b/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c index ee1c6881e..6859751c5 100644 --- a/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/mesalib/src/gallium/auxiliary/util/u_upload_mgr.c @@ -163,6 +163,13 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,     unsigned alloc_offset = align(min_out_offset, upload->alignment);     unsigned offset; +   /* Init these return values here in case we fail below to make +    * sure the caller doesn't get garbage values. +    */ +   *out_offset = ~0; +   pipe_resource_reference(outbuf, NULL); +   *ptr = NULL; +     /* Make sure we have enough space in the upload buffer      * for the sub-allocation. */     if (MAX2(upload->offset, alloc_offset) + alloc_size > upload->size) { @@ -182,8 +189,6 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,  					  PIPE_TRANSFER_UNSYNCHRONIZED,  					  &upload->transfer);        if (!upload->map) { -         pipe_resource_reference(outbuf, NULL); -         *ptr = NULL;           upload->transfer = NULL;           return PIPE_ERROR_OUT_OF_MEMORY;        } diff --git a/mesalib/src/gallium/auxiliary/util/u_vbuf.c b/mesalib/src/gallium/auxiliary/util/u_vbuf.c index b712b52de..244b04d2a 100644 --- a/mesalib/src/gallium/auxiliary/util/u_vbuf.c +++ b/mesalib/src/gallium/auxiliary/util/u_vbuf.c @@ -323,7 +323,7 @@ void u_vbuf_destroy(struct u_vbuf *mgr)     FREE(mgr);  } -static void +static enum pipe_error  u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,                           unsigned vb_mask, unsigned out_vb,                           int start_vertex, unsigned num_vertices, @@ -335,6 +335,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,     struct pipe_resource *out_buffer = NULL;     uint8_t *out_map;     unsigned out_offset, mask; +   enum pipe_error err;     /* Get a translate object. */     tr = translate_cache_find(mgr->translate_cache, key); @@ -381,6 +382,14 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,        assert((ib->buffer || ib->user_buffer) && ib->index_size); +      /* Create and map the output buffer. */ +      err = u_upload_alloc(mgr->uploader, 0, +                           key->output_stride * num_indices, +                           &out_offset, &out_buffer, +                           (void**)&out_map); +      if (err != PIPE_OK) +         return err; +        if (ib->user_buffer) {           map = (uint8_t*)ib->user_buffer + offset;        } else { @@ -389,12 +398,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,                                       PIPE_TRANSFER_READ, &transfer);        } -      /* Create and map the output buffer. */ -      u_upload_alloc(mgr->uploader, 0, -                     key->output_stride * num_indices, -                     &out_offset, &out_buffer, -                     (void**)&out_map); -        switch (ib->index_size) {        case 4:           tr->run_elts(tr, (unsigned*)map, num_indices, 0, out_map); @@ -412,11 +415,13 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,        }     } else {        /* Create and map the output buffer. */ -      u_upload_alloc(mgr->uploader, -                     key->output_stride * start_vertex, -                     key->output_stride * num_vertices, -                     &out_offset, &out_buffer, -                     (void**)&out_map); +      err = u_upload_alloc(mgr->uploader, +                           key->output_stride * start_vertex, +                           key->output_stride * num_vertices, +                           &out_offset, &out_buffer, +                           (void**)&out_map); +      if (err != PIPE_OK) +         return err;        out_offset -= key->output_stride * start_vertex; @@ -441,6 +446,8 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,     pipe_resource_reference(        &mgr->real_vertex_buffer[out_vb].buffer, NULL);     mgr->real_vertex_buffer[out_vb].buffer = out_buffer; + +   return PIPE_OK;  }  static boolean @@ -588,11 +595,14 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,     /* Translate buffers. */     for (type = 0; type < VB_NUM; type++) {        if (key[type].nr_elements) { -         u_vbuf_translate_buffers(mgr, &key[type], mask[type], -                                  mgr->fallback_vbs[type], -                                  start[type], num[type], -                                  start_index, num_indices, min_index, -                                  unroll_indices && type == VB_VERTEX); +         enum pipe_error err; +         err = u_vbuf_translate_buffers(mgr, &key[type], mask[type], +                                        mgr->fallback_vbs[type], +                                        start[type], num[type], +                                        start_index, num_indices, min_index, +                                        unroll_indices && type == VB_VERTEX); +         if (err != PIPE_OK) +            return FALSE;           /* Fixup the stride for constant attribs. */           if (type == VB_CONST) { @@ -884,7 +894,7 @@ void u_vbuf_set_index_buffer(struct u_vbuf *mgr,     pipe->set_index_buffer(pipe, ib);  } -static void +static enum pipe_error  u_vbuf_upload_buffers(struct u_vbuf *mgr,                        int start_vertex, unsigned num_vertices,                        int start_instance, unsigned num_instances) @@ -953,6 +963,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,        unsigned start, end;        struct pipe_vertex_buffer *real_vb;        const uint8_t *ptr; +      enum pipe_error err;        i = u_bit_scan(&buffer_mask); @@ -963,11 +974,15 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,        real_vb = &mgr->real_vertex_buffer[i];        ptr = mgr->vertex_buffer[i].user_buffer; -      u_upload_data(mgr->uploader, start, end - start, ptr + start, -                    &real_vb->buffer_offset, &real_vb->buffer); +      err = u_upload_data(mgr->uploader, start, end - start, ptr + start, +                          &real_vb->buffer_offset, &real_vb->buffer); +      if (err != PIPE_OK) +         return err;        real_vb->buffer_offset -= start;     } + +   return PIPE_OK;  }  static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr) @@ -1176,11 +1191,13 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)     if (unroll_indices ||         incompatible_vb_mask ||         mgr->ve->incompatible_elem_mask) { -      /* XXX check the return value */ -      u_vbuf_translate_begin(mgr, start_vertex, num_vertices, -                             info->start_instance, info->instance_count, -                             info->start, info->count, min_index, -                             unroll_indices); +      if (!u_vbuf_translate_begin(mgr, start_vertex, num_vertices, +                                  info->start_instance, info->instance_count, +                                  info->start, info->count, min_index, +                                  unroll_indices)) { +         debug_warn_once("u_vbuf_translate_begin() failed"); +         return; +      }        user_vb_mask &= ~(incompatible_vb_mask |                          mgr->ve->incompatible_vb_mask_all); @@ -1188,8 +1205,13 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)     /* Upload user buffers. */     if (user_vb_mask) { -      u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, -                            info->start_instance, info->instance_count); +      if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, +                                info->start_instance, +                                info->instance_count) != PIPE_OK) { +         debug_warn_once("u_vbuf_upload_buffers() failed"); +         return; +      } +        mgr->dirty_real_vb_mask |= user_vb_mask;     } diff --git a/mesalib/src/glsl/Makefile.am b/mesalib/src/glsl/Makefile.am index 058d8aed3..d0e5cd1d0 100644 --- a/mesalib/src/glsl/Makefile.am +++ b/mesalib/src/glsl/Makefile.am @@ -52,6 +52,7 @@ check_PROGRAMS =					\  tests_uniform_initializer_test_SOURCES =		\  	$(top_srcdir)/src/mesa/main/hash_table.c	\ +	$(top_srcdir)/src/mesa/main/imports.c		\  	$(top_srcdir)/src/mesa/program/prog_hash_table.c\  	$(top_srcdir)/src/mesa/program/symbol_table.c	\  	tests/copy_constant_to_storage_tests.cpp	\ @@ -100,6 +101,7 @@ endif  glsl_test_SOURCES = \  	$(top_srcdir)/src/mesa/main/hash_table.c \ +	$(top_srcdir)/src/mesa/main/imports.c \  	$(top_srcdir)/src/mesa/program/prog_hash_table.c \  	$(top_srcdir)/src/mesa/program/symbol_table.c \  	$(GLSL_SRCDIR)/standalone_scaffolding.cpp \ diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources index de63c3246..c294aa429 100644 --- a/mesalib/src/glsl/Makefile.sources +++ b/mesalib/src/glsl/Makefile.sources @@ -47,6 +47,8 @@ LIBGLSL_FILES = \  	$(GLSL_SRCDIR)/link_functions.cpp \  	$(GLSL_SRCDIR)/link_uniforms.cpp \  	$(GLSL_SRCDIR)/link_uniform_initializers.cpp \ +	$(GLSL_SRCDIR)/link_uniform_block_active_visitor.cpp \ +	$(GLSL_SRCDIR)/link_uniform_blocks.cpp \  	$(GLSL_SRCDIR)/link_varyings.cpp \  	$(GLSL_SRCDIR)/loop_analysis.cpp \  	$(GLSL_SRCDIR)/loop_controls.cpp \ @@ -60,6 +62,7 @@ LIBGLSL_FILES = \  	$(GLSL_SRCDIR)/lower_mat_op_to_vec.cpp \  	$(GLSL_SRCDIR)/lower_noise.cpp \  	$(GLSL_SRCDIR)/lower_packed_varyings.cpp \ +	$(GLSL_SRCDIR)/lower_packing_builtins.cpp \  	$(GLSL_SRCDIR)/lower_texture_projection.cpp \  	$(GLSL_SRCDIR)/lower_variable_index_to_cond_assign.cpp \  	$(GLSL_SRCDIR)/lower_vec_index_to_cond_assign.cpp \ diff --git a/mesalib/src/glsl/SConscript b/mesalib/src/glsl/SConscript index 6981f041b..c4ab97c1e 100644 --- a/mesalib/src/glsl/SConscript +++ b/mesalib/src/glsl/SConscript @@ -59,6 +59,7 @@ else:      # Copy these files to avoid generation object files into src/mesa/program      env.Prepend(CPPPATH = ['#src/mesa/main'])      env.Command('hash_table.c', '#src/mesa/main/hash_table.c', Copy('$TARGET', '$SOURCE')) +    env.Command('imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', '$SOURCE'))      # Copy these files to avoid generation object files into src/mesa/program      env.Prepend(CPPPATH = ['#src/mesa/program'])      env.Command('prog_hash_table.c', '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE')) @@ -68,6 +69,7 @@ else:      mesa_objs = env.StaticObject([          'hash_table.c', +        'imports.c',          'prog_hash_table.c',          'symbol_table.c',      ]) diff --git a/mesalib/src/glsl/ast.h b/mesalib/src/glsl/ast.h index 50747822d..1a28963c4 100644 --- a/mesalib/src/glsl/ast.h +++ b/mesalib/src/glsl/ast.h @@ -804,11 +804,12 @@ public:  class ast_uniform_block : public ast_node {  public:     ast_uniform_block(ast_type_qualifier layout, -		     const char *block_name, -		     ast_declarator_list *member_list) -   : layout(layout), block_name(block_name) +                     const char *instance_name, +		     ast_expression *array_size) +   : layout(layout), block_name(NULL), instance_name(instance_name), +     array_size(array_size)     { -      declarations.push_degenerate_list_at_head(&member_list->link); +      /* empty */     }     virtual ir_rvalue *hir(exec_list *instructions, @@ -816,8 +817,28 @@ public:     ast_type_qualifier layout;     const char *block_name; + +   /** +    * Declared name of the block instance, if specified. +    * +    * If the block does not have an instance name, this field will be +    * \c NULL. +    */ +   const char *instance_name; +     /** List of ast_declarator_list * */     exec_list declarations; + +   /** +    * Declared array size of the block instance +    * +    * If the block is not declared as an array, this field will be \c NULL. +    * +    * \note +    * A block can only be an array if it also has an instance name.  If this +    * field is not \c NULL, ::instance_name must also not be \c NULL. +    */ +   ast_expression *array_size;  };  /*@}*/ diff --git a/mesalib/src/glsl/ast_function.cpp b/mesalib/src/glsl/ast_function.cpp index dc7a58bf2..26f72cf8e 100644 --- a/mesalib/src/glsl/ast_function.cpp +++ b/mesalib/src/glsl/ast_function.cpp @@ -132,12 +132,13 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,        }        /* Verify that 'out' and 'inout' actual parameters are lvalues. */ -      if (formal->mode == ir_var_out || formal->mode == ir_var_inout) { +      if (formal->mode == ir_var_function_out +          || formal->mode == ir_var_function_inout) {  	 const char *mode = NULL;  	 switch (formal->mode) { -	 case ir_var_out:   mode = "out";   break; -	 case ir_var_inout: mode = "inout"; break; -	 default:           assert(false);  break; +	 case ir_var_function_out:   mode = "out";   break; +	 case ir_var_function_inout: mode = "inout"; break; +	 default:                    assert(false);  break;  	 }  	 /* This AST-based check catches errors like f(i++).  The IR-based @@ -210,13 +211,13 @@ generate_call(exec_list *instructions, ir_function_signature *sig,        if (formal->type->is_numeric() || formal->type->is_boolean()) {  	 switch (formal->mode) {  	 case ir_var_const_in: -	 case ir_var_in: { +	 case ir_var_function_in: {  	    ir_rvalue *converted  	       = convert_component(actual, formal->type);  	    actual->replace_with(converted);  	    break;  	 } -	 case ir_var_out: +	 case ir_var_function_out:  	    if (actual->type != formal->type) {  	       /* To convert an out parameter, we need to create a  		* temporary variable to hold the value before conversion, @@ -254,7 +255,7 @@ generate_call(exec_list *instructions, ir_function_signature *sig,  	       actual->replace_with(deref_tmp_2);  	    }  	    break; -	 case ir_var_inout: +	 case ir_var_function_inout:  	    /* Inout parameters should never require conversion, since that  	     * would require an implicit conversion to exist both to and  	     * from the formal parameter type, and there are no diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp index de3ce902e..49093d88f 100644 --- a/mesalib/src/glsl/ast_to_hir.cpp +++ b/mesalib/src/glsl/ast_to_hir.cpp @@ -857,14 +857,11 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)     case GLSL_TYPE_ERROR:     case GLSL_TYPE_VOID:     case GLSL_TYPE_SAMPLER: +   case GLSL_TYPE_INTERFACE:        /* I assume a comparison of a struct containing a sampler just         * ignores the sampler present in the type.         */        break; - -   default: -      assert(!"Should not get here."); -      break;     }     if (cmp == NULL) @@ -1625,6 +1622,15 @@ ast_expression::hir(exec_list *instructions,  	 }        } else if (array->type->array_size() == 0) {  	 _mesa_glsl_error(&loc, state, "unsized array index must be constant"); +      } else if (array->type->is_array() +                 && array->type->fields.array->is_interface()) { +         /* Page 46 in section 4.3.7 of the OpenGL ES 3.00 spec says: +          * +          *     "All indexes used to index a uniform block array must be +          *     constant integral expressions." +          */ +         _mesa_glsl_error(&loc, state, +                          "uniform block array index must be constant");        } else {  	 if (array->type->is_array()) {  	    /* whole_variable_referenced can return NULL if the array is a @@ -1924,11 +1930,11 @@ is_varying_var(ir_variable *var, _mesa_glsl_parser_targets target)  {     switch (target) {     case vertex_shader: -      return var->mode == ir_var_out; +      return var->mode == ir_var_shader_out;     case fragment_shader: -      return var->mode == ir_var_in; +      return var->mode == ir_var_shader_in;     default: -      return var->mode == ir_var_out || var->mode == ir_var_in; +      return var->mode == ir_var_shader_out || var->mode == ir_var_shader_in;     }  } @@ -1997,13 +2003,16 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,      * the setting alone.      */     if (qual->flags.q.in && qual->flags.q.out) -      var->mode = ir_var_inout; -   else if (qual->flags.q.attribute || qual->flags.q.in +      var->mode = ir_var_function_inout; +   else if (qual->flags.q.in) +      var->mode = is_parameter ? ir_var_function_in : ir_var_shader_in; +   else if (qual->flags.q.attribute  	    || (qual->flags.q.varying && (state->target == fragment_shader))) -      var->mode = ir_var_in; -   else if (qual->flags.q.out -	    || (qual->flags.q.varying && (state->target == vertex_shader))) -      var->mode = ir_var_out; +      var->mode = ir_var_shader_in; +   else if (qual->flags.q.out) +      var->mode = is_parameter ? ir_var_function_out : ir_var_shader_out; +   else if (qual->flags.q.varying && (state->target == vertex_shader)) +      var->mode = ir_var_shader_out;     else if (qual->flags.q.uniform)        var->mode = ir_var_uniform; @@ -2028,10 +2037,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,         * Similar text exists in the section on vertex shader outputs.         *         * Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES -       * 3.00 spec claims to allow structs as well.  However, this is likely -       * an error, since section 11 of the spec ("Counting of Inputs and -       * Outputs") enumerates all possible types of interstage linkage -       * variables, and it does not mention structs. +       * 3.00 spec allows structs as well.  Varying structs are also allowed +       * in GLSL 1.50.         */        switch (var->type->get_scalar_type()->base_type) {        case GLSL_TYPE_FLOAT: @@ -2046,6 +2053,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,                            state->get_version_string());           break;        case GLSL_TYPE_STRUCT: +         if (state->is_version(150, 300)) +            break;           _mesa_glsl_error(loc, state,                            "varying variables may not be of type struct");           break; @@ -2058,15 +2067,16 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,     if (state->all_invariant && (state->current_function == NULL)) {        switch (state->target) {        case vertex_shader: -	 if (var->mode == ir_var_out) +	 if (var->mode == ir_var_shader_out)  	    var->invariant = true;  	 break;        case geometry_shader: -	 if ((var->mode == ir_var_in) || (var->mode == ir_var_out)) +	 if ((var->mode == ir_var_shader_in) +             || (var->mode == ir_var_shader_out))  	    var->invariant = true;  	 break;        case fragment_shader: -	 if (var->mode == ir_var_in) +	 if (var->mode == ir_var_shader_in)  	    var->invariant = true;  	 break;        } @@ -2082,8 +2092,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,        var->interpolation = INTERP_QUALIFIER_NONE;     if (var->interpolation != INTERP_QUALIFIER_NONE && -       !(state->target == vertex_shader && var->mode == ir_var_out) && -       !(state->target == fragment_shader && var->mode == ir_var_in)) { +       !(state->target == vertex_shader && var->mode == ir_var_shader_out) && +       !(state->target == fragment_shader && var->mode == ir_var_shader_in)) {        _mesa_glsl_error(loc, state,  		       "interpolation qualifier `%s' can only be applied to "  		       "vertex shader outputs and fragment shader inputs.", @@ -2116,7 +2126,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,         */        switch (state->target) {        case vertex_shader: -	 if (!global_scope || (var->mode != ir_var_in)) { +	 if (!global_scope || (var->mode != ir_var_shader_in)) {  	    fail = true;  	    string = "input";  	 } @@ -2129,7 +2139,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,  	 break;        case fragment_shader: -	 if (!global_scope || (var->mode != ir_var_out)) { +	 if (!global_scope || (var->mode != ir_var_shader_out)) {  	    fail = true;  	    string = "output";  	 } @@ -2440,7 +2450,7 @@ process_initializer(ir_variable *var, ast_declaration *decl,  		       "cannot initialize samplers");     } -   if ((var->mode == ir_var_in) && (state->current_function == NULL)) { +   if ((var->mode == ir_var_shader_in) && (state->current_function == NULL)) {        _mesa_glsl_error(& initializer_loc, state,  		       "cannot initialize %s shader input / %s",  		       _mesa_glsl_shader_target_name(state->target), @@ -2579,12 +2589,12 @@ ast_declarator_list::hir(exec_list *instructions,  			     "Undeclared variable `%s' cannot be marked "  			     "invariant\n", decl->identifier);  	 } else if ((state->target == vertex_shader) -	       && (earlier->mode != ir_var_out)) { +	       && (earlier->mode != ir_var_shader_out)) {  	    _mesa_glsl_error(& loc, state,  			     "`%s' cannot be marked invariant, vertex shader "  			     "outputs only\n", decl->identifier);  	 } else if ((state->target == fragment_shader) -	       && (earlier->mode != ir_var_in)) { +	       && (earlier->mode != ir_var_shader_in)) {  	    _mesa_glsl_error(& loc, state,  			     "`%s' cannot be marked invariant, fragment shader "  			     "inputs only\n", decl->identifier); @@ -2707,16 +2717,13 @@ ast_declarator_list::hir(exec_list *instructions,  				       & loc, this->ubo_qualifiers_valid, false);        if (this->type->qualifier.flags.q.invariant) { -	 if ((state->target == vertex_shader) && !(var->mode == ir_var_out || -						   var->mode == ir_var_inout)) { -	    /* FINISHME: Note that this doesn't work for invariant on -	     * a function signature outval -	     */ +	 if ((state->target == vertex_shader) && +             var->mode != ir_var_shader_out) {  	    _mesa_glsl_error(& loc, state,  			     "`%s' cannot be marked invariant, vertex shader "  			     "outputs only\n", var->name);  	 } else if ((state->target == fragment_shader) && -		    !(var->mode == ir_var_in || var->mode == ir_var_inout)) { +		    var->mode != ir_var_shader_in) {  	    /* FINISHME: Note that this doesn't work for invariant on  	     * a function signature inval  	     */ @@ -2753,7 +2760,7 @@ ast_declarator_list::hir(exec_list *instructions,  			     "global scope%s",  			     mode, var->name, extra);  	 } -      } else if (var->mode == ir_var_in) { +      } else if (var->mode == ir_var_shader_in) {           var->read_only = true;  	 if (state->target == vertex_shader) { @@ -2833,7 +2840,7 @@ ast_declarator_list::hir(exec_list *instructions,            && state->target == vertex_shader            && state->current_function == NULL            && var->type->is_integer() -          && var->mode == ir_var_out +          && var->mode == ir_var_shader_out            && var->interpolation != INTERP_QUALIFIER_FLAT) {           _mesa_glsl_error(&loc, state, "If a vertex output is an integer, " @@ -3137,7 +3144,8 @@ ast_parameter_declarator::hir(exec_list *instructions,     }     is_void = false; -   ir_variable *var = new(ctx) ir_variable(type, this->identifier, ir_var_in); +   ir_variable *var = new(ctx) +      ir_variable(type, this->identifier, ir_var_function_in);     /* Apply any specified qualifiers to the parameter declaration.  Note that      * for function parameters the default mode is 'in'. @@ -3151,7 +3159,7 @@ ast_parameter_declarator::hir(exec_list *instructions,      *    as out or inout function parameters, nor can they be assigned      *    into."      */ -   if ((var->mode == ir_var_inout || var->mode == ir_var_out) +   if ((var->mode == ir_var_function_inout || var->mode == ir_var_function_out)         && type->contains_sampler()) {        _mesa_glsl_error(&loc, state, "out and inout parameters cannot contain samplers");        type = glsl_type::error_type; @@ -3171,7 +3179,7 @@ ast_parameter_declarator::hir(exec_list *instructions,      * So for GLSL 1.10, passing an array as an out or inout parameter is not      * allowed.  This restriction is removed in GLSL 1.20, and in GLSL ES.      */ -   if ((var->mode == ir_var_inout || var->mode == ir_var_out) +   if ((var->mode == ir_var_function_inout || var->mode == ir_var_function_out)         && type->is_array()         && !state->check_version(120, 100, &loc,                                  "Arrays cannot be out or inout parameters")) { @@ -4018,35 +4026,50 @@ ast_type_specifier::hir(exec_list *instructions,  } -ir_rvalue * -ast_struct_specifier::hir(exec_list *instructions, -			  struct _mesa_glsl_parse_state *state) +/** + * Process a structure or interface block tree into an array of structure fields + * + * After parsing, where there are some syntax differnces, structures and + * interface blocks are almost identical.  They are similar enough that the + * AST for each can be processed the same way into a set of + * \c glsl_struct_field to describe the members. + * + * \return + * The number of fields processed.  A pointer to the array structure fields is + * stored in \c *fields_ret. + */ +unsigned +ast_process_structure_or_interface_block(exec_list *instructions, +					 struct _mesa_glsl_parse_state *state, +					 exec_list *declarations, +					 YYLTYPE &loc, +					 glsl_struct_field **fields_ret, +                                         bool is_interface, +                                         bool block_row_major)  {     unsigned decl_count = 0; -   /* Make an initial pass over the list of structure fields to determine how +   /* Make an initial pass over the list of fields to determine how      * many there are.  Each element in this list is an ast_declarator_list.      * This means that we actually need to count the number of elements in the      * 'declarations' list in each of the elements.      */ -   foreach_list_typed (ast_declarator_list, decl_list, link, -		       &this->declarations) { +   foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {        foreach_list_const (decl_ptr, & decl_list->declarations) {  	 decl_count++;        }     } -   /* Allocate storage for the structure fields and process the field +   /* Allocate storage for the fields and process the field      * declarations.  As the declarations are processed, try to also convert      * the types to HIR.  This ensures that structure definitions embedded in -    * other structure definitions are processed. +    * other structure definitions or in interface blocks are processed.      */     glsl_struct_field *const fields = ralloc_array(state, glsl_struct_field,  						  decl_count);     unsigned i = 0; -   foreach_list_typed (ast_declarator_list, decl_list, link, -		       &this->declarations) { +   foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {        const char *type_name;        decl_list->type->specifier->hir(instructions, state); @@ -4055,7 +4078,6 @@ ast_struct_specifier::hir(exec_list *instructions,         * embedded structure definitions have been removed from the language.         */        if (state->es_shader && decl_list->type->specifier->structure != NULL) { -	 YYLTYPE loc = this->get_location();  	 _mesa_glsl_error(&loc, state, "Embedded structure definitions are "  			  "not allowed in GLSL ES 1.00.");        } @@ -4065,25 +4087,88 @@ ast_struct_specifier::hir(exec_list *instructions,        foreach_list_typed (ast_declaration, decl, link,  			  &decl_list->declarations) { -	 const struct glsl_type *field_type = decl_type; +         /* From the GL_ARB_uniform_buffer_object spec: +          * +          *     "Sampler types are not allowed inside of uniform +          *      blocks. All other types, arrays, and structures +          *      allowed for uniforms are allowed within a uniform +          *      block." +          */ +         const struct glsl_type *field_type = decl_type; + +         if (is_interface && field_type->contains_sampler()) { +            YYLTYPE loc = decl_list->get_location(); +            _mesa_glsl_error(&loc, state, +                             "Uniform in non-default uniform block contains sampler\n"); +         } + +         const struct ast_type_qualifier *const qual = +            & decl_list->type->qualifier; +         if (qual->flags.q.std140 || +             qual->flags.q.packed || +             qual->flags.q.shared) { +            _mesa_glsl_error(&loc, state, +                             "uniform block layout qualifiers std140, packed, and " +                             "shared can only be applied to uniform blocks, not " +                             "members"); +         } +  	 if (decl->is_array) { -	    YYLTYPE loc = decl->get_location();  	    field_type = process_array_type(&loc, decl_type, decl->array_size,  					    state);  	 }  	 fields[i].type = (field_type != NULL)  	    ? field_type : glsl_type::error_type;  	 fields[i].name = decl->identifier; + +         if (qual->flags.q.row_major || qual->flags.q.column_major) { +            if (!field_type->is_matrix() && !field_type->is_record()) { +               _mesa_glsl_error(&loc, state, +                                "uniform block layout qualifiers row_major and " +                                "column_major can only be applied to matrix and " +                                "structure types"); +            } else +               validate_matrix_layout_for_type(state, &loc, field_type); +         } + +         if (field_type->is_matrix() || +             (field_type->is_array() && field_type->fields.array->is_matrix())) { +            fields[i].row_major = block_row_major; +            if (qual->flags.q.row_major) +               fields[i].row_major = true; +            else if (qual->flags.q.column_major) +               fields[i].row_major = false; +         } +  	 i++;        }     }     assert(i == decl_count); +   *fields_ret = fields; +   return decl_count; +} + + +ir_rvalue * +ast_struct_specifier::hir(exec_list *instructions, +			  struct _mesa_glsl_parse_state *state) +{ +   YYLTYPE loc = this->get_location(); +   glsl_struct_field *fields; +   unsigned decl_count = +      ast_process_structure_or_interface_block(instructions, +					       state, +					       &this->declarations, +					       loc, +					       &fields, +                                               false, +                                               false); +     const glsl_type *t =        glsl_type::get_record_instance(fields, decl_count, this->name); -   YYLTYPE loc = this->get_location();     if (!state->symbols->add_type(name, t)) {        _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name);     } else { @@ -4102,96 +4187,98 @@ ast_struct_specifier::hir(exec_list *instructions,     return NULL;  } -static struct gl_uniform_block * -get_next_uniform_block(struct _mesa_glsl_parse_state *state) -{ -   if (state->num_uniform_blocks >= state->uniform_block_array_size) { -      state->uniform_block_array_size *= 2; -      if (state->uniform_block_array_size <= 4) -	 state->uniform_block_array_size = 4; - -      state->uniform_blocks = reralloc(state, -				       state->uniform_blocks, -				       struct gl_uniform_block, -				       state->uniform_block_array_size); -   } - -   memset(&state->uniform_blocks[state->num_uniform_blocks], -	  0, sizeof(*state->uniform_blocks)); -   return &state->uniform_blocks[state->num_uniform_blocks++]; -} -  ir_rvalue *  ast_uniform_block::hir(exec_list *instructions,  		       struct _mesa_glsl_parse_state *state)  { +   YYLTYPE loc = this->get_location(); +     /* The ast_uniform_block has a list of ast_declarator_lists.  We      * need to turn those into ir_variables with an association      * with this uniform block.      */ -   struct gl_uniform_block *ubo = get_next_uniform_block(state); -   ubo->Name = ralloc_strdup(state->uniform_blocks, this->block_name); +   enum glsl_interface_packing packing; +   if (this->layout.flags.q.shared) { +      packing = GLSL_INTERFACE_PACKING_SHARED; +   } else if (this->layout.flags.q.packed) { +      packing = GLSL_INTERFACE_PACKING_PACKED; +   } else { +      /* The default layout is std140. +       */ +      packing = GLSL_INTERFACE_PACKING_STD140; +   } -   if (!state->symbols->add_uniform_block(ubo)) { +   bool block_row_major = this->layout.flags.q.row_major; +   exec_list declared_variables; +   glsl_struct_field *fields; +   unsigned int num_variables = +      ast_process_structure_or_interface_block(&declared_variables, +                                               state, +                                               &this->declarations, +                                               loc, +                                               &fields, +                                               true, +                                               block_row_major); + +   const glsl_type *block_type = +      glsl_type::get_interface_instance(fields, +                                        num_variables, +                                        packing, +                                        this->block_name); + +   if (!state->symbols->add_type(block_type->name, block_type)) {        YYLTYPE loc = this->get_location();        _mesa_glsl_error(&loc, state, "Uniform block name `%s' already taken in " -                       "the current scope.\n", ubo->Name); +                       "the current scope.\n", this->block_name);     } -   unsigned int num_variables = 0; -   foreach_list_typed(ast_declarator_list, decl_list, link, &declarations) { -      foreach_list_const(node, &decl_list->declarations) { -	 num_variables++; -      } -   } - -   bool block_row_major = this->layout.flags.q.row_major; - -   ubo->Uniforms = rzalloc_array(state->uniform_blocks, -				 struct gl_uniform_buffer_variable, -				 num_variables); - -   foreach_list_typed(ast_declarator_list, decl_list, link, &declarations) { -      exec_list declared_variables; - -      decl_list->hir(&declared_variables, state); +   /* Since interface blocks cannot contain statements, it should be +    * impossible for the block to generate any instructions. +    */ +   assert(declared_variables.is_empty()); -      foreach_list_const(node, &declared_variables) { -	 ir_variable *var = (ir_variable *)node; +   /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec +    * says: +    * +    *     "If an instance name (instance-name) is used, then it puts all the +    *     members inside a scope within its own name space, accessed with the +    *     field selector ( . ) operator (analogously to structures)." +    */ +   if (this->instance_name) { +      ir_variable *var; -	 struct gl_uniform_buffer_variable *ubo_var = -	    &ubo->Uniforms[ubo->NumUniforms++]; +      if (this->array_size != NULL) { +         const glsl_type *block_array_type = +            process_array_type(&loc, block_type, this->array_size, state); -	 var->uniform_block = ubo - state->uniform_blocks; +         var = new(state) ir_variable(block_array_type, +                                      this->instance_name, +                                      ir_var_uniform); +      } else { +         var = new(state) ir_variable(block_type, +                                      this->instance_name, +                                      ir_var_uniform); +      } -	 ubo_var->Name = ralloc_strdup(state->uniform_blocks, var->name); -	 ubo_var->Type = var->type; -	 ubo_var->Offset = 0; /* Assigned at link time. */ +      var->interface_type = block_type; +      state->symbols->add_variable(var); +      instructions->push_tail(var); +   } else { +      /* In order to have an array size, the block must also be declared with +       * an instane name. +       */ +      assert(this->array_size == NULL); -	 if (var->type->is_matrix() || -	     (var->type->is_array() && var->type->fields.array->is_matrix())) { -	    ubo_var->RowMajor = block_row_major; -	    if (decl_list->type->qualifier.flags.q.row_major) -	       ubo_var->RowMajor = true; -	    else if (decl_list->type->qualifier.flags.q.column_major) -	       ubo_var->RowMajor = false; -	 } +      for (unsigned i = 0; i < num_variables; i++) { +         ir_variable *var = +            new(state) ir_variable(fields[i].type, +                                   ralloc_strdup(state, fields[i].name), +                                   ir_var_uniform); +         var->interface_type = block_type; -	 /* From the GL_ARB_uniform_buffer_object spec: -	  * -	  *     "Sampler types are not allowed inside of uniform -	  *      blocks. All other types, arrays, and structures -	  *      allowed for uniforms are allowed within a uniform -	  *      block." -	  */ -	 if (var->type->contains_sampler()) { -	    YYLTYPE loc = decl_list->get_location(); -	    _mesa_glsl_error(&loc, state, -			     "Uniform in non-default uniform block contains sampler\n"); -	 } +         state->symbols->add_variable(var); +         instructions->push_tail(var);        } - -      instructions->append_list(&declared_variables);     }     return NULL; @@ -4222,7 +4309,7 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,  	 gl_FragData_assigned = true;        else if (strncmp(var->name, "gl_", 3) != 0) {  	 if (state->target == fragment_shader && -	     (var->mode == ir_var_out || var->mode == ir_var_inout)) { +	     var->mode == ir_var_shader_out) {  	    user_defined_fs_output_assigned = true;  	    user_defined_fs_output = var;  	 } diff --git a/mesalib/src/glsl/builtin_compiler/Makefile.am b/mesalib/src/glsl/builtin_compiler/Makefile.am index 1a863b228..976640822 100644 --- a/mesalib/src/glsl/builtin_compiler/Makefile.am +++ b/mesalib/src/glsl/builtin_compiler/Makefile.am @@ -55,6 +55,7 @@ libglslcore_la_SOURCES =				\  builtin_compiler_SOURCES = \  	$(top_srcdir)/src/mesa/main/hash_table.c	\ +	$(top_srcdir)/src/mesa/main/imports.c		\  	$(top_srcdir)/src/mesa/program/prog_hash_table.c\  	$(top_srcdir)/src/mesa/program/symbol_table.c	\  	$(BUILTIN_COMPILER_CXX_FILES)			\ diff --git a/mesalib/src/glsl/builtin_types.h b/mesalib/src/glsl/builtin_types.h index a4c995fd1..c78c2d270 100644 --- a/mesalib/src/glsl/builtin_types.h +++ b/mesalib/src/glsl/builtin_types.h @@ -89,9 +89,9 @@ const glsl_type *const glsl_type::mat4_type = & builtin_core_types[14];  /*@{*/  static const struct glsl_struct_field gl_DepthRangeParameters_fields[] = { -   { glsl_type::float_type, "near" }, -   { glsl_type::float_type, "far" }, -   { glsl_type::float_type, "diff" }, +   { glsl_type::float_type, "near", false }, +   { glsl_type::float_type, "far", false }, +   { glsl_type::float_type, "diff", false },  };  const glsl_type glsl_type::builtin_structure_types[] = { @@ -106,58 +106,58 @@ const glsl_type glsl_type::builtin_structure_types[] = {  /*@{*/  static const struct glsl_struct_field gl_PointParameters_fields[] = { -   { glsl_type::float_type, "size" }, -   { glsl_type::float_type, "sizeMin" }, -   { glsl_type::float_type, "sizeMax" }, -   { glsl_type::float_type, "fadeThresholdSize" }, -   { glsl_type::float_type, "distanceConstantAttenuation" }, -   { glsl_type::float_type, "distanceLinearAttenuation" }, -   { glsl_type::float_type, "distanceQuadraticAttenuation" }, +   { glsl_type::float_type, "size", false }, +   { glsl_type::float_type, "sizeMin", false }, +   { glsl_type::float_type, "sizeMax", false }, +   { glsl_type::float_type, "fadeThresholdSize", false }, +   { glsl_type::float_type, "distanceConstantAttenuation", false }, +   { glsl_type::float_type, "distanceLinearAttenuation", false }, +   { glsl_type::float_type, "distanceQuadraticAttenuation", false },  };  static const struct glsl_struct_field gl_MaterialParameters_fields[] = { -   { glsl_type::vec4_type, "emission" }, -   { glsl_type::vec4_type, "ambient" }, -   { glsl_type::vec4_type, "diffuse" }, -   { glsl_type::vec4_type, "specular" }, -   { glsl_type::float_type, "shininess" }, +   { glsl_type::vec4_type, "emission", false }, +   { glsl_type::vec4_type, "ambient", false }, +   { glsl_type::vec4_type, "diffuse", false }, +   { glsl_type::vec4_type, "specular", false }, +   { glsl_type::float_type, "shininess", false },  };  static const struct glsl_struct_field gl_LightSourceParameters_fields[] = { -   { glsl_type::vec4_type, "ambient" }, -   { glsl_type::vec4_type, "diffuse" }, -   { glsl_type::vec4_type, "specular" }, -   { glsl_type::vec4_type, "position" }, -   { glsl_type::vec4_type, "halfVector" }, -   { glsl_type::vec3_type, "spotDirection" }, -   { glsl_type::float_type, "spotExponent" }, -   { glsl_type::float_type, "spotCutoff" }, -   { glsl_type::float_type, "spotCosCutoff" }, -   { glsl_type::float_type, "constantAttenuation" }, -   { glsl_type::float_type, "linearAttenuation" }, -   { glsl_type::float_type, "quadraticAttenuation" }, +   { glsl_type::vec4_type, "ambient", false }, +   { glsl_type::vec4_type, "diffuse", false }, +   { glsl_type::vec4_type, "specular", false }, +   { glsl_type::vec4_type, "position", false }, +   { glsl_type::vec4_type, "halfVector", false }, +   { glsl_type::vec3_type, "spotDirection", false }, +   { glsl_type::float_type, "spotExponent", false }, +   { glsl_type::float_type, "spotCutoff", false }, +   { glsl_type::float_type, "spotCosCutoff", false }, +   { glsl_type::float_type, "constantAttenuation", false }, +   { glsl_type::float_type, "linearAttenuation", false }, +   { glsl_type::float_type, "quadraticAttenuation", false },  };  static const struct glsl_struct_field gl_LightModelParameters_fields[] = { -   { glsl_type::vec4_type, "ambient" }, +   { glsl_type::vec4_type, "ambient", false },  };  static const struct glsl_struct_field gl_LightModelProducts_fields[] = { -   { glsl_type::vec4_type, "sceneColor" }, +   { glsl_type::vec4_type, "sceneColor", false },  };  static const struct glsl_struct_field gl_LightProducts_fields[] = { -   { glsl_type::vec4_type, "ambient" }, -   { glsl_type::vec4_type, "diffuse" }, -   { glsl_type::vec4_type, "specular" }, +   { glsl_type::vec4_type, "ambient", false }, +   { glsl_type::vec4_type, "diffuse", false }, +   { glsl_type::vec4_type, "specular", false },  };  static const struct glsl_struct_field gl_FogParameters_fields[] = { -   { glsl_type::vec4_type, "color" }, -   { glsl_type::float_type, "density" }, -   { glsl_type::float_type, "start" }, -   { glsl_type::float_type, "end" }, -   { glsl_type::float_type, "scale" }, +   { glsl_type::vec4_type, "color", false }, +   { glsl_type::float_type, "density", false }, +   { glsl_type::float_type, "start", false }, +   { glsl_type::float_type, "end", false }, +   { glsl_type::float_type, "scale", false },  };  const glsl_type glsl_type::builtin_110_deprecated_structure_types[] = { diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp index e7769419f..ccee7746e 100644 --- a/mesalib/src/glsl/builtin_variables.cpp +++ b/mesalib/src/glsl/builtin_variables.cpp @@ -47,18 +47,18 @@ struct builtin_variable {  };  static const builtin_variable builtin_core_vs_variables[] = { -   { ir_var_out, VERT_RESULT_HPOS, "vec4",  "gl_Position" }, -   { ir_var_out, VERT_RESULT_PSIZ, "float", "gl_PointSize" }, +   { ir_var_shader_out, VERT_RESULT_HPOS, "vec4",  "gl_Position" }, +   { ir_var_shader_out, VERT_RESULT_PSIZ, "float", "gl_PointSize" },  };  static const builtin_variable builtin_core_fs_variables[] = { -   { ir_var_in,  FRAG_ATTRIB_WPOS,  "vec4",  "gl_FragCoord" }, -   { ir_var_in,  FRAG_ATTRIB_FACE,  "bool",  "gl_FrontFacing" }, -   { ir_var_out, FRAG_RESULT_COLOR, "vec4",  "gl_FragColor" }, +   { ir_var_shader_in,  FRAG_ATTRIB_WPOS,  "vec4",  "gl_FragCoord" }, +   { ir_var_shader_in,  FRAG_ATTRIB_FACE,  "bool",  "gl_FrontFacing" }, +   { ir_var_shader_out, FRAG_RESULT_COLOR, "vec4",  "gl_FragColor" },  };  static const builtin_variable builtin_100ES_fs_variables[] = { -   { ir_var_in,  FRAG_ATTRIB_PNTC,   "vec2",   "gl_PointCoord" }, +   { ir_var_shader_in,  FRAG_ATTRIB_PNTC,   "vec2",   "gl_PointCoord" },  };  static const builtin_variable builtin_300ES_vs_variables[] = { @@ -66,46 +66,46 @@ static const builtin_variable builtin_300ES_vs_variables[] = {  };  static const builtin_variable builtin_300ES_fs_variables[] = { -   { ir_var_in,  FRAG_ATTRIB_WPOS,  "vec4",  "gl_FragCoord" }, -   { ir_var_in,  FRAG_ATTRIB_FACE,  "bool",  "gl_FrontFacing" }, -   { ir_var_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" }, -   { ir_var_in,  FRAG_ATTRIB_PNTC,   "vec2",   "gl_PointCoord" }, +   { ir_var_shader_in,  FRAG_ATTRIB_WPOS,  "vec4",  "gl_FragCoord" }, +   { ir_var_shader_in,  FRAG_ATTRIB_FACE,  "bool",  "gl_FrontFacing" }, +   { ir_var_shader_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" }, +   { ir_var_shader_in,  FRAG_ATTRIB_PNTC,   "vec2",   "gl_PointCoord" },  };  static const builtin_variable builtin_110_fs_variables[] = { -   { ir_var_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" }, +   { ir_var_shader_out, FRAG_RESULT_DEPTH, "float", "gl_FragDepth" },  };  static const builtin_variable builtin_110_deprecated_fs_variables[] = { -   { ir_var_in,  FRAG_ATTRIB_COL0,  "vec4",  "gl_Color" }, -   { ir_var_in,  FRAG_ATTRIB_COL1,  "vec4",  "gl_SecondaryColor" }, -   { ir_var_in,  FRAG_ATTRIB_FOGC,  "float", "gl_FogFragCoord" }, +   { ir_var_shader_in,  FRAG_ATTRIB_COL0,  "vec4",  "gl_Color" }, +   { ir_var_shader_in,  FRAG_ATTRIB_COL1,  "vec4",  "gl_SecondaryColor" }, +   { ir_var_shader_in,  FRAG_ATTRIB_FOGC,  "float", "gl_FogFragCoord" },  };  static const builtin_variable builtin_110_deprecated_vs_variables[] = { -   { ir_var_in,  VERT_ATTRIB_POS,         "vec4",  "gl_Vertex" }, -   { ir_var_in,  VERT_ATTRIB_NORMAL,      "vec3",  "gl_Normal" }, -   { ir_var_in,  VERT_ATTRIB_COLOR0,      "vec4",  "gl_Color" }, -   { ir_var_in,  VERT_ATTRIB_COLOR1,      "vec4",  "gl_SecondaryColor" }, -   { ir_var_in,  VERT_ATTRIB_TEX0,        "vec4",  "gl_MultiTexCoord0" }, -   { ir_var_in,  VERT_ATTRIB_TEX1,        "vec4",  "gl_MultiTexCoord1" }, -   { ir_var_in,  VERT_ATTRIB_TEX2,        "vec4",  "gl_MultiTexCoord2" }, -   { ir_var_in,  VERT_ATTRIB_TEX3,        "vec4",  "gl_MultiTexCoord3" }, -   { ir_var_in,  VERT_ATTRIB_TEX4,        "vec4",  "gl_MultiTexCoord4" }, -   { ir_var_in,  VERT_ATTRIB_TEX5,        "vec4",  "gl_MultiTexCoord5" }, -   { ir_var_in,  VERT_ATTRIB_TEX6,        "vec4",  "gl_MultiTexCoord6" }, -   { ir_var_in,  VERT_ATTRIB_TEX7,        "vec4",  "gl_MultiTexCoord7" }, -   { ir_var_in,  VERT_ATTRIB_FOG,         "float", "gl_FogCoord" }, -   { ir_var_out, VERT_RESULT_CLIP_VERTEX, "vec4",  "gl_ClipVertex" }, -   { ir_var_out, VERT_RESULT_COL0,        "vec4",  "gl_FrontColor" }, -   { ir_var_out, VERT_RESULT_BFC0,        "vec4",  "gl_BackColor" }, -   { ir_var_out, VERT_RESULT_COL1,        "vec4",  "gl_FrontSecondaryColor" }, -   { ir_var_out, VERT_RESULT_BFC1,        "vec4",  "gl_BackSecondaryColor" }, -   { ir_var_out, VERT_RESULT_FOGC,        "float", "gl_FogFragCoord" }, +   { ir_var_shader_in,  VERT_ATTRIB_POS,         "vec4",  "gl_Vertex" }, +   { ir_var_shader_in,  VERT_ATTRIB_NORMAL,      "vec3",  "gl_Normal" }, +   { ir_var_shader_in,  VERT_ATTRIB_COLOR0,      "vec4",  "gl_Color" }, +   { ir_var_shader_in,  VERT_ATTRIB_COLOR1,      "vec4",  "gl_SecondaryColor" }, +   { ir_var_shader_in,  VERT_ATTRIB_TEX0,        "vec4",  "gl_MultiTexCoord0" }, +   { ir_var_shader_in,  VERT_ATTRIB_TEX1,        "vec4",  "gl_MultiTexCoord1" }, +   { ir_var_shader_in,  VERT_ATTRIB_TEX2,        "vec4",  "gl_MultiTexCoord2" }, +   { ir_var_shader_in,  VERT_ATTRIB_TEX3,        "vec4",  "gl_MultiTexCoord3" }, +   { ir_var_shader_in,  VERT_ATTRIB_TEX4,        "vec4",  "gl_MultiTexCoord4" }, +   { ir_var_shader_in,  VERT_ATTRIB_TEX5,        "vec4",  "gl_MultiTexCoord5" }, +   { ir_var_shader_in,  VERT_ATTRIB_TEX6,        "vec4",  "gl_MultiTexCoord6" }, +   { ir_var_shader_in,  VERT_ATTRIB_TEX7,        "vec4",  "gl_MultiTexCoord7" }, +   { ir_var_shader_in,  VERT_ATTRIB_FOG,         "float", "gl_FogCoord" }, +   { ir_var_shader_out, VERT_RESULT_CLIP_VERTEX, "vec4",  "gl_ClipVertex" }, +   { ir_var_shader_out, VERT_RESULT_COL0,        "vec4",  "gl_FrontColor" }, +   { ir_var_shader_out, VERT_RESULT_BFC0,        "vec4",  "gl_BackColor" }, +   { ir_var_shader_out, VERT_RESULT_COL1,        "vec4",  "gl_FrontSecondaryColor" }, +   { ir_var_shader_out, VERT_RESULT_BFC1,        "vec4",  "gl_BackSecondaryColor" }, +   { ir_var_shader_out, VERT_RESULT_FOGC,        "float", "gl_FogFragCoord" },  };  static const builtin_variable builtin_120_fs_variables[] = { -   { ir_var_in,  FRAG_ATTRIB_PNTC,   "vec2",   "gl_PointCoord" }, +   { ir_var_shader_in,  FRAG_ATTRIB_PNTC,   "vec2",   "gl_PointCoord" },  };  static const builtin_variable builtin_130_vs_variables[] = { @@ -403,16 +403,18 @@ add_variable(exec_list *instructions, glsl_symbol_table *symtab,     switch (var->mode) {     case ir_var_auto: -   case ir_var_in: -   case ir_var_const_in: +   case ir_var_shader_in:     case ir_var_uniform:     case ir_var_system_value:        var->read_only = true;        break; -   case ir_var_inout: -   case ir_var_out: +   case ir_var_shader_out:        break;     default: +      /* The only variables that are added using this function should be +       * uniforms, shader inputs, and shader outputs, constants (which use +       * ir_var_auto), and system values. +       */        assert(0);        break;     } @@ -752,7 +754,8 @@ generate_110_vs_variables(exec_list *instructions,        glsl_type::get_array_instance(glsl_type::vec4_type, 0);     add_variable(instructions, state->symbols, -		"gl_TexCoord", vec4_array_type, ir_var_out, VERT_RESULT_TEX0); +		"gl_TexCoord", vec4_array_type, ir_var_shader_out, +                VERT_RESULT_TEX0);     generate_ARB_draw_buffers_variables(instructions, state, false,  				       vertex_shader); @@ -812,7 +815,7 @@ generate_130_vs_variables(exec_list *instructions,        glsl_type::get_array_instance(glsl_type::float_type, 0);     add_variable(instructions, state->symbols, -		"gl_ClipDistance", clip_distance_array_type, ir_var_out, +		"gl_ClipDistance", clip_distance_array_type, ir_var_shader_out,                  VERT_RESULT_CLIP_DIST0);  } @@ -937,7 +940,8 @@ generate_110_fs_variables(exec_list *instructions,        glsl_type::get_array_instance(glsl_type::vec4_type, 0);     add_variable(instructions, state->symbols, -		"gl_TexCoord", vec4_array_type, ir_var_in, FRAG_ATTRIB_TEX0); +		"gl_TexCoord", vec4_array_type, ir_var_shader_in, +                FRAG_ATTRIB_TEX0);     generate_ARB_draw_buffers_variables(instructions, state, false,  				       fragment_shader); @@ -969,7 +973,7 @@ generate_ARB_draw_buffers_variables(exec_list *instructions,        ir_variable *const fd =  	 add_variable(instructions, state->symbols,  		      "gl_FragData", vec4_array_type, -		      ir_var_out, FRAG_RESULT_DATA0); +		      ir_var_shader_out, FRAG_RESULT_DATA0);        if (warn)  	 fd->warn_extension = "GL_ARB_draw_buffers"; @@ -1026,7 +1030,7 @@ generate_ARB_shader_stencil_export_variables(exec_list *instructions,     ir_variable *const fd =        add_variable(instructions, state->symbols,  		   "gl_FragStencilRefARB", glsl_type::int_type, -		   ir_var_out, FRAG_RESULT_STENCIL); +		   ir_var_shader_out, FRAG_RESULT_STENCIL);     if (warn)        fd->warn_extension = "GL_ARB_shader_stencil_export"; @@ -1042,7 +1046,7 @@ generate_AMD_shader_stencil_export_variables(exec_list *instructions,     ir_variable *const fd =        add_variable(instructions, state->symbols,  		   "gl_FragStencilRefAMD", glsl_type::int_type, -		   ir_var_out, FRAG_RESULT_STENCIL); +		   ir_var_shader_out, FRAG_RESULT_STENCIL);     if (warn)        fd->warn_extension = "GL_AMD_shader_stencil_export"; @@ -1083,7 +1087,7 @@ generate_fs_clipdistance(exec_list *instructions,        glsl_type::get_array_instance(glsl_type::float_type, 0);     add_variable(instructions, state->symbols, -		"gl_ClipDistance", clip_distance_array_type, ir_var_in, +		"gl_ClipDistance", clip_distance_array_type, ir_var_shader_in,                  FRAG_ATTRIB_CLIP_DIST0);  } diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y index 8fba923a2..e927c7cb7 100644 --- a/mesalib/src/glsl/glcpp/glcpp-parse.y +++ b/mesalib/src/glsl/glcpp/glcpp-parse.y @@ -1227,6 +1227,9 @@ glcpp_parser_create (const struct gl_extensions *extensions, int api)  	      if (extensions->ARB_texture_cube_map_array)  	         add_builtin_define(parser, "GL_ARB_texture_cube_map_array", 1); + +	      if (extensions->ARB_shading_language_packing) +	         add_builtin_define(parser, "GL_ARB_shading_language_packing", 1);  	   }  	} diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll index 2f66c5828..ddc9f8073 100644 --- a/mesalib/src/glsl/glsl_lexer.ll +++ b/mesalib/src/glsl/glsl_lexer.ll @@ -399,23 +399,23 @@ layout		{  			}  [0-9]+\.[0-9]+([eE][+-]?[0-9]+)?[fF]?	{ -			    yylval->real = glsl_strtod(yytext, NULL); +			    yylval->real = glsl_strtof(yytext, NULL);  			    return FLOATCONSTANT;  			}  \.[0-9]+([eE][+-]?[0-9]+)?[fF]?		{ -			    yylval->real = glsl_strtod(yytext, NULL); +			    yylval->real = glsl_strtof(yytext, NULL);  			    return FLOATCONSTANT;  			}  [0-9]+\.([eE][+-]?[0-9]+)?[fF]?		{ -			    yylval->real = glsl_strtod(yytext, NULL); +			    yylval->real = glsl_strtof(yytext, NULL);  			    return FLOATCONSTANT;  			}  [0-9]+[eE][+-]?[0-9]+[fF]?		{ -			    yylval->real = glsl_strtod(yytext, NULL); +			    yylval->real = glsl_strtof(yytext, NULL);  			    return FLOATCONSTANT;  			}  [0-9]+[fF]		{ -			    yylval->real = glsl_strtod(yytext, NULL); +			    yylval->real = glsl_strtof(yytext, NULL);  			    return FLOATCONSTANT;  			} diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy index 88aae64d4..154ce2d09 100644 --- a/mesalib/src/glsl/glsl_parser.yy +++ b/mesalib/src/glsl/glsl_parser.yy @@ -79,6 +79,7 @@ static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg)     ast_case_label_list *case_label_list;     ast_case_statement *case_statement;     ast_case_statement_list *case_statement_list; +   ast_uniform_block *uniform_block;     struct {        ast_node *cond; @@ -112,6 +113,7 @@ static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg)  %token STRUCT VOID_TOK WHILE  %token <identifier> IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER  %type <identifier> any_identifier +%type <uniform_block> instance_name_opt  %token <real> FLOATCONSTANT  %token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT  %token <identifier> FIELD_SELECTION @@ -221,6 +223,7 @@ static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg)  %type <node> declaration_statement  %type <node> jump_statement  %type <node> uniform_block +%type <uniform_block> basic_uniform_block  %type <struct_specifier> struct_specifier  %type <declarator_list> struct_declaration_list  %type <declarator_list> struct_declaration @@ -1884,31 +1887,27 @@ function_definition:  /* layout_qualifieropt is packed into this rule */  uniform_block: -	UNIFORM NEW_IDENTIFIER '{' member_list '}' ';' +	basic_uniform_block  	{ -	   void *ctx = state; -	   $$ = new(ctx) ast_uniform_block(*state->default_uniform_qualifier, -					   $2, $4); - -	   if (!state->ARB_uniform_buffer_object_enable) { -	      _mesa_glsl_error(& @1, state, -			       "#version 140 / GL_ARB_uniform_buffer_object " -			       "required for defining uniform blocks\n"); -	   } else if (state->ARB_uniform_buffer_object_warn) { -	      _mesa_glsl_warning(& @1, state, -				 "#version 140 / GL_ARB_uniform_buffer_object " -				 "required for defining uniform blocks\n"); -	   } +	   $$ = $1;  	} -	| layout_qualifier UNIFORM NEW_IDENTIFIER '{' member_list '}' ';' +	| layout_qualifier basic_uniform_block  	{ -	   void *ctx = state; - -	   ast_type_qualifier qual = *state->default_uniform_qualifier; -	   if (!qual.merge_qualifier(& @1, state, $1)) { +	   ast_uniform_block *block = $2; +	   if (!block->layout.merge_qualifier(& @1, state, $1)) {  	      YYERROR;  	   } -	   $$ = new(ctx) ast_uniform_block(qual, $3, $5); +	   $$ = block; +	} +	; + +basic_uniform_block: +	UNIFORM NEW_IDENTIFIER '{' member_list '}' instance_name_opt ';' +	{ +	   ast_uniform_block *const block = $6; + +	   block->block_name = $2; +	   block->declarations.push_degenerate_list_at_head(& $4->link);  	   if (!state->ARB_uniform_buffer_object_enable) {  	      _mesa_glsl_error(& @1, state, @@ -1919,6 +1918,49 @@ uniform_block:  				 "#version 140 / GL_ARB_uniform_buffer_object "  				 "required for defining uniform blocks\n");  	   } + +	   /* Since block arrays require names, and both features are added in +	    * the same language versions, we don't have to explicitly +	    * version-check both things. +	    */ +	   if (block->instance_name != NULL +	       && !(state->language_version == 300 && state->es_shader)) { +	      _mesa_glsl_error(& @1, state, +			       "#version 300 es required for using uniform " +			       "blocks with an instance name\n"); +	   } + +	   $$ = block; +	} +	; + +instance_name_opt: +	/* empty */ +	{ +	   $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier, +					     NULL, +					     NULL); +	} +	| NEW_IDENTIFIER +	{ +	   $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier, +					     $1, +					     NULL); +	} +	| NEW_IDENTIFIER '[' constant_expression ']' +	{ +	   $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier, +					     $1, +					     $3); +	} +	| NEW_IDENTIFIER '[' ']' +	{ +	   _mesa_glsl_error(& @1, state, +			    "instance block arrays must be explicitly sized\n"); + +	   $$ = new(state) ast_uniform_block(*state->default_uniform_qualifier, +					     $1, +					     NULL);  	}  	; diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp index b460c8619..c8dbc89ff 100644 --- a/mesalib/src/glsl/glsl_parser_extras.cpp +++ b/mesalib/src/glsl/glsl_parser_extras.cpp @@ -462,6 +462,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {     EXT(ARB_uniform_buffer_object,      true,  false, true,  true,  false,     ARB_uniform_buffer_object),     EXT(OES_standard_derivatives,       false, false, true,  false,  true,     OES_standard_derivatives),     EXT(ARB_texture_cube_map_array,     true,  false, true,  true,  false,     ARB_texture_cube_map_array), +   EXT(ARB_shading_language_packing,   true,  false, true,  true,  false,     ARB_shading_language_packing),  };  #undef EXT diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h index 2e6bb0b0a..53df149d8 100644 --- a/mesalib/src/glsl/glsl_parser_extras.h +++ b/mesalib/src/glsl/glsl_parser_extras.h @@ -272,6 +272,8 @@ struct _mesa_glsl_parse_state {     bool OES_standard_derivatives_warn;     bool ARB_texture_cube_map_array_enable;     bool ARB_texture_cube_map_array_warn; +   bool ARB_shading_language_packing_enable; +   bool ARB_shading_language_packing_warn;     /*@}*/     /** Extensions supported by the OpenGL implementation. */ diff --git a/mesalib/src/glsl/glsl_symbol_table.cpp b/mesalib/src/glsl/glsl_symbol_table.cpp index eb275b12e..8d34547c6 100644 --- a/mesalib/src/glsl/glsl_symbol_table.cpp +++ b/mesalib/src/glsl/glsl_symbol_table.cpp @@ -41,15 +41,13 @@ public:        ralloc_free(entry);     } -   symbol_table_entry(ir_variable *v)               : v(v), f(0), t(0), u(0) {} -   symbol_table_entry(ir_function *f)               : v(0), f(f), t(0), u(0) {} -   symbol_table_entry(const glsl_type *t)           : v(0), f(0), t(t), u(0) {} -   symbol_table_entry(struct gl_uniform_block *u)   : v(0), f(0), t(0), u(u) {} +   symbol_table_entry(ir_variable *v)               : v(v), f(0), t(0) {} +   symbol_table_entry(ir_function *f)               : v(0), f(f), t(0) {} +   symbol_table_entry(const glsl_type *t)           : v(0), f(0), t(t) {}     ir_variable *v;     ir_function *f;     const glsl_type *t; -   struct gl_uniform_block *u;  };  glsl_symbol_table::glsl_symbol_table() @@ -134,12 +132,6 @@ bool glsl_symbol_table::add_function(ir_function *f)     return _mesa_symbol_table_add_symbol(table, -1, f->name, entry) == 0;  } -bool glsl_symbol_table::add_uniform_block(struct gl_uniform_block *u) -{ -   symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(u); -   return _mesa_symbol_table_add_symbol(table, -1, u->Name, entry) == 0; -} -  void glsl_symbol_table::add_global_function(ir_function *f)  {     symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f); diff --git a/mesalib/src/glsl/glsl_symbol_table.h b/mesalib/src/glsl/glsl_symbol_table.h index f95fb8a01..9f5602787 100644 --- a/mesalib/src/glsl/glsl_symbol_table.h +++ b/mesalib/src/glsl/glsl_symbol_table.h @@ -99,7 +99,6 @@ public:     bool add_variable(ir_variable *v);     bool add_type(const char *name, const glsl_type *t);     bool add_function(ir_function *f); -   bool add_uniform_block(struct gl_uniform_block *u);     /*@}*/     /** diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp index 71b185027..4a2c87907 100644 --- a/mesalib/src/glsl/glsl_types.cpp +++ b/mesalib/src/glsl/glsl_types.cpp @@ -34,6 +34,7 @@ extern "C" {  hash_table *glsl_type::array_types = NULL;  hash_table *glsl_type::record_types = NULL; +hash_table *glsl_type::interface_types = NULL;  void *glsl_type::mem_ctx = NULL;  void @@ -51,7 +52,7 @@ glsl_type::glsl_type(GLenum gl_type,     gl_type(gl_type),     base_type(base_type),     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), -   sampler_type(0), +   sampler_type(0), interface_packing(0),     vector_elements(vector_elements), matrix_columns(matrix_columns),     length(0)  { @@ -69,7 +70,7 @@ glsl_type::glsl_type(GLenum gl_type,     gl_type(gl_type),     base_type(GLSL_TYPE_SAMPLER),     sampler_dimensionality(dim), sampler_shadow(shadow), -   sampler_array(array), sampler_type(type), +   sampler_array(array), sampler_type(type), interface_packing(0),     vector_elements(0), matrix_columns(0),     length(0)  { @@ -82,7 +83,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,  		     const char *name) :     base_type(GLSL_TYPE_STRUCT),     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), -   sampler_type(0), +   sampler_type(0), interface_packing(0),     vector_elements(0), matrix_columns(0),     length(num_fields)  { @@ -96,6 +97,29 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,        this->fields.structure[i].type = fields[i].type;        this->fields.structure[i].name = ralloc_strdup(this->fields.structure,  						     fields[i].name); +      this->fields.structure[i].row_major = fields[i].row_major; +   } +} + +glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, +		     enum glsl_interface_packing packing, const char *name) : +   base_type(GLSL_TYPE_INTERFACE), +   sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), +   sampler_type(0), interface_packing((unsigned) packing), +   vector_elements(0), matrix_columns(0), +   length(num_fields) +{ +   unsigned int i; + +   init_ralloc_type_ctx(); +   this->name = ralloc_strdup(this->mem_ctx, name); +   this->fields.structure = ralloc_array(this->mem_ctx, +					 glsl_struct_field, length); +   for (i = 0; i < length; i++) { +      this->fields.structure[i].type = fields[i].type; +      this->fields.structure[i].name = ralloc_strdup(this->fields.structure, +						     fields[i].name); +      this->fields.structure[i].row_major = fields[i].row_major;     }  } @@ -429,7 +453,7 @@ _mesa_glsl_release_types(void)  glsl_type::glsl_type(const glsl_type *array, unsigned length) :     base_type(GLSL_TYPE_ARRAY),     sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), -   sampler_type(0), +   sampler_type(0), interface_packing(0),     vector_elements(0), matrix_columns(0),     name(NULL), length(length)  { @@ -561,12 +585,18 @@ glsl_type::record_key_compare(const void *a, const void *b)     if (key1->length != key2->length)        return 1; +   if (key1->interface_packing != key2->interface_packing) +      return 1; +     for (unsigned i = 0; i < key1->length; i++) {        if (key1->fields.structure[i].type != key2->fields.structure[i].type)  	 return 1;        if (strcmp(key1->fields.structure[i].name,  		 key2->fields.structure[i].name) != 0)  	 return 1; +      if (key1->fields.structure[i].row_major +         != key2->fields.structure[i].row_major) +        return 1;     }     return 0; @@ -621,9 +651,37 @@ glsl_type::get_record_instance(const glsl_struct_field *fields,  const glsl_type * +glsl_type::get_interface_instance(const glsl_struct_field *fields, +				  unsigned num_fields, +				  enum glsl_interface_packing packing, +				  const char *name) +{ +   const glsl_type key(fields, num_fields, packing, name); + +   if (interface_types == NULL) { +      interface_types = hash_table_ctor(64, record_key_hash, record_key_compare); +   } + +   const glsl_type *t = (glsl_type *) hash_table_find(interface_types, & key); +   if (t == NULL) { +      t = new glsl_type(fields, num_fields, packing, name); + +      hash_table_insert(interface_types, (void *) t, t); +   } + +   assert(t->base_type == GLSL_TYPE_INTERFACE); +   assert(t->length == num_fields); +   assert(strcmp(t->name, name) == 0); + +   return t; +} + + +const glsl_type *  glsl_type::field_type(const char *name) const  { -   if (this->base_type != GLSL_TYPE_STRUCT) +   if (this->base_type != GLSL_TYPE_STRUCT +       && this->base_type != GLSL_TYPE_INTERFACE)        return error_type;     for (unsigned i = 0; i < this->length; i++) { @@ -638,7 +696,8 @@ glsl_type::field_type(const char *name) const  int  glsl_type::field_index(const char *name) const  { -   if (this->base_type != GLSL_TYPE_STRUCT) +   if (this->base_type != GLSL_TYPE_STRUCT +       && this->base_type != GLSL_TYPE_INTERFACE)        return -1;     for (unsigned i = 0; i < this->length; i++) { @@ -660,7 +719,8 @@ glsl_type::component_slots() const     case GLSL_TYPE_BOOL:        return this->components(); -   case GLSL_TYPE_STRUCT: { +   case GLSL_TYPE_STRUCT: +   case GLSL_TYPE_INTERFACE: {        unsigned size = 0;        for (unsigned i = 0; i < this->length; i++) @@ -672,9 +732,13 @@ glsl_type::component_slots() const     case GLSL_TYPE_ARRAY:        return this->length * this->fields.array->component_slots(); -   default: -      return 0; +   case GLSL_TYPE_SAMPLER: +   case GLSL_TYPE_VOID: +   case GLSL_TYPE_ERROR: +      break;     } + +   return 0;  }  bool @@ -799,12 +863,6 @@ glsl_type::std140_base_alignment(bool row_major) const     return -1;  } -static unsigned -align(unsigned val, unsigned align) -{ -   return (val + align - 1) / align * align; -} -  unsigned  glsl_type::std140_size(bool row_major) const  { @@ -906,11 +964,11 @@ glsl_type::std140_size(bool row_major) const        for (unsigned i = 0; i < this->length; i++) {  	 const struct glsl_type *field_type = this->fields.structure[i].type;  	 unsigned align = field_type->std140_base_alignment(row_major); -	 size = (size + align - 1) / align * align; +	 size = glsl_align(size, align);  	 size += field_type->std140_size(row_major);        } -      size = align(size, -		   this->fields.structure[0].type->std140_base_alignment(row_major)); +      size = glsl_align(size, +			this->fields.structure[0].type->std140_base_alignment(row_major));        return size;     } diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h index d6f5c105e..b0db2bf11 100644 --- a/mesalib/src/glsl/glsl_types.h +++ b/mesalib/src/glsl/glsl_types.h @@ -54,6 +54,7 @@ enum glsl_base_type {     GLSL_TYPE_BOOL,     GLSL_TYPE_SAMPLER,     GLSL_TYPE_STRUCT, +   GLSL_TYPE_INTERFACE,     GLSL_TYPE_ARRAY,     GLSL_TYPE_VOID,     GLSL_TYPE_ERROR @@ -69,6 +70,12 @@ enum glsl_sampler_dim {     GLSL_SAMPLER_DIM_EXTERNAL  }; +enum glsl_interface_packing { +   GLSL_INTERFACE_PACKING_STD140, +   GLSL_INTERFACE_PACKING_SHARED, +   GLSL_INTERFACE_PACKING_PACKED +}; +  #ifdef __cplusplus  #include "GL/gl.h"  #include "ralloc.h" @@ -84,6 +91,7 @@ struct glsl_type {  				* only \c GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT,  				* and \c GLSL_TYPE_UINT are valid.  				*/ +   unsigned interface_packing:2;     /* Callers of this ralloc-based new need not call delete. It's      * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */ @@ -130,8 +138,9 @@ struct glsl_type {     /**      * For \c GLSL_TYPE_ARRAY, this is the length of the array.  For -    * \c GLSL_TYPE_STRUCT, it is the number of elements in the structure and -    * the number of values pointed to by \c fields.structure (below). +    * \c GLSL_TYPE_STRUCT or \c GLSL_TYPE_INTERFACE, it is the number of +    * elements in the structure and the number of values pointed to by +    * \c fields.structure (below).      */     unsigned length; @@ -232,6 +241,14 @@ struct glsl_type {  					       const char *name);     /** +    * Get the instance of an interface block type +    */ +   static const glsl_type *get_interface_instance(const glsl_struct_field *fields, +						  unsigned num_fields, +						  enum glsl_interface_packing packing, +						  const char *name); + +   /**      * Query the total number of scalars that make up a scalar, vector or matrix      */     unsigned components() const @@ -394,6 +411,14 @@ struct glsl_type {     }     /** +    * Query whether or not a type is an interface +    */ +   bool is_interface() const +   { +      return base_type == GLSL_TYPE_INTERFACE; +   } + +   /**      * Query whether or not a type is the void type singleton.      */     bool is_void() const @@ -491,6 +516,10 @@ private:     glsl_type(const glsl_struct_field *fields, unsigned num_fields,  	     const char *name); +   /** Constructor for interface types */ +   glsl_type(const glsl_struct_field *fields, unsigned num_fields, +	     enum glsl_interface_packing packing, const char *name); +     /** Constructor for array types */     glsl_type(const glsl_type *array, unsigned length); @@ -500,6 +529,9 @@ private:     /** Hash table containing the known record types. */     static struct hash_table *record_types; +   /** Hash table containing the known interface types. */ +   static struct hash_table *interface_types; +     static int record_key_compare(const void *a, const void *b);     static unsigned record_key_hash(const void *key); @@ -566,8 +598,15 @@ private:  struct glsl_struct_field {     const struct glsl_type *type;     const char *name; +   bool row_major;  }; +static inline unsigned int +glsl_align(unsigned int a, unsigned int align) +{ +   return (a + align - 1) / align * align; +} +  #endif /* __cplusplus */  #endif /* GLSL_TYPES_H */ diff --git a/mesalib/src/glsl/hir_field_selection.cpp b/mesalib/src/glsl/hir_field_selection.cpp index ac416d5da..0035a5f81 100644 --- a/mesalib/src/glsl/hir_field_selection.cpp +++ b/mesalib/src/glsl/hir_field_selection.cpp @@ -61,7 +61,8 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr,  	 _mesa_glsl_error(& loc, state, "Invalid swizzle / mask `%s'",  			  expr->primary_expression.identifier);        } -   } else if (op->type->base_type == GLSL_TYPE_STRUCT) { +   } else if (op->type->base_type == GLSL_TYPE_STRUCT +              || op->type->base_type == GLSL_TYPE_INTERFACE) {        result = new(ctx) ir_dereference_record(op,  					      expr->primary_expression.identifier); diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp index 703f5ec58..954995db3 100644 --- a/mesalib/src/glsl/ir.cpp +++ b/mesalib/src/glsl/ir.cpp @@ -306,6 +306,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)        break;     case ir_unop_noise: +   case ir_unop_unpack_half_2x16_split_x: +   case ir_unop_unpack_half_2x16_split_y:        this->type = glsl_type::float_type;        break; @@ -313,6 +315,25 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)        this->type = glsl_type::bool_type;        break; +   case ir_unop_pack_snorm_2x16: +   case ir_unop_pack_snorm_4x8: +   case ir_unop_pack_unorm_2x16: +   case ir_unop_pack_unorm_4x8: +   case ir_unop_pack_half_2x16: +      this->type = glsl_type::uint_type; +      break; + +   case ir_unop_unpack_snorm_2x16: +   case ir_unop_unpack_unorm_2x16: +   case ir_unop_unpack_half_2x16: +      this->type = glsl_type::vec2_type; +      break; + +   case ir_unop_unpack_snorm_4x8: +   case ir_unop_unpack_unorm_4x8: +      this->type = glsl_type::vec4_type; +      break; +     default:        assert(!"not reached: missing automatic type setup for ir_expression");        this->type = op0->type; @@ -364,10 +385,15 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)     case ir_binop_bit_and:     case ir_binop_bit_xor:     case ir_binop_bit_or: +       assert(!op0->type->is_matrix()); +       assert(!op1->type->is_matrix());        if (op0->type->is_scalar()) { -	 this->type = op1->type; +         this->type = op1->type;        } else if (op1->type->is_scalar()) { -	 this->type = op0->type; +         this->type = op0->type; +      } else { +          assert(op0->type->vector_elements == op1->type->vector_elements); +          this->type = op0->type;        }        break; @@ -386,6 +412,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)        this->type = glsl_type::float_type;        break; +   case ir_binop_pack_half_2x16_split: +      this->type = glsl_type::uint_type; +      break; +     case ir_binop_lshift:     case ir_binop_rshift:        this->type = op0->type; @@ -454,6 +484,18 @@ static const char *const operator_strs[] = {     "cos_reduced",     "dFdx",     "dFdy", +   "packSnorm2x16", +   "packSnorm4x8", +   "packUnorm2x16", +   "packUnorm4x8", +   "packHalf2x16", +   "unpackSnorm2x16", +   "unpackSnorm4x8", +   "unpackUnorm2x16", +   "unpackUnorm4x8", +   "unpackHalf2x16", +   "unpackHalf2x16_split_x", +   "unpackHalf2x16_split_y",     "noise",     "+",     "-", @@ -480,6 +522,7 @@ static const char *const operator_strs[] = {     "min",     "max",     "pow", +   "packHalf2x16_split",     "ubo_load",     "vector",  }; @@ -1493,7 +1536,6 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,     this->has_initializer = false;     this->location = -1;     this->location_frac = 0; -   this->uniform_block = -1;     this->warn_extension = NULL;     this->constant_value = NULL;     this->constant_initializer = NULL; @@ -1553,8 +1595,8 @@ modes_match(unsigned a, unsigned b)        return true;     /* Accept "in" vs. "const in" */ -   if ((a == ir_var_const_in && b == ir_var_in) || -       (b == ir_var_const_in && a == ir_var_in)) +   if ((a == ir_var_const_in && b == ir_var_function_in) || +       (b == ir_var_const_in && a == ir_var_function_in))        return true;     return false; diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h index 85fc5ce95..efd80dad8 100644 --- a/mesalib/src/glsl/ir.h +++ b/mesalib/src/glsl/ir.h @@ -265,9 +265,11 @@ protected:  enum ir_variable_mode {     ir_var_auto = 0,     /**< Function local variables and globals. */     ir_var_uniform,      /**< Variable declared as a uniform. */ -   ir_var_in, -   ir_var_out, -   ir_var_inout, +   ir_var_shader_in, +   ir_var_shader_out, +   ir_var_function_in, +   ir_var_function_out, +   ir_var_function_inout,     ir_var_const_in,	/**< "in" param that must be a constant expression */     ir_var_system_value, /**< Ex: front-face, instance-id, etc. */     ir_var_temporary	/**< Temporary variable generated during compilation. */ @@ -348,6 +350,41 @@ public:     glsl_interp_qualifier determine_interpolation_mode(bool flat_shade);     /** +    * Determine whether or not a variable is part of a uniform block. +    */ +   inline bool is_in_uniform_block() const +   { +      return this->mode == ir_var_uniform && this->interface_type != NULL; +   } + +   /** +    * Determine whether or not a variable is the declaration of an interface +    * block +    * +    * For the first declaration below, there will be an \c ir_variable named +    * "instance" whose type and whose instance_type will be the same +    *  \cglsl_type.  For the second declaration, there will be an \c ir_variable +    * named "f" whose type is float and whose instance_type is B2. +    * +    * "instance" is an interface instance variable, but "f" is not. +    * +    * uniform B1 { +    *     float f; +    * } instance; +    * +    * uniform B2 { +    *     float f; +    * }; +    */ +   inline bool is_interface_instance() const +   { +      const glsl_type *const t = this->type; + +      return (t == this->interface_type) +         || (t->is_array() && t->fields.array == this->interface_type); +    } + +   /**      * Declared type of the variable      */     const struct glsl_type *type; @@ -401,7 +438,7 @@ public:      *      * \sa ir_variable_mode      */ -   unsigned mode:3; +   unsigned mode:4;     /**      * Interpolation mode for shader inputs / outputs @@ -481,16 +518,6 @@ public:     int location;     /** -    * Uniform block number for uniforms. -    * -    * This index is into the shader's list of uniform blocks, not the -    * linked program's merged list. -    * -    * If the variable is not in a uniform block, the value will be -1. -    */ -   int uniform_block; - -   /**      * output index for dual source blending.      */     int index; @@ -530,6 +557,14 @@ public:      * objects.      */     ir_constant *constant_initializer; + +   /** +    * For variables that are in an interface block or are an instance of an +    * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. +    * +    * \sa ir_variable::location +    */ +   const glsl_type *interface_type;  }; @@ -908,7 +943,7 @@ public:     unsigned write_mask:4;  }; -/* Update ir_expression::num_operands() and operator_strs when +/* Update ir_expression::get_num_operands() and operator_strs when   * updating this list.   */  enum ir_expression_operation { @@ -969,6 +1004,32 @@ enum ir_expression_operation {     ir_unop_dFdy,     /*@}*/ +   /** +    * \name Floating point pack and unpack operations. +    */ +   /*@{*/ +   ir_unop_pack_snorm_2x16, +   ir_unop_pack_snorm_4x8, +   ir_unop_pack_unorm_2x16, +   ir_unop_pack_unorm_4x8, +   ir_unop_pack_half_2x16, +   ir_unop_unpack_snorm_2x16, +   ir_unop_unpack_snorm_4x8, +   ir_unop_unpack_unorm_2x16, +   ir_unop_unpack_unorm_4x8, +   ir_unop_unpack_half_2x16, +   /*@}*/ + +   /** +    * \name Lowered floating point unpacking operations. +    * +    * \see lower_packing_builtins_visitor::split_unpack_half_2x16 +    */ +   /*@{*/ +   ir_unop_unpack_half_2x16_split_x, +   ir_unop_unpack_half_2x16_split_y, +   /*@}*/ +     ir_unop_noise,     /** @@ -1036,6 +1097,15 @@ enum ir_expression_operation {     ir_binop_pow,     /** +    * \name Lowered floating point packing operations. +    * +    * \see lower_packing_builtins_visitor::split_pack_half_2x16 +    */ +   /*@{*/ +   ir_binop_pack_half_2x16_split, +   /*@}*/ + +   /**      * Load a value the size of a given GLSL type from a uniform block.      *      * operand0 is the ir_constant uniform block index in the linked shader. diff --git a/mesalib/src/glsl/ir_builder.cpp b/mesalib/src/glsl/ir_builder.cpp index c62f0b115..8fb30a02a 100644 --- a/mesalib/src/glsl/ir_builder.cpp +++ b/mesalib/src/glsl/ir_builder.cpp @@ -188,11 +188,27 @@ ir_expression *mul(operand a, operand b)     return expr(ir_binop_mul, a, b);  } +ir_expression *div(operand a, operand b) +{ +   return expr(ir_binop_div, a, b); +} + +ir_expression *round_even(operand a) +{ +   return expr(ir_unop_round_even, a); +} +  ir_expression *dot(operand a, operand b)  {     return expr(ir_binop_dot, a, b);  } +ir_expression* +clamp(operand a, operand b, operand c) +{ +   return expr(ir_binop_min, expr(ir_binop_max, a, b), c); +} +  ir_expression *  saturate(operand a)  { @@ -203,4 +219,147 @@ saturate(operand a)  	       new(mem_ctx) ir_constant(0.0f));  } +ir_expression* +equal(operand a, operand b) +{ +   return expr(ir_binop_equal, a, b); +} + +ir_expression* +less(operand a, operand b) +{ +   return expr(ir_binop_less, a, b); +} + +ir_expression* +greater(operand a, operand b) +{ +   return expr(ir_binop_greater, a, b); +} + +ir_expression* +lequal(operand a, operand b) +{ +   return expr(ir_binop_lequal, a, b); +} + +ir_expression* +gequal(operand a, operand b) +{ +   return expr(ir_binop_gequal, a, b); +} + +ir_expression* +logic_not(operand a) +{ +   return expr(ir_unop_logic_not, a); +} + +ir_expression* +logic_and(operand a, operand b) +{ +   return expr(ir_binop_logic_and, a, b); +} + +ir_expression* +logic_or(operand a, operand b) +{ +   return expr(ir_binop_logic_or, a, b); +} + +ir_expression* +bit_not(operand a) +{ +   return expr(ir_unop_bit_not, a); +} + +ir_expression* +bit_and(operand a, operand b) +{ +   return expr(ir_binop_bit_and, a, b); +} + +ir_expression* +bit_or(operand a, operand b) +{ +   return expr(ir_binop_bit_or, a, b); +} + +ir_expression* +lshift(operand a, operand b) +{ +   return expr(ir_binop_lshift, a, b); +} + +ir_expression* +rshift(operand a, operand b) +{ +   return expr(ir_binop_rshift, a, b); +} + +ir_expression* +f2i(operand a) +{ +   return expr(ir_unop_f2i, a); +} + +ir_expression* +i2f(operand a) +{ +   return expr(ir_unop_i2f, a); +} + +ir_expression* +i2u(operand a) +{ +   return expr(ir_unop_i2u, a); +} + +ir_expression* +u2i(operand a) +{ +   return expr(ir_unop_u2i, a); +} + +ir_expression* +f2u(operand a) +{ +   return expr(ir_unop_f2u, a); +} + +ir_expression* +u2f(operand a) +{ +   return expr(ir_unop_u2f, a); +} + +ir_if* +if_tree(operand condition, +        ir_instruction *then_branch) +{ +   assert(then_branch != NULL); + +   void *mem_ctx = ralloc_parent(condition.val); + +   ir_if *result = new(mem_ctx) ir_if(condition.val); +   result->then_instructions.push_tail(then_branch); +   return result; +} + +ir_if* +if_tree(operand condition, +        ir_instruction *then_branch, +        ir_instruction *else_branch) +{ +   assert(then_branch != NULL); +   assert(else_branch != NULL); + +   void *mem_ctx = ralloc_parent(condition.val); + +   ir_if *result = new(mem_ctx) ir_if(condition.val); +   result->then_instructions.push_tail(then_branch); +   result->else_instructions.push_tail(else_branch); +   return result; +} +  } /* namespace ir_builder */ diff --git a/mesalib/src/glsl/ir_builder.h b/mesalib/src/glsl/ir_builder.h index 067858df4..690ac74eb 100644 --- a/mesalib/src/glsl/ir_builder.h +++ b/mesalib/src/glsl/ir_builder.h @@ -25,6 +25,15 @@  namespace ir_builder { +#ifndef WRITEMASK_X +enum writemask { +   WRITEMASK_X = 0x1, +   WRITEMASK_Y = 0x2, +   WRITEMASK_Z = 0x4, +   WRITEMASK_W = 0x8, +}; +#endif +  /**   * This little class exists to let the helper expression generators   * take either an ir_rvalue * or an ir_variable * to be automatically @@ -73,9 +82,40 @@ public:  class ir_factory {  public: +   ir_factory() +      : instructions(NULL), +        mem_ctx(NULL) +   { +      return; +   } +     void emit(ir_instruction *ir);     ir_variable *make_temp(const glsl_type *type, const char *name); +   ir_constant* +   constant(float f) +   { +      return new(mem_ctx) ir_constant(f); +   } + +   ir_constant* +   constant(int i) +   { +      return new(mem_ctx) ir_constant(i); +   } + +   ir_constant* +   constant(unsigned u) +   { +      return new(mem_ctx) ir_constant(u); +   } + +   ir_constant* +   constant(bool b) +   { +      return new(mem_ctx) ir_constant(b); +   } +     exec_list *instructions;     void *mem_ctx;  }; @@ -88,9 +128,35 @@ ir_expression *expr(ir_expression_operation op, operand a, operand b);  ir_expression *add(operand a, operand b);  ir_expression *sub(operand a, operand b);  ir_expression *mul(operand a, operand b); +ir_expression *div(operand a, operand b); +ir_expression *round_even(operand a);  ir_expression *dot(operand a, operand b); +ir_expression *clamp(operand a, operand b, operand c);  ir_expression *saturate(operand a); +ir_expression *equal(operand a, operand b); +ir_expression *less(operand a, operand b); +ir_expression *greater(operand a, operand b); +ir_expression *lequal(operand a, operand b); +ir_expression *gequal(operand a, operand b); + +ir_expression *logic_not(operand a); +ir_expression *logic_and(operand a, operand b); +ir_expression *logic_or(operand a, operand b); + +ir_expression *bit_not(operand a); +ir_expression *bit_or(operand a, operand b); +ir_expression *bit_and(operand a, operand b); +ir_expression *lshift(operand a, operand b); +ir_expression *rshift(operand a, operand b); + +ir_expression *f2i(operand a); +ir_expression *i2f(operand a); +ir_expression *f2u(operand a); +ir_expression *u2f(operand a); +ir_expression *i2u(operand a); +ir_expression *u2i(operand a); +  /**   * Swizzle away later components, but preserve the ordering.   */ @@ -108,4 +174,10 @@ ir_swizzle *swizzle_xy(operand a);  ir_swizzle *swizzle_xyz(operand a);  ir_swizzle *swizzle_xyzw(operand a); +ir_if *if_tree(operand condition, +               ir_instruction *then_branch); +ir_if *if_tree(operand condition, +               ir_instruction *then_branch, +               ir_instruction *else_branch); +  } /* namespace ir_builder */ diff --git a/mesalib/src/glsl/ir_clone.cpp b/mesalib/src/glsl/ir_clone.cpp index c62c1fc20..b94ff05df 100644 --- a/mesalib/src/glsl/ir_clone.cpp +++ b/mesalib/src/glsl/ir_clone.cpp @@ -50,7 +50,6 @@ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const     var->interpolation = this->interpolation;     var->location = this->location;     var->index = this->index; -   var->uniform_block = this->uniform_block;     var->warn_extension = this->warn_extension;     var->origin_upper_left = this->origin_upper_left;     var->pixel_center_integer = this->pixel_center_integer; @@ -77,6 +76,8 @@ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const        var->constant_initializer =  	 this->constant_initializer->clone(mem_ctx, ht); +   var->interface_type = this->interface_type; +     if (ht) {        hash_table_insert(ht, var, (void *)const_cast<ir_variable *>(this));     } @@ -375,10 +376,15 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const        return c;     } -   default: +   case GLSL_TYPE_SAMPLER: +   case GLSL_TYPE_VOID: +   case GLSL_TYPE_ERROR: +   case GLSL_TYPE_INTERFACE:        assert(!"Should not get here."); -      return NULL; +      break;     } + +   return NULL;  } diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp index 17b54b923..86b863f31 100644 --- a/mesalib/src/glsl/ir_constant_expression.cpp +++ b/mesalib/src/glsl/ir_constant_expression.cpp @@ -40,25 +40,6 @@  #include "glsl_types.h"  #include "program/hash_table.h" -/* Using C99 rounding functions for roundToEven() implementation is - * difficult, because round(), rint, and nearbyint() are affected by - * fesetenv(), which the application may have done for its own - * purposes.  Mesa's IROUND macro is close to what we want, but it - * rounds away from 0 on n + 0.5. - */ -static int -round_to_even(float val) -{ -   int rounded = IROUND(val); - -   if (val - floor(val) == 0.5) { -      if (rounded % 2 != 0) -	 rounded += val > 0 ? -1 : 1; -   } - -   return rounded; -} -  static float  dot(ir_constant *op0, ir_constant *op1)  { @@ -94,6 +75,297 @@ bitcast_f2u(float f)     return u;  } +/** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef uint8_t +(*pack_1x8_func_t)(float); + +/** + * Evaluate one component of a floating-point 2x16 unpacking function. + */ +typedef uint16_t +(*pack_1x16_func_t)(float); + +/** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef float +(*unpack_1x8_func_t)(uint8_t); + +/** + * Evaluate one component of a floating-point 2x16 unpacking function. + */ +typedef float +(*unpack_1x16_func_t)(uint16_t); + +/** + * Evaluate a 2x16 floating-point packing function. + */ +static uint32_t +pack_2x16(pack_1x16_func_t pack_1x16, +          float x, float y) +{ +   /* From section 8.4 of the GLSL ES 3.00 spec: +    * +    *    packSnorm2x16 +    *    ------------- +    *    The first component of the vector will be written to the least +    *    significant bits of the output; the last component will be written to +    *    the most significant bits. +    * +    * The specifications for the other packing functions contain similar +    * language. +    */ +   uint32_t u = 0; +   u |= ((uint32_t) pack_1x16(x) << 0); +   u |= ((uint32_t) pack_1x16(y) << 16); +   return u; +} + +/** + * Evaluate a 4x8 floating-point packing function. + */ +static uint32_t +pack_4x8(pack_1x8_func_t pack_1x8, +         float x, float y, float z, float w) +{ +   /* From section 8.4 of the GLSL 4.30 spec: +    * +    *    packSnorm4x8 +    *    ------------ +    *    The first component of the vector will be written to the least +    *    significant bits of the output; the last component will be written to +    *    the most significant bits. +    * +    * The specifications for the other packing functions contain similar +    * language. +    */ +   uint32_t u = 0; +   u |= ((uint32_t) pack_1x8(x) << 0); +   u |= ((uint32_t) pack_1x8(y) << 8); +   u |= ((uint32_t) pack_1x8(z) << 16); +   u |= ((uint32_t) pack_1x8(w) << 24); +   return u; +} + +/** + * Evaluate a 2x16 floating-point unpacking function. + */ +static void +unpack_2x16(unpack_1x16_func_t unpack_1x16, +            uint32_t u, +            float *x, float *y) +{ +    /* From section 8.4 of the GLSL ES 3.00 spec: +     * +     *    unpackSnorm2x16 +     *    --------------- +     *    The first component of the returned vector will be extracted from +     *    the least significant bits of the input; the last component will be +     *    extracted from the most significant bits. +     * +     * The specifications for the other unpacking functions contain similar +     * language. +     */ +   *x = unpack_1x16((uint16_t) (u & 0xffff)); +   *y = unpack_1x16((uint16_t) (u >> 16)); +} + +/** + * Evaluate a 4x8 floating-point unpacking function. + */ +static void +unpack_4x8(unpack_1x8_func_t unpack_1x8, uint32_t u, +           float *x, float *y, float *z, float *w) +{ +    /* From section 8.4 of the GLSL 4.30 spec: +     * +     *    unpackSnorm4x8 +     *    -------------- +     *    The first component of the returned vector will be extracted from +     *    the least significant bits of the input; the last component will be +     *    extracted from the most significant bits. +     * +     * The specifications for the other unpacking functions contain similar +     * language. +     */ +   *x = unpack_1x8((uint8_t) (u & 0xff)); +   *y = unpack_1x8((uint8_t) (u >> 8)); +   *z = unpack_1x8((uint8_t) (u >> 16)); +   *w = unpack_1x8((uint8_t) (u >> 24)); +} + +/** + * Evaluate one component of packSnorm4x8. + */ +static uint8_t +pack_snorm_1x8(float x) +{ +    /* From section 8.4 of the GLSL 4.30 spec: +     * +     *    packSnorm4x8 +     *    ------------ +     *    The conversion for component c of v to fixed point is done as +     *    follows: +     * +     *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0) +     * +     * We must first cast the float to an int, because casting a negative +     * float to a uint is undefined. +     */ +   return (uint8_t) (int8_t) +          _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 127.0f); +} + +/** + * Evaluate one component of packSnorm2x16. + */ +static uint16_t +pack_snorm_1x16(float x) +{ +    /* From section 8.4 of the GLSL ES 3.00 spec: +     * +     *    packSnorm2x16 +     *    ------------- +     *    The conversion for component c of v to fixed point is done as +     *    follows: +     * +     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) +     * +     * We must first cast the float to an int, because casting a negative +     * float to a uint is undefined. +     */ +   return (uint16_t) (int16_t) +          _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f); +} + +/** + * Evaluate one component of unpackSnorm4x8. + */ +static float +unpack_snorm_1x8(uint8_t u) +{ +    /* From section 8.4 of the GLSL 4.30 spec: +     * +     *    unpackSnorm4x8 +     *    -------------- +     *    The conversion for unpacked fixed-point value f to floating point is +     *    done as follows: +     * +     *       unpackSnorm4x8: clamp(f / 127.0, -1, +1) +     */ +   return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component of unpackSnorm2x16. + */ +static float +unpack_snorm_1x16(uint16_t u) +{ +    /* From section 8.4 of the GLSL ES 3.00 spec: +     * +     *    unpackSnorm2x16 +     *    --------------- +     *    The conversion for unpacked fixed-point value f to floating point is +     *    done as follows: +     * +     *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1) +     */ +   return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component packUnorm4x8. + */ +static uint8_t +pack_unorm_1x8(float x) +{ +    /* From section 8.4 of the GLSL 4.30 spec: +     * +     *    packUnorm4x8 +     *    ------------ +     *    The conversion for component c of v to fixed point is done as +     *    follows: +     * +     *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0) +     */ +   return (uint8_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 255.0f); +} + +/** + * Evaluate one component packUnorm2x16. + */ +static uint16_t +pack_unorm_1x16(float x) +{ +    /* From section 8.4 of the GLSL ES 3.00 spec: +     * +     *    packUnorm2x16 +     *    ------------- +     *    The conversion for component c of v to fixed point is done as +     *    follows: +     * +     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) +     */ +   return (uint16_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f); +} + +/** + * Evaluate one component of unpackUnorm4x8. + */ +static float +unpack_unorm_1x8(uint8_t u) +{ +    /* From section 8.4 of the GLSL 4.30 spec: +     * +     *    unpackUnorm4x8 +     *    -------------- +     *    The conversion for unpacked fixed-point value f to floating point is +     *    done as follows: +     * +     *       unpackUnorm4x8: f / 255.0 +     */ +   return (float) u / 255.0f; +} + +/** + * Evaluate one component of unpackUnorm2x16. + */ +static float +unpack_unorm_1x16(uint16_t u) +{ +    /* From section 8.4 of the GLSL ES 3.00 spec: +     * +     *    unpackUnorm2x16 +     *    --------------- +     *    The conversion for unpacked fixed-point value f to floating point is +     *    done as follows: +     * +     *       unpackUnorm2x16: f / 65535.0 +     */ +   return (float) u / 65535.0f; +} + +/** + * Evaluate one component of packHalf2x16. + */ +static uint16_t +pack_half_1x16(float x) +{ +   return _mesa_float_to_half(x); +} + +/** + * Evaluate one component of unpackHalf2x16. + */ +static float +unpack_half_1x16(uint16_t u) +{ +   return _mesa_half_to_float(u); +} +  ir_constant *  ir_rvalue::constant_expression_value(struct hash_table *variable_context)  { @@ -279,7 +551,7 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)     case ir_unop_round_even:        assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);        for (unsigned c = 0; c < op[0]->type->components(); c++) { -	 data.f[c] = round_to_even(op[0]->value.f[c]); +	 data.f[c] = _mesa_round_to_even(op[0]->value.f[c]);        }        break; @@ -459,6 +731,70 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)        }        break; +   case ir_unop_pack_snorm_2x16: +      assert(op[0]->type == glsl_type::vec2_type); +      data.u[0] = pack_2x16(pack_snorm_1x16, +                            op[0]->value.f[0], +                            op[0]->value.f[1]); +      break; +   case ir_unop_pack_snorm_4x8: +      assert(op[0]->type == glsl_type::vec4_type); +      data.u[0] = pack_4x8(pack_snorm_1x8, +                           op[0]->value.f[0], +                           op[0]->value.f[1], +                           op[0]->value.f[2], +                           op[0]->value.f[3]); +      break; +   case ir_unop_unpack_snorm_2x16: +      assert(op[0]->type == glsl_type::uint_type); +      unpack_2x16(unpack_snorm_1x16, +                  op[0]->value.u[0], +                  &data.f[0], &data.f[1]); +      break; +   case ir_unop_unpack_snorm_4x8: +      assert(op[0]->type == glsl_type::uint_type); +      unpack_4x8(unpack_snorm_1x8, +                 op[0]->value.u[0], +                 &data.f[0], &data.f[1], &data.f[2], &data.f[3]); +      break; +   case ir_unop_pack_unorm_2x16: +      assert(op[0]->type == glsl_type::vec2_type); +      data.u[0] = pack_2x16(pack_unorm_1x16, +                            op[0]->value.f[0], +                            op[0]->value.f[1]); +      break; +   case ir_unop_pack_unorm_4x8: +      assert(op[0]->type == glsl_type::vec4_type); +      data.u[0] = pack_4x8(pack_unorm_1x8, +                           op[0]->value.f[0], +                           op[0]->value.f[1], +                           op[0]->value.f[2], +                           op[0]->value.f[3]); +      break; +   case ir_unop_unpack_unorm_2x16: +      assert(op[0]->type == glsl_type::uint_type); +      unpack_2x16(unpack_unorm_1x16, +                  op[0]->value.u[0], +                  &data.f[0], &data.f[1]); +      break; +   case ir_unop_unpack_unorm_4x8: +      assert(op[0]->type == glsl_type::uint_type); +      unpack_4x8(unpack_unorm_1x8, +                 op[0]->value.u[0], +                 &data.f[0], &data.f[1], &data.f[2], &data.f[3]); +      break; +   case ir_unop_pack_half_2x16: +      assert(op[0]->type == glsl_type::vec2_type); +      data.u[0] = pack_2x16(pack_half_1x16, +                            op[0]->value.f[0], +                            op[0]->value.f[1]); +      break; +   case ir_unop_unpack_half_2x16: +      assert(op[0]->type == glsl_type::uint_type); +      unpack_2x16(unpack_half_1x16, +                  op[0]->value.u[0], +                  &data.f[0], &data.f[1]); +      break;     case ir_binop_pow:        assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);        for (unsigned c = 0; c < op[0]->type->components(); c++) { diff --git a/mesalib/src/glsl/ir_function.cpp b/mesalib/src/glsl/ir_function.cpp index a525693ed..fe4209c77 100644 --- a/mesalib/src/glsl/ir_function.cpp +++ b/mesalib/src/glsl/ir_function.cpp @@ -78,17 +78,17 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)  	 return PARAMETER_LIST_NO_MATCH;        case ir_var_const_in: -      case ir_var_in: +      case ir_var_function_in:  	 if (!actual->type->can_implicitly_convert_to(param->type))  	    return PARAMETER_LIST_NO_MATCH;  	 break; -      case ir_var_out: +      case ir_var_function_out:  	 if (!param->type->can_implicitly_convert_to(actual->type))  	    return PARAMETER_LIST_NO_MATCH;  	 break; -      case ir_var_inout: +      case ir_var_function_inout:  	 /* Since there are no bi-directional automatic conversions (e.g.,  	  * there is int -> float but no float -> int), inout parameters must  	  * be exact matches. diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h index 6b9519174..8f3301840 100644 --- a/mesalib/src/glsl/ir_optimization.h +++ b/mesalib/src/glsl/ir_optimization.h @@ -37,6 +37,31 @@  #define MOD_TO_FRACT       0x20  #define INT_DIV_TO_MUL_RCP 0x40 +/** + * \see class lower_packing_builtins_visitor + */ +enum lower_packing_builtins_op { +   LOWER_PACK_UNPACK_NONE               = 0x0000, + +   LOWER_PACK_SNORM_2x16                = 0x0001, +   LOWER_UNPACK_SNORM_2x16              = 0x0002, + +   LOWER_PACK_UNORM_2x16                = 0x0004, +   LOWER_UNPACK_UNORM_2x16              = 0x0008, + +   LOWER_PACK_HALF_2x16                 = 0x0010, +   LOWER_UNPACK_HALF_2x16               = 0x0020, + +   LOWER_PACK_HALF_2x16_TO_SPLIT        = 0x0040, +   LOWER_UNPACK_HALF_2x16_TO_SPLIT      = 0x0080, + +   LOWER_PACK_SNORM_4x8                 = 0x0100, +   LOWER_UNPACK_SNORM_4x8               = 0x0200, + +   LOWER_PACK_UNORM_4x8                 = 0x0400, +   LOWER_UNPACK_UNORM_4x8               = 0x0800, +}; +  bool do_common_optimization(exec_list *ir, bool linked,  			    bool uniform_locations_assigned,  			    unsigned max_unroll_iterations); @@ -74,6 +99,7 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions,  bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);  bool lower_clip_distance(gl_shader *shader);  void lower_output_reads(exec_list *instructions); +bool lower_packing_builtins(exec_list *instructions, int op_mask);  void lower_ubo_reference(struct gl_shader *shader, exec_list *instructions);  void lower_packed_varyings(void *mem_ctx, unsigned location_base,                             unsigned locations_used, ir_variable_mode mode, diff --git a/mesalib/src/glsl/ir_print_visitor.cpp b/mesalib/src/glsl/ir_print_visitor.cpp index 8aa26e5d0..acc92dbf1 100644 --- a/mesalib/src/glsl/ir_print_visitor.cpp +++ b/mesalib/src/glsl/ir_print_visitor.cpp @@ -146,7 +146,8 @@ void ir_print_visitor::visit(ir_variable *ir)     const char *const cent = (ir->centroid) ? "centroid " : "";     const char *const inv = (ir->invariant) ? "invariant " : ""; -   const char *const mode[] = { "", "uniform ", "in ", "out ", "inout ", +   const char *const mode[] = { "", "uniform ", "shader_in ", "shader_out ", +                                "in ", "out ", "inout ",  			        "const_in ", "sys ", "temporary " };     const char *const interp[] = { "", "flat", "noperspective" }; diff --git a/mesalib/src/glsl/ir_reader.cpp b/mesalib/src/glsl/ir_reader.cpp index 03dbb67c3..405e75b64 100644 --- a/mesalib/src/glsl/ir_reader.cpp +++ b/mesalib/src/glsl/ir_reader.cpp @@ -400,13 +400,17 @@ ir_reader::read_declaration(s_expression *expr)        } else if (strcmp(qualifier->value(), "auto") == 0) {  	 var->mode = ir_var_auto;        } else if (strcmp(qualifier->value(), "in") == 0) { -	 var->mode = ir_var_in; +	 var->mode = ir_var_function_in; +      } else if (strcmp(qualifier->value(), "shader_in") == 0) { +         var->mode = ir_var_shader_in;        } else if (strcmp(qualifier->value(), "const_in") == 0) {  	 var->mode = ir_var_const_in;        } else if (strcmp(qualifier->value(), "out") == 0) { -	 var->mode = ir_var_out; +	 var->mode = ir_var_function_out; +      } else if (strcmp(qualifier->value(), "shader_out") == 0) { +	 var->mode = ir_var_shader_out;        } else if (strcmp(qualifier->value(), "inout") == 0) { -	 var->mode = ir_var_inout; +	 var->mode = ir_var_function_inout;        } else if (strcmp(qualifier->value(), "temporary") == 0) {  	 var->mode = ir_var_temporary;        } else if (strcmp(qualifier->value(), "smooth") == 0) { diff --git a/mesalib/src/glsl/ir_set_program_inouts.cpp b/mesalib/src/glsl/ir_set_program_inouts.cpp index e5de07e01..1e102bfbb 100644 --- a/mesalib/src/glsl/ir_set_program_inouts.cpp +++ b/mesalib/src/glsl/ir_set_program_inouts.cpp @@ -85,7 +85,7 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len,     for (int i = 0; i < len; i++) {        GLbitfield64 bitfield = BITFIELD64_BIT(var->location + var->index + offset + i); -      if (var->mode == ir_var_in) { +      if (var->mode == ir_var_shader_in) {  	 prog->InputsRead |= bitfield;           if (is_fragment_shader) {              gl_fragment_program *fprog = (gl_fragment_program *) prog; @@ -152,8 +152,8 @@ ir_set_program_inouts_visitor::visit_enter(ir_dereference_array *ir)  ir_visitor_status  ir_set_program_inouts_visitor::visit(ir_variable *ir)  { -   if (ir->mode == ir_var_in || -       ir->mode == ir_var_out || +   if (ir->mode == ir_var_shader_in || +       ir->mode == ir_var_shader_out ||         ir->mode == ir_var_system_value) {        hash_table_insert(this->ht, ir, ir);     } diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp index ad57a3149..d8cafd55f 100644 --- a/mesalib/src/glsl/ir_validate.cpp +++ b/mesalib/src/glsl/ir_validate.cpp @@ -329,6 +329,38 @@ ir_validate::visit_leave(ir_expression *ir)        assert(ir->operands[0]->type == ir->type);        break; +   case ir_unop_pack_snorm_2x16: +   case ir_unop_pack_unorm_2x16: +   case ir_unop_pack_half_2x16: +      assert(ir->type == glsl_type::uint_type); +      assert(ir->operands[0]->type == glsl_type::vec2_type); +      break; + +   case ir_unop_pack_snorm_4x8: +   case ir_unop_pack_unorm_4x8: +      assert(ir->type == glsl_type::uint_type); +      assert(ir->operands[0]->type == glsl_type::vec4_type); +      break; + +   case ir_unop_unpack_snorm_2x16: +   case ir_unop_unpack_unorm_2x16: +   case ir_unop_unpack_half_2x16: +      assert(ir->type == glsl_type::vec2_type); +      assert(ir->operands[0]->type == glsl_type::uint_type); +      break; + +   case ir_unop_unpack_snorm_4x8: +   case ir_unop_unpack_unorm_4x8: +      assert(ir->type == glsl_type::vec4_type); +      assert(ir->operands[0]->type == glsl_type::uint_type); +      break; + +   case ir_unop_unpack_half_2x16_split_x: +   case ir_unop_unpack_half_2x16_split_y: +      assert(ir->type == glsl_type::float_type); +      assert(ir->operands[0]->type == glsl_type::uint_type); +      break; +     case ir_unop_noise:        /* XXX what can we assert here? */        break; @@ -423,6 +455,12 @@ ir_validate::visit_leave(ir_expression *ir)        assert(ir->operands[0]->type == ir->operands[1]->type);        break; +   case ir_binop_pack_half_2x16_split: +      assert(ir->type == glsl_type::uint_type); +      assert(ir->operands[0]->type == glsl_type::float_type); +      assert(ir->operands[1]->type == glsl_type::float_type); +      break; +     case ir_binop_ubo_load:        assert(ir->operands[0]->as_constant());        assert(ir->operands[0]->type == glsl_type::uint_type); @@ -605,8 +643,8 @@ ir_validate::visit_enter(ir_call *ir)           printf("ir_call parameter type mismatch:\n");           goto dump_ir;        } -      if (formal_param->mode == ir_var_out -          || formal_param->mode == ir_var_inout) { +      if (formal_param->mode == ir_var_function_out +          || formal_param->mode == ir_var_function_inout) {           if (!actual_param->is_lvalue()) {              printf("ir_call out/inout parameters must be lvalues:\n");              goto dump_ir; diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.cpp b/mesalib/src/glsl/link_uniform_block_active_visitor.cpp new file mode 100644 index 000000000..56a8384e9 --- /dev/null +++ b/mesalib/src/glsl/link_uniform_block_active_visitor.cpp @@ -0,0 +1,162 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "link_uniform_block_active_visitor.h" +#include "program.h" + +link_uniform_block_active * +process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var) +{ +   const uint32_t h = _mesa_hash_string(var->interface_type->name); +   const hash_entry *const existing_block = +      _mesa_hash_table_search(ht, h, var->interface_type->name); + +   const glsl_type *const block_type = var->is_interface_instance() +      ? var->type : var->interface_type; + + +   /* If a block with this block-name has not previously been seen, add it. +    * If a block with this block-name has been seen, it must be identical to +    * the block currently being examined. +    */ +   if (existing_block == NULL) { +      link_uniform_block_active *const b = +	 rzalloc(mem_ctx, struct link_uniform_block_active); + +      b->type = block_type; +      b->has_instance_name = var->is_interface_instance(); + +      _mesa_hash_table_insert(ht, h, var->interface_type->name, +			      (void *) b); +      return b; +   } else { +      link_uniform_block_active *const b = +	 (link_uniform_block_active *) existing_block->data; + +      if (b->type != block_type +	  || b->has_instance_name != var->is_interface_instance()) +	 return NULL; +      else +	 return b; +   } + +   assert(!"Should not get here."); +   return NULL; +} + +ir_visitor_status +link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir) +{ +   ir_dereference_variable *const d = ir->array->as_dereference_variable(); +   ir_variable *const var = (d == NULL) ? NULL : d->var; + +   /* If the r-value being dereferenced is not a variable (e.g., a field of a +    * structure) or is not a uniform block instance, continue. +    * +    * WARNING: It is not enough for the variable to be part of uniform block. +    * It must represent the entire block.  Arrays (or matrices) inside blocks +    * that lack an instance name are handled by the ir_dereference_variable +    * function. +    */ +   if (var == NULL +       || !var->is_in_uniform_block() +       || !var->is_interface_instance()) +      return visit_continue; + +   /* Process the block.  Bail if there was an error. +    */ +   link_uniform_block_active *const b = +      process_block(this->mem_ctx, this->ht, var); +   if (b == NULL) { +      linker_error(prog, +		   "uniform block `%s' has mismatching definitions", +		   var->interface_type->name); +      this->success = false; +      return visit_stop; +   } + +   /* Block arrays must be declared with an instance name. +    */ +   assert(b->has_instance_name); +   assert((b->num_array_elements == 0) == (b->array_elements == NULL)); +   assert(b->type != NULL); + +   /* Determine whether or not this array index has already been added to the +    * list of active array indices.  At this point all constant folding must +    * have occured, and the array index must be a constant. +    */ +   ir_constant *c = ir->array_index->as_constant(); +   assert(c != NULL); + +   const unsigned idx = c->get_uint_component(0); + +   unsigned i; +   for (i = 0; i < b->num_array_elements; i++) { +      if (b->array_elements[i] == idx) +	 break; +   } + +   assert(i <= b->num_array_elements); + +   if (i == b->num_array_elements) { +      b->array_elements = reralloc(this->mem_ctx, +				   b->array_elements, +				   unsigned, +				   b->num_array_elements + 1); + +      b->array_elements[b->num_array_elements] = idx; + +      b->num_array_elements++; +   } + +   return visit_continue_with_parent; +} + +ir_visitor_status +link_uniform_block_active_visitor::visit(ir_dereference_variable *ir) +{ +   ir_variable *var = ir->var; + +   if (!var->is_in_uniform_block()) +      return visit_continue; + +   assert(!var->is_interface_instance() || !var->type->is_array()); + +   /* Process the block.  Bail if there was an error. +    */ +   link_uniform_block_active *const b = +      process_block(this->mem_ctx, this->ht, var); +   if (b == NULL) { +      linker_error(this->prog, +		   "uniform block `%s' has mismatching definitions", +		   var->interface_type->name); +      this->success = false; +      return visit_stop; +   } + +   assert(b->num_array_elements == 0); +   assert(b->array_elements == NULL); +   assert(b->type != NULL); + +   return visit_continue; +} diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.h b/mesalib/src/glsl/link_uniform_block_active_visitor.h new file mode 100644 index 000000000..fba628a8f --- /dev/null +++ b/mesalib/src/glsl/link_uniform_block_active_visitor.h @@ -0,0 +1,62 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H +#define LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H + +#include "ir.h" +#include "ir_visitor.h" +#include "glsl_types.h" +#include "main/hash_table.h" + +struct link_uniform_block_active { +   const glsl_type *type; + +   unsigned *array_elements; +   unsigned num_array_elements; + +   bool has_instance_name; +}; + +class link_uniform_block_active_visitor : public ir_hierarchical_visitor { +public: +   link_uniform_block_active_visitor(void *mem_ctx, struct hash_table *ht, +				     struct gl_shader_program *prog) +      : success(true), prog(prog), ht(ht), mem_ctx(mem_ctx) +   { +      /* empty */ +   } + +   virtual ir_visitor_status visit_enter(ir_dereference_array *); +   virtual ir_visitor_status visit(ir_dereference_variable *); + +   bool success; + +private: +   struct gl_shader_program *prog; +   struct hash_table *ht; +   void *mem_ctx; +}; + +#endif /* LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H */ diff --git a/mesalib/src/glsl/link_uniform_blocks.cpp b/mesalib/src/glsl/link_uniform_blocks.cpp new file mode 100644 index 000000000..74fe1e29f --- /dev/null +++ b/mesalib/src/glsl/link_uniform_blocks.cpp @@ -0,0 +1,313 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "ir.h" +#include "linker.h" +#include "ir_uniform.h" +#include "link_uniform_block_active_visitor.h" +#include "main/hash_table.h" +#include "program.h" + +class ubo_visitor : public uniform_field_visitor { +public: +   ubo_visitor(void *mem_ctx, gl_uniform_buffer_variable *variables, +               unsigned num_variables) +      : index(0), offset(0), buffer_size(0), variables(variables), +        num_variables(num_variables), mem_ctx(mem_ctx), is_array_instance(false) +   { +      /* empty */ +   } + +   void process(const glsl_type *type, const char *name) +   { +      this->offset = 0; +      this->buffer_size = 0; +      this->is_array_instance = strchr(name, ']') != NULL; +      this->uniform_field_visitor::process(type, name); +   } + +   unsigned index; +   unsigned offset; +   unsigned buffer_size; +   gl_uniform_buffer_variable *variables; +   unsigned num_variables; +   void *mem_ctx; +   bool is_array_instance; + +private: +   virtual void visit_field(const glsl_type *type, const char *name, +                            bool row_major) +   { +      assert(this->index < this->num_variables); + +      gl_uniform_buffer_variable *v = &this->variables[this->index++]; + +      v->Name = ralloc_strdup(mem_ctx, name); +      v->Type = type; +      v->RowMajor = row_major; + +      if (this->is_array_instance) { +         v->IndexName = ralloc_strdup(mem_ctx, name); + +         char *open_bracket = strchr(v->IndexName, '['); +         assert(open_bracket != NULL); + +         char *close_bracket = strchr(open_bracket, ']'); +         assert(close_bracket != NULL); + +         /* Length of the tail without the ']' but with the NUL. +          */ +         unsigned len = strlen(close_bracket + 1) + 1; + +         memmove(open_bracket, close_bracket + 1, len); +     } else { +         v->IndexName = v->Name; +      } + +      unsigned alignment = type->std140_base_alignment(v->RowMajor); +      unsigned size = type->std140_size(v->RowMajor); + +      this->offset = glsl_align(this->offset, alignment); +      v->Offset = this->offset; +      this->offset += size; + +      /* From the GL_ARB_uniform_buffer_object spec: +       * +       *     "For uniform blocks laid out according to [std140] rules, the +       *      minimum buffer object size returned by the +       *      UNIFORM_BLOCK_DATA_SIZE query is derived by taking the offset of +       *      the last basic machine unit consumed by the last uniform of the +       *      uniform block (including any end-of-array or end-of-structure +       *      padding), adding one, and rounding up to the next multiple of +       *      the base alignment required for a vec4." +       */ +      this->buffer_size = glsl_align(this->offset, 16); +   } + +   virtual void visit_field(const glsl_struct_field *field) +   { +      this->offset = glsl_align(this->offset, +                                field->type->std140_base_alignment(false)); +   } +}; + +class count_block_size : public uniform_field_visitor { +public: +   count_block_size() : num_active_uniforms(0) +   { +      /* empty */ +   } + +   unsigned num_active_uniforms; + +private: +   virtual void visit_field(const glsl_type *type, const char *name, +                            bool row_major) +   { +      (void) type; +      (void) name; +      (void) row_major; +      this->num_active_uniforms++; +   } +}; + +struct block { +   const glsl_type *type; +   bool has_instance_name; +}; + +int +link_uniform_blocks(void *mem_ctx, +                    struct gl_shader_program *prog, +                    struct gl_shader **shader_list, +                    unsigned num_shaders, +                    struct gl_uniform_block **blocks_ret) +{ +   /* This hash table will track all of the uniform blocks that have been +    * encountered.  Since blocks with the same block-name must be the same, +    * the hash is organized by block-name. +    */ +   struct hash_table *block_hash = +      _mesa_hash_table_create(mem_ctx, _mesa_key_string_equal); + +   /* Determine which uniform blocks are active. +    */ +   link_uniform_block_active_visitor v(mem_ctx, block_hash, prog); +   for (unsigned i = 0; i < num_shaders; i++) { +      visit_list_elements(&v, shader_list[i]->ir); +   } + +   /* Count the number of active uniform blocks.  Count the total number of +    * active slots in those uniform blocks. +    */ +   unsigned num_blocks = 0; +   unsigned num_variables = 0; +   count_block_size block_size; +   struct hash_entry *entry; + +   hash_table_foreach (block_hash, entry) { +      const struct link_uniform_block_active *const b = +         (const struct link_uniform_block_active *) entry->data; + +      const glsl_type *const block_type = +         b->type->is_array() ? b->type->fields.array : b->type; + +      assert((b->num_array_elements > 0) == b->type->is_array()); + +      block_size.num_active_uniforms = 0; +      block_size.process(block_type, ""); + +      if (b->num_array_elements > 0) { +         num_blocks += b->num_array_elements; +         num_variables += b->num_array_elements +            * block_size.num_active_uniforms; +      } else { +         num_blocks++; +         num_variables += block_size.num_active_uniforms; +      } + +   } + +   if (num_blocks == 0) { +      assert(num_variables == 0); +      _mesa_hash_table_destroy(block_hash, NULL); +      return 0; +   } + +   assert(num_variables != 0); + +   /* Allocate storage to hold all of the informatation related to uniform +    * blocks that can be queried through the API. +    */ +   gl_uniform_block *blocks = +      ralloc_array(mem_ctx, gl_uniform_block, num_blocks); +   gl_uniform_buffer_variable *variables = +      ralloc_array(blocks, gl_uniform_buffer_variable, num_variables); + +   /* Add each variable from each uniform block to the API tracking +    * structures. +    */ +   unsigned i = 0; +   ubo_visitor parcel(blocks, variables, num_variables); + +   STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140) +                 == unsigned(ubo_packing_std140)); +   STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED) +                 == unsigned(ubo_packing_shared)); +   STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED) +                 == unsigned(ubo_packing_packed)); + + +   hash_table_foreach (block_hash, entry) { +      const struct link_uniform_block_active *const b = +         (const struct link_uniform_block_active *) entry->data; +      const glsl_type *block_type = b->type; + +      if (b->num_array_elements > 0) { +         const char *const name = block_type->fields.array->name; + +         assert(b->has_instance_name); +         for (unsigned j = 0; j < b->num_array_elements; j++) { +            blocks[i].Name = ralloc_asprintf(blocks, "%s[%u]", name, +                                             b->array_elements[j]); +            blocks[i].Uniforms = &variables[parcel.index]; +            blocks[i].Binding = 0; +            blocks[i].UniformBufferSize = 0; +            blocks[i]._Packing = +               gl_uniform_block_packing(block_type->interface_packing); + +            parcel.process(block_type->fields.array, +                           blocks[i].Name); + +            blocks[i].UniformBufferSize = parcel.buffer_size; + +            blocks[i].NumUniforms = +               (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + +            i++; +         } +      } else { +         blocks[i].Name = ralloc_strdup(blocks, block_type->name); +         blocks[i].Uniforms = &variables[parcel.index]; +         blocks[i].Binding = 0; +         blocks[i].UniformBufferSize = 0; +         blocks[i]._Packing = +            gl_uniform_block_packing(block_type->interface_packing); + +         parcel.process(block_type, +                        b->has_instance_name ? block_type->name : ""); + +         blocks[i].UniformBufferSize = parcel.buffer_size; + +         blocks[i].NumUniforms = +            (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + +         i++; +      } +   } + +   assert(parcel.index == num_variables); + +   _mesa_hash_table_destroy(block_hash, NULL); + +   *blocks_ret = blocks; +   return num_blocks; +} + +bool +link_uniform_blocks_are_compatible(const gl_uniform_block *a, +				   const gl_uniform_block *b) +{ +   assert(strcmp(a->Name, b->Name) == 0); + +   /* Page 35 (page 42 of the PDF) in section 4.3.7 of the GLSL 1.50 spec says: +    * +    *     "Matched block names within an interface (as defined above) must +    *     match in terms of having the same number of declarations with the +    *     same sequence of types and the same sequence of member names, as +    *     well as having the same member-wise layout qualification....if a +    *     matching block is declared as an array, then the array sizes must +    *     also match... Any mismatch will generate a link error." +    * +    * Arrays are not yet supported, so there is no check for that. +    */ +   if (a->NumUniforms != b->NumUniforms) +      return false; + +   if (a->_Packing != b->_Packing) +      return false; + +   for (unsigned i = 0; i < a->NumUniforms; i++) { +      if (strcmp(a->Uniforms[i].Name, b->Uniforms[i].Name) != 0) +	 return false; + +      if (a->Uniforms[i].Type != b->Uniforms[i].Type) +	 return false; + +      if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor) +	 return false; +   } + +   return true; +} diff --git a/mesalib/src/glsl/link_uniform_initializers.cpp b/mesalib/src/glsl/link_uniform_initializers.cpp index 849e08097..836a360fa 100644 --- a/mesalib/src/glsl/link_uniform_initializers.cpp +++ b/mesalib/src/glsl/link_uniform_initializers.cpp @@ -67,7 +67,11 @@ copy_constant_to_storage(union gl_constant_value *storage,        case GLSL_TYPE_BOOL:  	 storage[i].b = int(val->value.b[i]);  	 break; -      default: +      case GLSL_TYPE_ARRAY: +      case GLSL_TYPE_STRUCT: +      case GLSL_TYPE_INTERFACE: +      case GLSL_TYPE_VOID: +      case GLSL_TYPE_ERROR:  	 /* All other types should have already been filtered by other  	  * paths in the caller.  	  */ diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp index 07d9c18de..f1284adb2 100644 --- a/mesalib/src/glsl/link_uniforms.cpp +++ b/mesalib/src/glsl/link_uniforms.cpp @@ -29,12 +29,6 @@  #include "program/hash_table.h"  #include "program.h" -static inline unsigned int -align(unsigned int a, unsigned int align) -{ -   return (a + align - 1) / align * align; -} -  /**   * \file link_uniforms.cpp   * Assign locations for GLSL uniforms. @@ -58,23 +52,49 @@ values_for_type(const glsl_type *type)  }  void +uniform_field_visitor::process(const glsl_type *type, const char *name) +{ +   assert(type->is_record() +          || (type->is_array() && type->fields.array->is_record()) +          || type->is_interface() +          || (type->is_array() && type->fields.array->is_interface())); + +   char *name_copy = ralloc_strdup(NULL, name); +   recursion(type, &name_copy, strlen(name), false); +   ralloc_free(name_copy); +} + +void  uniform_field_visitor::process(ir_variable *var)  {     const glsl_type *t = var->type; +   /* false is always passed for the row_major parameter to the other +    * processing functions because no information is available to do +    * otherwise.  See the warning in linker.h. +    */ +     /* Only strdup the name if we actually will need to modify it. */     if (t->is_record() || (t->is_array() && t->fields.array->is_record())) {        char *name = ralloc_strdup(NULL, var->name); -      recursion(var->type, &name, strlen(name)); +      recursion(var->type, &name, strlen(name), false); +      ralloc_free(name); +   } else if (t->is_interface()) { +      char *name = ralloc_strdup(NULL, var->type->name); +      recursion(var->type, &name, strlen(name), false); +      ralloc_free(name); +   } else if (t->is_array() && t->fields.array->is_interface()) { +      char *name = ralloc_strdup(NULL, var->type->fields.array->name); +      recursion(var->type, &name, strlen(name), false);        ralloc_free(name);     } else { -      this->visit_field(t, var->name); +      this->visit_field(t, var->name, false);     }  }  void  uniform_field_visitor::recursion(const glsl_type *t, char **name, -				 size_t name_length) +                                 size_t name_length, bool row_major)  {     /* Records need to have each field processed individually.      * @@ -82,30 +102,47 @@ uniform_field_visitor::recursion(const glsl_type *t, char **name,      * individually, then each field of the resulting array elements processed      * individually.      */ -   if (t->is_record()) { +   if (t->is_record() || t->is_interface()) {        for (unsigned i = 0; i < t->length; i++) {  	 const char *field = t->fields.structure[i].name;  	 size_t new_length = name_length; -	 /* Append '.field' to the current uniform name. */ -	 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); +         if (t->fields.structure[i].type->is_record()) +            this->visit_field(&t->fields.structure[i]); + +         /* Append '.field' to the current uniform name. */ +         if (name_length == 0) { +            ralloc_asprintf_rewrite_tail(name, &new_length, "%s", field); +         } else { +            ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); +         } -	 recursion(t->fields.structure[i].type, name, new_length); +         recursion(t->fields.structure[i].type, name, new_length, +                   t->fields.structure[i].row_major);        } -   } else if (t->is_array() && t->fields.array->is_record()) { +   } else if (t->is_array() && (t->fields.array->is_record() +                                || t->fields.array->is_interface())) {        for (unsigned i = 0; i < t->length; i++) {  	 size_t new_length = name_length;  	 /* Append the subscript to the current uniform name */  	 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); -	 recursion(t->fields.array, name, new_length); +         recursion(t->fields.array, name, new_length, +                   t->fields.structure[i].row_major);        }     } else { -      this->visit_field(t, *name); +      this->visit_field(t, *name, row_major);     }  } +void +uniform_field_visitor::visit_field(const glsl_struct_field *field) +{ +   (void) field; +   /* empty */ +} +  /**   * Class to help calculate the storage requirements for a set of uniforms   * @@ -131,6 +168,15 @@ public:        this->num_shader_uniform_components = 0;     } +   void process(ir_variable *var) +   { +      if (var->is_interface_instance()) +         uniform_field_visitor::process(var->interface_type, +                                        var->interface_type->name); +      else +         uniform_field_visitor::process(var); +   } +     /**      * Total number of active uniforms counted      */ @@ -152,10 +198,15 @@ public:     unsigned num_shader_uniform_components;  private: -   virtual void visit_field(const glsl_type *type, const char *name) +   virtual void visit_field(const glsl_type *type, const char *name, +                            bool row_major)     {        assert(!type->is_record());        assert(!(type->is_array() && type->fields.array->is_record())); +      assert(!type->is_interface()); +      assert(!(type->is_array() && type->fields.array->is_interface())); + +      (void) row_major;        /* Count the number of samplers regardless of whether the uniform is         * already in the hash table.  The hash table prevents adding the same @@ -224,42 +275,77 @@ public:     }     void set_and_process(struct gl_shader_program *prog, -			struct gl_shader *shader,  			ir_variable *var)     { -      ubo_var = NULL; -      if (var->uniform_block != -1) { -	 struct gl_uniform_block *block = -	    &shader->UniformBlocks[var->uniform_block]; - -	 ubo_block_index = -1; -	 for (unsigned i = 0; i < prog->NumUniformBlocks; i++) { -	    if (!strcmp(prog->UniformBlocks[i].Name, -			shader->UniformBlocks[var->uniform_block].Name)) { -	       ubo_block_index = i; -	       break; +      ubo_block_index = -1; +      if (var->is_in_uniform_block()) { +         if (var->is_interface_instance() && var->type->is_array()) { +            unsigned l = strlen(var->interface_type->name); + +            for (unsigned i = 0; i < prog->NumUniformBlocks; i++) { +               if (strncmp(var->interface_type->name, +                           prog->UniformBlocks[i].Name, +                           l) == 0 +                   && prog->UniformBlocks[i].Name[l] == '[') { +                  ubo_block_index = i; +                  break; +               } +            } +         } else { +            for (unsigned i = 0; i < prog->NumUniformBlocks; i++) { +               if (strcmp(var->interface_type->name, +                          prog->UniformBlocks[i].Name) == 0) { +                  ubo_block_index = i; +                  break; +               }  	    }  	 }  	 assert(ubo_block_index != -1); -	 ubo_var_index = var->location; -	 ubo_var = &block->Uniforms[var->location]; -	 ubo_byte_offset = ubo_var->Offset; -      } - -      process(var); +         /* Uniform blocks that were specified with an instance name must be +          * handled a little bit differently.  The name of the variable is the +          * name used to reference the uniform block instead of being the name +          * of a variable within the block.  Therefore, searching for the name +          * within the block will fail. +          */ +         if (var->is_interface_instance()) { +            ubo_byte_offset = 0; +            ubo_row_major = false; +         } else { +            const struct gl_uniform_block *const block = +               &prog->UniformBlocks[ubo_block_index]; + +            assert(var->location != -1); + +            const struct gl_uniform_buffer_variable *const ubo_var = +               &block->Uniforms[var->location]; + +            ubo_row_major = ubo_var->RowMajor; +            ubo_byte_offset = ubo_var->Offset; +         } + +         if (var->is_interface_instance()) +            process(var->interface_type, var->interface_type->name); +         else +            process(var); +      } else +         process(var);     } -   struct gl_uniform_buffer_variable *ubo_var;     int ubo_block_index; -   int ubo_var_index;     int ubo_byte_offset; +   bool ubo_row_major;  private: -   virtual void visit_field(const glsl_type *type, const char *name) +   virtual void visit_field(const glsl_type *type, const char *name, +                            bool row_major)     {        assert(!type->is_record());        assert(!(type->is_array() && type->fields.array->is_record())); +      assert(!type->is_interface()); +      assert(!(type->is_array() && type->fields.array->is_interface())); + +      (void) row_major;        unsigned id;        bool found = this->map->get(id, name); @@ -330,17 +416,17 @@ private:        this->uniforms[id].num_driver_storage = 0;        this->uniforms[id].driver_storage = NULL;        this->uniforms[id].storage = this->values; -      if (this->ubo_var) { +      if (this->ubo_block_index != -1) {  	 this->uniforms[id].block_index = this->ubo_block_index; -	 unsigned alignment = type->std140_base_alignment(ubo_var->RowMajor); -	 this->ubo_byte_offset = align(this->ubo_byte_offset, alignment); +	 unsigned alignment = type->std140_base_alignment(ubo_row_major); +	 this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);  	 this->uniforms[id].offset = this->ubo_byte_offset; -	 this->ubo_byte_offset += type->std140_size(ubo_var->RowMajor); +	 this->ubo_byte_offset += type->std140_size(ubo_row_major);  	 if (type->is_array()) {  	    this->uniforms[id].array_stride = -	       align(type->fields.array->std140_size(ubo_var->RowMajor), 16); +	       glsl_align(type->fields.array->std140_size(ubo_row_major), 16);  	 } else {  	    this->uniforms[id].array_stride = 0;  	 } @@ -348,7 +434,7 @@ private:  	 if (type->is_matrix() ||  	     (type->is_array() && type->fields.array->is_matrix())) {  	    this->uniforms[id].matrix_stride = 16; -	    this->uniforms[id].row_major = ubo_var->RowMajor; +	    this->uniforms[id].row_major = ubo_row_major;  	 } else {  	    this->uniforms[id].matrix_stride = 0;  	    this->uniforms[id].row_major = false; @@ -399,26 +485,10 @@ link_cross_validate_uniform_block(void *mem_ctx,  {     for (unsigned int i = 0; i < *num_linked_blocks; i++) {        struct gl_uniform_block *old_block = &(*linked_blocks)[i]; -      if (strcmp(old_block->Name, new_block->Name) == 0) { -	 if (old_block->NumUniforms != new_block->NumUniforms) { -	    return -1; -	 } -	 for (unsigned j = 0; j < old_block->NumUniforms; j++) { -	    if (strcmp(old_block->Uniforms[j].Name, -		       new_block->Uniforms[j].Name) != 0) -	       return -1; - -	    if (old_block->Uniforms[j].Offset != -		new_block->Uniforms[j].Offset) -	       return -1; - -	    if (old_block->Uniforms[j].RowMajor != -		new_block->Uniforms[j].RowMajor) -	       return -1; -	 } -	 return i; -      } +      if (strcmp(old_block->Name, new_block->Name) == 0) +	 return link_uniform_blocks_are_compatible(old_block, new_block) +	    ? i : -1;     }     *linked_blocks = reralloc(mem_ctx, *linked_blocks, @@ -440,7 +510,13 @@ link_cross_validate_uniform_block(void *mem_ctx,        struct gl_uniform_buffer_variable *ubo_var =  	 &linked_block->Uniforms[i]; -      ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); +      if (ubo_var->Name == ubo_var->IndexName) { +         ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); +         ubo_var->IndexName = ubo_var->Name; +      } else { +         ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); +         ubo_var->IndexName = ralloc_strdup(*linked_blocks, ubo_var->IndexName); +      }     }     return linked_block_index; @@ -458,17 +534,47 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)     foreach_list(node, shader->ir) {        ir_variable *const var = ((ir_instruction *) node)->as_variable(); -      if ((var == NULL) || (var->uniform_block == -1)) +      if ((var == NULL) || !var->is_in_uniform_block())  	 continue;        assert(var->mode == ir_var_uniform); +      if (var->is_interface_instance()) { +         var->location = 0; +         continue; +      } +        bool found = false; +      char sentinel = '\0'; + +      if (var->type->is_record()) { +         sentinel = '.'; +      } else if (var->type->is_array() +                 && var->type->fields.array->is_record()) { +         sentinel = '['; +      } + +      const unsigned l = strlen(var->name);        for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {  	 for (unsigned j = 0; j < shader->UniformBlocks[i].NumUniforms; j++) { -	    if (!strcmp(var->name, shader->UniformBlocks[i].Uniforms[j].Name)) { +            if (sentinel) { +               const char *begin = shader->UniformBlocks[i].Uniforms[j].Name; +               const char *end = strchr(begin, sentinel); + +               if (end == NULL) +                  continue; + +               if (l != (end - begin)) +                  continue; + +               if (strncmp(var->name, begin, l) == 0) { +                  found = true; +                  var->location = j; +                  break; +               } +            } else if (!strcmp(var->name, +                               shader->UniformBlocks[i].Uniforms[j].Name)) {  	       found = true; -	       var->uniform_block = i;  	       var->location = j;  	       break;  	    } @@ -494,7 +600,7 @@ link_assign_uniform_block_offsets(struct gl_shader *shader)  	 unsigned alignment = type->std140_base_alignment(ubo_var->RowMajor);  	 unsigned size = type->std140_size(ubo_var->RowMajor); -	 offset = align(offset, alignment); +	 offset = glsl_align(offset, alignment);  	 ubo_var->Offset = offset;  	 offset += size;        } @@ -510,7 +616,7 @@ link_assign_uniform_block_offsets(struct gl_shader *shader)         *      and rounding up to the next multiple of the base         *      alignment required for a vec4."         */ -      block->UniformBufferSize = align(offset, 16); +      block->UniformBufferSize = glsl_align(offset, 16);     }  } @@ -538,13 +644,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog)      */     memset(prog->SamplerUnits, 0, sizeof(prog->SamplerUnits)); -   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { -      if (prog->_LinkedShaders[i] == NULL) -	 continue; - -      link_update_uniform_buffer_variables(prog->_LinkedShaders[i]); -   } -     /* First pass: Count the uniform resources used by the user-defined      * uniforms.  While this happens, each active uniform will have an index      * assigned to it. @@ -557,6 +656,8 @@ link_assign_uniform_locations(struct gl_shader_program *prog)        if (prog->_LinkedShaders[i] == NULL)  	 continue; +      link_update_uniform_buffer_variables(prog->_LinkedShaders[i]); +        /* Reset various per-shader target counts.         */        uniform_size.start_shader(); @@ -620,7 +721,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog)  	 if (strncmp("gl_", var->name, 3) == 0)  	    continue; -	 parcel.set_and_process(prog, prog->_LinkedShaders[i], var); +	 parcel.set_and_process(prog, var);        }        prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used; diff --git a/mesalib/src/glsl/link_varyings.cpp b/mesalib/src/glsl/link_varyings.cpp index 5c27f231e..25681d618 100644 --- a/mesalib/src/glsl/link_varyings.cpp +++ b/mesalib/src/glsl/link_varyings.cpp @@ -54,10 +54,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,     foreach_list(node, producer->ir) {        ir_variable *const var = ((ir_instruction *) node)->as_variable(); -      /* FINISHME: For geometry shaders, this should also look for inout -       * FINISHME: variables. -       */ -      if ((var == NULL) || (var->mode != ir_var_out)) +      if ((var == NULL) || (var->mode != ir_var_shader_out))  	 continue;        parameters.add_variable(var); @@ -71,10 +68,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,     foreach_list(node, consumer->ir) {        ir_variable *const input = ((ir_instruction *) node)->as_variable(); -      /* FINISHME: For geometry shaders, this should also look for inout -       * FINISHME: variables. -       */ -      if ((input == NULL) || (input->mode != ir_var_in)) +      if ((input == NULL) || (input->mode != ir_var_shader_in))  	 continue;        ir_variable *const output = parameters.get_variable(input->name); @@ -417,8 +411,17 @@ tfeedback_decl::find_output_var(gl_shader_program *prog,     const char *name = this->is_clip_distance_mesa        ? "gl_ClipDistanceMESA" : this->var_name;     ir_variable *var = producer->symbols->get_variable(name); -   if (var && var->mode == ir_var_out) +   if (var && var->mode == ir_var_shader_out) { +      const glsl_type *type = var->type; +      while (type->base_type == GLSL_TYPE_ARRAY) +         type = type->fields.array; +      if (type->base_type == GLSL_TYPE_STRUCT) { +         linker_error(prog, "Transform feedback of varying structs not " +                      "implemented yet."); +         return NULL; +      }        return var; +   }     /* From GL_EXT_transform_feedback:      *   A program will fail to link if: @@ -810,16 +813,15 @@ varying_matches::compute_packing_order(ir_variable *var)  {     const glsl_type *element_type = var->type; -   /* FINISHME: Support for "varying" records in GLSL 1.50. */     while (element_type->base_type == GLSL_TYPE_ARRAY) {        element_type = element_type->fields.array;     } -   switch (element_type->vector_elements) { +   switch (element_type->component_slots() % 4) {     case 1: return PACKING_ORDER_SCALAR;     case 2: return PACKING_ORDER_VEC2;     case 3: return PACKING_ORDER_VEC3; -   case 4: return PACKING_ORDER_VEC4; +   case 0: return PACKING_ORDER_VEC4;     default:        assert(!"Unexpected value of vector_elements");        return PACKING_ORDER_VEC4; @@ -854,7 +856,7 @@ is_varying_var(GLenum shaderType, const ir_variable *var)  {     /* Only fragment shaders will take a varying variable as an input */     if (shaderType == GL_FRAGMENT_SHADER && -       var->mode == ir_var_in) { +       var->mode == ir_var_shader_in) {        switch (var->location) {        case FRAG_ATTRIB_WPOS:        case FRAG_ATTRIB_FACE: @@ -915,13 +917,13 @@ assign_varying_locations(struct gl_context *ctx,     foreach_list(node, producer->ir) {        ir_variable *const output_var = ((ir_instruction *) node)->as_variable(); -      if ((output_var == NULL) || (output_var->mode != ir_var_out)) +      if ((output_var == NULL) || (output_var->mode != ir_var_shader_out))  	 continue;        ir_variable *input_var =  	 consumer ? consumer->symbols->get_variable(output_var->name) : NULL; -      if (input_var && input_var->mode != ir_var_in) +      if (input_var && input_var->mode != ir_var_shader_in)           input_var = NULL;        if (input_var) { @@ -965,11 +967,11 @@ assign_varying_locations(struct gl_context *ctx,         */        assert(!ctx->Extensions.EXT_transform_feedback);     } else { -      lower_packed_varyings(mem_ctx, producer_base, slots_used, ir_var_out, -                            producer); +      lower_packed_varyings(mem_ctx, producer_base, slots_used, +                            ir_var_shader_out, producer);        if (consumer) { -         lower_packed_varyings(mem_ctx, consumer_base, slots_used, ir_var_in, -                               consumer); +         lower_packed_varyings(mem_ctx, consumer_base, slots_used, +                               ir_var_shader_in, consumer);        }     } @@ -979,7 +981,7 @@ assign_varying_locations(struct gl_context *ctx,        foreach_list(node, consumer->ir) {           ir_variable *const var = ((ir_instruction *) node)->as_variable(); -         if ((var == NULL) || (var->mode != ir_var_in)) +         if ((var == NULL) || (var->mode != ir_var_shader_in))              continue;           if (var->is_unmatched_generic_inout) { diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index 63548e071..63ce178f4 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -107,8 +107,8 @@ public:  	 ir_rvalue *param_rval = (ir_rvalue *)iter.get();  	 ir_variable *sig_param = (ir_variable *)sig_iter.get(); -	 if (sig_param->mode == ir_var_out || -	     sig_param->mode == ir_var_inout) { +	 if (sig_param->mode == ir_var_function_out || +	     sig_param->mode == ir_var_function_inout) {  	    ir_variable *var = param_rval->variable_referenced();  	    if (var && strcmp(name, var->name) == 0) {  	       found = true; @@ -212,10 +212,10 @@ link_invalidate_variable_locations(gl_shader *sh, int input_base,        int base;        switch (var->mode) { -      case ir_var_in: +      case ir_var_shader_in:           base = input_base;           break; -      case ir_var_out: +      case ir_var_shader_out:           base = output_base;           break;        default: @@ -393,10 +393,9 @@ mode_string(const ir_variable *var)     case ir_var_auto:        return (var->read_only) ? "global constant" : "global variable"; -   case ir_var_uniform: return "uniform"; -   case ir_var_in:      return "shader input"; -   case ir_var_out:     return "shader output"; -   case ir_var_inout:   return "shader inout"; +   case ir_var_uniform:    return "uniform"; +   case ir_var_shader_in:  return "shader input"; +   case ir_var_shader_out: return "shader output";     case ir_var_const_in:     case ir_var_temporary: @@ -874,7 +873,6 @@ link_intrastage_shaders(void *mem_ctx,  			unsigned num_shaders)  {     struct gl_uniform_block *uniform_blocks = NULL; -   unsigned num_uniform_blocks = 0;     /* Check that global variables defined in multiple shaders are consistent.      */ @@ -882,23 +880,11 @@ link_intrastage_shaders(void *mem_ctx,        return NULL;     /* Check that uniform blocks between shaders for a stage agree. */ -   for (unsigned i = 0; i < num_shaders; i++) { -      struct gl_shader *sh = shader_list[i]; - -      for (unsigned j = 0; j < sh->NumUniformBlocks; j++) { -	 link_assign_uniform_block_offsets(sh); - -	 int index = link_cross_validate_uniform_block(mem_ctx, -						       &uniform_blocks, -						       &num_uniform_blocks, -						       &sh->UniformBlocks[j]); -	 if (index == -1) { -	    linker_error(prog, "uniform block `%s' has mismatching definitions", -			 sh->UniformBlocks[j].Name); -	    return NULL; -	 } -      } -   } +   const int num_uniform_blocks = +      link_uniform_blocks(mem_ctx, prog, shader_list, num_shaders, +                          &uniform_blocks); +   if (num_uniform_blocks < 0) +      return NULL;     /* Check that there is only a single definition of each function signature      * across all shaders. @@ -1069,8 +1055,8 @@ update_array_sizes(struct gl_shader_program *prog)  	 ir_variable *const var = ((ir_instruction *) node)->as_variable();  	 if ((var == NULL) || (var->mode != ir_var_uniform && -			       var->mode != ir_var_in && -			       var->mode != ir_var_out) || +			       var->mode != ir_var_shader_in && +			       var->mode != ir_var_shader_out) ||  	     !var->type->is_array())  	    continue; @@ -1078,7 +1064,7 @@ update_array_sizes(struct gl_shader_program *prog)  	  * will not be eliminated.  Since we always do std140, just  	  * don't resize arrays in UBOs.  	  */ -	 if (var->uniform_block != -1) +	 if (var->is_in_uniform_block())  	    continue;  	 unsigned int size = var->max_array_access; @@ -1206,7 +1192,8 @@ assign_attribute_or_color_locations(gl_shader_program *prog,        ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0;     const enum ir_variable_mode direction = -      (target_index == MESA_SHADER_VERTEX) ? ir_var_in : ir_var_out; +      (target_index == MESA_SHADER_VERTEX) +      ? ir_var_shader_in : ir_var_shader_out;     /* Temporary storage for the set of attributes that need locations assigned. @@ -1428,7 +1415,7 @@ store_fragdepth_layout(struct gl_shader_program *prog)     foreach_list(node, ir) {        ir_variable *const var = ((ir_instruction *) node)->as_variable(); -      if (var == NULL || var->mode != ir_var_out) { +      if (var == NULL || var->mode != ir_var_shader_out) {           continue;        } @@ -1809,7 +1796,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)     if (prog->_LinkedShaders[MESA_SHADER_VERTEX] != NULL) {        demote_shader_inputs_and_outputs(prog->_LinkedShaders[MESA_SHADER_VERTEX], -				       ir_var_out); +				       ir_var_shader_out);        /* Eliminate code that is now dead due to unused vertex outputs being         * demoted. @@ -1821,9 +1808,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)     if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) {        gl_shader *const sh = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; -      demote_shader_inputs_and_outputs(sh, ir_var_in); -      demote_shader_inputs_and_outputs(sh, ir_var_inout); -      demote_shader_inputs_and_outputs(sh, ir_var_out); +      demote_shader_inputs_and_outputs(sh, ir_var_shader_in); +      demote_shader_inputs_and_outputs(sh, ir_var_shader_out);        /* Eliminate code that is now dead due to unused geometry outputs being         * demoted. @@ -1835,7 +1821,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)     if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] != NULL) {        gl_shader *const sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; -      demote_shader_inputs_and_outputs(sh, ir_var_in); +      demote_shader_inputs_and_outputs(sh, ir_var_shader_in);        /* Eliminate code that is now dead due to unused fragment inputs being         * demoted.  This shouldn't actually do anything other than remove diff --git a/mesalib/src/glsl/linker.h b/mesalib/src/glsl/linker.h index 67c7f3488..14eb9c1cd 100644 --- a/mesalib/src/glsl/linker.h +++ b/mesalib/src/glsl/linker.h @@ -49,6 +49,17 @@ link_cross_validate_uniform_block(void *mem_ctx,  void  link_assign_uniform_block_offsets(struct gl_shader *shader); +extern bool +link_uniform_blocks_are_compatible(const gl_uniform_block *a, +				   const gl_uniform_block *b); + +extern int +link_uniform_blocks(void *mem_ctx, +                    struct gl_shader_program *prog, +                    struct gl_shader **shader_list, +                    unsigned num_shaders, +                    struct gl_uniform_block **blocks_ret); +  /**   * Class for processing all of the leaf fields of an uniform   * @@ -71,24 +82,60 @@ public:      * \param var  The uniform variable that is to be processed      *      * Calls \c ::visit_field for each leaf of the uniform. +    * +    * \warning +    * This entry should only be used with uniform blocks in cases where the +    * row / column ordering of matrices in the block does not matter.  For +    * example, enumerating the names of members of the block, but not for +    * determining the offsets of members.      */     void process(ir_variable *var); +   /** +    * Begin processing a uniform of a structured type. +    * +    * This flavor of \c process should be used to handle structured types +    * (i.e., structures, interfaces, or arrays there of) that need special +    * name handling.  A common usage is to handle cases where the block name +    * (instead of the instance name) is used for an interface block. +    * +    * \param type  Type that is to be processed, associated with \c name +    * \param name  Base name of the structured uniform being processed +    * +    * \note +    * \c type must be \c GLSL_TYPE_RECORD, \c GLSL_TYPE_INTERFACE, or an array +    * there of. +    */ +   void process(const glsl_type *type, const char *name); +  protected:     /**      * Method invoked for each leaf of the uniform      *      * \param type  Type of the field.      * \param name  Fully qualified name of the field. +    * \param row_major  For a matrix type, is it stored row-major. +    */ +   virtual void visit_field(const glsl_type *type, const char *name, +                            bool row_major) = 0; + +   /** +    * Visit a record before visiting its fields +    * +    * For structures-of-structures or interfaces-of-structures, this visits +    * the inner structure before visiting its fields. +    * +    * The default implementation does nothing.      */ -   virtual void visit_field(const glsl_type *type, const char *name) = 0; +   virtual void visit_field(const glsl_struct_field *field);  private:     /**      * \param name_length  Length of the current name \b not including the      *                     terminating \c NUL character.      */ -   void recursion(const glsl_type *t, char **name, size_t name_length); +   void recursion(const glsl_type *t, char **name, size_t name_length, +                  bool row_major);  };  void diff --git a/mesalib/src/glsl/lower_clip_distance.cpp b/mesalib/src/glsl/lower_clip_distance.cpp index 09bdc36e1..643807de8 100644 --- a/mesalib/src/glsl/lower_clip_distance.cpp +++ b/mesalib/src/glsl/lower_clip_distance.cpp @@ -301,8 +301,8 @@ lower_clip_distance_visitor::visit_leave(ir_call *ir)           this->base_ir->insert_before(temp_clip_distance);           actual_param->replace_with(              new(ctx) ir_dereference_variable(temp_clip_distance)); -         if (formal_param->mode == ir_var_in -             || formal_param->mode == ir_var_inout) { +         if (formal_param->mode == ir_var_function_in +             || formal_param->mode == ir_var_function_inout) {              /* Copy from gl_ClipDistance to the temporary before the call.               * Since we are going to insert this copy before the current               * instruction, we need to visit it afterwards to make sure it @@ -314,8 +314,8 @@ lower_clip_distance_visitor::visit_leave(ir_call *ir)              this->base_ir->insert_before(new_assignment);              this->visit_new_assignment(new_assignment);           } -         if (formal_param->mode == ir_var_out -             || formal_param->mode == ir_var_inout) { +         if (formal_param->mode == ir_var_function_out +             || formal_param->mode == ir_var_function_inout) {              /* Copy from the temporary to gl_ClipDistance after the call.               * Since visit_list_elements() has already decided which               * instruction it's going to visit next, we need to visit diff --git a/mesalib/src/glsl/lower_output_reads.cpp b/mesalib/src/glsl/lower_output_reads.cpp index a6192a517..b93e254ec 100644 --- a/mesalib/src/glsl/lower_output_reads.cpp +++ b/mesalib/src/glsl/lower_output_reads.cpp @@ -41,7 +41,7 @@ class output_read_remover : public ir_hierarchical_visitor {  protected:     /**      * A hash table mapping from the original ir_variable shader outputs -    * (ir_var_out mode) to the new temporaries to be used instead. +    * (ir_var_shader_out mode) to the new temporaries to be used instead.      */     hash_table *replacements; @@ -86,7 +86,7 @@ output_read_remover::~output_read_remover()  ir_visitor_status  output_read_remover::visit(ir_dereference_variable *ir)  { -   if (ir->var->mode != ir_var_out) +   if (ir->var->mode != ir_var_shader_out)        return visit_continue;     ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var); diff --git a/mesalib/src/glsl/lower_packed_varyings.cpp b/mesalib/src/glsl/lower_packed_varyings.cpp index 9e7f274b7..8a40f5e72 100644 --- a/mesalib/src/glsl/lower_packed_varyings.cpp +++ b/mesalib/src/glsl/lower_packed_varyings.cpp @@ -70,6 +70,10 @@   * This lowering pass also packs flat floats, ints, and uints together, by   * using ivec4 as the base type of flat "varyings", and using appropriate   * casts to convert floats and uints into ints. + * + * This lowering pass also handles varyings whose type is a struct or an array + * of struct.  Structs are packed in order and with no gaps, so there may be a + * performance penalty due to structure elements being double-parked.   */  #include "glsl_symbol_table.h" @@ -135,8 +139,8 @@ private:     ir_variable **packed_varyings;     /** -    * Type of varying which is being lowered in this pass (either ir_var_in or -    * ir_var_out). +    * Type of varying which is being lowered in this pass (either +    * ir_var_shader_in or ir_var_shader_out).      */     const ir_variable_mode mode; @@ -274,10 +278,20 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,                                              ir_variable *unpacked_var,                                              const char *name)  { -   /* FINISHME: Support for "varying" records in GLSL 1.50. */ -   assert(!rvalue->type->is_record()); - -   if (rvalue->type->is_array()) { +   if (rvalue->type->is_record()) { +      for (unsigned i = 0; i < rvalue->type->length; i++) { +         if (i != 0) +            rvalue = rvalue->clone(this->mem_ctx, NULL); +         const char *field_name = rvalue->type->fields.structure[i].name; +         ir_dereference_record *dereference_record = new(this->mem_ctx) +            ir_dereference_record(rvalue, field_name); +         char *deref_name +            = ralloc_asprintf(this->mem_ctx, "%s.%s", name, field_name); +         fine_location = this->lower_rvalue(dereference_record, fine_location, +                                            unpacked_var, deref_name); +      } +      return fine_location; +   } else if (rvalue->type->is_array()) {        /* Arrays are packed/unpacked by considering each array element in         * sequence.         */ @@ -336,7 +350,7 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,                                                            unpacked_var, name));        ir_swizzle *swizzle = new(this->mem_ctx)           ir_swizzle(packed_deref, swizzle_values, components); -      if (this->mode == ir_var_out) { +      if (this->mode == ir_var_shader_out) {           ir_assignment *assignment              = this->bitwise_assign_pack(swizzle, rvalue);           this->main_instructions->push_tail(assignment); diff --git a/mesalib/src/glsl/lower_packing_builtins.cpp b/mesalib/src/glsl/lower_packing_builtins.cpp new file mode 100644 index 000000000..db73c7b0f --- /dev/null +++ b/mesalib/src/glsl/lower_packing_builtins.cpp @@ -0,0 +1,1314 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" + +namespace { + +using namespace ir_builder; + +/** + * A visitor that lowers built-in floating-point pack/unpack expressions + * such packSnorm2x16. + */ +class lower_packing_builtins_visitor : public ir_rvalue_visitor { +public: +   /** +    * \param op_mask is a bitmask of `enum lower_packing_builtins_op` +    */ +   explicit lower_packing_builtins_visitor(int op_mask) +      : op_mask(op_mask), +        progress(false) +   { +      /* Mutually exclusive options. */ +      assert(!((op_mask & LOWER_PACK_HALF_2x16) && +               (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT))); + +      assert(!((op_mask & LOWER_UNPACK_HALF_2x16) && +               (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT))); + +      factory.instructions = &factory_instructions; +   } + +   virtual ~lower_packing_builtins_visitor() +   { +      assert(factory_instructions.is_empty()); +   } + +   bool get_progress() { return progress; } + +   void handle_rvalue(ir_rvalue **rvalue) +   { +      if (!*rvalue) +	 return; + +      ir_expression *expr = (*rvalue)->as_expression(); +      if (!expr) +	 return; + +      enum lower_packing_builtins_op lowering_op = +         choose_lowering_op(expr->operation); + +      if (lowering_op == LOWER_PACK_UNPACK_NONE) +         return; + +      setup_factory(ralloc_parent(expr)); + +      ir_rvalue *op0 = expr->operands[0]; +      ralloc_steal(factory.mem_ctx, op0); + +      switch (lowering_op) { +      case LOWER_PACK_SNORM_2x16: +         *rvalue = lower_pack_snorm_2x16(op0); +         break; +      case LOWER_PACK_SNORM_4x8: +         *rvalue = lower_pack_snorm_4x8(op0); +         break; +      case LOWER_PACK_UNORM_2x16: +         *rvalue = lower_pack_unorm_2x16(op0); +         break; +      case LOWER_PACK_UNORM_4x8: +         *rvalue = lower_pack_unorm_4x8(op0); +         break; +      case LOWER_PACK_HALF_2x16: +         *rvalue = lower_pack_half_2x16(op0); +         break; +      case LOWER_PACK_HALF_2x16_TO_SPLIT: +         *rvalue = split_pack_half_2x16(op0); +         break; +      case LOWER_UNPACK_SNORM_2x16: +         *rvalue = lower_unpack_snorm_2x16(op0); +         break; +      case LOWER_UNPACK_SNORM_4x8: +         *rvalue = lower_unpack_snorm_4x8(op0); +         break; +      case LOWER_UNPACK_UNORM_2x16: +         *rvalue = lower_unpack_unorm_2x16(op0); +         break; +      case LOWER_UNPACK_UNORM_4x8: +         *rvalue = lower_unpack_unorm_4x8(op0); +         break; +      case LOWER_UNPACK_HALF_2x16: +         *rvalue = lower_unpack_half_2x16(op0); +         break; +      case LOWER_UNPACK_HALF_2x16_TO_SPLIT: +         *rvalue = split_unpack_half_2x16(op0); +         break; +      case LOWER_PACK_UNPACK_NONE: +         assert(!"not reached"); +         break; +      } + +      teardown_factory(); +      progress = true; +   } + +private: +   const int op_mask; +   bool progress; +   ir_factory factory; +   exec_list factory_instructions; + +   /** +    * Determine the needed lowering operation by filtering \a expr_op +    * through \ref op_mask. +    */ +   enum lower_packing_builtins_op +   choose_lowering_op(ir_expression_operation expr_op) +   { +      /* C++ regards int and enum as fundamentally different types. +       * So, we can't simply return from each case; we must cast the return +       * value. +       */ +      int result; + +      switch (expr_op) { +      case ir_unop_pack_snorm_2x16: +         result = op_mask & LOWER_PACK_SNORM_2x16; +         break; +      case ir_unop_pack_snorm_4x8: +         result = op_mask & LOWER_PACK_SNORM_4x8; +         break; +      case ir_unop_pack_unorm_2x16: +         result = op_mask & LOWER_PACK_UNORM_2x16; +         break; +      case ir_unop_pack_unorm_4x8: +         result = op_mask & LOWER_PACK_UNORM_4x8; +         break; +      case ir_unop_pack_half_2x16: +         result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); +         break; +      case ir_unop_unpack_snorm_2x16: +         result = op_mask & LOWER_UNPACK_SNORM_2x16; +         break; +      case ir_unop_unpack_snorm_4x8: +         result = op_mask & LOWER_UNPACK_SNORM_4x8; +         break; +      case ir_unop_unpack_unorm_2x16: +         result = op_mask & LOWER_UNPACK_UNORM_2x16; +         break; +      case ir_unop_unpack_unorm_4x8: +         result = op_mask & LOWER_UNPACK_UNORM_4x8; +         break; +      case ir_unop_unpack_half_2x16: +         result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); +         break; +      default: +         result = LOWER_PACK_UNPACK_NONE; +         break; +      } + +      return static_cast<enum lower_packing_builtins_op>(result); +   } + +   void +   setup_factory(void *mem_ctx) +   { +      assert(factory.mem_ctx == NULL); +      assert(factory.instructions->is_empty()); + +      factory.mem_ctx = mem_ctx; +   } + +   void +   teardown_factory() +   { +      base_ir->insert_before(factory.instructions); +      assert(factory.instructions->is_empty()); +      factory.mem_ctx = NULL; +   } + +   template <typename T> +   ir_constant* +   constant(T x) +   { +      return factory.constant(x); +   } + +   /** +    * \brief Pack two uint16's into a single uint32. +    * +    * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 +    * where the least significant bits specify the first element of the pair. +    * Return the uint32. +    */ +   ir_rvalue* +   pack_uvec2_to_uint(ir_rvalue *uvec2_rval) +   { +      assert(uvec2_rval->type == glsl_type::uvec2_type); + +      /* uvec2 u = UVEC2_RVAL; */ +      ir_variable *u = factory.make_temp(glsl_type::uvec2_type, +                                          "tmp_pack_uvec2_to_uint"); +      factory.emit(assign(u, uvec2_rval)); + +      /* return (u.y << 16) | (u.x & 0xffff); */ +      return bit_or(lshift(swizzle_y(u), constant(16u)), +                    bit_and(swizzle_x(u), constant(0xffffu))); +   } + +   /** +    * \brief Pack four uint8's into a single uint32. +    * +    * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a +    * uint32 where the least significant bits specify the first element of the +    * 4-tuple. Return the uint32. +    */ +   ir_rvalue* +   pack_uvec4_to_uint(ir_rvalue *uvec4_rval) +   { +      assert(uvec4_rval->type == glsl_type::uvec4_type); + +      /* uvec4 u = UVEC4_RVAL; */ +      ir_variable *u = factory.make_temp(glsl_type::uvec4_type, +                                          "tmp_pack_uvec4_to_uint"); +      factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); + +      /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ +      return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), +                           lshift(swizzle_z(u), constant(16u))), +                    bit_or(lshift(swizzle_y(u), constant(8u)), +                           swizzle_x(u))); +   } + +   /** +    * \brief Unpack a uint32 into two uint16's. +    * +    * Interpret the given uint32 as a uint16 pair where the uint32's least +    * significant bits specify the pair's first element. Return the uint16 +    * pair as a uvec2. +    */ +   ir_rvalue* +   unpack_uint_to_uvec2(ir_rvalue *uint_rval) +   { +      assert(uint_rval->type == glsl_type::uint_type); + +      /* uint u = UINT_RVAL; */ +      ir_variable *u = factory.make_temp(glsl_type::uint_type, +                                          "tmp_unpack_uint_to_uvec2_u"); +      factory.emit(assign(u, uint_rval)); + +      /* uvec2 u2; */ +      ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, +                                           "tmp_unpack_uint_to_uvec2_u2"); + +      /* u2.x = u & 0xffffu; */ +      factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); + +      /* u2.y = u >> 16u; */ +      factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); + +      return deref(u2).val; +   } + +   /** +    * \brief Unpack a uint32 into four uint8's. +    * +    * Interpret the given uint32 as a uint8 4-tuple where the uint32's least +    * significant bits specify the 4-tuple's first element. Return the uint8 +    * 4-tuple as a uvec4. +    */ +   ir_rvalue* +   unpack_uint_to_uvec4(ir_rvalue *uint_rval) +   { +      assert(uint_rval->type == glsl_type::uint_type); + +      /* uint u = UINT_RVAL; */ +      ir_variable *u = factory.make_temp(glsl_type::uint_type, +                                          "tmp_unpack_uint_to_uvec4_u"); +      factory.emit(assign(u, uint_rval)); + +      /* uvec4 u4; */ +      ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, +                                           "tmp_unpack_uint_to_uvec4_u4"); + +      /* u4.x = u & 0xffu; */ +      factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); + +      /* u4.y = (u >> 8u) & 0xffu; */ +      factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), +                                      constant(0xffu)), WRITEMASK_Y)); + +      /* u4.z = (u >> 16u) & 0xffu; */ +      factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), +                                      constant(0xffu)), WRITEMASK_Z)); + +      /* u4.w = (u >> 24u) */ +      factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); + +      return deref(u4).val; +   } + +   /** +    * \brief Lower a packSnorm2x16 expression. +    * +    * \param vec2_rval is packSnorm2x16's input +    * \return packSnorm2x16's output as a uint rvalue +    */ +   ir_rvalue* +   lower_pack_snorm_2x16(ir_rvalue *vec2_rval) +   { +      /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: +       * +       *    highp uint packSnorm2x16(vec2 v) +       *    -------------------------------- +       *    First, converts each component of the normalized floating-point value +       *    v into 16-bit integer values. Then, the results are packed into the +       *    returned 32-bit unsigned integer. +       * +       *    The conversion for component c of v to fixed point is done as +       *    follows: +       * +       *       packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) +       * +       *    The first component of the vector will be written to the least +       *    significant bits of the output; the last component will be written to +       *    the most significant bits. +       * +       * This function generates IR that approximates the following pseudo-GLSL: +       * +       *     return pack_uvec2_to_uint( +       *         uvec2(ivec2( +       *           round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); +       * +       * It is necessary to first convert the vec2 to ivec2 rather than directly +       * converting vec2 to uvec2 because the latter conversion is undefined. +       * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to +       * convert a negative floating point value to an uint". +       */ +      assert(vec2_rval->type == glsl_type::vec2_type); + +      ir_rvalue *result = pack_uvec2_to_uint( +            i2u(f2i(round_even(mul(clamp(vec2_rval, +                                         constant(-1.0f), +                                         constant(1.0f)), +                                   constant(32767.0f)))))); + +      assert(result->type == glsl_type::uint_type); +      return result; +   } + +   /** +    * \brief Lower a packSnorm4x8 expression. +    * +    * \param vec4_rval is packSnorm4x8's input +    * \return packSnorm4x8's output as a uint rvalue +    */ +   ir_rvalue* +   lower_pack_snorm_4x8(ir_rvalue *vec4_rval) +   { +      /* From page 137 (143 of pdf) of the GLSL 4.30 spec: +       * +       *    highp uint packSnorm4x8(vec4 v) +       *    ------------------------------- +       *    First, converts each component of the normalized floating-point value +       *    v into 8-bit integer values. Then, the results are packed into the +       *    returned 32-bit unsigned integer. +       * +       *    The conversion for component c of v to fixed point is done as +       *    follows: +       * +       *       packSnorm4x8: round(clamp(c, -1, +1) * 127.0) +       * +       *    The first component of the vector will be written to the least +       *    significant bits of the output; the last component will be written to +       *    the most significant bits. +       * +       * This function generates IR that approximates the following pseudo-GLSL: +       * +       *     return pack_uvec4_to_uint( +       *         uvec4(ivec4( +       *           round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); +       * +       * It is necessary to first convert the vec4 to ivec4 rather than directly +       * converting vec4 to uvec4 because the latter conversion is undefined. +       * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to +       * convert a negative floating point value to an uint". +       */ +      assert(vec4_rval->type == glsl_type::vec4_type); + +      ir_rvalue *result = pack_uvec4_to_uint( +            i2u(f2i(round_even(mul(clamp(vec4_rval, +                                         constant(-1.0f), +                                         constant(1.0f)), +                                   constant(127.0f)))))); + +      assert(result->type == glsl_type::uint_type); +      return result; +   } + +   /** +    * \brief Lower an unpackSnorm2x16 expression. +    * +    * \param uint_rval is unpackSnorm2x16's input +    * \return unpackSnorm2x16's output as a vec2 rvalue +    */ +   ir_rvalue* +   lower_unpack_snorm_2x16(ir_rvalue *uint_rval) +   { +      /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: +       * +       *    highp vec2 unpackSnorm2x16 (highp uint p) +       *    ----------------------------------------- +       *    First, unpacks a single 32-bit unsigned integer p into a pair of +       *    16-bit unsigned integers. Then, each component is converted to +       *    a normalized floating-point value to generate the returned +       *    two-component vector. +       * +       *    The conversion for unpacked fixed-point value f to floating point is +       *    done as follows: +       * +       *       unpackSnorm2x16: clamp(f / 32767.0, -1,+1) +       * +       *    The first component of the returned vector will be extracted from the +       *    least significant bits of the input; the last component will be +       *    extracted from the most significant bits. +       * +       * This function generates IR that approximates the following pseudo-GLSL: +       * +       *    return clamp( +       *       ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, +       *       -1.0f, 1.0f); +       * +       * The above IR may appear unnecessarily complex, but the intermediate +       * conversion to ivec2 and the bit shifts are necessary to correctly unpack +       * negative floats. +       * +       * To see why, consider packing and then unpacking vec2(-1.0, 0.0). +       * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we +       * place that int16 into an int32, which results in the *positive* integer +       * 0x0000ffff.  The int16's sign bit becomes, in the int32, the rather +       * unimportant bit 16. We must now extend the int16's sign bit into bits +       * 17-32, which is accomplished by left-shifting then right-shifting. +       */ + +      assert(uint_rval->type == glsl_type::uint_type); + +      ir_rvalue *result = +        clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), +                                    constant(16)), +                             constant(16u))), +                  constant(32767.0f)), +              constant(-1.0f), +              constant(1.0f)); + +      assert(result->type == glsl_type::vec2_type); +      return result; +   } + +   /** +    * \brief Lower an unpackSnorm4x8 expression. +    * +    * \param uint_rval is unpackSnorm4x8's input +    * \return unpackSnorm4x8's output as a vec4 rvalue +    */ +   ir_rvalue* +   lower_unpack_snorm_4x8(ir_rvalue *uint_rval) +   { +      /* From page 137 (143 of pdf) of the GLSL 4.30 spec: +       * +       *    highp vec4 unpackSnorm4x8 (highp uint p) +       *    ---------------------------------------- +       *    First, unpacks a single 32-bit unsigned integer p into four +       *    8-bit unsigned integers. Then, each component is converted to +       *    a normalized floating-point value to generate the returned +       *    four-component vector. +       * +       *    The conversion for unpacked fixed-point value f to floating point is +       *    done as follows: +       * +       *       unpackSnorm4x8: clamp(f / 127.0, -1, +1) +       * +       *    The first component of the returned vector will be extracted from the +       *    least significant bits of the input; the last component will be +       *    extracted from the most significant bits. +       * +       * This function generates IR that approximates the following pseudo-GLSL: +       * +       *    return clamp( +       *       ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, +       *       -1.0f, 1.0f); +       * +       * The above IR may appear unnecessarily complex, but the intermediate +       * conversion to ivec4 and the bit shifts are necessary to correctly unpack +       * negative floats. +       * +       * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, +       * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we +       * place that int8 into an int32, which results in the *positive* integer +       * 0x000000ff.  The int8's sign bit becomes, in the int32, the rather +       * unimportant bit 8. We must now extend the int8's sign bit into bits +       * 9-32, which is accomplished by left-shifting then right-shifting. +       */ + +      assert(uint_rval->type == glsl_type::uint_type); + +      ir_rvalue *result = +        clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), +                                    constant(24u)), +                             constant(24u))), +                  constant(127.0f)), +              constant(-1.0f), +              constant(1.0f)); + +      assert(result->type == glsl_type::vec4_type); +      return result; +   } + +   /** +    * \brief Lower a packUnorm2x16 expression. +    * +    * \param vec2_rval is packUnorm2x16's input +    * \return packUnorm2x16's output as a uint rvalue +    */ +   ir_rvalue* +   lower_pack_unorm_2x16(ir_rvalue *vec2_rval) +   { +      /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: +       * +       *    highp uint packUnorm2x16 (vec2 v) +       *    --------------------------------- +       *    First, converts each component of the normalized floating-point value +       *    v into 16-bit integer values. Then, the results are packed into the +       *    returned 32-bit unsigned integer. +       * +       *    The conversion for component c of v to fixed point is done as +       *    follows: +       * +       *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) +       * +       *    The first component of the vector will be written to the least +       *    significant bits of the output; the last component will be written to +       *    the most significant bits. +       * +       * This function generates IR that approximates the following pseudo-GLSL: +       * +       *     return pack_uvec2_to_uint(uvec2( +       *                round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); +       * +       * Here it is safe to directly convert the vec2 to uvec2 because the the +       * vec2 has been clamped to a non-negative range. +       */ + +      assert(vec2_rval->type == glsl_type::vec2_type); + +      ir_rvalue *result = pack_uvec2_to_uint( +         f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); + +      assert(result->type == glsl_type::uint_type); +      return result; +   } + +   /** +    * \brief Lower a packUnorm4x8 expression. +    * +    * \param vec4_rval is packUnorm4x8's input +    * \return packUnorm4x8's output as a uint rvalue +    */ +   ir_rvalue* +   lower_pack_unorm_4x8(ir_rvalue *vec4_rval) +   { +      /* From page 137 (143 of pdf) of the GLSL 4.30 spec: +       * +       *    highp uint packUnorm4x8 (vec4 v) +       *    -------------------------------- +       *    First, converts each component of the normalized floating-point value +       *    v into 8-bit integer values. Then, the results are packed into the +       *    returned 32-bit unsigned integer. +       * +       *    The conversion for component c of v to fixed point is done as +       *    follows: +       * +       *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0) +       * +       *    The first component of the vector will be written to the least +       *    significant bits of the output; the last component will be written to +       *    the most significant bits. +       * +       * This function generates IR that approximates the following pseudo-GLSL: +       * +       *     return pack_uvec4_to_uint(uvec4( +       *                round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); +       * +       * Here it is safe to directly convert the vec4 to uvec4 because the the +       * vec4 has been clamped to a non-negative range. +       */ + +      assert(vec4_rval->type == glsl_type::vec4_type); + +      ir_rvalue *result = pack_uvec4_to_uint( +         f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); + +      assert(result->type == glsl_type::uint_type); +      return result; +   } + +   /** +    * \brief Lower an unpackUnorm2x16 expression. +    * +    * \param uint_rval is unpackUnorm2x16's input +    * \return unpackUnorm2x16's output as a vec2 rvalue +    */ +   ir_rvalue* +   lower_unpack_unorm_2x16(ir_rvalue *uint_rval) +   { +      /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: +       * +       *    highp vec2 unpackUnorm2x16 (highp uint p) +       *    ----------------------------------------- +       *    First, unpacks a single 32-bit unsigned integer p into a pair of +       *    16-bit unsigned integers. Then, each component is converted to +       *    a normalized floating-point value to generate the returned +       *    two-component vector. +       * +       *    The conversion for unpacked fixed-point value f to floating point is +       *    done as follows: +       * +       *       unpackUnorm2x16: f / 65535.0 +       * +       *    The first component of the returned vector will be extracted from the +       *    least significant bits of the input; the last component will be +       *    extracted from the most significant bits. +       * +       * This function generates IR that approximates the following pseudo-GLSL: +       * +       *     return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; +       */ + +      assert(uint_rval->type == glsl_type::uint_type); + +      ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), +                              constant(65535.0f)); + +      assert(result->type == glsl_type::vec2_type); +      return result; +   } + +   /** +    * \brief Lower an unpackUnorm4x8 expression. +    * +    * \param uint_rval is unpackUnorm4x8's input +    * \return unpackUnorm4x8's output as a vec4 rvalue +    */ +   ir_rvalue* +   lower_unpack_unorm_4x8(ir_rvalue *uint_rval) +   { +      /* From page 137 (143 of pdf) of the GLSL 4.30 spec: +       * +       *    highp vec4 unpackUnorm4x8 (highp uint p) +       *    ---------------------------------------- +       *    First, unpacks a single 32-bit unsigned integer p into four +       *    8-bit unsigned integers. Then, each component is converted to +       *    a normalized floating-point value to generate the returned +       *    two-component vector. +       * +       *    The conversion for unpacked fixed-point value f to floating point is +       *    done as follows: +       * +       *       unpackUnorm4x8: f / 255.0 +       * +       *    The first component of the returned vector will be extracted from the +       *    least significant bits of the input; the last component will be +       *    extracted from the most significant bits. +       * +       * This function generates IR that approximates the following pseudo-GLSL: +       * +       *     return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; +       */ + +      assert(uint_rval->type == glsl_type::uint_type); + +      ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), +                              constant(255.0f)); + +      assert(result->type == glsl_type::vec4_type); +      return result; +   } + +   /** +    * \brief Lower the component-wise calculation of packHalf2x16. +    * +    * \param f_rval is one component of packHafl2x16's input +    * \param e_rval is the unshifted exponent bits of f_rval +    * \param m_rval is the unshifted mantissa bits of f_rval +    * +    * \return a uint rvalue that encodes a float16 in its lower 16 bits +    */ +   ir_rvalue* +   pack_half_1x16_nosign(ir_rvalue *f_rval, +                         ir_rvalue *e_rval, +                         ir_rvalue *m_rval) +   { +      assert(e_rval->type == glsl_type::uint_type); +      assert(m_rval->type == glsl_type::uint_type); + +      /* uint u16; */ +      ir_variable *u16 = factory.make_temp(glsl_type::uint_type, +                                           "tmp_pack_half_1x16_u16"); + +      /* float f = FLOAT_RVAL; */ +      ir_variable *f = factory.make_temp(glsl_type::float_type, +                                          "tmp_pack_half_1x16_f"); +      factory.emit(assign(f, f_rval)); + +      /* uint e = E_RVAL; */ +      ir_variable *e = factory.make_temp(glsl_type::uint_type, +                                          "tmp_pack_half_1x16_e"); +      factory.emit(assign(e, e_rval)); + +      /* uint m = M_RVAL; */ +      ir_variable *m = factory.make_temp(glsl_type::uint_type, +                                          "tmp_pack_half_1x16_m"); +      factory.emit(assign(m, m_rval)); + +      /* Preliminaries +       * ------------- +       * +       * For a float16, the bit layout is: +       * +       *   sign:     15 +       *   exponent: 10:14 +       *   mantissa: 0:9 +       * +       * Let f16 be a float16 value. The sign, exponent, and mantissa +       * determine its value thus: +       * +       *   if e16 = 0 and m16 = 0, then zero:       (-1)^s16 * 0                               (1) +       *   if e16 = 0 and m16!= 0, then subnormal:  (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10)     (2) +       *   if 0 < e16 < 31, then normal:            (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) +       *   if e16 = 31 and m16 = 0, then infinite:  (-1)^s16 * inf                             (4) +       *   if e16 = 31 and m16 != 0, then           NaN                                        (5) +       * +       * where 0 <= m16 < 2^10. +       * +       * For a float32, the bit layout is: +       * +       *   sign:     31 +       *   exponent: 23:30 +       *   mantissa: 0:22 +       * +       * Let f32 be a float32 value. The sign, exponent, and mantissa +       * determine its value thus: +       * +       *   if e32 = 0 and m32 = 0, then zero:        (-1)^s * 0                                (10) +       *   if e32 = 0 and m32 != 0, then subnormal:  (-1)^s * 2^(e32 - 126) * (m32 / 2^23)     (11) +       *   if 0 < e32 < 255, then normal:            (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) +       *   if e32 = 255 and m32 = 0, then infinite:  (-1)^s * inf                              (13) +       *   if e32 = 255 and m32 != 0, then           NaN                                       (14) +       * +       * where 0 <= m32 < 2^23. +       * +       * The minimum and maximum normal float16 values are +       * +       *   min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14)   (20) +       *   max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10)         (21) +       * +       * The step at max_norm16 is +       * +       *   max_step16 = 2^5                                     (22) +       * +       * Observe that the float16 boundary values in equations 20-21 lie in the +       * range of normal float32 values. +       * +       * +       * Rounding Behavior +       * ----------------- +       * Not all float32 values can be exactly represented as a float16. We +       * round all such intermediate float32 values to the nearest float16; if +       * the float32 is exactly between to float16 values, we round to the one +       * with an even mantissa. This rounding behavior has several benefits: +       * +       *   - It has no sign bias. +       * +       *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's +       *     GPU ISA. +       * +       *   - By reproducing the behavior of the GPU (at least on Intel hardware), +       *     compile-time evaluation of constant packHalf2x16 GLSL expressions will +       *     result in the same value as if the expression were executed on the +       *     GPU. +       * +       * Calculation +       * ----------- +       * Our task is to compute s16, e16, m16 given f32.  Since this function +       * ignores the sign bit, assume that s32 = s16 = 0.  There are several +       * cases consider. +       */ + +      factory.emit( + +         /* Case 1) f32 is NaN +          * +          *   The resultant f16 will also be NaN. +          */ + +         /* if (e32 == 255 && m32 != 0) { */ +         if_tree(logic_and(equal(e, constant(0xffu << 23u)), +                           logic_not(equal(m, constant(0u)))), + +            assign(u16, constant(0x7fffu)), + +         /* Case 2) f32 lies in the range [0, min_norm16). +          * +          *   The resultant float16 will be either zero, subnormal, or normal. +          * +          *   Solving +          * +          *     f32 = min_norm16       (30) +          * +          *   gives +          * +          *     e32 = 113 and m32 = 0  (31) +          * +          *   Therefore this case occurs if and only if +          * +          *     e32 < 113              (32) +          */ + +         /* } else if (e32 < 113) { */ +         if_tree(less(e, constant(113u << 23u)), + +            /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ +            assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), +                                           constant((float) (1 << 24)))))), + +         /* Case 3) f32 lies in the range +          *         [min_norm16, max_norm16 + max_step16). +          * +          *   The resultant float16 will be either normal or infinite. +          * +          *   Solving +          * +          *     f32 = max_norm16 + max_step16           (40) +          *         = 2^15 * (1 + 1023 / 2^10) + 2^5    (41) +          *         = 2^16                              (42) +          *   gives +          * +          *     e32 = 143 and m32 = 0                   (43) +          * +          *   We already solved the boundary condition f32 = min_norm16 above +          *   in equation 31. Therefore this case occurs if and only if +          * +          *     113 <= e32 and e32 < 143 +          */ + +         /* } else if (e32 < 143) { */ +         if_tree(less(e, constant(143u << 23u)), + +            /* The addition below handles the case where the mantissa rounds +             * up to 1024 and bumps the exponent. +             * +             * u16 = ((e - (112u << 23u)) >> 13u) +             *     + round_to_even((float(m) / (1u << 13u)); +             */ +            assign(u16, add(rshift(sub(e, constant(112u << 23u)), +                                   constant(13u)), +                            f2u(round_even( +                                  div(u2f(m), constant((float) (1 << 13))))))), + +         /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. +          * +          *   The resultant float16 will be infinite. +          * +          *   The cases above caught all float32 values in the range +          *   [0, max_norm16 + max_step16), so this is the fall-through case. +          */ + +         /* } else { */ + +            assign(u16, constant(31u << 10u)))))); + +         /* } */ + +       return deref(u16).val; +   } + +   /** +    * \brief Lower a packHalf2x16 expression. +    * +    * \param vec2_rval is packHalf2x16's input +    * \return packHalf2x16's output as a uint rvalue +    */ +   ir_rvalue* +   lower_pack_half_2x16(ir_rvalue *vec2_rval) +   { +      /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: +       * +       *    highp uint packHalf2x16 (mediump vec2 v) +       *    ---------------------------------------- +       *    Returns an unsigned integer obtained by converting the components of +       *    a two-component floating-point vector to the 16-bit floating-point +       *    representation found in the OpenGL ES Specification, and then packing +       *    these two 16-bit integers into a 32-bit unsigned integer. +       * +       *    The first vector component specifies the 16 least- significant bits +       *    of the result; the second component specifies the 16 most-significant +       *    bits. +       */ + +      assert(vec2_rval->type == glsl_type::vec2_type); + +      /* vec2 f = VEC2_RVAL; */ +      ir_variable *f = factory.make_temp(glsl_type::vec2_type, +                                         "tmp_pack_half_2x16_f"); +      factory.emit(assign(f, vec2_rval)); + +      /* uvec2 f32 = bitcast_f2u(f); */ +      ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, +                                            "tmp_pack_half_2x16_f32"); +      factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); + +      /* uvec2 f16; */ +      ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, +                                        "tmp_pack_half_2x16_f16"); + +      /* Get f32's unshifted exponent bits. +       * +       *   uvec2 e = f32 & 0x7f800000u; +       */ +      ir_variable *e = factory.make_temp(glsl_type::uvec2_type, +                                          "tmp_pack_half_2x16_e"); +      factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); + +      /* Get f32's unshifted mantissa bits. +       * +       *   uvec2 m = f32 & 0x007fffffu; +       */ +      ir_variable *m = factory.make_temp(glsl_type::uvec2_type, +                                          "tmp_pack_half_2x16_m"); +      factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); + +      /* Set f16's exponent and mantissa bits. +       * +       *   f16.x = pack_half_1x16_nosign(e.x, m.x); +       *   f16.y = pack_half_1y16_nosign(e.y, m.y); +       */ +      factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), +                                                     swizzle_x(e), +                                                     swizzle_x(m)), +                           WRITEMASK_X)); +      factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), +                                                     swizzle_y(e), +                                                     swizzle_y(m)), +                           WRITEMASK_Y)); + +      /* Set f16's sign bits. +       * +       *   f16 |= (f32 & (1u << 31u) >> 16u; +       */ +      factory.emit( +         assign(f16, bit_or(f16, +                            rshift(bit_and(f32, constant(1u << 31u)), +                                   constant(16u))))); + + +      /* return (f16.y << 16u) | f16.x; */ +      ir_rvalue *result = bit_or(lshift(swizzle_y(f16), +                                        constant(16u)), +                                 swizzle_x(f16)); + +      assert(result->type == glsl_type::uint_type); +      return result; +   } + +   /** +    * \brief Split packHalf2x16's vec2 operand into two floats. +    * +    * \param vec2_rval is packHalf2x16's input +    * \return a uint rvalue +    * +    * Some code generators, such as the i965 fragment shader, require that all +    * vector expressions be lowered to a sequence of scalar expressions. +    * However, packHalf2x16 cannot be scalarized by the same mechanism as +    * a true vector operation because its input and output have a differing +    * number of vector components. +    * +    * This method scalarizes packHalf2x16 by transforming it from an unary +    * operation having vector input to a binary operation having scalar input. +    * That is, it transforms +    * +    *    packHalf2x16(VEC2_RVAL); +    * +    * into +    * +    *    vec2 v = VEC2_RVAL; +    *    return packHalf2x16_split(v.x, v.y); +    */ +   ir_rvalue* +   split_pack_half_2x16(ir_rvalue *vec2_rval) +   { +      assert(vec2_rval->type == glsl_type::vec2_type); + +      ir_variable *v = factory.make_temp(glsl_type::vec2_type, +                                         "tmp_split_pack_half_2x16_v"); +      factory.emit(assign(v, vec2_rval)); + +      return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v)); +   } + +   /** +    * \brief Lower the component-wise calculation of unpackHalf2x16. +    * +    * Given a uint that encodes a float16 in its lower 16 bits, this function +    * returns a uint that encodes a float32 with the same value. The sign bit +    * of the float16 is ignored. +    * +    * \param e_rval is the unshifted exponent bits of a float16 +    * \param m_rval is the unshifted mantissa bits of a float16 +    * \param a uint rvalue that encodes a float32 +    */ +   ir_rvalue* +   unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) +   { +      assert(e_rval->type == glsl_type::uint_type); +      assert(m_rval->type == glsl_type::uint_type); + +      /* uint u32; */ +      ir_variable *u32 = factory.make_temp(glsl_type::uint_type, +                                           "tmp_unpack_half_1x16_u32"); + +      /* uint e = E_RVAL; */ +      ir_variable *e = factory.make_temp(glsl_type::uint_type, +                                          "tmp_unpack_half_1x16_e"); +      factory.emit(assign(e, e_rval)); + +      /* uint m = M_RVAL; */ +      ir_variable *m = factory.make_temp(glsl_type::uint_type, +                                          "tmp_unpack_half_1x16_m"); +      factory.emit(assign(m, m_rval)); + +      /* Preliminaries +       * ------------- +       * +       * For a float16, the bit layout is: +       * +       *   sign:     15 +       *   exponent: 10:14 +       *   mantissa: 0:9 +       * +       * Let f16 be a float16 value. The sign, exponent, and mantissa +       * determine its value thus: +       * +       *   if e16 = 0 and m16 = 0, then zero:       (-1)^s16 * 0                               (1) +       *   if e16 = 0 and m16!= 0, then subnormal:  (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10)     (2) +       *   if 0 < e16 < 31, then normal:            (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) +       *   if e16 = 31 and m16 = 0, then infinite:  (-1)^s16 * inf                             (4) +       *   if e16 = 31 and m16 != 0, then           NaN                                        (5) +       * +       * where 0 <= m16 < 2^10. +       * +       * For a float32, the bit layout is: +       * +       *   sign: 31 +       *   exponent: 23:30 +       *   mantissa: 0:22 +       * +       * Let f32 be a float32 value. The sign, exponent, and mantissa +       * determine its value thus: +       * +       *   if e32 = 0 and m32 = 0, then zero:        (-1)^s * 0                                (10) +       *   if e32 = 0 and m32 != 0, then subnormal:  (-1)^s * 2^(e32 - 126) * (m32 / 2^23)     (11) +       *   if 0 < e32 < 255, then normal:            (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) +       *   if e32 = 255 and m32 = 0, then infinite:  (-1)^s * inf                              (13) +       *   if e32 = 255 and m32 != 0, then           NaN                                       (14) +       * +       * where 0 <= m32 < 2^23. +       * +       * Calculation +       * ----------- +       * Our task is to compute s32, e32, m32 given f16.  Since this function +       * ignores the sign bit, assume that s32 = s16 = 0.  There are several +       * cases consider. +       */ + +      factory.emit( + +         /* Case 1) f16 is zero or subnormal. +          * +          *   The simplest method of calcuating f32 in this case is +          * +          *     f32 = f16                       (20) +          *         = 2^(-14) * (m16 / 2^10)    (21) +          *         = m16 / 2^(-24)             (22) +          */ + +         /* if (e16 == 0) { */ +         if_tree(equal(e, constant(0u)), + +            /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ +            assign(u32, expr(ir_unop_bitcast_f2u, +                                div(u2f(m), constant((float)(1 << 24))))), + +         /* Case 2) f16 is normal. +          * +          *   The equation +          * +          *     f32 = f16                              (30) +          *     2^(e32 - 127) * (1 + m32 / 2^23) =     (31) +          *       2^(e16 - 15) * (1 + m16 / 2^10) +          * +          *   can be decomposed into two +          * +          *     2^(e32 - 127) = 2^(e16 - 15)           (32) +          *     1 + m32 / 2^23 = 1 + m16 / 2^10        (33) +          * +          *   which solve to +          * +          *     e32 = e16 + 112                        (34) +          *     m32 = m16 * 2^13                       (35) +          */ + +         /* } else if (e16 < 31)) { */ +         if_tree(less(e, constant(31u << 10u)), + +              /* u32 = ((e + (112 << 10)) | m) << 13; +               */ +              assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), +                                 constant(13u))), + + +         /* Case 3) f16 is infinite. */ +         if_tree(equal(m, constant(0u)), + +                 assign(u32, constant(255u << 23u)), + +         /* Case 4) f16 is NaN. */ +         /* } else { */ + +            assign(u32, constant(0x7fffffffu)))))); + +         /* } */ + +      return deref(u32).val; +   } + +   /** +    * \brief Lower an unpackHalf2x16 expression. +    * +    * \param uint_rval is unpackHalf2x16's input +    * \return unpackHalf2x16's output as a vec2 rvalue +    */ +   ir_rvalue* +   lower_unpack_half_2x16(ir_rvalue *uint_rval) +   { +      /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: +       * +       *    mediump vec2 unpackHalf2x16 (highp uint v) +       *    ------------------------------------------ +       *    Returns a two-component floating-point vector with components +       *    obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit +       *    values, interpreting those values as 16-bit floating-point numbers +       *    according to the OpenGL ES Specification, and converting them to +       *    32-bit floating-point values. +       * +       *    The first component of the vector is obtained from the +       *    16 least-significant bits of v; the second component is obtained +       *    from the 16 most-significant bits of v. +       */ +      assert(uint_rval->type == glsl_type::uint_type); + +      /* uint u = RVALUE; +       * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); +       */ +      ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, +                                            "tmp_unpack_half_2x16_f16"); +      factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); + +      /* uvec2 f32; */ +      ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, +                                            "tmp_unpack_half_2x16_f32"); + +      /* Get f16's unshifted exponent bits. +       * +       *    uvec2 e = f16 & 0x7c00u; +       */ +      ir_variable *e = factory.make_temp(glsl_type::uvec2_type, +                                          "tmp_unpack_half_2x16_e"); +      factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); + +      /* Get f16's unshifted mantissa bits. +       * +       *    uvec2 m = f16 & 0x03ffu; +       */ +      ir_variable *m = factory.make_temp(glsl_type::uvec2_type, +                                          "tmp_unpack_half_2x16_m"); +      factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); + +      /* Set f32's exponent and mantissa bits. +       * +       *   f32.x = unpack_half_1x16_nosign(e.x, m.x); +       *   f32.y = unpack_half_1x16_nosign(e.y, m.y); +       */ +      factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), +                                                       swizzle_x(m)), +                           WRITEMASK_X)); +      factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), +                                                       swizzle_y(m)), +                           WRITEMASK_Y)); + +      /* Set f32's sign bit. +       * +       *    f32 |= (f16 & 0x8000u) << 16u; +       */ +      factory.emit(assign(f32, bit_or(f32, +                                       lshift(bit_and(f16, +                                                      constant(0x8000u)), +                                              constant(16u))))); + +      /* return bitcast_u2f(f32); */ +      ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); +      assert(result->type == glsl_type::vec2_type); +      return result; +   } + +   /** +    * \brief Split unpackHalf2x16 into two operations. +    * +    * \param uint_rval is unpackHalf2x16's input +    * \return a vec2 rvalue +    * +    * Some code generators, such as the i965 fragment shader, require that all +    * vector expressions be lowered to a sequence of scalar expressions. +    * However, unpackHalf2x16 cannot be scalarized by the same method as +    * a true vector operation because the number of components of its input +    * and output differ. +    * +    * This method scalarizes unpackHalf2x16 by transforming it from a single +    * operation having vec2 output to a pair of operations each having float +    * output. That is, it transforms +    * +    *   unpackHalf2x16(UINT_RVAL) +    * +    * into +    * +    *   uint u = UINT_RVAL; +    *   vec2 v; +    * +    *   v.x = unpackHalf2x16_split_x(u); +    *   v.y = unpackHalf2x16_split_y(u); +    * +    *   return v; +    */ +   ir_rvalue* +   split_unpack_half_2x16(ir_rvalue *uint_rval) +   { +      assert(uint_rval->type == glsl_type::uint_type); + +      /* uint u = uint_rval; */ +      ir_variable *u = factory.make_temp(glsl_type::uint_type, +                                          "tmp_split_unpack_half_2x16_u"); +      factory.emit(assign(u, uint_rval)); + +      /* vec2 v; */ +      ir_variable *v = factory.make_temp(glsl_type::vec2_type, +                                          "tmp_split_unpack_half_2x16_v"); + +      /* v.x = unpack_half_2x16_split_x(u); */ +      factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u), +                           WRITEMASK_X)); + +      /* v.y = unpack_half_2x16_split_y(u); */ +      factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u), +                           WRITEMASK_Y)); + +      return deref(v).val; +   } +}; + +} // namespace anonymous + +/** + * \brief Lower the builtin packing functions. + * + * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. + */ +bool +lower_packing_builtins(exec_list *instructions, int op_mask) +{ +   lower_packing_builtins_visitor v(op_mask); +   visit_list_elements(&v, instructions, true); +   return v.get_progress(); +} diff --git a/mesalib/src/glsl/lower_ubo_reference.cpp b/mesalib/src/glsl/lower_ubo_reference.cpp index e8d2c4742..026197df7 100644 --- a/mesalib/src/glsl/lower_ubo_reference.cpp +++ b/mesalib/src/glsl/lower_ubo_reference.cpp @@ -61,10 +61,58 @@ public:     bool progress;  }; -static inline unsigned int -align(unsigned int a, unsigned int align) +/** + * Determine the name of the interface block field + * + * This is the name of the specific member as it would appear in the + * \c gl_uniform_buffer_variable::Name field in the shader's + * \c UniformBlocks array. + */ +static const char * +interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d)  { -   return (a + align - 1) / align * align; +   ir_constant *previous_index = NULL; + +   while (d != NULL) { +      switch (d->ir_type) { +      case ir_type_dereference_variable: { +         ir_dereference_variable *v = (ir_dereference_variable *) d; +         if (previous_index +             && v->var->is_interface_instance() +             && v->var->type->is_array()) +            return ralloc_asprintf(mem_ctx, +                                   "%s[%d]", +                                   base_name, +                                   previous_index->get_uint_component(0)); +         else +            return base_name; + +         break; +      } + +      case ir_type_dereference_record: { +         ir_dereference_record *r = (ir_dereference_record *) d; + +         d = r->record->as_dereference(); +         break; +      } + +      case ir_type_dereference_array: { +         ir_dereference_array *a = (ir_dereference_array *) d; + +         d = a->array->as_dereference(); +         previous_index = a->array_index->as_constant(); +         break; +      } + +      default: +         assert(!"Should not get here."); +         break; +      } +   } + +   assert(!"Should not get here."); +   return NULL;  }  void @@ -78,13 +126,30 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)        return;     ir_variable *var = deref->variable_referenced(); -   if (!var || var->uniform_block == -1) +   if (!var || !var->is_in_uniform_block())        return;     mem_ctx = ralloc_parent(*rvalue); -   uniform_block = var->uniform_block; -   struct gl_uniform_block *block = &shader->UniformBlocks[uniform_block]; -   this->ubo_var = &block->Uniforms[var->location]; + +   const char *const field_name = +      interface_field_name(mem_ctx, (char *) var->interface_type->name, deref); + +   this->uniform_block = -1; +   for (unsigned i = 0; i < shader->NumUniformBlocks; i++) { +      if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) { +         this->uniform_block = i; + +         struct gl_uniform_block *block = &shader->UniformBlocks[i]; + +         this->ubo_var = var->is_interface_instance() +            ? &block->Uniforms[0] : &block->Uniforms[var->location]; + +         break; +      } +   } + +   assert(this->uniform_block != (unsigned) -1); +     ir_rvalue *offset = new(mem_ctx) ir_constant(0u);     unsigned const_offset = 0;     bool row_major = ubo_var->RowMajor; @@ -111,9 +176,21 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)  	     * vector) is handled below in emit_ubo_loads.  	     */  	    array_stride = 4; +         } else if (deref_array->type->is_interface()) { +            /* We're processing an array dereference of an interface instance +	     * array.  The thing being dereferenced *must* be a variable +	     * dereference because intefaces cannot be embedded an other +	     * types.  In terms of calculating the offsets for the lowering +	     * pass, we don't care about the array index.  All elements of an +	     * interface instance array will have the same offsets relative to +	     * the base of the block that backs them. +             */ +            assert(deref_array->array->as_dereference_variable()); +            deref = deref_array->array->as_dereference(); +            break;  	 } else {  	    array_stride = deref_array->type->std140_size(row_major); -	    array_stride = align(array_stride, 16); +	    array_stride = glsl_align(array_stride, 16);  	 }  	 ir_constant *const_index = deref_array->array_index->as_constant(); @@ -138,7 +215,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)  	    const glsl_type *type = struct_type->fields.structure[i].type;  	    unsigned field_align = type->std140_base_alignment(row_major);  	    max_field_align = MAX2(field_align, max_field_align); -	    intra_struct_offset = align(intra_struct_offset, field_align); +	    intra_struct_offset = glsl_align(intra_struct_offset, field_align);  	    if (strcmp(struct_type->fields.structure[i].name,  		       deref_record->field) == 0) @@ -146,7 +223,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)  	    intra_struct_offset += type->std140_size(row_major);  	 } -	 const_offset = align(const_offset, max_field_align); +	 const_offset = glsl_align(const_offset, max_field_align);  	 const_offset += intra_struct_offset;  	 deref = deref_record->record->as_dereference(); @@ -217,8 +294,8 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,  					       field->name);  	 field_offset = -	    align(field_offset, -		  field->type->std140_base_alignment(ubo_var->RowMajor)); +	    glsl_align(field_offset, +		       field->type->std140_base_alignment(ubo_var->RowMajor));  	 emit_ubo_loads(field_deref, base_offset, deref_offset + field_offset); @@ -229,7 +306,8 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,     if (deref->type->is_array()) {        unsigned array_stride = -	 align(deref->type->fields.array->std140_size(ubo_var->RowMajor), 16); +	 glsl_align(deref->type->fields.array->std140_size(ubo_var->RowMajor), +		    16);        for (unsigned i = 0; i < deref->type->length; i++) {  	 ir_constant *element = new(mem_ctx) ir_constant(i); diff --git a/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp b/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp index 57771074a..040b0bf83 100644 --- a/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/mesalib/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -364,12 +364,16 @@ public:  	 return this->lower_temps;        case ir_var_uniform:  	 return this->lower_uniforms; -      case ir_var_in: +      case ir_var_function_in:        case ir_var_const_in: -	 return (var->location == -1) ? this->lower_temps : this->lower_inputs; -      case ir_var_out: -	 return (var->location == -1) ? this->lower_temps : this->lower_outputs; -      case ir_var_inout: +         return this->lower_temps; +      case ir_var_shader_in: +         return this->lower_inputs; +      case ir_var_function_out: +         return this->lower_temps; +      case ir_var_shader_out: +         return this->lower_outputs; +      case ir_var_function_inout:  	 return this->lower_temps;        } diff --git a/mesalib/src/glsl/opt_constant_folding.cpp b/mesalib/src/glsl/opt_constant_folding.cpp index 7d94d481c..072fefe9a 100644 --- a/mesalib/src/glsl/opt_constant_folding.cpp +++ b/mesalib/src/glsl/opt_constant_folding.cpp @@ -127,7 +127,8 @@ ir_constant_folding_visitor::visit_enter(ir_call *ir)        ir_rvalue *param_rval = (ir_rvalue *)iter.get();        ir_variable *sig_param = (ir_variable *)sig_iter.get(); -      if (sig_param->mode == ir_var_in || sig_param->mode == ir_var_const_in) { +      if (sig_param->mode == ir_var_function_in +          || sig_param->mode == ir_var_const_in) {  	 ir_rvalue *new_param = param_rval;  	 handle_rvalue(&new_param); diff --git a/mesalib/src/glsl/opt_constant_propagation.cpp b/mesalib/src/glsl/opt_constant_propagation.cpp index a03811999..2f65937fe 100644 --- a/mesalib/src/glsl/opt_constant_propagation.cpp +++ b/mesalib/src/glsl/opt_constant_propagation.cpp @@ -285,7 +285,8 @@ ir_constant_propagation_visitor::visit_enter(ir_call *ir)     foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {        ir_variable *sig_param = (ir_variable *)sig_param_iter.get();        ir_rvalue *param = (ir_rvalue *)iter.get(); -      if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) { +      if (sig_param->mode != ir_var_function_out +          && sig_param->mode != ir_var_function_inout) {  	 ir_rvalue *new_param = param;  	 handle_rvalue(&new_param);           if (new_param != param) diff --git a/mesalib/src/glsl/opt_constant_variable.cpp b/mesalib/src/glsl/opt_constant_variable.cpp index 1bbaf8e47..cbe6450c6 100644 --- a/mesalib/src/glsl/opt_constant_variable.cpp +++ b/mesalib/src/glsl/opt_constant_variable.cpp @@ -137,8 +137,8 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)        ir_rvalue *param_rval = (ir_rvalue *)iter.get();        ir_variable *param = (ir_variable *)sig_iter.get(); -      if (param->mode == ir_var_out || -	  param->mode == ir_var_inout) { +      if (param->mode == ir_var_function_out || +	  param->mode == ir_var_function_inout) {  	 ir_variable *var = param_rval->variable_referenced();  	 struct assignment_entry *entry; diff --git a/mesalib/src/glsl/opt_copy_propagation.cpp b/mesalib/src/glsl/opt_copy_propagation.cpp index 2952ce594..7282b611e 100644 --- a/mesalib/src/glsl/opt_copy_propagation.cpp +++ b/mesalib/src/glsl/opt_copy_propagation.cpp @@ -189,7 +189,8 @@ ir_copy_propagation_visitor::visit_enter(ir_call *ir)     foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {        ir_variable *sig_param = (ir_variable *)sig_param_iter.get();        ir_instruction *ir = (ir_instruction *)iter.get(); -      if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) { +      if (sig_param->mode != ir_var_function_out +          && sig_param->mode != ir_var_function_inout) {           ir->accept(this);        }        sig_param_iter.next(); diff --git a/mesalib/src/glsl/opt_copy_propagation_elements.cpp b/mesalib/src/glsl/opt_copy_propagation_elements.cpp index de9f4ef6f..6a19da40d 100644 --- a/mesalib/src/glsl/opt_copy_propagation_elements.cpp +++ b/mesalib/src/glsl/opt_copy_propagation_elements.cpp @@ -297,7 +297,8 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)     foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {        ir_variable *sig_param = (ir_variable *)sig_param_iter.get();        ir_instruction *ir = (ir_instruction *)iter.get(); -      if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) { +      if (sig_param->mode != ir_var_function_out +          && sig_param->mode != ir_var_function_inout) {           ir->accept(this);        }        sig_param_iter.next(); diff --git a/mesalib/src/glsl/opt_dead_code.cpp b/mesalib/src/glsl/opt_dead_code.cpp index 47247e20d..b65e5c2ce 100644 --- a/mesalib/src/glsl/opt_dead_code.cpp +++ b/mesalib/src/glsl/opt_dead_code.cpp @@ -77,10 +77,11 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)        if (entry->assign) {  	 /* Remove a single dead assignment to the variable we found. -	  * Don't do so if it's a shader output, though. +	  * Don't do so if it's a shader or function output, though.  	  */ -	 if (entry->var->mode != ir_var_out && -	     entry->var->mode != ir_var_inout) { +	 if (entry->var->mode != ir_var_function_out && +	     entry->var->mode != ir_var_function_inout && +             entry->var->mode != ir_var_shader_out) {  	    entry->assign->remove();  	    progress = true; @@ -97,15 +98,10 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)  	 /* uniform initializers are precious, and could get used by another  	  * stage.  Also, once uniform locations have been assigned, the  	  * declaration cannot be deleted. -	  * -	  * Also, GL_ARB_uniform_buffer_object says that std140 -	  * uniforms will not be eliminated.  Since we always do -	  * std140, just don't eliminate uniforms in UBOs.  	  */  	 if (entry->var->mode == ir_var_uniform &&  	     (uniform_locations_assigned || -	      entry->var->constant_value || -	      entry->var->uniform_block != -1)) +	      entry->var->constant_value))  	    continue;  	 entry->var->remove(); diff --git a/mesalib/src/glsl/opt_function_inlining.cpp b/mesalib/src/glsl/opt_function_inlining.cpp index f9f5bd442..0733d5180 100644 --- a/mesalib/src/glsl/opt_function_inlining.cpp +++ b/mesalib/src/glsl/opt_function_inlining.cpp @@ -144,9 +144,9 @@ ir_call::generate_inline(ir_instruction *next_ir)        }        /* Move the actual param into our param variable if it's an 'in' type. */ -      if (parameters[i] && (sig_param->mode == ir_var_in || +      if (parameters[i] && (sig_param->mode == ir_var_function_in ||  			    sig_param->mode == ir_var_const_in || -			    sig_param->mode == ir_var_inout)) { +			    sig_param->mode == ir_var_function_inout)) {  	 ir_assignment *assign;  	 assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]), @@ -202,8 +202,8 @@ ir_call::generate_inline(ir_instruction *next_ir)        const ir_variable *const sig_param = (ir_variable *) sig_param_iter.get();        /* Move our param variable into the actual param if it's an 'out' type. */ -      if (parameters[i] && (sig_param->mode == ir_var_out || -			    sig_param->mode == ir_var_inout)) { +      if (parameters[i] && (sig_param->mode == ir_var_function_out || +			    sig_param->mode == ir_var_function_inout)) {  	 ir_assignment *assign;  	 assign = new(ctx) ir_assignment(param->clone(ctx, NULL)->as_rvalue(), diff --git a/mesalib/src/glsl/opt_structure_splitting.cpp b/mesalib/src/glsl/opt_structure_splitting.cpp index 9b3f048e4..806c079e5 100644 --- a/mesalib/src/glsl/opt_structure_splitting.cpp +++ b/mesalib/src/glsl/opt_structure_splitting.cpp @@ -104,7 +104,8 @@ ir_structure_reference_visitor::get_variable_entry(ir_variable *var)  {     assert(var); -   if (!var->type->is_record() || var->mode == ir_var_uniform) +   if (!var->type->is_record() || var->mode == ir_var_uniform +       || var->mode == ir_var_shader_in || var->mode == ir_var_shader_out)        return NULL;     foreach_iter(exec_list_iterator, iter, this->variable_list) { diff --git a/mesalib/src/glsl/opt_tree_grafting.cpp b/mesalib/src/glsl/opt_tree_grafting.cpp index 25b18ea94..113abb7b0 100644 --- a/mesalib/src/glsl/opt_tree_grafting.cpp +++ b/mesalib/src/glsl/opt_tree_grafting.cpp @@ -211,7 +211,8 @@ ir_tree_grafting_visitor::visit_enter(ir_call *ir)        ir_rvalue *ir = (ir_rvalue *)iter.get();        ir_rvalue *new_ir = ir; -      if (sig_param->mode != ir_var_in && sig_param->mode != ir_var_const_in) { +      if (sig_param->mode != ir_var_function_in +          && sig_param->mode != ir_var_const_in) {  	 if (check_graft(ir, sig_param) == visit_stop)  	    return visit_stop;  	 continue; @@ -350,8 +351,9 @@ tree_grafting_basic_block(ir_instruction *bb_first,        if (!lhs_var)  	 continue; -      if (lhs_var->mode == ir_var_out || -	  lhs_var->mode == ir_var_inout) +      if (lhs_var->mode == ir_var_function_out || +	  lhs_var->mode == ir_var_function_inout || +          lhs_var->mode == ir_var_shader_out)  	 continue;        ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var); diff --git a/mesalib/src/glsl/s_expression.cpp b/mesalib/src/glsl/s_expression.cpp index 57de9d334..1bdf6bca6 100644 --- a/mesalib/src/glsl/s_expression.cpp +++ b/mesalib/src/glsl/s_expression.cpp @@ -66,18 +66,18 @@ read_atom(void *ctx, const char *&src, char *&symbol_buffer)        return NULL; // no atom     // Check for the special symbol '+INF', which means +Infinity.  Note: C99 -   // requires strtod to parse '+INF' as +Infinity, but we still support some +   // requires strtof to parse '+INF' as +Infinity, but we still support some     // non-C99-compliant compilers (e.g. MSVC).     if (n == 4 && strncmp(src, "+INF", 4) == 0) {        expr = new(ctx) s_float(std::numeric_limits<float>::infinity());     } else {        // Check if the atom is a number.        char *float_end = NULL; -      double f = glsl_strtod(src, &float_end); +      float f = glsl_strtof(src, &float_end);        if (float_end != src) {           char *int_end = NULL;           int i = strtol(src, &int_end, 10); -         // If strtod matched more characters, it must have a decimal part +         // If strtof matched more characters, it must have a decimal part           if (float_end > int_end)              expr = new(ctx) s_float(f);           else diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp index 33d3804c6..0fb4f5b16 100644 --- a/mesalib/src/glsl/standalone_scaffolding.cpp +++ b/mesalib/src/glsl/standalone_scaffolding.cpp @@ -34,6 +34,24 @@  #include "ralloc.h"  void +_mesa_warning(struct gl_context *ctx, const char *fmt, ...) +{ +    va_list vargs; +    (void) ctx; + +    va_start(vargs, fmt); + +    /* This output is not thread-safe, but that's good enough for the +     * standalone compiler. +     */ +    fprintf(stderr, "Mesa warning: "); +    vfprintf(stderr, fmt, vargs); +    fprintf(stderr, "\n"); + +    va_end(vargs); +} + +void  _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,                         struct gl_shader *sh)  { @@ -81,6 +99,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)     ctx->Extensions.EXT_texture3D = true;     ctx->Extensions.OES_EGL_image_external = true;     ctx->Extensions.ARB_shader_bit_encoding = true; +   ctx->Extensions.ARB_shading_language_packing = true;     ctx->Extensions.OES_standard_derivatives = true;     ctx->Extensions.ARB_texture_cube_map_array = true; diff --git a/mesalib/src/glsl/standalone_scaffolding.h b/mesalib/src/glsl/standalone_scaffolding.h index 41ce35bef..096b2f114 100644 --- a/mesalib/src/glsl/standalone_scaffolding.h +++ b/mesalib/src/glsl/standalone_scaffolding.h @@ -34,6 +34,9 @@  #include "main/mtypes.h"  extern "C" void +_mesa_warning(struct gl_context *ctx, const char *fmtString, ... ); + +extern "C" void  _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,                         struct gl_shader *sh); diff --git a/mesalib/src/glsl/strtod.c b/mesalib/src/glsl/strtod.c index 47c1f0ed6..5d4346b5a 100644 --- a/mesalib/src/glsl/strtod.c +++ b/mesalib/src/glsl/strtod.c @@ -55,3 +55,25 @@ glsl_strtod(const char *s, char **end)     return strtod(s, end);  #endif  } + + +/** + * Wrapper around strtof which uses the "C" locale so the decimal + * point is always '.' + */ +float +glsl_strtof(const char *s, char **end) +{ +#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \ +   !defined(__HAIKU__) && !defined(__UCLIBC__) +   static locale_t loc = NULL; +   if (!loc) { +      loc = newlocale(LC_CTYPE_MASK, "C", NULL); +   } +   return strtof_l(s, end, loc); +#elif _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE +   return strtof(s, end); +#else +   return (float) strtod(s, end); +#endif +} diff --git a/mesalib/src/glsl/strtod.h b/mesalib/src/glsl/strtod.h index 0cf6409d4..ad847dbb0 100644 --- a/mesalib/src/glsl/strtod.h +++ b/mesalib/src/glsl/strtod.h @@ -34,6 +34,9 @@ extern "C" {  extern double  glsl_strtod(const char *s, char **end); +extern float +glsl_strtof(const char *s, char **end); +  #ifdef __cplusplus  } diff --git a/mesalib/src/mesa/Android.libmesa_glsl_utils.mk b/mesalib/src/mesa/Android.libmesa_glsl_utils.mk index 9c5f3493c..47f2e151b 100644 --- a/mesalib/src/mesa/Android.libmesa_glsl_utils.mk +++ b/mesalib/src/mesa/Android.libmesa_glsl_utils.mk @@ -35,10 +35,13 @@ include $(CLEAR_VARS)  LOCAL_MODULE := libmesa_glsl_utils -LOCAL_C_INCLUDES := $(MESA_TOP)/src/glsl +LOCAL_C_INCLUDES := \ +	$(MESA_TOP)/src/glsl \ +	$(MESA_TOP)/src/mapi  LOCAL_SRC_FILES := \  	main/hash_table.c \ +	main/imports.c \  	program/prog_hash_table.c \  	program/symbol_table.c @@ -54,10 +57,13 @@ include $(CLEAR_VARS)  LOCAL_MODULE := libmesa_glsl_utils  LOCAL_IS_HOST_MODULE := true -LOCAL_C_INCLUDES := $(MESA_TOP)/src/glsl +LOCAL_C_INCLUDES := \ +	$(MESA_TOP)/src/glsl \ +	$(MESA_TOP)/src/mapi  LOCAL_SRC_FILES := \  	main/hash_table.c \ +	main/imports.c \  	program/prog_hash_table.c \  	program/symbol_table.c diff --git a/mesalib/src/mesa/main/extensions.c b/mesalib/src/mesa/main/extensions.c index 5d01ac8ea..7ae07fb5a 100644 --- a/mesalib/src/mesa/main/extensions.c +++ b/mesalib/src/mesa/main/extensions.c @@ -125,6 +125,7 @@ static const struct extension extension_table[] = {     { "GL_ARB_shader_stencil_export",               o(ARB_shader_stencil_export),               GL,             2009 },     { "GL_ARB_shader_texture_lod",                  o(ARB_shader_texture_lod),                  GL,             2009 },     { "GL_ARB_shading_language_100",                o(ARB_shading_language_100),                GLL,            2003 }, +   { "GL_ARB_shading_language_packing",            o(ARB_shading_language_packing),            GL,             2011 },     { "GL_ARB_shadow",                              o(ARB_shadow),                              GLL,            2001 },     { "GL_ARB_sync",                                o(ARB_sync),                                GL,             2003 },     { "GL_ARB_texture_border_clamp",                o(ARB_texture_border_clamp),                GLL,            2000 }, diff --git a/mesalib/src/mesa/main/getstring.c b/mesalib/src/mesa/main/getstring.c index 1f23cc0a4..aa3a528fd 100644 --- a/mesalib/src/mesa/main/getstring.c +++ b/mesalib/src/mesa/main/getstring.c @@ -74,7 +74,9 @@ shading_language_version(struct gl_context *ctx)        break;     case API_OPENGLES2: -      return (const GLubyte *) "OpenGL ES GLSL ES 1.0.16"; +      return (ctx->Version < 30) +         ? (const GLubyte *) "OpenGL ES GLSL ES 1.0.16" +         : (const GLubyte *) "OpenGL ES GLSL ES 3.0";     case API_OPENGLES:        /* fall-through */ diff --git a/mesalib/src/mesa/main/imports.c b/mesalib/src/mesa/main/imports.c index 76f835e0e..e6f754254 100644 --- a/mesalib/src/mesa/main/imports.c +++ b/mesalib/src/mesa/main/imports.c @@ -314,10 +314,43 @@ _mesa_bitcount_64(uint64_t n)  #endif +/* Using C99 rounding functions for roundToEven() implementation is + * difficult, because round(), rint, and nearbyint() are affected by + * fesetenv(), which the application may have done for its own + * purposes.  Mesa's IROUND macro is close to what we want, but it + * rounds away from 0 on n + 0.5. + */ +int +_mesa_round_to_even(float val) +{ +   int rounded = IROUND(val); + +   if (val - floor(val) == 0.5) { +      if (rounded % 2 != 0) +         rounded += val > 0 ? -1 : 1; +   } + +   return rounded; +} + +  /**   * Convert a 4-byte float to a 2-byte half float. - * Based on code from: - * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html + * + * Not all float32 values can be represented exactly as a float16 value. We + * round such intermediate float32 values to the nearest float16. When the + * float32 lies exactly between to float16 values, we round to the one with + * an even mantissa. + * + * This rounding behavior has several benefits: + *   - It has no sign bias. + * + *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's + *     GPU ISA. + * + *   - By reproducing the behavior of the GPU (at least on Intel hardware), + *     compile-time evaluation of constant packHalf2x16 GLSL expressions will + *     result in the same value as if the expression were executed on the GPU.   */  GLhalfARB  _mesa_float_to_half(float val) @@ -356,32 +389,13 @@ _mesa_float_to_half(float val)     else {        /* regular number */        const int new_exp = flt_e - 127; -      if (new_exp < -24) { -         /* this maps to 0 */ -         /* m = 0; - already set */ -         e = 0; -      } -      else if (new_exp < -14) { -         /* this maps to a denorm */ -         unsigned int exp_val = (unsigned int) (-14 - new_exp); /* 2^-exp_val*/ +      if (new_exp < -14) { +         /* The float32 lies in the range (0.0, min_normal16) and is rounded +          * to a nearby float16 value. The result will be either zero, subnormal, +          * or normal. +          */           e = 0; -         switch (exp_val) { -            case 0: -               _mesa_warning(NULL, -                   "float_to_half: logical error in denorm creation!\n"); -               /* m = 0; - already set */ -               break; -            case 1: m = 512 + (flt_m >> 14); break; -            case 2: m = 256 + (flt_m >> 15); break; -            case 3: m = 128 + (flt_m >> 16); break; -            case 4: m = 64 + (flt_m >> 17); break; -            case 5: m = 32 + (flt_m >> 18); break; -            case 6: m = 16 + (flt_m >> 19); break; -            case 7: m = 8 + (flt_m >> 20); break; -            case 8: m = 4 + (flt_m >> 21); break; -            case 9: m = 2 + (flt_m >> 22); break; -            case 10: m = 1; break; -         } +         m = _mesa_round_to_even((1 << 24) * fabsf(fi.f));        }        else if (new_exp > 15) {           /* map this value to infinity */ @@ -389,12 +403,26 @@ _mesa_float_to_half(float val)           e = 31;        }        else { -         /* regular */ +         /* The float32 lies in the range +          *   [min_normal16, max_normal16 + max_step16) +          * and is rounded to a nearby float16 value. The result will be +          * either normal or infinite. +          */           e = new_exp + 15; -         m = flt_m >> 13; +         m = _mesa_round_to_even(flt_m / (float) (1 << 13));        }     } +   assert(0 <= m && m <= 1024); +   if (m == 1024) { +      /* The float32 was rounded upwards into the range of the next exponent, +       * so bump the exponent. This correctly handles the case where f32 +       * should be rounded up to float16 infinity. +       */ +      ++e; +      m = 0; +   } +     result = (s << 15) | (e << 10) | m;     return result;  } diff --git a/mesalib/src/mesa/main/imports.h b/mesalib/src/mesa/main/imports.h index 8446ea2a3..4b783818b 100644 --- a/mesalib/src/mesa/main/imports.h +++ b/mesalib/src/mesa/main/imports.h @@ -548,6 +548,9 @@ _mesa_fls(unsigned int n)  #endif  } +extern int +_mesa_round_to_even(float val); +  extern GLhalfARB  _mesa_float_to_half(float f); diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h index d37e6c4c0..3369623f7 100644 --- a/mesalib/src/mesa/main/mtypes.h +++ b/mesalib/src/mesa/main/mtypes.h @@ -2273,11 +2273,30 @@ typedef enum  struct gl_uniform_buffer_variable  {     char *Name; + +   /** +    * Name of the uniform as seen by glGetUniformIndices. +    * +    * glGetUniformIndices requires that the block instance index \b not be +    * present in the name of queried uniforms. +    * +    * \note +    * \c gl_uniform_buffer_variable::IndexName and +    * \c gl_uniform_buffer_variable::Name may point to identical storage. +    */ +   char *IndexName; +     const struct glsl_type *Type;     unsigned int Offset;     GLboolean RowMajor;  }; +enum gl_uniform_block_packing { +   ubo_packing_std140, +   ubo_packing_shared, +   ubo_packing_packed +}; +  struct gl_uniform_block  {     /** Declared name of the uniform block */ @@ -2299,6 +2318,14 @@ struct gl_uniform_block      * (GL_UNIFORM_BLOCK_DATA_SIZE).      */     GLuint UniformBufferSize; + +   /** +    * Layout specified in the shader +    * +    * This isn't accessible through the API, but it is used while +    * cross-validating uniform blocks. +    */ +   enum gl_uniform_block_packing _Packing;  };  /** @@ -3042,6 +3069,7 @@ struct gl_extensions     GLboolean ARB_shader_stencil_export;     GLboolean ARB_shader_texture_lod;     GLboolean ARB_shading_language_100; +   GLboolean ARB_shading_language_packing;     GLboolean ARB_shadow;     GLboolean ARB_sync;     GLboolean ARB_texture_border_clamp; diff --git a/mesalib/src/mesa/main/remap.c b/mesalib/src/mesa/main/remap.c index c89fba453..a09870561 100644 --- a/mesalib/src/mesa/main/remap.c +++ b/mesalib/src/mesa/main/remap.c @@ -208,8 +208,10 @@ _mesa_do_init_remap_table(const char *pool,        offset = _mesa_map_function_spec(spec);        /* store the dispatch offset in the remap table */        driDispatchRemapTable[i] = offset; -      if (offset < 0) -         _mesa_warning(NULL, "failed to remap index %d", i); +      if (offset < 0) { +         const char *name = spec + strlen(spec) + 1; +         _mesa_warning(NULL, "failed to remap %s", name); +      }     }  } diff --git a/mesalib/src/mesa/main/shader_query.cpp b/mesalib/src/mesa/main/shader_query.cpp index 27b1b8f56..3014a9778 100644 --- a/mesalib/src/mesa/main/shader_query.cpp +++ b/mesalib/src/mesa/main/shader_query.cpp @@ -106,7 +106,7 @@ _mesa_GetActiveAttrib(GLhandleARB program, GLuint desired_index,        const ir_variable *const var = ((ir_instruction *) node)->as_variable();        if (var == NULL -	  || var->mode != ir_var_in +	  || var->mode != ir_var_shader_in  	  || var->location == -1)  	 continue; @@ -169,7 +169,7 @@ _mesa_GetAttribLocation(GLhandleARB program, const GLcharARB * name)         *     attribute, or if an error occurs, -1 will be returned."         */        if (var == NULL -	  || var->mode != ir_var_in +	  || var->mode != ir_var_shader_in  	  || var->location == -1  	  || var->location < VERT_ATTRIB_GENERIC0)  	 continue; @@ -197,7 +197,7 @@ _mesa_count_active_attribs(struct gl_shader_program *shProg)        const ir_variable *const var = ((ir_instruction *) node)->as_variable();        if (var == NULL -	  || var->mode != ir_var_in +	  || var->mode != ir_var_shader_in  	  || var->location == -1)  	 continue; @@ -223,7 +223,7 @@ _mesa_longest_attribute_name_length(struct gl_shader_program *shProg)        const ir_variable *const var = ((ir_instruction *) node)->as_variable();        if (var == NULL -	  || var->mode != ir_var_in +	  || var->mode != ir_var_shader_in  	  || var->location == -1)  	 continue; @@ -333,7 +333,7 @@ _mesa_GetFragDataIndex(GLuint program, const GLchar *name)         *     attribute, or if an error occurs, -1 will be returned."         */        if (var == NULL -          || var->mode != ir_var_out +          || var->mode != ir_var_shader_out            || var->location == -1            || var->location < FRAG_RESULT_DATA0)           continue; @@ -389,7 +389,7 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name)         *     attribute, or if an error occurs, -1 will be returned."         */        if (var == NULL -	  || var->mode != ir_var_out +	  || var->mode != ir_var_shader_out  	  || var->location == -1  	  || var->location < FRAG_RESULT_DATA0)  	 continue; diff --git a/mesalib/src/mesa/main/texparam.c b/mesalib/src/mesa/main/texparam.c index 8d0ae16fb..52ede13c0 100644 --- a/mesalib/src/mesa/main/texparam.c +++ b/mesalib/src/mesa/main/texparam.c @@ -1388,10 +1388,10 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )           if (ctx->API != API_OPENGLES || !ctx->Extensions.OES_draw_texture)              goto invalid_pname; -         params[0] = obj->CropRect[0]; -         params[1] = obj->CropRect[1]; -         params[2] = obj->CropRect[2]; -         params[3] = obj->CropRect[3]; +         params[0] = (GLfloat) obj->CropRect[0]; +         params[1] = (GLfloat) obj->CropRect[1]; +         params[2] = (GLfloat) obj->CropRect[2]; +         params[3] = (GLfloat) obj->CropRect[3];           break;        case GL_TEXTURE_SWIZZLE_R_EXT: diff --git a/mesalib/src/mesa/main/uniforms.c b/mesalib/src/mesa/main/uniforms.c index 62c85b3c0..d902407a0 100644 --- a/mesalib/src/mesa/main/uniforms.c +++ b/mesalib/src/mesa/main/uniforms.c @@ -695,7 +695,7 @@ _mesa_GetActiveUniformBlockiv(GLuint program,        for (i = 0; i < block->NumUniforms; i++) {  	 unsigned offset;  	 params[i] = _mesa_get_uniform_location(ctx, shProg, -						block->Uniforms[i].Name, +						block->Uniforms[i].IndexName,  						&offset);        }        return; diff --git a/mesalib/src/mesa/main/version.c b/mesalib/src/mesa/main/version.c index 4373d7b91..e944a5518 100644 --- a/mesalib/src/mesa/main/version.c +++ b/mesalib/src/mesa/main/version.c @@ -323,7 +323,30 @@ compute_version_es2(struct gl_context *ctx)                                ctx->Extensions.ARB_fragment_shader &&                                ctx->Extensions.ARB_texture_non_power_of_two &&                                ctx->Extensions.EXT_blend_equation_separate); -   if (ver_2_0) { +   /* FINISHME: This list isn't quite right. */ +   const GLboolean ver_3_0 = (ctx->Extensions.ARB_half_float_vertex && +                              ctx->Extensions.ARB_internalformat_query && +                              ctx->Extensions.ARB_map_buffer_range && +                              ctx->Extensions.ARB_shader_texture_lod && +                              ctx->Extensions.ARB_texture_float && +                              ctx->Extensions.ARB_texture_rg && +                              ctx->Extensions.ARB_texture_compression_rgtc && +                              ctx->Extensions.EXT_draw_buffers2 && +                              /* ctx->Extensions.ARB_framebuffer_object && */ +                              ctx->Extensions.EXT_framebuffer_sRGB && +                              ctx->Extensions.EXT_packed_float && +                              ctx->Extensions.EXT_texture_array && +                              ctx->Extensions.EXT_texture_shared_exponent && +                              ctx->Extensions.EXT_transform_feedback && +                              ctx->Extensions.NV_conditional_render && +                              ctx->Extensions.ARB_draw_instanced && +                              ctx->Extensions.ARB_uniform_buffer_object && +                              ctx->Extensions.EXT_texture_snorm && +                              ctx->Extensions.NV_primitive_restart && +                              ctx->Extensions.OES_depth_texture_cube_map); +   if (ver_3_0) { +      ctx->Version = 30; +   } else if (ver_2_0) {        ctx->Version = 20;     } else {        _mesa_problem(ctx, "Incomplete OpenGL ES 2.0 support."); diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp index 0f7439b3b..cd89171da 100644 --- a/mesalib/src/mesa/program/ir_to_mesa.cpp +++ b/mesalib/src/mesa/program/ir_to_mesa.cpp @@ -623,10 +623,14 @@ type_size(const struct glsl_type *type)         * at link time.         */        return 1; -   default: -      assert(0); -      return 0; +   case GLSL_TYPE_VOID: +   case GLSL_TYPE_ERROR: +   case GLSL_TYPE_INTERFACE: +      assert(!"Invalid type in type_size"); +      break;     } + +   return 0;  }  /** @@ -1427,7 +1431,21 @@ ir_to_mesa_visitor::visit(ir_expression *ir)     case ir_unop_fract:        emit(ir, OPCODE_FRC, result_dst, op[0]);        break; - +   case ir_unop_pack_snorm_2x16: +   case ir_unop_pack_snorm_4x8: +   case ir_unop_pack_unorm_2x16: +   case ir_unop_pack_unorm_4x8: +   case ir_unop_pack_half_2x16: +   case ir_unop_unpack_snorm_2x16: +   case ir_unop_unpack_snorm_4x8: +   case ir_unop_unpack_unorm_2x16: +   case ir_unop_unpack_unorm_4x8: +   case ir_unop_unpack_half_2x16: +   case ir_unop_unpack_half_2x16_split_x: +   case ir_unop_unpack_half_2x16_split_y: +   case ir_binop_pack_half_2x16_split: +      assert(!"not supported"); +      break;     case ir_binop_min:        emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);        break; @@ -1529,21 +1547,18 @@ ir_to_mesa_visitor::visit(ir_dereference_variable *ir)  					       var->location);  	 this->variables.push_tail(entry);  	 break; -      case ir_var_in: -      case ir_var_inout: +      case ir_var_shader_in:  	 /* The linker assigns locations for varyings and attributes,  	  * including deprecated builtins (like gl_Color),  	  * user-assigned generic attributes (glBindVertexLocation),  	  * and user-defined varyings. -	  * -	  * FINISHME: We would hit this path for function arguments.  Fix!  	  */  	 assert(var->location != -1);           entry = new(mem_ctx) variable_storage(var,                                                 PROGRAM_INPUT,                                                 var->location);           break; -      case ir_var_out: +      case ir_var_shader_out:  	 assert(var->location != -1);           entry = new(mem_ctx) variable_storage(var,                                                 PROGRAM_OUTPUT, @@ -2378,7 +2393,8 @@ public:     }  private: -   virtual void visit_field(const glsl_type *type, const char *name); +   virtual void visit_field(const glsl_type *type, const char *name, +                            bool row_major);     struct gl_shader_program *shader_program;     struct gl_program_parameter_list *params; @@ -2386,10 +2402,13 @@ private:  };  void -add_uniform_to_shader::visit_field(const glsl_type *type, const char *name) +add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, +                                   bool row_major)  {     unsigned int size; +   (void) row_major; +     if (type->is_vector() || type->is_scalar()) {        size = type->vector_elements;     } else { @@ -2459,7 +2478,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program        ir_variable *var = ((ir_instruction *) node)->as_variable();        if ((var == NULL) || (var->mode != ir_var_uniform) -	  || var->uniform_block != -1 || (strncmp(var->name, "gl_", 3) == 0)) +	  || var->is_in_uniform_block() || (strncmp(var->name, "gl_", 3) == 0))  	 continue;        add.process(var); @@ -2522,7 +2541,11 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,  	    format = uniform_native;  	    columns = 1;  	    break; -	 default: +         case GLSL_TYPE_ARRAY: +         case GLSL_TYPE_VOID: +         case GLSL_TYPE_STRUCT: +         case GLSL_TYPE_ERROR: +         case GLSL_TYPE_INTERFACE:  	    assert(!"Should not get here.");  	    break;  	 } diff --git a/mesalib/src/mesa/program/program.c b/mesalib/src/mesa/program/program.c index 993803dd5..fb0aeb7ed 100644 --- a/mesalib/src/mesa/program/program.c +++ b/mesalib/src/mesa/program/program.c @@ -696,7 +696,7 @@ _mesa_combine_programs(struct gl_context *ctx,     const GLuint newLength = lenA + lenB;     GLboolean usedTemps[MAX_PROGRAM_TEMPS];     GLuint firstTemp = 0; -   GLbitfield inputsB; +   GLbitfield64 inputsB;     GLuint i;     ASSERT(progA->Target == progB->Target); @@ -724,7 +724,7 @@ _mesa_combine_programs(struct gl_context *ctx,     if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) {        const struct gl_fragment_program *fprogA, *fprogB;        struct gl_fragment_program *newFprog; -      GLbitfield progB_inputsRead = progB->InputsRead; +      GLbitfield64 progB_inputsRead = progB->InputsRead;        GLint progB_colorFile, progB_colorIndex;        fprogA = gl_fragment_program_const(progA); @@ -840,8 +840,8 @@ _mesa_find_used_registers(const struct gl_program *prog,        for (j = 0; j < n; j++) {           if (inst->SrcReg[j].File == file) { -            ASSERT(inst->SrcReg[j].Index < usedSize); -            if(inst->SrcReg[j].Index < usedSize) +            ASSERT(inst->SrcReg[j].Index < (GLint) usedSize); +            if (inst->SrcReg[j].Index < (GLint) usedSize)                 used[inst->SrcReg[j].Index] = GL_TRUE;           }        } @@ -908,23 +908,23 @@ _mesa_valid_register_index(const struct gl_context *ctx,        return GL_TRUE;  /* XXX or maybe false? */     case PROGRAM_TEMPORARY: -      return index >= 0 && index < c->MaxTemps; +      return index >= 0 && index < (GLint) c->MaxTemps;     case PROGRAM_ENV_PARAM: -      return index >= 0 && index < c->MaxEnvParams; +      return index >= 0 && index < (GLint) c->MaxEnvParams;     case PROGRAM_LOCAL_PARAM: -      return index >= 0 && index < c->MaxLocalParams; +      return index >= 0 && index < (GLint) c->MaxLocalParams;     case PROGRAM_UNIFORM:     case PROGRAM_STATE_VAR:        /* aka constant buffer */ -      return index >= 0 && index < c->MaxUniformComponents / 4; +      return index >= 0 && index < (GLint) c->MaxUniformComponents / 4;     case PROGRAM_CONSTANT:        /* constant buffer w/ possible relative negative addressing */        return (index > (int) c->MaxUniformComponents / -4 && -              index < c->MaxUniformComponents / 4); +              index < (int) c->MaxUniformComponents / 4);     case PROGRAM_INPUT:        if (index < 0) @@ -932,11 +932,11 @@ _mesa_valid_register_index(const struct gl_context *ctx,        switch (shaderType) {        case MESA_SHADER_VERTEX: -         return index < VERT_ATTRIB_GENERIC0 + c->MaxAttribs; +         return index < VERT_ATTRIB_GENERIC0 + (GLint) c->MaxAttribs;        case MESA_SHADER_FRAGMENT: -         return index < FRAG_ATTRIB_VAR0 + ctx->Const.MaxVarying; +         return index < FRAG_ATTRIB_VAR0 + (GLint) ctx->Const.MaxVarying;        case MESA_SHADER_GEOMETRY: -         return index < GEOM_ATTRIB_VAR0 + ctx->Const.MaxVarying; +         return index < GEOM_ATTRIB_VAR0 + (GLint) ctx->Const.MaxVarying;        default:           return GL_FALSE;        } @@ -947,17 +947,17 @@ _mesa_valid_register_index(const struct gl_context *ctx,        switch (shaderType) {        case MESA_SHADER_VERTEX: -         return index < VERT_RESULT_VAR0 + ctx->Const.MaxVarying; +         return index < VERT_RESULT_VAR0 + (GLint) ctx->Const.MaxVarying;        case MESA_SHADER_FRAGMENT: -         return index < FRAG_RESULT_DATA0 + ctx->Const.MaxDrawBuffers; +         return index < FRAG_RESULT_DATA0 + (GLint) ctx->Const.MaxDrawBuffers;        case MESA_SHADER_GEOMETRY: -         return index < GEOM_RESULT_VAR0 + ctx->Const.MaxVarying; +         return index < GEOM_RESULT_VAR0 + (GLint) ctx->Const.MaxVarying;        default:           return GL_FALSE;        }     case PROGRAM_ADDRESS: -      return index >= 0 && index < c->MaxAddressRegs; +      return index >= 0 && index < (GLint) c->MaxAddressRegs;     default:        _mesa_problem(ctx, diff --git a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c index 843dc5be3..63dbdb29b 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c +++ b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c @@ -350,9 +350,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,        tBot = (GLfloat) height;     } -   u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), vbuf_offset, vbuf, -		  (void**)&vertices); -   if (!vbuf) { +   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), +                      vbuf_offset, vbuf, (void **) &vertices) != PIPE_OK) {        return;     } diff --git a/mesalib/src/mesa/state_tracker/st_cb_clear.c b/mesalib/src/mesa/state_tracker/st_cb_clear.c index d01236e28..a5aa8f496 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_clear.c +++ b/mesalib/src/mesa/state_tracker/st_cb_clear.c @@ -141,9 +141,8 @@ draw_quad(struct st_context *st,     GLuint i, offset;     float (*vertices)[2][4];  /**< vertex pos + color */ -   u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), &offset, &vbuf, -		  (void**)&vertices); -   if (!vbuf) { +   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), +                      &offset, &vbuf, (void **) &vertices) != PIPE_OK) {        return;     } diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c index ff8a9dc43..c944b81f6 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c @@ -568,9 +568,8 @@ draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,     struct pipe_resource *buf = NULL;     unsigned offset; -   u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset, &buf, -		  (void**)&verts); -   if (!buf) { +   if (u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), &offset, +                      &buf, (void **) &verts) != PIPE_OK) {        return;     } @@ -795,7 +794,7 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,     y1 = y + height * ctx->Pixel.ZoomY;     /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ -   z = z * 2.0 - 1.0; +   z = z * 2.0f - 1.0f;     draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,               normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width, @@ -1063,7 +1062,7 @@ static void  clamp_size(struct pipe_context *pipe, GLsizei *width, GLsizei *height,             struct gl_pixelstore_attrib *unpack)  { -   const unsigned maxSize =  +   const int maxSize =        1 << (pipe->screen->get_param(pipe->screen,                                      PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c index 269068da2..5ca097004 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c @@ -148,10 +148,9 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,        GLfloat *vbuf = NULL;        GLuint attr; -      u_upload_alloc(st->uploader, 0, -		     numAttribs * 4 * 4 * sizeof(GLfloat), -		     &offset, &vbuffer, (void**)&vbuf); -      if (!vbuffer) { +      if (u_upload_alloc(st->uploader, 0, +                         numAttribs * 4 * 4 * sizeof(GLfloat), +                         &offset, &vbuffer, (void **) &vbuf) != PIPE_OK) {           return;        } diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c index 7f07b741e..3cea2df07 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_texture.c +++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c @@ -1555,6 +1555,7 @@ void  st_init_texture_functions(struct dd_function_table *functions)  {     functions->ChooseTextureFormat = st_ChooseTextureFormat; +   functions->QuerySamplesForFormat = st_QuerySamplesForFormat;     functions->TexImage = st_TexImage;     functions->TexSubImage = _mesa_store_texsubimage;     functions->CompressedTexSubImage = _mesa_store_compressed_texsubimage; diff --git a/mesalib/src/mesa/state_tracker/st_draw.c b/mesalib/src/mesa/state_tracker/st_draw.c index de539ca5a..de62264a1 100644 --- a/mesalib/src/mesa/state_tracker/st_draw.c +++ b/mesalib/src/mesa/state_tracker/st_draw.c @@ -84,7 +84,12 @@ all_varyings_in_vbos(const struct gl_client_array *arrays[])  } -static void +/** + * Basically, translate Mesa's index buffer information into + * a pipe_index_buffer object. + * \return TRUE or FALSE for success/failure + */ +static boolean  setup_index_buffer(struct st_context *st,                     const struct _mesa_index_buffer *ib,                     struct pipe_index_buffer *ibuffer) @@ -100,8 +105,12 @@ setup_index_buffer(struct st_context *st,        ibuffer->offset = pointer_to_offset(ib->ptr);     }     else if (st->indexbuf_uploader) { -      u_upload_data(st->indexbuf_uploader, 0, ib->count * ibuffer->index_size, -                    ib->ptr, &ibuffer->offset, &ibuffer->buffer); +      if (u_upload_data(st->indexbuf_uploader, 0, +                        ib->count * ibuffer->index_size, ib->ptr, +                        &ibuffer->offset, &ibuffer->buffer) != PIPE_OK) { +         /* out of memory */ +         return FALSE; +      }        u_upload_unmap(st->indexbuf_uploader);     }     else { @@ -110,6 +119,7 @@ setup_index_buffer(struct st_context *st,     }     cso_set_index_buffer(st->cso_context, ibuffer); +   return TRUE;  } @@ -220,7 +230,10 @@ st_draw_vbo(struct gl_context *ctx,              vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index,                                     nr_prims); -      setup_index_buffer(st, ib, &ibuffer); +      if (!setup_index_buffer(st, ib, &ibuffer)) { +         /* out of memory */ +         return; +      }        info.indexed = TRUE;        if (min_index != ~0 && max_index != ~0) { diff --git a/mesalib/src/mesa/state_tracker/st_extensions.c b/mesalib/src/mesa/state_tracker/st_extensions.c index 18d89815d..af54cf7c8 100644 --- a/mesalib/src/mesa/state_tracker/st_extensions.c +++ b/mesalib/src/mesa/state_tracker/st_extensions.c @@ -516,6 +516,7 @@ void st_init_extensions(struct st_context *st)     ctx->Extensions.ARB_fragment_shader = GL_TRUE;     ctx->Extensions.ARB_half_float_pixel = GL_TRUE;     ctx->Extensions.ARB_half_float_vertex = GL_TRUE; +   ctx->Extensions.ARB_internalformat_query = GL_TRUE;     ctx->Extensions.ARB_map_buffer_range = GL_TRUE;     ctx->Extensions.ARB_shader_objects = GL_TRUE;     ctx->Extensions.ARB_shading_language_100 = GL_TRUE; @@ -594,9 +595,10 @@ void st_init_extensions(struct st_context *st)        ctx->Const.NativeIntegers = GL_TRUE;        ctx->Const.MaxClipPlanes = 8; -      /* Extensions that only depend on GLSL 1.3. */ +      /* Extensions that either depend on GLSL 1.30 or are a subset thereof. */        ctx->Extensions.ARB_conservative_depth = GL_TRUE;        ctx->Extensions.ARB_shader_bit_encoding = GL_TRUE; +      ctx->Extensions.OES_depth_texture_cube_map = GL_TRUE;     } else {        /* Optional integer support for GLSL 1.2. */        if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX, diff --git a/mesalib/src/mesa/state_tracker/st_format.c b/mesalib/src/mesa/state_tracker/st_format.c index af81f732d..7ef063953 100644 --- a/mesalib/src/mesa/state_tracker/st_format.c +++ b/mesalib/src/mesa/state_tracker/st_format.c @@ -1642,6 +1642,40 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,  } +/** + * Called via ctx->Driver.ChooseTextureFormat(). + */ +size_t +st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat, +                         int samples[16]) +{ +   struct pipe_screen *screen = st_context(ctx)->pipe->screen; +   enum pipe_format format; +   unsigned i, bind, num_sample_counts = 0; + +   if (_mesa_is_depth_or_stencil_format(internalFormat)) +      bind = PIPE_BIND_DEPTH_STENCIL; +   else +      bind = PIPE_BIND_RENDER_TARGET; + +   /* Set sample counts in descending order. */ +   for (i = 16; i > 1; i--) { +      format = st_choose_format(screen, internalFormat, GL_NONE, GL_NONE, +                                PIPE_TEXTURE_2D, i, bind); + +      if (format != PIPE_FORMAT_NONE) { +         samples[num_sample_counts++] = i; +      } +   } + +   if (!num_sample_counts) { +      samples[num_sample_counts++] = 1; +   } + +   return num_sample_counts; +} + +  GLboolean  st_sampler_compat_formats(enum pipe_format format1, enum pipe_format format2)  { diff --git a/mesalib/src/mesa/state_tracker/st_format.h b/mesalib/src/mesa/state_tracker/st_format.h index 39397b17a..cb6e5bc96 100644 --- a/mesalib/src/mesa/state_tracker/st_format.h +++ b/mesalib/src/mesa/state_tracker/st_format.h @@ -67,6 +67,9 @@ st_ChooseTextureFormat(struct gl_context * ctx, GLenum target,                         GLint internalFormat,                         GLenum format, GLenum type); +size_t +st_QuerySamplesForFormat(struct gl_context *ctx, GLenum internalFormat, +                         int samples[16]);  /* can we use a sampler view to translate these formats     only used to make TFP so far */ diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 1d96e905c..c6ac634a2 100644 --- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -984,10 +984,13 @@ type_size(const struct glsl_type *type)         * at link time.         */        return 1; -   default: -      assert(0); -      return 0; +   case GLSL_TYPE_INTERFACE: +   case GLSL_TYPE_VOID: +   case GLSL_TYPE_ERROR: +      assert(!"Invalid type in type_size"); +      break;     } +   return 0;  }  /** @@ -1932,10 +1935,23 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)        }        break;     } +   case ir_unop_pack_snorm_2x16: +   case ir_unop_pack_unorm_2x16: +   case ir_unop_pack_half_2x16: +   case ir_unop_pack_snorm_4x8: +   case ir_unop_pack_unorm_4x8: +   case ir_unop_unpack_snorm_2x16: +   case ir_unop_unpack_unorm_2x16: +   case ir_unop_unpack_half_2x16: +   case ir_unop_unpack_half_2x16_split_x: +   case ir_unop_unpack_half_2x16_split_y: +   case ir_unop_unpack_snorm_4x8: +   case ir_unop_unpack_unorm_4x8: +   case ir_binop_pack_half_2x16_split:     case ir_quadop_vector: -      /* This operation should have already been handled. +      /* This operation is not supported, or should have already been handled.         */ -      assert(!"Should not get here."); +      assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");        break;     } @@ -2001,21 +2017,18 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)          				       var->location);           this->variables.push_tail(entry);           break; -      case ir_var_in: -      case ir_var_inout: +      case ir_var_shader_in:           /* The linker assigns locations for varyings and attributes,            * including deprecated builtins (like gl_Color), user-assign            * generic attributes (glBindVertexLocation), and            * user-defined varyings. -          * -          * FINISHME: We would hit this path for function arguments.  Fix!            */           assert(var->location != -1);           entry = new(mem_ctx) variable_storage(var,                                                 PROGRAM_INPUT,                                                 var->location);           break; -      case ir_var_out: +      case ir_var_shader_out:           assert(var->location != -1);           entry = new(mem_ctx) variable_storage(var,                                                 PROGRAM_OUTPUT, @@ -2304,7 +2317,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)        assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());        l.writemask = WRITEMASK_XYZW;     } else if (ir->lhs->type->is_scalar() && -              ir->lhs->variable_referenced()->mode == ir_var_out) { +              ir->lhs->variable_referenced()->mode == ir_var_shader_out) {        /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the         * FINISHME: W component of fragment shader output zero, work correctly.         */ @@ -2581,8 +2594,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)        ir_rvalue *param_rval = (ir_rvalue *)iter.get();        ir_variable *param = (ir_variable *)sig_iter.get(); -      if (param->mode == ir_var_in || -          param->mode == ir_var_inout) { +      if (param->mode == ir_var_function_in || +          param->mode == ir_var_function_inout) {           variable_storage *storage = find_variable_storage(param);           assert(storage); @@ -2617,8 +2630,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)        ir_rvalue *param_rval = (ir_rvalue *)iter.get();        ir_variable *param = (ir_variable *)sig_iter.get(); -      if (param->mode == ir_var_out || -          param->mode == ir_var_inout) { +      if (param->mode == ir_var_function_out || +          param->mode == ir_var_function_inout) {           variable_storage *storage = find_variable_storage(param);           assert(storage); diff --git a/mesalib/src/mesa/swrast/s_texfilter.c b/mesalib/src/mesa/swrast/s_texfilter.c index 0a91cca06..953300f65 100644 --- a/mesalib/src/mesa/swrast/s_texfilter.c +++ b/mesalib/src/mesa/swrast/s_texfilter.c @@ -1647,14 +1647,14 @@ sample_2d_ewa(struct gl_context *ctx,                GLfloat rgba[])  {     GLint level = lod > 0 ? lod : 0; -   GLfloat scaling = 1.0 / (1 << level); +   GLfloat scaling = 1.0f / (1 << level);     const struct gl_texture_image *img =	tObj->Image[0][level];     const struct gl_texture_image *mostDetailedImage =        tObj->Image[0][tObj->BaseLevel];     const struct swrast_texture_image *swImg =        swrast_texture_image_const(mostDetailedImage); -   GLfloat tex_u=-0.5 + texcoord[0] * swImg->WidthScale * scaling; -   GLfloat tex_v=-0.5 + texcoord[1] * swImg->HeightScale * scaling; +   GLfloat tex_u = -0.5f + texcoord[0] * swImg->WidthScale * scaling; +   GLfloat tex_v = -0.5f + texcoord[1] * swImg->HeightScale * scaling;     GLfloat ux = dudx * scaling;     GLfloat vx = dvdx * scaling; @@ -1667,20 +1667,20 @@ sample_2d_ewa(struct gl_context *ctx,     GLfloat A = vx*vx+vy*vy+1;     GLfloat B = -2*(ux*vx+uy*vy);     GLfloat C = ux*ux+uy*uy+1; -   GLfloat F = A*C-B*B/4.0; +   GLfloat F = A*C-B*B/4.0f;     /* check if it is an ellipse */     /* ASSERT(F > 0.0); */     /* Compute the ellipse's (u,v) bounding box in texture space */ -   GLfloat d = -B*B+4.0*C*A; -   GLfloat box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with   */ -   GLfloat box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */ +   GLfloat d = -B*B+4.0f*C*A; +   GLfloat box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */ +   GLfloat box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */ -   GLint u0 = floor(tex_u - box_u); -   GLint u1 = ceil (tex_u + box_u); -   GLint v0 = floor(tex_v - box_v); -   GLint v1 = ceil (tex_v + box_v); +   GLint u0 = (GLint) floorf(tex_u - box_u); +   GLint u1 = (GLint) ceilf (tex_u + box_u); +   GLint v0 = (GLint) floorf(tex_v - box_v); +   GLint v1 = (GLint) ceilf (tex_v + box_v);     GLfloat num[4] = {0.0F, 0.0F, 0.0F, 0.0F};     GLfloat newCoord[2]; @@ -1692,7 +1692,7 @@ sample_2d_ewa(struct gl_context *ctx,     /* Scale ellipse formula to directly index the Filter Lookup Table.      * i.e. scale so that F = WEIGHT_LUT_SIZE-1      */ -   double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F; +   GLfloat formScale = (GLfloat) (WEIGHT_LUT_SIZE - 1) / F;     A *= formScale;     B *= formScale;     C *= formScale; @@ -1715,7 +1715,7 @@ sample_2d_ewa(struct gl_context *ctx,              /* as a LUT is used, q must never be negative;               * should not happen, though               */ -            const GLint qClamped = q >= 0.0F ? q : 0; +            const GLint qClamped = q >= 0.0F ? (GLint) q : 0;              GLfloat weight = weightLut[qClamped];              newCoord[0] = u / ((GLfloat) img->Width2); @@ -1795,19 +1795,19 @@ sample_2d_footprint(struct gl_context *ctx,     /*  Calculate the per anisotropic sample offsets in s,t space. */     if (Px2 > Py2) { -      numSamples = ceil(sqrtf(Px2)); +      numSamples = (GLint) ceilf(sqrtf(Px2));        ds = ux / ((GLfloat) img->Width2);        dt = vx / ((GLfloat) img->Height2);     }     else { -      numSamples = ceil(sqrtf(Py2)); +      numSamples = (GLint) ceilf(sqrtf(Py2));        ds = uy / ((GLfloat) img->Width2);        dt = vy / ((GLfloat) img->Height2);     }     for (s = 0; s<numSamples; s++) { -      newCoord[0] = texcoord[0] + ds * ((GLfloat)(s+1) / (numSamples+1) -0.5); -      newCoord[1] = texcoord[1] + dt * ((GLfloat)(s+1) / (numSamples+1) -0.5); +      newCoord[0] = texcoord[0] + ds * ((GLfloat)(s+1) / (numSamples+1) -0.5f); +      newCoord[1] = texcoord[1] + dt * ((GLfloat)(s+1) / (numSamples+1) -0.5f);        sample_2d_linear(ctx, samp, img, newCoord, rgba);        num[0] += rgba[0]; @@ -1956,7 +1956,7 @@ sample_lambda_2d_aniso(struct gl_context *ctx,        /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid         * this since 0.5*log(x) = log(sqrt(x))         */ -      lod = 0.5 * LOG2(Pmin2); +      lod = 0.5f * LOG2(Pmin2);        if (adjustLOD) {           /* from swrast/s_texcombine.c _swrast_texture_span */ @@ -1988,7 +1988,7 @@ sample_lambda_2d_aniso(struct gl_context *ctx,            * seem to be worth the extra running time.            */           sample_2d_ewa(ctx, samp, tObj, texcoords[i], -                       dudx, dvdx, dudy, dvdy, floor(lod), rgba[i]); +                       dudx, dvdx, dudy, dvdy, (GLint) floorf(lod), rgba[i]);           /* unused: */           (void) sample_2d_footprint; diff --git a/mesalib/src/mesa/vbo/vbo_exec_api.c b/mesalib/src/mesa/vbo/vbo_exec_api.c index 985f2209c..353f8cfde 100644 --- a/mesalib/src/mesa/vbo/vbo_exec_api.c +++ b/mesalib/src/mesa/vbo/vbo_exec_api.c @@ -124,6 +124,11 @@ void vbo_exec_vtx_wrap( struct vbo_exec_context *exec )      */     vbo_exec_wrap_buffers( exec ); +   if (!exec->vtx.buffer_ptr) { +      /* probably ran out of memory earlier when allocating the VBO */ +      return; +   } +     /* Copy stored stored vertices to start of new list.       */     assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr); diff --git a/mkfontscale/configure.ac b/mkfontscale/configure.ac index 4340f99e1..4c7e599d5 100644 --- a/mkfontscale/configure.ac +++ b/mkfontscale/configure.ac @@ -27,6 +27,7 @@ AC_INIT([mkfontscale], [1.1.0],          [mkfontscale])  AC_CONFIG_SRCDIR([Makefile.am])  AC_CONFIG_HEADERS([config.h]) +AC_USE_SYSTEM_EXTENSIONS  # Initialize Automake  AM_INIT_AUTOMAKE([foreign dist-bzip2]) diff --git a/mkfontscale/hash.c b/mkfontscale/hash.c index c2cf9caa3..3adfb6861 100644 --- a/mkfontscale/hash.c +++ b/mkfontscale/hash.c @@ -20,6 +20,8 @@    THE SOFTWARE.  */ +#include "config.h" +  #include <stdlib.h>  #include <stdio.h>  #include <string.h> @@ -41,14 +43,11 @@ hash(const char *string)  }  static void -strcpy_lwr(char *dst, const char *src) +str_tolower(char *s)  { -    while(1) { -        *dst = tolower(*src); -        if(*src == '\0') -            break; -        src++; -        dst++; +    while(*s != '\0') { +        *s = tolower(*s); +        s++;      }  } @@ -97,12 +96,11 @@ putHash(HashTablePtr table, char *key, char *value, int prio)      for(bp = table[i]; bp; bp = bp->next) {          if(strcasecmp(bp->key, key) == 0) {              if(prio > bp->prio) { -                keycopy = malloc(strlen(key) + 1); +                keycopy = strdup(key);                  if(keycopy == NULL) goto fail; -                strcpy_lwr(keycopy, key); -                valuecopy = malloc(strlen(value) + 1); +                str_tolower(keycopy); +                valuecopy = strdup(value);                  if(valuecopy == NULL) goto fail; -                strcpy(valuecopy, value);                  free(bp->key);                  free(bp->value);                  bp->key = keycopy; @@ -111,14 +109,13 @@ putHash(HashTablePtr table, char *key, char *value, int prio)              return 1;          }      } -    keycopy = malloc(strlen(key) + 1); +    keycopy = strdup(key);      if(keycopy == NULL)          goto fail; -    strcpy_lwr(keycopy, key); -    valuecopy = malloc(strlen(value) + 1); +    str_tolower(keycopy); +    valuecopy = strdup(value);      if(valuecopy == NULL)          goto fail; -    strcpy(valuecopy, value);      bp = malloc(sizeof(HashBucketRec));      if(bp == NULL)          goto fail; diff --git a/mkfontscale/ident.c b/mkfontscale/ident.c index bf544832c..41212575e 100644 --- a/mkfontscale/ident.c +++ b/mkfontscale/ident.c @@ -315,10 +315,9 @@ pcfIdentify(fontFile *f, char **name)      if(i >= nprops)          goto fail; -    s = malloc(strlen(strings + props[i].value) + 1); +    s = strdup(strings + props[i].value);      if(s == NULL)          goto fail; -    strcpy(s, strings + props[i].value);      *name = s;      free(strings);      free(props); diff --git a/mkfontscale/mkfontscale.c b/mkfontscale/mkfontscale.c index 5cf5cb9af..a67f28338 100644 --- a/mkfontscale/mkfontscale.c +++ b/mkfontscale/mkfontscale.c @@ -20,6 +20,8 @@    THE SOFTWARE.  */ +#include "config.h" +  #include <stdio.h>  #include <stdlib.h>  #include <string.h> @@ -896,10 +898,9 @@ doDirectory(const char *dirname_given, int numEncodings, ListPtr encodingsToDo)                  BDF_PropertyRec prop;                  rc = FT_Get_BDF_Property(face, "FONT", &prop);                  if(rc == 0 && prop.type == BDF_PROPERTY_TYPE_ATOM) { -                    xlfd_name = malloc(strlen(prop.u.atom) + 1); +                    xlfd_name = strdup(prop.u.atom);                      if(xlfd_name == NULL)                          goto done; -                    strcpy(xlfd_name, prop.u.atom);                  }              }          } diff --git a/pixman/configure.ac b/pixman/configure.ac index 515e31218..a93e2905b 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -968,6 +968,22 @@ fi  AC_MSG_RESULT($support_for_attribute_constructor)  AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR) +dnl ===================================== +dnl __float128 + +support_for_float128=no + +AC_MSG_CHECKING(for __float128) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; } +]])], support_for_float128=yes) + +if test x$support_for_float128 = xyes; then +   AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128]) +fi + +AC_MSG_RESULT($support_for_float128) +  dnl ==================  dnl libpng diff --git a/pixman/demos/scale.c b/pixman/demos/scale.c index 9100ff72a..869ada12b 100644 --- a/pixman/demos/scale.c +++ b/pixman/demos/scale.c @@ -39,6 +39,7 @@ typedef struct      GtkAdjustment *     scale_x_adjustment;      GtkAdjustment *     scale_y_adjustment;      GtkAdjustment *     rotate_adjustment; +    GtkAdjustment *	subsample_adjustment;      int                 scaled_width;      int                 scaled_height;  } app_t; @@ -236,7 +237,8 @@ rescale (GtkWidget *may_be_null, app_t *app)  	get_value (app, filters, "reconstruct_y_combo_box"),  	get_value (app, filters, "sample_x_combo_box"),  	get_value (app, filters, "sample_y_combo_box"), -        4, 4); +	gtk_adjustment_get_value (app->subsample_adjustment), +	gtk_adjustment_get_value (app->subsample_adjustment));      pixman_image_set_filter (app->original, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, params, n_params); @@ -360,10 +362,13 @@ app_new (pixman_image_t *original)          GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_y_adjustment"));      app->rotate_adjustment =          GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "rotate_adjustment")); +    app->subsample_adjustment = +	GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "subsample_adjustment"));      g_signal_connect (app->scale_x_adjustment, "value_changed", G_CALLBACK (rescale), app);      g_signal_connect (app->scale_y_adjustment, "value_changed", G_CALLBACK (rescale), app);      g_signal_connect (app->rotate_adjustment, "value_changed", G_CALLBACK (rescale), app); +    g_signal_connect (app->subsample_adjustment, "value_changed", G_CALLBACK (rescale), app);      widget = get_widget (app, "scale_x_scale");      gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL); diff --git a/pixman/demos/scale.ui b/pixman/demos/scale.ui index f7c0c805f..b3450d34d 100644 --- a/pixman/demos/scale.ui +++ b/pixman/demos/scale.ui @@ -23,6 +23,14 @@      <property name="page_increment">10</property>      <property name="page_size">10</property>    </object> +  <object class="GtkAdjustment" id="subsample_adjustment"> +    <property name="lower">1</property> +    <property name="upper">12</property> +    <property name="step_increment">1</property> +    <property name="page_increment">1</property> +    <property name="page_size">0</property> +    <property name="value">4</property> +  </object>    <object class="GtkWindow" id="main">      <child>        <object class="GtkHBox" id="u"> @@ -51,6 +59,7 @@          <child>            <object class="GtkVBox" id="box1">              <property name="visible">True</property> +	    <property name="spacing">12</property>              <child>                <object class="GtkHBox" id="box2">                  <property name="visible">True</property> @@ -234,6 +243,17 @@                        </packing>                      </child>                      <child> +                      <object class="GtkLabel" id="label9"> +                        <property name="visible">True</property> +                        <property name="xalign">1</property> +                        <property name="label" translatable="yes"><b>Subsample:</b></property> +                        <property name="use_markup">True</property> +                      </object> +                      <packing> +                        <property name="top_attach">5</property> +                      </packing> +                    </child> +                    <child>                        <object class="GtkComboBox" id="reconstruct_x_combo_box">                          <property name="visible">True</property>                        </object> @@ -277,6 +297,16 @@                          <property name="top_attach">4</property>                        </packing>                      </child> +                    <child> +                      <object class="GtkSpinButton" id="subsample_spin_button"> +                        <property name="visible">True</property> +			<property name="adjustment">subsample_adjustment</property> +                      </object> +                      <packing> +                        <property name="left_attach">1</property> +                        <property name="top_attach">5</property> +                      </packing> +                    </child>                    </object>                    <packing>                      <property name="expand">False</property> diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index c625e0c4a..247aea645 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -739,36 +739,6 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp,  }  static void -fast_composite_src_x888_0565 (pixman_implementation_t *imp, -                              pixman_composite_info_t *info) -{ -    PIXMAN_COMPOSITE_ARGS (info); -    uint16_t    *dst_line, *dst; -    uint32_t    *src_line, *src, s; -    int dst_stride, src_stride; -    int32_t w; - -    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); -    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - -    while (height--) -    { -	dst = dst_line; -	dst_line += dst_stride; -	src = src_line; -	src_line += src_stride; -	w = width; - -	while (w--) -	{ -	    s = *src++; -	    *dst = convert_8888_to_0565 (s); -	    dst++; -	} -    } -} - -static void  fast_composite_add_8_8 (pixman_implementation_t *imp,  			pixman_composite_info_t *info)  { @@ -1243,6 +1213,18 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,      pixman_composite_func_t func;      pixman_format_code_t mask_format;      uint32_t src_flags, mask_flags; +    int32_t sx, sy; +    int32_t width_remain; +    int32_t num_pixels; +    int32_t src_width; +    int32_t i, j; +    pixman_image_t extended_src_image; +    uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; +    pixman_bool_t need_src_extension; +    uint32_t *src_line; +    int32_t src_stride; +    int32_t src_bpp; +    pixman_composite_info_t info2 = *info;      src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |  		    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; @@ -1258,149 +1240,131 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,  	mask_flags = FAST_PATH_IS_OPAQUE;      } -    if (_pixman_implementation_lookup_composite ( -	    imp->toplevel, info->op, -	    src_image->common.extended_format_code, src_flags, -	    mask_format, mask_flags, -	    dest_image->common.extended_format_code, info->dest_flags, -	    &imp, &func)) +    _pixman_implementation_lookup_composite ( +	imp->toplevel, info->op, +	src_image->common.extended_format_code, src_flags, +	mask_format, mask_flags, +	dest_image->common.extended_format_code, info->dest_flags, +	&imp, &func); + +    src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + +    if (src_image->bits.width < REPEAT_MIN_WIDTH		&& +	(src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&& +	!src_image->bits.indexed)      { -	int32_t sx, sy; -	int32_t width_remain; -	int32_t num_pixels; -	int32_t src_width; -	int32_t i, j; -	pixman_image_t extended_src_image; -	uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; -	pixman_bool_t need_src_extension; -	uint32_t *src_line; -	int32_t src_stride; -	int32_t src_bpp; -	pixman_composite_info_t info2 = *info; - -	src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); - -	if (src_image->bits.width < REPEAT_MIN_WIDTH		&& -	    (src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&& -	    !src_image->bits.indexed) -	{ -	    sx = src_x; -	    sx = MOD (sx, src_image->bits.width); -	    sx += width; -	    src_width = 0; +	sx = src_x; +	sx = MOD (sx, src_image->bits.width); +	sx += width; +	src_width = 0; -	    while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) -		src_width += src_image->bits.width; +	while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) +	    src_width += src_image->bits.width; -	    src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); +	src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); -	    /* Initialize/validate stack-allocated temporary image */ -	    _pixman_bits_image_init (&extended_src_image, src_image->bits.format, -				     src_width, 1, &extended_src[0], src_stride, -				     FALSE); -	    _pixman_image_validate (&extended_src_image); +	/* Initialize/validate stack-allocated temporary image */ +	_pixman_bits_image_init (&extended_src_image, src_image->bits.format, +				 src_width, 1, &extended_src[0], src_stride, +				 FALSE); +	_pixman_image_validate (&extended_src_image); -	    info2.src_image = &extended_src_image; -	    need_src_extension = TRUE; -	} -	else -	{ -	    src_width = src_image->bits.width; -	    need_src_extension = FALSE; -	} +	info2.src_image = &extended_src_image; +	need_src_extension = TRUE; +    } +    else +    { +	src_width = src_image->bits.width; +	need_src_extension = FALSE; +    } -	sx = src_x; -	sy = src_y; +    sx = src_x; +    sy = src_y; -	while (--height >= 0) -	{ -	    sx = MOD (sx, src_width); -	    sy = MOD (sy, src_image->bits.height); +    while (--height >= 0) +    { +	sx = MOD (sx, src_width); +	sy = MOD (sy, src_image->bits.height); -	    if (need_src_extension) +	if (need_src_extension) +	{ +	    if (src_bpp == 32)  	    { -		if (src_bpp == 32) -		{ -		    PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); +		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); -		    for (i = 0; i < src_width; ) -		    { -			for (j = 0; j < src_image->bits.width; j++, i++) -			    extended_src[i] = src_line[j]; -		    } -		} -		else if (src_bpp == 16) +		for (i = 0; i < src_width; )  		{ -		    uint16_t *src_line_16; - -		    PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, -					   src_line_16, 1); -		    src_line = (uint32_t*)src_line_16; - -		    for (i = 0; i < src_width; ) -		    { -			for (j = 0; j < src_image->bits.width; j++, i++) -			    ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; -		    } +		    for (j = 0; j < src_image->bits.width; j++, i++) +			extended_src[i] = src_line[j];  		} -		else if (src_bpp == 8) -		{ -		    uint8_t *src_line_8; +	    } +	    else if (src_bpp == 16) +	    { +		uint16_t *src_line_16; -		    PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, -					   src_line_8, 1); -		    src_line = (uint32_t*)src_line_8; +		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, +				       src_line_16, 1); +		src_line = (uint32_t*)src_line_16; -		    for (i = 0; i < src_width; ) -		    { -			for (j = 0; j < src_image->bits.width; j++, i++) -			    ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; -		    } +		for (i = 0; i < src_width; ) +		{ +		    for (j = 0; j < src_image->bits.width; j++, i++) +			((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];  		} - -		info2.src_y = 0;  	    } -	    else +	    else if (src_bpp == 8)  	    { -		info2.src_y = sy; +		uint8_t *src_line_8; + +		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, +				       src_line_8, 1); +		src_line = (uint32_t*)src_line_8; + +		for (i = 0; i < src_width; ) +		{ +		    for (j = 0; j < src_image->bits.width; j++, i++) +			((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; +		}  	    } -	    width_remain = width; +	    info2.src_y = 0; +	} +	else +	{ +	    info2.src_y = sy; +	} -	    while (width_remain > 0) -	    { -		num_pixels = src_width - sx; +	width_remain = width; -		if (num_pixels > width_remain) -		    num_pixels = width_remain; +	while (width_remain > 0) +	{ +	    num_pixels = src_width - sx; -		info2.src_x = sx; -		info2.width = num_pixels; -		info2.height = 1; +	    if (num_pixels > width_remain) +		num_pixels = width_remain; -		func (imp, &info2); +	    info2.src_x = sx; +	    info2.width = num_pixels; +	    info2.height = 1; -		width_remain -= num_pixels; -		info2.mask_x += num_pixels; -		info2.dest_x += num_pixels; -		sx = 0; -	    } +	    func (imp, &info2); -	    sx = src_x; -	    sy++; -	    info2.mask_x = info->mask_x; -	    info2.mask_y++; -	    info2.dest_x = info->dest_x; -	    info2.dest_y++; +	    width_remain -= num_pixels; +	    info2.mask_x += num_pixels; +	    info2.dest_x += num_pixels; +	    sx = 0;  	} -	if (need_src_extension) -	    _pixman_image_fini (&extended_src_image); -    } -    else -    { -	_pixman_log_error (FUNC, "Didn't find a suitable function "); +	sx = src_x; +	sy++; +	info2.mask_x = info->mask_x; +	info2.mask_y++; +	info2.dest_x = info->dest_x; +	info2.dest_y++;      } + +    if (need_src_extension) +	_pixman_image_fini (&extended_src_image);  }  /* Use more unrolling for src_0565_0565 because it is typically CPU bound */ @@ -1913,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] =      PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),      PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),      PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), -    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), -    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), -    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), -    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),      PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),      PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), @@ -2199,12 +2159,200 @@ fast_path_fill (pixman_implementation_t *imp,      return TRUE;  } +/*****************************************************************************/ + +static uint32_t * +fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ +    int32_t w = iter->width; +    uint32_t *dst = iter->buffer; +    const uint16_t *src = (const uint16_t *)iter->bits; + +    iter->bits += iter->stride; + +    /* Align the source buffer at 4 bytes boundary */ +    if (w > 0 && ((uintptr_t)src & 3)) +    { +	*dst++ = convert_0565_to_8888 (*src++); +	w--; +    } +    /* Process two pixels per iteration */ +    while ((w -= 2) >= 0) +    { +	uint32_t sr, sb, sg, t0, t1; +	uint32_t s = *(const uint32_t *)src; +	src += 2; +	sr = (s >> 8) & 0x00F800F8; +	sb = (s << 3) & 0x00F800F8; +	sg = (s >> 3) & 0x00FC00FC; +	sr |= sr >> 5; +	sb |= sb >> 5; +	sg |= sg >> 6; +	t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | +	     (sb & 0xFF) | 0xFF000000; +	t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | +	     (sb >> 16) | 0xFF000000; +#ifdef WORDS_BIGENDIAN +	*dst++ = t1; +	*dst++ = t0; +#else +	*dst++ = t0; +	*dst++ = t1; +#endif +    } +    if (w & 1) +    { +	*dst = convert_0565_to_8888 (*src); +    } + +    return iter->buffer; +} + +static uint32_t * +fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) +{ +    iter->bits += iter->stride; +    return iter->buffer; +} + +/* Helper function for a workaround, which tries to ensure that 0x1F001F + * constant is always allocated in a register on RISC architectures. + */ +static force_inline uint32_t +convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) +{ +    uint32_t a, b; +    a = (s >> 3) & x1F001F; +    b = s & 0xFC00; +    a |= a >> 5; +    a |= b >> 5; +    return a; +} + +static void +fast_write_back_r5g6b5 (pixman_iter_t *iter) +{ +    int32_t w = iter->width; +    uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); +    const uint32_t *src = iter->buffer; +    /* Workaround to ensure that x1F001F variable is allocated in a register */ +    static volatile uint32_t volatile_x1F001F = 0x1F001F; +    uint32_t x1F001F = volatile_x1F001F; + +    while ((w -= 4) >= 0) +    { +	uint32_t s1 = *src++; +	uint32_t s2 = *src++; +	uint32_t s3 = *src++; +	uint32_t s4 = *src++; +	*dst++ = convert_8888_to_0565_workaround (s1, x1F001F); +	*dst++ = convert_8888_to_0565_workaround (s2, x1F001F); +	*dst++ = convert_8888_to_0565_workaround (s3, x1F001F); +	*dst++ = convert_8888_to_0565_workaround (s4, x1F001F); +    } +    if (w & 2) +    { +	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); +	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); +    } +    if (w & 1) +    { +	*dst = convert_8888_to_0565_workaround (*src, x1F001F); +    } +} + +typedef struct +{ +    pixman_format_code_t	format; +    pixman_iter_get_scanline_t	get_scanline; +    pixman_iter_write_back_t	write_back; +} fetcher_info_t; + +static const fetcher_info_t fetchers[] = +{ +    { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, +    { PIXMAN_null } +}; + +static pixman_bool_t +fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ +    pixman_image_t *image = iter->image; + +#define FLAGS								\ +    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\ +     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + +    if ((iter->iter_flags & ITER_NARROW)			&& +	(iter->image_flags & FLAGS) == FLAGS) +    { +	const fetcher_info_t *f; + +	for (f = &fetchers[0]; f->format != PIXMAN_null; f++) +	{ +	    if (image->common.extended_format_code == f->format) +	    { +		uint8_t *b = (uint8_t *)image->bits.bits; +		int s = image->bits.rowstride * 4; + +		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; +		iter->stride = s; + +		iter->get_scanline = f->get_scanline; +		return TRUE; +	    } +	} +    } + +    return FALSE; +} + +static pixman_bool_t +fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ +    pixman_image_t *image = iter->image; + +    if ((iter->iter_flags & ITER_NARROW)		&& +	(iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) +    { +	const fetcher_info_t *f; + +	for (f = &fetchers[0]; f->format != PIXMAN_null; f++) +	{ +	    if (image->common.extended_format_code == f->format) +	    { +		uint8_t *b = (uint8_t *)image->bits.bits; +		int s = image->bits.rowstride * 4; + +		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; +		iter->stride = s; + +		if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == +		    (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) +		{ +		    iter->get_scanline = fast_dest_fetch_noop; +		} +		else +		{ +		    iter->get_scanline = f->get_scanline; +		} +		iter->write_back = f->write_back; +		return TRUE; +	    } +	} +    } +    return FALSE; +} + +  pixman_implementation_t *  _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)  {      pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);      imp->fill = fast_path_fill; +    imp->src_iter_init = fast_src_iter_init; +    imp->dest_iter_init = fast_dest_iter_init;      return imp;  } diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c index f175d771e..93a1b9acf 100644 --- a/pixman/pixman/pixman-general.c +++ b/pixman/pixman/pixman-general.c @@ -188,9 +188,6 @@ general_composite_rect  (pixman_implementation_t *imp,      compose = _pixman_implementation_lookup_combiner (  	imp->toplevel, op, component_alpha, narrow); -    if (!compose) -	return; -      for (i = 0; i < height; ++i)      {  	uint32_t *s, *m, *d; diff --git a/pixman/pixman/pixman-glyph.c b/pixman/pixman/pixman-glyph.c index 6d2c8bbb7..5a271b64b 100644 --- a/pixman/pixman/pixman-glyph.c +++ b/pixman/pixman/pixman-glyph.c @@ -463,16 +463,13 @@ pixman_composite_glyphs_no_mask (pixman_op_t            op,  		{  		    glyph_format = glyph_img->common.extended_format_code;  		    glyph_flags = glyph_img->common.flags; -		     +  		    _pixman_implementation_lookup_composite (  			get_implementation(), op,  			src->common.extended_format_code, src->common.flags,  			glyph_format, glyph_flags | extra,  			dest_format, dest_flags,  			&implementation, &func); - -		    if (!func) -			goto out;  		}  		info.src_x = src_x + composite_box.x1 - dest_x; @@ -582,9 +579,6 @@ add_glyphs (pixman_glyph_cache_t *cache,  		mask_format, info.mask_flags,  		dest_format, dest_flags,  		&implementation, &func); - -	    if (!func) -		goto out;  	}  	glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x; diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c index ec467a619..c0a643633 100644 --- a/pixman/pixman/pixman-implementation.c +++ b/pixman/pixman/pixman-implementation.c @@ -65,7 +65,13 @@ typedef struct  PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); -pixman_bool_t +static void +dummy_composite_rect (pixman_implementation_t *imp, +		      pixman_composite_info_t *info) +{ +} + +void  _pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,  					 pixman_op_t               op,  					 pixman_format_code_t      src_format, @@ -142,7 +148,11 @@ _pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,  	    ++info;  	}      } -    return FALSE; + +    /* We should never reach this point */ +    _pixman_log_error (FUNC, "No known composite function\n"); +    *out_imp = NULL; +    *out_func = dummy_composite_rect;  update_cache:      if (i) @@ -160,8 +170,16 @@ update_cache:  	cache->cache[0].fast_path.dest_flags = dest_flags;  	cache->cache[0].fast_path.func = *out_func;      } +} -    return TRUE; +static void +dummy_combine (pixman_implementation_t *imp, +	       pixman_op_t              op, +	       uint32_t *               pd, +	       const uint32_t *         ps, +	       const uint32_t *         pm, +	       int                      w) +{  }  pixman_combine_32_func_t @@ -199,7 +217,9 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp,  	imp = imp->fallback;      } -    return NULL; +    /* We should never reach this point */ +    _pixman_log_error (FUNC, "No known combine function\n"); +    return dummy_combine;  }  pixman_bool_t diff --git a/pixman/pixman/pixman-inlines.h b/pixman/pixman/pixman-inlines.h index ab4def0dc..dd1c2f17f 100644 --- a/pixman/pixman/pixman-inlines.h +++ b/pixman/pixman/pixman-inlines.h @@ -88,6 +88,42 @@ pixman_fixed_to_bilinear_weight (pixman_fixed_t x)  	   ((1 << BILINEAR_INTERPOLATION_BITS) - 1);  } +#if BILINEAR_INTERPOLATION_BITS <= 4 +/* Inspired by Filter_32_opaque from Skia */ +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, +			uint32_t bl, uint32_t br, +			int distx, int disty) +{ +    int distxy, distxiy, distixy, distixiy; +    uint32_t lo, hi; + +    distx <<= (4 - BILINEAR_INTERPOLATION_BITS); +    disty <<= (4 - BILINEAR_INTERPOLATION_BITS); + +    distxy = distx * disty; +    distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */ +    distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */ +    distixiy = +	16 * 16 - (disty << 4) - +	(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ + +    lo = (tl & 0xff00ff) * distixiy; +    hi = ((tl >> 8) & 0xff00ff) * distixiy; + +    lo += (tr & 0xff00ff) * distxiy; +    hi += ((tr >> 8) & 0xff00ff) * distxiy; + +    lo += (bl & 0xff00ff) * distixy; +    hi += ((bl >> 8) & 0xff00ff) * distixy; + +    lo += (br & 0xff00ff) * distxy; +    hi += ((br >> 8) & 0xff00ff) * distxy; + +    return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); +} + +#else  #if SIZEOF_LONG > 4  static force_inline uint32_t @@ -184,6 +220,7 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,  }  #endif +#endif // BILINEAR_INTERPOLATION_BITS <= 4  /*   * For each scanline fetched from source image with PAD repeat: diff --git a/pixman/pixman/pixman-matrix.c b/pixman/pixman/pixman-matrix.c index cd2f1b5b8..89b96826b 100644 --- a/pixman/pixman/pixman-matrix.c +++ b/pixman/pixman/pixman-matrix.c @@ -34,6 +34,338 @@  #define F(x)    pixman_int_to_fixed (x) +static force_inline int +count_leading_zeros (uint32_t x) +{ +#ifdef __GNUC__ +    return __builtin_clz (x); +#else +    int n = 0; +    while (x) +    { +        n++; +        x >>= 1; +    } +    return 32 - n; +#endif +} + +/* + * Large signed/unsigned integer division with rounding for the platforms with + * only 64-bit integer data type supported (no 128-bit data type). + * + * Arguments: + *     hi, lo - high and low 64-bit parts of the dividend + *     div    - 48-bit divisor + * + * Returns: lowest 64 bits of the result as a return value and highest 64 + *          bits of the result to "result_hi" pointer + */ + +/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */ +static force_inline uint64_t +rounded_udiv_128_by_48 (uint64_t  hi, +                        uint64_t  lo, +                        uint64_t  div, +                        uint64_t *result_hi) +{ +    uint64_t tmp, remainder, result_lo; +    assert(div < ((uint64_t)1 << 48)); + +    remainder = hi % div; +    *result_hi = hi / div; + +    tmp = (remainder << 16) + (lo >> 48); +    result_lo = tmp / div; +    remainder = tmp % div; + +    tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF); +    result_lo = (result_lo << 16) + (tmp / div); +    remainder = tmp % div; + +    tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF); +    result_lo = (result_lo << 16) + (tmp / div); +    remainder = tmp % div; + +    tmp = (remainder << 16) + (lo & 0xFFFF); +    result_lo = (result_lo << 16) + (tmp / div); +    remainder = tmp % div; + +    /* round to nearest */ +    if (remainder * 2 >= div && ++result_lo == 0) +        *result_hi += 1; + +    return result_lo; +} + +/* signed division (128-bit by 49-bit) with rounding to nearest */ +static inline int64_t +rounded_sdiv_128_by_49 (int64_t   hi, +                        uint64_t  lo, +                        int64_t   div, +                        int64_t  *signed_result_hi) +{ +    uint64_t result_lo, result_hi; +    int sign = 0; +    if (div < 0) +    { +        div = -div; +        sign ^= 1; +    } +    if (hi < 0) +    { +        if (lo != 0) +            hi++; +        hi = -hi; +        lo = -lo; +        sign ^= 1; +    } +    result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi); +    if (sign) +    { +        if (result_lo != 0) +            result_hi++; +        result_hi = -result_hi; +        result_lo = -result_lo; +    } +    if (signed_result_hi) +    { +        *signed_result_hi = result_hi; +    } +    return result_lo; +} + +/* + * Multiply 64.16 fixed point value by (2^scalebits) and convert + * to 128-bit integer. + */ +static force_inline void +fixed_64_16_to_int128 (int64_t  hi, +                       int64_t  lo, +                       int64_t *rhi, +                       int64_t *rlo, +                       int      scalebits) +{ +    /* separate integer and fractional parts */ +    hi += lo >> 16; +    lo &= 0xFFFF; + +    if (scalebits <= 0) +    { +        *rlo = hi >> (-scalebits); +        *rhi = *rlo >> 63; +    } +    else +    { +        *rhi = hi >> (64 - scalebits); +        *rlo = (uint64_t)hi << scalebits; +        if (scalebits < 16) +            *rlo += lo >> (16 - scalebits); +        else +            *rlo += lo << (scalebits - 16); +    } +} + +/* + * Convert 112.16 fixed point value to 48.16 with clamping for the out + * of range values. + */ +static force_inline pixman_fixed_48_16_t +fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag) +{ +    if ((lo >> 63) != hi) +    { +        *clampflag = TRUE; +        return hi >= 0 ? INT64_MAX : INT64_MIN; +    } +    else +    { +        return lo; +    } +} + +/* + * Transform a point with 31.16 fixed point coordinates from the destination + * space to a point with 48.16 fixed point coordinates in the source space. + * No overflows are possible for affine transformations and the results are + * accurate including the least significant bit. Projective transformations + * may overflow, in this case the results are just clamped to return maximum + * or minimum 48.16 values (so that the caller can at least handle the NONE + * and PAD repeats correctly) and the return value is FALSE to indicate that + * such clamping has happened. + */ +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t    *t, +                              const pixman_vector_48_16_t *v, +                              pixman_vector_48_16_t       *result) +{ +    pixman_bool_t clampflag = FALSE; +    int i; +    int64_t tmp[3][2], divint; +    uint16_t divfrac; + +    /* input vector values must have no more than 31 bits (including sign) +     * in the integer part */ +    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + +    for (i = 0; i < 3; i++) +    { +        tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); +        tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); +        tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); +        tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); +        tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); +        tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); +    } + +    /* +     * separate 64-bit integer and 16-bit fractional parts for the divisor, +     * which is also scaled by 65536 after fixed point multiplication. +     */ +    divint  = tmp[2][0] + (tmp[2][1] >> 16); +    divfrac = tmp[2][1] & 0xFFFF; + +    if (divint == pixman_fixed_1 && divfrac == 0) +    { +        /* +         * this is a simple affine transformation +         */ +        result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); +        result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); +        result->v[2] = pixman_fixed_1; +    } +    else if (divint == 0 && divfrac == 0) +    { +        /* +         * handle zero divisor (if the values are non-zero, set the +         * results to maximum positive or minimum negative) +         */ +        clampflag = TRUE; + +        result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); +        result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + +        if (result->v[0] > 0) +            result->v[0] = INT64_MAX; +        else if (result->v[0] < 0) +            result->v[0] = INT64_MIN; + +        if (result->v[1] > 0) +            result->v[1] = INT64_MAX; +        else if (result->v[1] < 0) +            result->v[1] = INT64_MIN; +    } +    else +    { +        /* +         * projective transformation, analyze the top 32 bits of the divisor +         */ +        int32_t hi32divbits = divint >> 32; +        if (hi32divbits < 0) +            hi32divbits = ~hi32divbits; + +        if (hi32divbits == 0) +        { +            /* the divisor is small, we can actually keep all the bits */ +            int64_t hi, rhi, lo, rlo; +            int64_t div = (divint << 16) + divfrac; + +            fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32); +            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); +            result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + +            fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32); +            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); +            result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); +        } +        else +        { +            /* the divisor needs to be reduced to 48 bits */ +            int64_t hi, rhi, lo, rlo, div; +            int shift = 32 - count_leading_zeros (hi32divbits); +            fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift); + +            fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift); +            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); +            result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + +            fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift); +            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); +            result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); +        } +    } +    result->v[2] = pixman_fixed_1; +    return !clampflag; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_affine (const pixman_transform_t    *t, +                                     const pixman_vector_48_16_t *v, +                                     pixman_vector_48_16_t       *result) +{ +    int64_t hi0, lo0, hi1, lo1; + +    /* input vector values must have no more than 31 bits (including sign) +     * in the integer part */ +    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + +    hi0  = (int64_t)t->matrix[0][0] * (v->v[0] >> 16); +    lo0  = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF); +    hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16); +    lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF); +    hi0 += (int64_t)t->matrix[0][2]; + +    hi1  = (int64_t)t->matrix[1][0] * (v->v[0] >> 16); +    lo1  = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF); +    hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16); +    lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF); +    hi1 += (int64_t)t->matrix[1][2]; + +    result->v[0] = hi0 + ((lo0 + 0x8000) >> 16); +    result->v[1] = hi1 + ((lo1 + 0x8000) >> 16); +    result->v[2] = pixman_fixed_1; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_3d (const pixman_transform_t    *t, +                                 const pixman_vector_48_16_t *v, +                                 pixman_vector_48_16_t       *result) +{ +    int i; +    int64_t tmp[3][2]; + +    /* input vector values must have no more than 31 bits (including sign) +     * in the integer part */ +    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16))); +    assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + +    for (i = 0; i < 3; i++) +    { +        tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); +        tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); +        tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); +        tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); +        tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); +        tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); +    } + +    result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); +    result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); +    result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16); +} +  PIXMAN_EXPORT void  pixman_transform_init_identity (struct pixman_transform *matrix)  { @@ -50,69 +382,41 @@ PIXMAN_EXPORT pixman_bool_t  pixman_transform_point_3d (const struct pixman_transform *transform,                             struct pixman_vector *         vector)  { -    struct pixman_vector result; -    pixman_fixed_32_32_t partial; -    pixman_fixed_48_16_t v; -    int i, j; +    pixman_vector_48_16_t tmp; +    tmp.v[0] = vector->vector[0]; +    tmp.v[1] = vector->vector[1]; +    tmp.v[2] = vector->vector[2]; -    for (j = 0; j < 3; j++) -    { -	v = 0; -	for (i = 0; i < 3; i++) -	{ -	    partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * -	               (pixman_fixed_48_16_t) vector->vector[i]); -	    v += (partial + 0x8000) >> 16; -	} -	 -	if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) -	    return FALSE; -	 -	result.vector[j] = (pixman_fixed_t) v; -    } -     -    *vector = result; +    pixman_transform_point_31_16_3d (transform, &tmp, &tmp); -    if (!result.vector[2]) -	return FALSE; +    vector->vector[0] = tmp.v[0]; +    vector->vector[1] = tmp.v[1]; +    vector->vector[2] = tmp.v[2]; -    return TRUE; +    return vector->vector[0] == tmp.v[0] && +           vector->vector[1] == tmp.v[1] && +           vector->vector[2] == tmp.v[2];  }  PIXMAN_EXPORT pixman_bool_t  pixman_transform_point (const struct pixman_transform *transform,                          struct pixman_vector *         vector)  { -    pixman_fixed_32_32_t partial; -    pixman_fixed_34_30_t v[3]; -    pixman_fixed_48_16_t quo; -    int i, j; +    pixman_vector_48_16_t tmp; +    tmp.v[0] = vector->vector[0]; +    tmp.v[1] = vector->vector[1]; +    tmp.v[2] = vector->vector[2]; -    for (j = 0; j < 3; j++) -    { -	v[j] = 0; -	 -	for (i = 0; i < 3; i++) -	{ -	    partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * -	               (pixman_fixed_32_32_t) vector->vector[i]); -	    v[j] += (partial + 2) >> 2; -	} -    } -     -    if (!((v[2] + 0x8000) >> 16)) -	return FALSE; +    if (!pixman_transform_point_31_16 (transform, &tmp, &tmp)) +        return FALSE; -    for (j = 0; j < 2; j++) -    { -	quo = v[j] / ((v[2] + 0x8000) >> 16); -	if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) -	    return FALSE; -	vector->vector[j] = (pixman_fixed_t) quo; -    } -     -    vector->vector[2] = pixman_fixed_1; -    return TRUE; +    vector->vector[0] = tmp.v[0]; +    vector->vector[1] = tmp.v[1]; +    vector->vector[2] = tmp.v[2]; + +    return vector->vector[0] == tmp.v[0] && +           vector->vector[1] == tmp.v[1] && +           vector->vector[2] == tmp.v[2];  }  PIXMAN_EXPORT pixman_bool_t diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index e5ab873ed..cb78a2ed8 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -497,7 +497,7 @@ pixman_implementation_t *  _pixman_implementation_create (pixman_implementation_t *fallback,  			       const pixman_fast_path_t *fast_paths); -pixman_bool_t +void  _pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,  					 pixman_op_t               op,  					 pixman_format_code_t      src_format, @@ -1052,7 +1052,7 @@ _pixman_log_error (const char *function, const char *message);  #else -#define _pixman_log_error(f,m) do { } while (0)				\ +#define _pixman_log_error(f,m) do { } while (0)  #define return_if_fail(expr)						\      do                                                                  \ @@ -1078,6 +1078,27 @@ _pixman_log_error (const char *function, const char *message);  #endif  /* + * Matrix + */ + +typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t; + +pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t    *t, +                              const pixman_vector_48_16_t *v, +                              pixman_vector_48_16_t       *result); + +void +pixman_transform_point_31_16_3d (const pixman_transform_t    *t, +                                 const pixman_vector_48_16_t *v, +                                 pixman_vector_48_16_t       *result); + +void +pixman_transform_point_31_16_affine (const pixman_transform_t    *t, +                                     const pixman_vector_48_16_t *v, +                                     pixman_vector_48_16_t       *result); + +/*   * Timers   */ diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index 5a0e0626a..fc873cc96 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -4523,7 +4523,163 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp,  	sse2_combine_add_u (imp, op, dst, src, NULL, width);      } +} + +static void +sse2_composite_add_n_8888 (pixman_implementation_t *imp, +			   pixman_composite_info_t *info) +{ +    PIXMAN_COMPOSITE_ARGS (info); +    uint32_t *dst_line, *dst, src; +    int dst_stride; + +    __m128i xmm_src; + +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); +    if (src == 0) +	return; + +    if (src == ~0) +    { +	pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, +		     dest_x, dest_y, width, height, ~0); + +	return; +    } + +    xmm_src = _mm_set_epi32 (src, src, src, src); +    while (height--) +    { +	int w = width; +	uint32_t d; + +	dst = dst_line; +	dst_line += dst_stride; + +	while (w && (unsigned long)dst & 15) +	{ +	    d = *dst; +	    *dst++ = +		_mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d))); +	    w--; +	} + +	while (w >= 4) +	{ +	    save_128_aligned +		((__m128i*)dst, +		 _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); + +	    dst += 4; +	    w -= 4; +	} + +	while (w--) +	{ +	    d = *dst; +	    *dst++ = +		_mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src, +						  _mm_cvtsi32_si128 (d))); +	} +    } +} + +static void +sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, +			     pixman_composite_info_t *info) +{ +    PIXMAN_COMPOSITE_ARGS (info); +    uint32_t     *dst_line, *dst; +    uint8_t     *mask_line, *mask; +    int dst_stride, mask_stride; +    int32_t w; +    uint32_t src; + +    __m128i xmm_src; + +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); +    if (src == 0) +	return; +    xmm_src = expand_pixel_32_1x128 (src); + +    PIXMAN_IMAGE_GET_LINE ( +	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); +    PIXMAN_IMAGE_GET_LINE ( +	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + +    while (height--) +    { +	dst = dst_line; +	dst_line += dst_stride; +	mask = mask_line; +	mask_line += mask_stride; +	w = width; + +	while (w && ((unsigned long)dst & 15)) +	{ +	    uint8_t m = *mask++; +	    if (m) +	    { +		*dst = pack_1x128_32 +		    (_mm_adds_epu16 +		     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), +		      unpack_32_1x128 (*dst))); +	    } +	    dst++; +	    w--; +	} + +	while (w >= 4) +	{ +	    uint32_t m = *(uint32_t*)mask; +	    if (m) +	    { +		__m128i xmm_mask_lo, xmm_mask_hi; +		__m128i xmm_dst_lo, xmm_dst_hi; + +		__m128i xmm_dst = load_128_aligned ((__m128i*)dst); +		__m128i xmm_mask = +		    _mm_unpacklo_epi8 (unpack_32_1x128(m), +				       _mm_setzero_si128 ()); + +		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); +		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + +		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, +					&xmm_mask_lo, &xmm_mask_hi); + +		pix_multiply_2x128 (&xmm_src, &xmm_src, +				    &xmm_mask_lo, &xmm_mask_hi, +				    &xmm_mask_lo, &xmm_mask_hi); + +		xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); +		xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); + +		save_128_aligned ( +		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); +	    } +	    w -= 4; +	    dst += 4; +	    mask += 4; +	} + +	while (w) +	{ +	    uint8_t m = *mask++; +	    if (m) +	    { +		*dst = pack_1x128_32 +		    (_mm_adds_epu16 +		     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), +		      unpack_32_1x128 (*dst))); +	    } +	    dst++; +	    w--; +	} +    }  }  static pixman_bool_t @@ -5786,6 +5942,121 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,  			       uint32_t, uint8_t, uint32_t,  			       NORMAL, FLAG_HAVE_NON_SOLID_MASK) +static force_inline void +scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t *       dst, +						const uint32_t * mask, +						const uint32_t * src_top, +						const uint32_t * src_bottom, +						int32_t          w, +						int              wt, +						int              wb, +						pixman_fixed_t   vx, +						pixman_fixed_t   unit_x, +						pixman_fixed_t   max_vx, +						pixman_bool_t    zero_src) +{ +    BILINEAR_DECLARE_VARIABLES; +    uint32_t pix1, pix2, pix3, pix4; +    __m128i xmm_mask; + +    if (zero_src || (*mask >> 24) == 0) +	return; + +    xmm_mask = create_mask_16_128 (*mask >> 24); + +    while (w && ((uintptr_t)dst & 15)) +    { +	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); +	if (pix1) +	{ +		uint32_t d = *dst; + +		__m128i ms = unpack_32_1x128 (pix1); +		__m128i alpha     = expand_alpha_1x128 (ms); +		__m128i dest      = xmm_mask; +		__m128i alpha_dst = unpack_32_1x128 (d); + +		*dst = pack_1x128_32 +			(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); +	} + +	dst++; +	w--; +    } + +    while (w >= 4) +    { +	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); +	BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); +	BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); +	BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + +	if (pix1 | pix2 | pix3 | pix4) +	{ +	    __m128i xmm_src, xmm_src_lo, xmm_src_hi; +	    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; +	    __m128i xmm_alpha_lo, xmm_alpha_hi; + +	    xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + +	    xmm_dst = load_128_aligned ((__m128i*)dst); + +	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); +	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); +	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, +				&xmm_alpha_lo, &xmm_alpha_hi); + +	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi, +			   &xmm_alpha_lo, &xmm_alpha_hi, +			   &xmm_mask, &xmm_mask, +			   &xmm_dst_lo, &xmm_dst_hi); + +	    save_128_aligned +		((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); +	} + +	dst += 4; +	w -= 4; +    } + +    while (w) +    { +	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); +	if (pix1) +	{ +		uint32_t d = *dst; + +		__m128i ms = unpack_32_1x128 (pix1); +		__m128i alpha     = expand_alpha_1x128 (ms); +		__m128i dest      = xmm_mask; +		__m128i alpha_dst = unpack_32_1x128 (d); + +		*dst = pack_1x128_32 +			(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); +	} + +	dst++; +	w--; +    } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, +			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER, +			       uint32_t, uint32_t, uint32_t, +			       COVER, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, +			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER, +			       uint32_t, uint32_t, uint32_t, +			       PAD, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, +			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER, +			       uint32_t, uint32_t, uint32_t, +			       NONE, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, +			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER, +			       uint32_t, uint32_t, uint32_t, +			       NORMAL, FLAG_HAVE_SOLID_MASK) +  static const pixman_fast_path_t sse2_fast_paths[] =  {      /* PIXMAN_OP_OVER */ @@ -5848,6 +6119,14 @@ static const pixman_fast_path_t sse2_fast_paths[] =      PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),      PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),      PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8), +    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888), +    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888), +    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888), +    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888), +    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888), +    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888), +    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888), +    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888),      /* PIXMAN_OP_SRC */      PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888), @@ -5912,6 +6191,11 @@ static const pixman_fast_path_t sse2_fast_paths[] =      SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),      SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), +    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), +    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), +    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), +    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), +      SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),      SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),      SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888), diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c index 3fabed161..184f0c4e6 100644 --- a/pixman/pixman/pixman.c +++ b/pixman/pixman/pixman.c @@ -581,11 +581,13 @@ pixman_image_composite32 (pixman_op_t      op,                            int32_t          height)  {      pixman_format_code_t src_format, mask_format, dest_format; -    uint32_t src_flags, mask_flags, dest_flags;      pixman_region32_t region;      pixman_box32_t extents;      pixman_implementation_t *imp;      pixman_composite_func_t func; +    pixman_composite_info_t info; +    const pixman_box32_t *pbox; +    int n;      _pixman_image_validate (src);      if (mask) @@ -593,27 +595,27 @@ pixman_image_composite32 (pixman_op_t      op,      _pixman_image_validate (dest);      src_format = src->common.extended_format_code; -    src_flags = src->common.flags; +    info.src_flags = src->common.flags;      if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE))      {  	mask_format = mask->common.extended_format_code; -	mask_flags = mask->common.flags; +	info.mask_flags = mask->common.flags;      }      else      {  	mask_format = PIXMAN_null; -	mask_flags = FAST_PATH_IS_OPAQUE; +	info.mask_flags = FAST_PATH_IS_OPAQUE;      }      dest_format = dest->common.extended_format_code; -    dest_flags = dest->common.flags; +    info.dest_flags = dest->common.flags;      /* Check for pixbufs */      if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) &&  	(src->type == BITS && src->bits.bits == mask->bits.bits)	   &&  	(src->common.repeat == mask->common.repeat)			   && -	(src_flags & mask_flags & FAST_PATH_ID_TRANSFORM)		   && +	(info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM)	   &&  	(src_x == mask_x && src_y == mask_y))      {  	if (src_format == PIXMAN_x8b8g8r8) @@ -638,7 +640,7 @@ pixman_image_composite32 (pixman_op_t      op,      extents.x2 -= dest_x - src_x;      extents.y2 -= dest_y - src_y; -    if (!analyze_extent (src, &extents, &src_flags)) +    if (!analyze_extent (src, &extents, &info.src_flags))  	goto out;      extents.x1 -= src_x - mask_x; @@ -646,7 +648,7 @@ pixman_image_composite32 (pixman_op_t      op,      extents.x2 -= src_x - mask_x;      extents.y2 -= src_y - mask_y; -    if (!analyze_extent (mask, &extents, &mask_flags)) +    if (!analyze_extent (mask, &extents, &info.mask_flags))  	goto out;      /* If the clip is within the source samples, and the samples are @@ -659,16 +661,16 @@ pixman_image_composite32 (pixman_op_t      op,  			 FAST_PATH_BILINEAR_FILTER |			\  			 FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR) -    if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || -	(src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) +    if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || +	(info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)      { -	src_flags |= FAST_PATH_IS_OPAQUE; +	info.src_flags |= FAST_PATH_IS_OPAQUE;      } -    if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || -	(mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) +    if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || +	(info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)      { -	mask_flags |= FAST_PATH_IS_OPAQUE; +	info.mask_flags |= FAST_PATH_IS_OPAQUE;      }      /* @@ -676,42 +678,35 @@ pixman_image_composite32 (pixman_op_t      op,       * if the src or dest are opaque. The output operator should be       * mathematically equivalent to the source.       */ -    op = optimize_operator (op, src_flags, mask_flags, dest_flags); +    info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags); -    if (_pixman_implementation_lookup_composite ( -	    get_implementation (), op, -	    src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags, -	    &imp, &func)) -    { -	pixman_composite_info_t info; -	const pixman_box32_t *pbox; -	int n; +    _pixman_implementation_lookup_composite ( +	get_implementation (), info.op, +	src_format, info.src_flags, +	mask_format, info.mask_flags, +	dest_format, info.dest_flags, +	&imp, &func); -	info.op = op; -	info.src_image = src; -	info.mask_image = mask; -	info.dest_image = dest; -	info.src_flags = src_flags; -	info.mask_flags = mask_flags; -	info.dest_flags = dest_flags; +    info.src_image = src; +    info.mask_image = mask; +    info.dest_image = dest; -	pbox = pixman_region32_rectangles (®ion, &n); +    pbox = pixman_region32_rectangles (®ion, &n); -	while (n--) -	{ -	    info.src_x = pbox->x1 + src_x - dest_x; -	    info.src_y = pbox->y1 + src_y - dest_y; -	    info.mask_x = pbox->x1 + mask_x - dest_x; -	    info.mask_y = pbox->y1 + mask_y - dest_y; -	    info.dest_x = pbox->x1; -	    info.dest_y = pbox->y1; -	    info.width = pbox->x2 - pbox->x1; -	    info.height = pbox->y2 - pbox->y1; - -	    func (imp, &info); - -	    pbox++; -	} +    while (n--) +    { +	info.src_x = pbox->x1 + src_x - dest_x; +	info.src_y = pbox->y1 + src_y - dest_y; +	info.mask_x = pbox->x1 + mask_x - dest_x; +	info.mask_y = pbox->y1 + mask_y - dest_y; +	info.dest_x = pbox->x1; +	info.dest_y = pbox->y1; +	info.width = pbox->x2 - pbox->x1; +	info.height = pbox->y2 - pbox->y1; + +	func (imp, &info); + +	pbox++;      }  out: diff --git a/pixman/test/Makefile.sources b/pixman/test/Makefile.sources index 8c0b505df..e323a8e8c 100644 --- a/pixman/test/Makefile.sources +++ b/pixman/test/Makefile.sources @@ -17,6 +17,7 @@ TESTPROGRAMS =			\  	gradient-crash-test	\  	region-contains-test	\  	alphamap		\ +	matrix-test		\  	stress-test		\  	composite-traps-test	\  	blitters-test		\ diff --git a/pixman/test/affine-test.c b/pixman/test/affine-test.c index 678fbe844..2506250db 100644 --- a/pixman/test/affine-test.c +++ b/pixman/test/affine-test.c @@ -307,11 +307,11 @@ test_composite (int      testnum,  }  #if BILINEAR_INTERPOLATION_BITS == 8 -#define CHECKSUM 0x97097336 +#define CHECKSUM 0x2CDF1F07  #elif BILINEAR_INTERPOLATION_BITS == 7 -#define CHECKSUM 0x31D2DC21 +#define CHECKSUM 0xBC00B1DF  #elif BILINEAR_INTERPOLATION_BITS == 4 -#define CHECKSUM 0x8B925154 +#define CHECKSUM 0xA227306B  #else  #define CHECKSUM 0x00000000  #endif diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c index 2f97b7b24..7336fa0d5 100644 --- a/pixman/test/lowlevel-blt-bench.c +++ b/pixman/test/lowlevel-blt-bench.c @@ -630,6 +630,8 @@ tests_tbl[] =      { "src_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },      { "src_1555_0565",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },      { "src_0565_1555",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 }, +    { "src_8_8",               PIXMAN_a8,          0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8 }, +    { "src_n_8",               PIXMAN_a8,          1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8 },      { "src_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },      { "src_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },      { "src_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 }, @@ -772,7 +774,7 @@ main (int argc, char *argv[])      for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)      { -	if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern)) +	if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0)  	{  	    bench_composite (tests_tbl[i].testname,  			     tests_tbl[i].src_fmt, diff --git a/pixman/test/matrix-test.c b/pixman/test/matrix-test.c new file mode 100644 index 000000000..8437dd291 --- /dev/null +++ b/pixman/test/matrix-test.c @@ -0,0 +1,186 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> + +#ifdef HAVE_FLOAT128 + +#define pixman_fixed_to_float128(x) (((__float128)(x)) / 65536.0Q) + +typedef struct { __float128 v[3]; } pixman_vector_f128_t; +typedef struct { __float128 m[3][3]; } pixman_transform_f128_t; + +pixman_bool_t +pixman_transform_point_f128 (const pixman_transform_f128_t *t, +                             const pixman_vector_f128_t    *v, +                             pixman_vector_f128_t          *result) +{ +    int i; +    for (i = 0; i < 3; i++) +    { +        result->v[i] = t->m[i][0] * v->v[0] + +                       t->m[i][1] * v->v[1] + +                       t->m[i][2] * v->v[2]; +    } +    if (result->v[2] != 0) +    { +        result->v[0] /= result->v[2]; +        result->v[1] /= result->v[2]; +        result->v[2] = 1; +        return TRUE; +    } +    else +    { +        return FALSE; +    } +} + +pixman_bool_t does_it_fit_fixed_48_16 (__float128 x) +{ +    if (x >= 65536.0Q * 65536.0Q * 32768.0Q) +        return FALSE; +    if (x <= -65536.0Q * 65536.0Q * 32768.0Q) +        return FALSE; +    return TRUE; +} + +#endif + +uint32_t +test_matrix (int testnum, int verbose) +{ +    uint32_t crc32 = 0; +    int i, j, k; +    pixman_bool_t is_affine; + +    prng_srand (testnum); + +    for (i = 0; i < 100; i++) +    { +        pixman_bool_t           transform_ok; +        pixman_transform_t      ti; +        pixman_vector_48_16_t   vi, result_i; +#ifdef HAVE_FLOAT128 +        pixman_transform_f128_t tf; +        pixman_vector_f128_t    vf, result_f; +#endif +        prng_randmemset (&ti, sizeof(ti), 0); +        prng_randmemset (&vi, sizeof(vi), 0); + +        for (j = 0; j < 3; j++) +        { +            /* make sure that "vi" contains 31.16 fixed point data */ +            vi.v[j] >>= 17; +            /* and apply random shift */ +            if (prng_rand_n (3) == 0) +                vi.v[j] >>= prng_rand_n (46); +        } + +        if (prng_rand_n (2)) +        { +            /* random shift for the matrix */ +            for (j = 0; j < 3; j++) +                for (k = 0; k < 3; k++) +                    ti.matrix[j][k] >>= prng_rand_n (30); +        } + +        if (prng_rand_n (2)) +        { +            /* affine matrix */ +            ti.matrix[2][0] = 0; +            ti.matrix[2][1] = 0; +            ti.matrix[2][2] = pixman_fixed_1; +        } + +        if (prng_rand_n (2)) +        { +            /* cartesian coordinates */ +            vi.v[2] = pixman_fixed_1; +        } + +        is_affine = (ti.matrix[2][0] == 0 && ti.matrix[2][1] == 0 && +                     ti.matrix[2][2] == pixman_fixed_1 && +                     vi.v[2] == pixman_fixed_1); + +        transform_ok = TRUE; +        if (is_affine && prng_rand_n (2)) +            pixman_transform_point_31_16_affine (&ti, &vi, &result_i); +        else +            transform_ok = pixman_transform_point_31_16 (&ti, &vi, &result_i); + +        crc32 = compute_crc32 (crc32, &result_i, sizeof(result_i)); + +#ifdef HAVE_FLOAT128 +        /* compare with a reference 128-bit floating point implementation */ +        for (j = 0; j < 3; j++) +        { +            vf.v[j] = pixman_fixed_to_float128 (vi.v[j]); +            for (k = 0; k < 3; k++) +            { +                tf.m[j][k] = pixman_fixed_to_float128 (ti.matrix[j][k]); +            } +        } + +        if (pixman_transform_point_f128 (&tf, &vf, &result_f)) +        { +            if (transform_ok || +                (does_it_fit_fixed_48_16 (result_f.v[0]) && +                 does_it_fit_fixed_48_16 (result_f.v[1]) && +                 does_it_fit_fixed_48_16 (result_f.v[2]))) +            { +                for (j = 0; j < 3; j++) +                { +                    double diff = fabs (result_f.v[j] - +                                        pixman_fixed_to_float128 (result_i.v[j])); + +                    if (is_affine && diff > (0.51 / 65536.0)) +                    { +                        printf ("%d:%d: bad precision for affine (%.12f)\n", +                               testnum, i, diff); +                        abort (); +                    } +                    else if (diff > (0.71 / 65536.0)) +                    { +                        printf ("%d:%d: bad precision for projective (%.12f)\n", +                               testnum, i, diff); +                        abort (); +                    } +                } +            } +        } +#endif +    } +    return crc32; +} + +int +main (int argc, const char *argv[]) +{ +    return fuzzer_test_main ("matrix", 20000, +			     0xBEBF98C3, +			     test_matrix, argc, argv); +} | 
