7 files changed, 247 insertions, 217 deletions
diff --git a/mesalib/src/gallium/auxiliary/util/.gitignore b/mesalib/src/gallium/auxiliary/util/.gitignore
index 5dd0408ef..da74de623 100644
--- a/mesalib/src/gallium/auxiliary/util/.gitignore
+++ b/mesalib/src/gallium/auxiliary/util/.gitignore
@@ -1,3 +1,2 @@
 u_format_srgb.c
 u_format_table.c
-u_half.c
diff --git a/mesalib/src/gallium/auxiliary/util/u_format.c b/mesalib/src/gallium/auxiliary/util/u_format.c
index cfc4a17a0..6f4529835 100644
--- a/mesalib/src/gallium/auxiliary/util/u_format.c
+++ b/mesalib/src/gallium/auxiliary/util/u_format.c
@@ -159,6 +159,38 @@ util_format_is_pure_uint(enum pipe_format format)
 }
 
 boolean
+util_format_is_array(const struct util_format_description *desc)
+{
+   unsigned chan;
+
+   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+       desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
+       desc->block.width != 1 ||
+       desc->block.height != 1) {
+      return FALSE;
+   }
+
+   for (chan = 0; chan < desc->nr_channels; ++chan) {
+      if (desc->swizzle[chan] != chan)
+         return FALSE;
+
+      if (desc->channel[chan].type != desc->channel[0].type)
+         return FALSE;
+
+      if (desc->channel[chan].normalized != desc->channel[0].normalized)
+         return FALSE;
+
+      if (desc->channel[chan].pure_integer != desc->channel[0].pure_integer)
+         return FALSE;
+
+      if (desc->channel[chan].size != desc->channel[0].size)
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+boolean
 util_format_is_luminance_alpha(enum pipe_format format)
 {
    const struct util_format_description *desc =
diff --git a/mesalib/src/gallium/auxiliary/util/u_format.h b/mesalib/src/gallium/auxiliary/util/u_format.h
index 1718fb5e2..e35e164b4 100644
--- a/mesalib/src/gallium/auxiliary/util/u_format.h
+++ b/mesalib/src/gallium/auxiliary/util/u_format.h
@@ -591,6 +591,13 @@ boolean
 util_format_is_pure_uint(enum pipe_format format);
 
 /**
+ * Whether the format is a simple array format where all channels
+ * are of the same type and can be loaded from memory as a vector
+ */
+boolean
+util_format_is_array(const struct util_format_description *desc);
+
+/**
  * Check if the src format can be blitted to the destination format with
  * a simple memcpy.  For example, blitting from RGBA to RGBx is OK, but not
  * the reverse.
diff --git a/mesalib/src/gallium/auxiliary/util/u_format_tests.c b/mesalib/src/gallium/auxiliary/util/u_format_tests.c
index fc29d8d48..457fda6c8 100644
--- a/mesalib/src/gallium/auxiliary/util/u_format_tests.c
+++ b/mesalib/src/gallium/auxiliary/util/u_format_tests.c
@@ -26,6 +26,9 @@
  **************************************************************************/
 
 
+#include <float.h>
+
+#include "pipe/p_config.h"
 #include "u_memory.h"
 #include "u_format_tests.h"
 
@@ -63,6 +66,9 @@
        {{ 0,  0,  0,  0}, { 0,  0,  0,  0}, {0, 0, 0, 0}, {0, 0, 0, 0}}}
 
 
+#define NAN (0.0 / 0.0)
+#define INF (1.0 / 0.0)
+
 /**
  * Test cases.
  *
@@ -876,7 +882,39 @@ util_format_test_cases[] =
     * Half float formats
     */
 
-   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(  0.0, 0.0, 0.0, 1.0)},
+   /* Minimum positive normal */
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0400), UNPACKED_1x1( 6.10352E-5, 0.0, 0.0, 1.0)},
+
+   /* Max denormal */
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x03FF), UNPACKED_1x1( 6.09756E-5, 0.0, 0.0, 1.0)},
+
+   /* Minimum positive denormal */
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0001), UNPACKED_1x1( 5.96046E-8, 0.0, 0.0, 1.0)},
+
+   /* Min representable value */
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfbff), UNPACKED_1x1(   -65504.0, 0.0, 0.0, 1.0)},
+
+   /* Max representable value */
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7bff), UNPACKED_1x1(    65504.0, 0.0, 0.0, 1.0)},
+
+#if !defined(PIPE_CC_MSVC)
+
+   /* NaNs */
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7c01), UNPACKED_1x1(        NAN, 0.0, 0.0, 1.0)},
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfc01), UNPACKED_1x1(       -NAN, 0.0, 0.0, 1.0)},
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), UNPACKED_1x1(        NAN, 0.0, 0.0, 1.0)},
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1(       -NAN, 0.0, 0.0, 1.0)},
+
+   /* Inf */
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7c00), UNPACKED_1x1(        INF, 0.0, 0.0, 1.0)},
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfc00), UNPACKED_1x1(       -INF, 0.0, 0.0, 1.0)},
+
+#endif
+
+   /* Zero, ignore sign */
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0x7fff), PACKED_1x16(0x8000), UNPACKED_1x1( -0.0, 0.0, 0.0, 1.0)},
+   {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0x7fff), PACKED_1x16(0x0000), UNPACKED_1x1(  0.0, 0.0, 0.0, 1.0)},
+
    {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x3c00), UNPACKED_1x1(  1.0, 0.0, 0.0, 1.0)},
    {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xbc00), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)},
 
diff --git a/mesalib/src/gallium/auxiliary/util/u_half.h b/mesalib/src/gallium/auxiliary/util/u_half.h
index ad030e90c..f7009f548 100644
--- a/mesalib/src/gallium/auxiliary/util/u_half.h
+++ b/mesalib/src/gallium/auxiliary/util/u_half.h
@@ -35,51 +35,84 @@
 extern "C" {
 #endif
 
-extern const uint32_t util_half_to_float_mantissa_table[2048];
-extern const uint32_t util_half_to_float_exponent_table[64];
-extern const uint32_t util_half_to_float_offset_table[64];
-extern const uint16_t util_float_to_half_base_table[512];
-extern const uint8_t util_float_to_half_shift_table[512];
-
 /*
- * Note that if the half float is a signaling NaN, the x87 FPU will turn
- * it into a quiet NaN immediately upon loading into a float.
- *
- * Additionally, denormals may be flushed to zero.
+ * References for float <-> half conversions
  *
- * To avoid this, use the floatui functions instead of the float ones
- * when just doing conversion rather than computation on the resulting
- * floats.
+ *  http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
+ *  https://gist.github.com/2156668
+ *  https://gist.github.com/2144712
  */
 
-static INLINE uint32_t
-util_half_to_floatui(uint16_t h)
+static INLINE uint16_t
+util_float_to_half(float f)
 {
-   unsigned exp = h >> 10;
-   return util_half_to_float_mantissa_table[util_half_to_float_offset_table[exp] + (h & 0x3ff)] + util_half_to_float_exponent_table[exp];
+   uint32_t sign_mask  = 0x80000000;
+   uint32_t round_mask = ~0xfff;
+   uint32_t f32inf = 0xff << 23;
+   uint32_t f16inf = 0x1f << 23;
+   uint32_t sign;
+   union fi magic;
+   union fi f32;
+   uint16_t f16;
+
+   magic.ui = 0xf << 23;
+
+   f32.f = f;
+
+   /* Sign */
+   sign = f32.ui & sign_mask;
+   f32.ui ^= sign;
+
+   if (f32.ui == f32inf) {
+      /* Inf */
+      f16 = 0x7c00;
+   } else if (f32.ui > f32inf) {
+      /* NaN */
+      f16 = 0x7e00;
+   } else {
+      /* Number */
+      f32.ui &= round_mask;
+      f32.f  *= magic.f;
+      f32.ui -= round_mask;
+
+      /* Clamp to infinity if overflowed */
+      if (f32.ui > f16inf)
+         f32.ui = f16inf;
+
+      f16 = f32.ui >> 13;
+   }
+
+   /* Sign */
+   f16 |= sign >> 16;
+
+   return f16;
 }
 
 static INLINE float
-util_half_to_float(uint16_t h)
+util_half_to_float(uint16_t f16)
 {
-   union fi r;
-   r.ui = util_half_to_floatui(h);
-   return r.f;
-}
+   union fi infnan;
+   union fi magic;
+   union fi f32;
 
-static INLINE uint16_t
-util_floatui_to_half(uint32_t v)
-{
-   unsigned signexp = v >> 23;
-   return util_float_to_half_base_table[signexp] + ((v & 0x007fffff) >> util_float_to_half_shift_table[signexp]);
-}
+   infnan.ui = 0x8f << 23;
+   infnan.f = 65536.0f;
+   magic.ui  = 0xef << 23;
 
-static INLINE uint16_t
-util_float_to_half(float f)
-{
-   union fi i;
-   i.f = f;
-   return util_floatui_to_half(i.ui);
+   /* Exponent / Mantissa */
+   f32.ui = (f16 & 0x7fff) << 13;
+
+   /* Adjust */
+   f32.f *= magic.f;
+
+   /* Inf / NaN */
+   if (f32.f >= infnan.f)
+      f32.ui |= 0xff << 23;
+
+   /* Sign */
+   f32.ui |= (f16 & 0x8000) << 16;
+
+   return f32.f;
 }
 
 #ifdef __cplusplus
diff --git a/mesalib/src/gallium/auxiliary/util/u_half.py b/mesalib/src/gallium/auxiliary/util/u_half.py
deleted file mode 100644
index 915cf3b92..000000000
--- a/mesalib/src/gallium/auxiliary/util/u_half.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright 2010 Luca Barbieri
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial
-# portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# *************************************************************************
-
-# The code is a reimplementation of the algorithm in
-#  www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
-# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
-#
-# The table contents have been slightly changed so that the exponent
-# bias is now in the exponent table instead of the mantissa table (mostly
-# for cosmetic reasons, and because it theoretically allows a variant
-# that flushes denormal to zero but uses a mantissa table with 24-bit
-# entries).
-#
-# The tables are also constructed slightly differently.
-#
-
-# Note that using a 64K * 4 table is a terrible idea since it will not fit
-# in the L1 cache and will massively pollute the L2 cache as well
-#
-# These should instead fit in the L1 cache.
-#
-# TODO: we could use a denormal bias table instead of the mantissa/offset
-# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
-# but would involve more computation
-#
-# Note however that if denormals are never encountered, the L1 cache usage
-# is only about 4608 bytes anyway.
-
-table_index = None
-table_length = None
-
-def begin(t, n, l):
-	global table_length
-	global table_index
-	table_index = 0
-	table_length = l
-	print
-	print "const " + t + " " + n + "[" + str(l) + "] = {"
-
-def value(v):
-	global table_index
-	table_index += 1
-	print "\t" + hex(v) + ","
-
-def end():
-	global table_length
-	global table_index
-	print "};"
-	assert table_index == table_length
-
-print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
-print "#include \"util/u_half.h\""
-
-begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
-# zero
-value(0)
-
-# denormals
-for i in xrange(1, 1024):
-	m = i << 13
-	e = 0
-
-	# normalize number
-	while (m & 0x00800000) == 0:
-		e -= 0x00800000
-		m <<= 1
-
-	m &= ~0x00800000
-	e += 0x38800000
-	value(m | e)
-
-# normals
-for i in xrange(1024, 2048):
-	value((i - 1024) << 13)
-end()
-
-begin("uint32_t", "util_half_to_float_exponent_table", 64)
-# positive zero or denormals
-value(0)
-
-# positive numbers
-for i in xrange(1, 31):
-	value(0x38000000 + (i << 23))
-
-# positive infinity/NaN
-value(0x7f800000)
-
-# negative zero or denormals
-value(0x80000000)
-
-# negative numbers
-for i in range(33, 63):
-	value(0xb8000000 + ((i - 32) << 23))
-
-# negative infinity/NaN
-value(0xff800000)
-end()
-
-begin("uint32_t", "util_half_to_float_offset_table", 64)
-# positive zero or denormals
-value(0)
-
-# positive normals
-for i in range(1, 32):
-	value(1024)
-
-# negative zero or denormals
-value(0)
-
-# negative normals
-for i in xrange(33, 64):
-	value(1024)
-end()
-
-begin("uint16_t", "util_float_to_half_base_table", 512)
-for sign in (0, 0x8000):
-	# very small numbers mapping to zero
-	for i in xrange(-127, -24):
-		value(sign | 0)
-
-	# small numbers mapping to denormals
-	for i in xrange(-24, -14):
-		value(sign | (0x400 >> (-14 -i)))
-
-	# normal numbers
-	for i in xrange(-14, 16):
-		value(sign | ((i + 15) << 10))
-
-	# large numbers mapping to infinity
-	for i in xrange(16, 128):
-		value(sign | 0x7c00)
-
-	# infinity and NaNs
-	value(sign | 0x7c00)
-end()
-
-begin("uint8_t", "util_float_to_half_shift_table", 512)
-for sign in (0, 0x8000):
-	# very small numbers mapping to zero
-	for i in xrange(-127, -24):
-		value(24)
-
-	# small numbers mapping to denormals
-	for i in xrange(-24, -14):
-		value(-1 - i)
-
-	# normal numbers
-	for i in xrange(-14, 16):
-		value(13)
-
-	# large numbers mapping to infinity
-	for i in xrange(16, 128):
-		value(24)
-
-	# infinity and NaNs
-	value(13)
-end()
-
diff --git a/mesalib/src/gallium/auxiliary/util/u_math.h b/mesalib/src/gallium/auxiliary/util/u_math.h
index f6196665f..724b136b5 100644
--- a/mesalib/src/gallium/auxiliary/util/u_math.h
+++ b/mesalib/src/gallium/auxiliary/util/u_math.h
@@ -183,6 +183,13 @@ union fi {
 };
 
 
+union di {
+   double d;
+   int64_t i;
+   uint64_t ui;
+};
+
+
 /**
  * Fast version of 2^x
  * Identity: exp2(a + b) = exp2(a) * exp2(b)
@@ -325,14 +332,107 @@ util_is_approx(float a, float b, float tol)
 
 
 /**
- * Test if x is NaN or +/- infinity.
+ * util_is_X_inf_or_nan = test if x is NaN or +/- Inf
+ * util_is_X_nan        = test if x is NaN
+ * util_X_inf_sign      = return +1 for +Inf, -1 for -Inf, or 0 for not Inf
+ *
+ * NaN can be checked with x != x, however this fails with the fast math flag
+ **/
+
+
+/**
+ * Single-float
  */
 static INLINE boolean
 util_is_inf_or_nan(float x)
 {
    union fi tmp;
    tmp.f = x;
-   return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31);
+   return (tmp.ui & 0x7f800000) == 0x7f800000;
+}
+
+
+static INLINE boolean
+util_is_nan(float x)
+{
+   union fi tmp;
+   tmp.f = x;
+   return (tmp.ui & 0x7fffffff) > 0x7f800000;
+}
+
+
+static INLINE int
+util_inf_sign(float x)
+{
+   union fi tmp;
+   tmp.f = x;
+   if ((tmp.ui & 0x7fffffff) != 0x7f800000) {
+      return 0;
+   }
+
+   return (x < 0) ? -1 : 1;
+}
+
+
+/**
+ * Double-float
+ */
+static INLINE boolean
+util_is_double_inf_or_nan(double x)
+{
+   union di tmp;
+   tmp.d = x;
+   return (tmp.ui & 0x7ff0000000000000) == 0x7ff0000000000000;
+}
+
+
+static INLINE boolean
+util_is_double_nan(double x)
+{
+   union di tmp;
+   tmp.d = x;
+   return (tmp.ui & 0x7fffffffffffffff) > 0x7ff0000000000000;
+}
+
+
+static INLINE int
+util_double_inf_sign(double x)
+{
+   union di tmp;
+   tmp.d = x;
+   if ((tmp.ui & 0x7fffffffffffffff) != 0x7ff0000000000000) {
+      return 0;
+   }
+
+   return (x < 0) ? -1 : 1;
+}
+
+
+/**
+ * Half-float
+ */
+static INLINE boolean
+util_is_half_inf_or_nan(int16_t x)
+{
+   return (x & 0x7c00) == 0x7c00;
+}
+
+
+static INLINE boolean
+util_is_half_nan(int16_t x)
+{
+   return (x & 0x7fff) > 0x7c00;
+}
+
+
+static INLINE int
+util_half_inf_sign(int16_t x)
+{
+   if ((x & 0x7fff) != 0x7c00) {
+      return 0;
+   }
+
+   return (x < 0) ? -1 : 1;
 }