diff options
Diffstat (limited to 'mesalib/src/gallium/auxiliary/util')
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/.gitignore | 1 | ||||
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_format.c | 32 | ||||
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_format.h | 7 | ||||
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_format_tests.c | 40 | ||||
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_half.h | 101 | ||||
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_half.py | 179 | ||||
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_math.h | 104 |
7 files changed, 247 insertions, 217 deletions
diff --git a/mesalib/src/gallium/auxiliary/util/.gitignore b/mesalib/src/gallium/auxiliary/util/.gitignore index 5dd0408ef..da74de623 100644 --- a/mesalib/src/gallium/auxiliary/util/.gitignore +++ b/mesalib/src/gallium/auxiliary/util/.gitignore @@ -1,3 +1,2 @@ u_format_srgb.c u_format_table.c -u_half.c diff --git a/mesalib/src/gallium/auxiliary/util/u_format.c b/mesalib/src/gallium/auxiliary/util/u_format.c index cfc4a17a0..6f4529835 100644 --- a/mesalib/src/gallium/auxiliary/util/u_format.c +++ b/mesalib/src/gallium/auxiliary/util/u_format.c @@ -159,6 +159,38 @@ util_format_is_pure_uint(enum pipe_format format) } boolean +util_format_is_array(const struct util_format_description *desc) +{ + unsigned chan; + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB || + desc->block.width != 1 || + desc->block.height != 1) { + return FALSE; + } + + for (chan = 0; chan < desc->nr_channels; ++chan) { + if (desc->swizzle[chan] != chan) + return FALSE; + + if (desc->channel[chan].type != desc->channel[0].type) + return FALSE; + + if (desc->channel[chan].normalized != desc->channel[0].normalized) + return FALSE; + + if (desc->channel[chan].pure_integer != desc->channel[0].pure_integer) + return FALSE; + + if (desc->channel[chan].size != desc->channel[0].size) + return FALSE; + } + + return TRUE; +} + +boolean util_format_is_luminance_alpha(enum pipe_format format) { const struct util_format_description *desc = diff --git a/mesalib/src/gallium/auxiliary/util/u_format.h b/mesalib/src/gallium/auxiliary/util/u_format.h index 1718fb5e2..e35e164b4 100644 --- a/mesalib/src/gallium/auxiliary/util/u_format.h +++ b/mesalib/src/gallium/auxiliary/util/u_format.h @@ -591,6 +591,13 @@ boolean util_format_is_pure_uint(enum pipe_format format); /** + * Whether the format is a simple array format where all channels + * are of the same type and can be loaded from memory as a vector + */ +boolean +util_format_is_array(const struct util_format_description *desc); + +/** * Check if the src format can be blitted to the destination format with * a simple memcpy. For example, blitting from RGBA to RGBx is OK, but not * the reverse. diff --git a/mesalib/src/gallium/auxiliary/util/u_format_tests.c b/mesalib/src/gallium/auxiliary/util/u_format_tests.c index fc29d8d48..457fda6c8 100644 --- a/mesalib/src/gallium/auxiliary/util/u_format_tests.c +++ b/mesalib/src/gallium/auxiliary/util/u_format_tests.c @@ -26,6 +26,9 @@ **************************************************************************/ +#include <float.h> + +#include "pipe/p_config.h" #include "u_memory.h" #include "u_format_tests.h" @@ -63,6 +66,9 @@ {{ 0, 0, 0, 0}, { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}} +#define NAN (0.0 / 0.0) +#define INF (1.0 / 0.0) + /** * Test cases. * @@ -876,7 +882,39 @@ util_format_test_cases[] = * Half float formats */ - {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + /* Minimum positive normal */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0400), UNPACKED_1x1( 6.10352E-5, 0.0, 0.0, 1.0)}, + + /* Max denormal */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x03FF), UNPACKED_1x1( 6.09756E-5, 0.0, 0.0, 1.0)}, + + /* Minimum positive denormal */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0001), UNPACKED_1x1( 5.96046E-8, 0.0, 0.0, 1.0)}, + + /* Min representable value */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfbff), UNPACKED_1x1( -65504.0, 0.0, 0.0, 1.0)}, + + /* Max representable value */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7bff), UNPACKED_1x1( 65504.0, 0.0, 0.0, 1.0)}, + +#if !defined(PIPE_CC_MSVC) + + /* NaNs */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7c01), UNPACKED_1x1( NAN, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfc01), UNPACKED_1x1( -NAN, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), UNPACKED_1x1( NAN, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1( -NAN, 0.0, 0.0, 1.0)}, + + /* Inf */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7c00), UNPACKED_1x1( INF, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfc00), UNPACKED_1x1( -INF, 0.0, 0.0, 1.0)}, + +#endif + + /* Zero, ignore sign */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0x7fff), PACKED_1x16(0x8000), UNPACKED_1x1( -0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0x7fff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x3c00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xbc00), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, diff --git a/mesalib/src/gallium/auxiliary/util/u_half.h b/mesalib/src/gallium/auxiliary/util/u_half.h index ad030e90c..f7009f548 100644 --- a/mesalib/src/gallium/auxiliary/util/u_half.h +++ b/mesalib/src/gallium/auxiliary/util/u_half.h @@ -35,51 +35,84 @@ extern "C" { #endif -extern const uint32_t util_half_to_float_mantissa_table[2048]; -extern const uint32_t util_half_to_float_exponent_table[64]; -extern const uint32_t util_half_to_float_offset_table[64]; -extern const uint16_t util_float_to_half_base_table[512]; -extern const uint8_t util_float_to_half_shift_table[512]; - /* - * Note that if the half float is a signaling NaN, the x87 FPU will turn - * it into a quiet NaN immediately upon loading into a float. - * - * Additionally, denormals may be flushed to zero. + * References for float <-> half conversions * - * To avoid this, use the floatui functions instead of the float ones - * when just doing conversion rather than computation on the resulting - * floats. + * http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ + * https://gist.github.com/2156668 + * https://gist.github.com/2144712 */ -static INLINE uint32_t -util_half_to_floatui(uint16_t h) +static INLINE uint16_t +util_float_to_half(float f) { - unsigned exp = h >> 10; - return util_half_to_float_mantissa_table[util_half_to_float_offset_table[exp] + (h & 0x3ff)] + util_half_to_float_exponent_table[exp]; + uint32_t sign_mask = 0x80000000; + uint32_t round_mask = ~0xfff; + uint32_t f32inf = 0xff << 23; + uint32_t f16inf = 0x1f << 23; + uint32_t sign; + union fi magic; + union fi f32; + uint16_t f16; + + magic.ui = 0xf << 23; + + f32.f = f; + + /* Sign */ + sign = f32.ui & sign_mask; + f32.ui ^= sign; + + if (f32.ui == f32inf) { + /* Inf */ + f16 = 0x7c00; + } else if (f32.ui > f32inf) { + /* NaN */ + f16 = 0x7e00; + } else { + /* Number */ + f32.ui &= round_mask; + f32.f *= magic.f; + f32.ui -= round_mask; + + /* Clamp to infinity if overflowed */ + if (f32.ui > f16inf) + f32.ui = f16inf; + + f16 = f32.ui >> 13; + } + + /* Sign */ + f16 |= sign >> 16; + + return f16; } static INLINE float -util_half_to_float(uint16_t h) +util_half_to_float(uint16_t f16) { - union fi r; - r.ui = util_half_to_floatui(h); - return r.f; -} + union fi infnan; + union fi magic; + union fi f32; -static INLINE uint16_t -util_floatui_to_half(uint32_t v) -{ - unsigned signexp = v >> 23; - return util_float_to_half_base_table[signexp] + ((v & 0x007fffff) >> util_float_to_half_shift_table[signexp]); -} + infnan.ui = 0x8f << 23; + infnan.f = 65536.0f; + magic.ui = 0xef << 23; -static INLINE uint16_t -util_float_to_half(float f) -{ - union fi i; - i.f = f; - return util_floatui_to_half(i.ui); + /* Exponent / Mantissa */ + f32.ui = (f16 & 0x7fff) << 13; + + /* Adjust */ + f32.f *= magic.f; + + /* Inf / NaN */ + if (f32.f >= infnan.f) + f32.ui |= 0xff << 23; + + /* Sign */ + f32.ui |= (f16 & 0x8000) << 16; + + return f32.f; } #ifdef __cplusplus diff --git a/mesalib/src/gallium/auxiliary/util/u_half.py b/mesalib/src/gallium/auxiliary/util/u_half.py deleted file mode 100644 index 915cf3b92..000000000 --- a/mesalib/src/gallium/auxiliary/util/u_half.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright 2010 Luca Barbieri -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice (including the -# next paragraph) shall be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# ************************************************************************* - -# The code is a reimplementation of the algorithm in -# www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf -# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008 -# -# The table contents have been slightly changed so that the exponent -# bias is now in the exponent table instead of the mantissa table (mostly -# for cosmetic reasons, and because it theoretically allows a variant -# that flushes denormal to zero but uses a mantissa table with 24-bit -# entries). -# -# The tables are also constructed slightly differently. -# - -# Note that using a 64K * 4 table is a terrible idea since it will not fit -# in the L1 cache and will massively pollute the L2 cache as well -# -# These should instead fit in the L1 cache. -# -# TODO: we could use a denormal bias table instead of the mantissa/offset -# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes -# but would involve more computation -# -# Note however that if denormals are never encountered, the L1 cache usage -# is only about 4608 bytes anyway. - -table_index = None -table_length = None - -def begin(t, n, l): - global table_length - global table_index - table_index = 0 - table_length = l - print - print "const " + t + " " + n + "[" + str(l) + "] = {" - -def value(v): - global table_index - table_index += 1 - print "\t" + hex(v) + "," - -def end(): - global table_length - global table_index - print "};" - assert table_index == table_length - -print "/* This file is autogenerated by u_half.py. Do not edit directly. */" -print "#include \"util/u_half.h\"" - -begin("uint32_t", "util_half_to_float_mantissa_table", 2048) -# zero -value(0) - -# denormals -for i in xrange(1, 1024): - m = i << 13 - e = 0 - - # normalize number - while (m & 0x00800000) == 0: - e -= 0x00800000 - m <<= 1 - - m &= ~0x00800000 - e += 0x38800000 - value(m | e) - -# normals -for i in xrange(1024, 2048): - value((i - 1024) << 13) -end() - -begin("uint32_t", "util_half_to_float_exponent_table", 64) -# positive zero or denormals -value(0) - -# positive numbers -for i in xrange(1, 31): - value(0x38000000 + (i << 23)) - -# positive infinity/NaN -value(0x7f800000) - -# negative zero or denormals -value(0x80000000) - -# negative numbers -for i in range(33, 63): - value(0xb8000000 + ((i - 32) << 23)) - -# negative infinity/NaN -value(0xff800000) -end() - -begin("uint32_t", "util_half_to_float_offset_table", 64) -# positive zero or denormals -value(0) - -# positive normals -for i in range(1, 32): - value(1024) - -# negative zero or denormals -value(0) - -# negative normals -for i in xrange(33, 64): - value(1024) -end() - -begin("uint16_t", "util_float_to_half_base_table", 512) -for sign in (0, 0x8000): - # very small numbers mapping to zero - for i in xrange(-127, -24): - value(sign | 0) - - # small numbers mapping to denormals - for i in xrange(-24, -14): - value(sign | (0x400 >> (-14 -i))) - - # normal numbers - for i in xrange(-14, 16): - value(sign | ((i + 15) << 10)) - - # large numbers mapping to infinity - for i in xrange(16, 128): - value(sign | 0x7c00) - - # infinity and NaNs - value(sign | 0x7c00) -end() - -begin("uint8_t", "util_float_to_half_shift_table", 512) -for sign in (0, 0x8000): - # very small numbers mapping to zero - for i in xrange(-127, -24): - value(24) - - # small numbers mapping to denormals - for i in xrange(-24, -14): - value(-1 - i) - - # normal numbers - for i in xrange(-14, 16): - value(13) - - # large numbers mapping to infinity - for i in xrange(16, 128): - value(24) - - # infinity and NaNs - value(13) -end() - diff --git a/mesalib/src/gallium/auxiliary/util/u_math.h b/mesalib/src/gallium/auxiliary/util/u_math.h index f6196665f..724b136b5 100644 --- a/mesalib/src/gallium/auxiliary/util/u_math.h +++ b/mesalib/src/gallium/auxiliary/util/u_math.h @@ -183,6 +183,13 @@ union fi { }; +union di { + double d; + int64_t i; + uint64_t ui; +}; + + /** * Fast version of 2^x * Identity: exp2(a + b) = exp2(a) * exp2(b) @@ -325,14 +332,107 @@ util_is_approx(float a, float b, float tol) /** - * Test if x is NaN or +/- infinity. + * util_is_X_inf_or_nan = test if x is NaN or +/- Inf + * util_is_X_nan = test if x is NaN + * util_X_inf_sign = return +1 for +Inf, -1 for -Inf, or 0 for not Inf + * + * NaN can be checked with x != x, however this fails with the fast math flag + **/ + + +/** + * Single-float */ static INLINE boolean util_is_inf_or_nan(float x) { union fi tmp; tmp.f = x; - return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31); + return (tmp.ui & 0x7f800000) == 0x7f800000; +} + + +static INLINE boolean +util_is_nan(float x) +{ + union fi tmp; + tmp.f = x; + return (tmp.ui & 0x7fffffff) > 0x7f800000; +} + + +static INLINE int +util_inf_sign(float x) +{ + union fi tmp; + tmp.f = x; + if ((tmp.ui & 0x7fffffff) != 0x7f800000) { + return 0; + } + + return (x < 0) ? -1 : 1; +} + + +/** + * Double-float + */ +static INLINE boolean +util_is_double_inf_or_nan(double x) +{ + union di tmp; + tmp.d = x; + return (tmp.ui & 0x7ff0000000000000) == 0x7ff0000000000000; +} + + +static INLINE boolean +util_is_double_nan(double x) +{ + union di tmp; + tmp.d = x; + return (tmp.ui & 0x7fffffffffffffff) > 0x7ff0000000000000; +} + + +static INLINE int +util_double_inf_sign(double x) +{ + union di tmp; + tmp.d = x; + if ((tmp.ui & 0x7fffffffffffffff) != 0x7ff0000000000000) { + return 0; + } + + return (x < 0) ? -1 : 1; +} + + +/** + * Half-float + */ +static INLINE boolean +util_is_half_inf_or_nan(int16_t x) +{ + return (x & 0x7c00) == 0x7c00; +} + + +static INLINE boolean +util_is_half_nan(int16_t x) +{ + return (x & 0x7fff) > 0x7c00; +} + + +static INLINE int +util_half_inf_sign(int16_t x) +{ + if ((x & 0x7fff) != 0x7c00) { + return 0; + } + + return (x < 0) ? -1 : 1; } |