diff options
Diffstat (limited to 'mesalib/src/gallium/auxiliary/util/u_half.py')
-rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_half.py | 358 |
1 files changed, 179 insertions, 179 deletions
diff --git a/mesalib/src/gallium/auxiliary/util/u_half.py b/mesalib/src/gallium/auxiliary/util/u_half.py index 88ee43c25..915cf3b92 100644 --- a/mesalib/src/gallium/auxiliary/util/u_half.py +++ b/mesalib/src/gallium/auxiliary/util/u_half.py @@ -1,179 +1,179 @@ -# Copyright 2010 Luca Barbieri
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial
-# portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# *************************************************************************
-
-# The code is a reimplementation of the algorithm in
-# www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
-# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
-#
-# The table contents have been slightly changed so that the exponent
-# bias is now in the exponent table instead of the mantissa table (mostly
-# for cosmetic reasons, and because it theoretically allows a variant
-# that flushes denormal to zero but uses a mantissa table with 24-bit
-# entries).
-#
-# The tables are also constructed slightly differently.
-#
-
-# Note that using a 64K * 4 table is a terrible idea since it will not fit
-# in the L1 cache and will massively pollute the L2 cache as well
-#
-# These should instead fit in the L1 cache.
-#
-# TODO: we could use a denormal bias table instead of the mantissa/offset
-# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
-# but would involve more computation
-#
-# Note however that if denormals are never encountered, the L1 cache usage
-# is only about 4608 bytes anyway.
-
-table_index = None
-table_length = None
-
-def begin(t, n, l):
- global table_length
- global table_index
- table_index = 0
- table_length = l
- print
- print "const " + t + " " + n + "[" + str(l) + "] = {"
-
-def value(v):
- global table_index
- table_index += 1
- print "\t" + hex(v) + ","
-
-def end():
- global table_length
- global table_index
- print "};"
- assert table_index == table_length
-
-print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
-print "#include \"util/u_half.h\""
-
-begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
-# zero
-value(0)
-
-# denormals
-for i in xrange(1, 1024):
- m = i << 13
- e = 0
-
- # normalize number
- while (m & 0x00800000) == 0:
- e -= 0x00800000
- m <<= 1
-
- m &= ~0x00800000
- e += 0x38800000
- value(m | e)
-
-# normals
-for i in xrange(1024, 2048):
- value((i - 1024) << 13)
-end()
-
-begin("uint32_t", "util_half_to_float_exponent_table", 64)
-# positive zero or denormals
-value(0)
-
-# positive numbers
-for i in xrange(1, 31):
- value(0x38000000 + (i << 23))
-
-# positive infinity/NaN
-value(0x7f800000)
-
-# negative zero or denormals
-value(0x80000000)
-
-# negative numbers
-for i in range(33, 63):
- value(0xb8000000 + ((i - 32) << 23))
-
-# negative infinity/NaN
-value(0xff800000)
-end()
-
-begin("uint32_t", "util_half_to_float_offset_table", 64)
-# positive zero or denormals
-value(0)
-
-# positive normals
-for i in range(1, 32):
- value(1024)
-
-# negative zero or denormals
-value(0)
-
-# negative normals
-for i in xrange(33, 64):
- value(1024)
-end()
-
-begin("uint16_t", "util_float_to_half_base_table", 512)
-for sign in (0, 0x8000):
- # very small numbers mapping to zero
- for i in xrange(-127, -24):
- value(sign | 0)
-
- # small numbers mapping to denormals
- for i in xrange(-24, -14):
- value(sign | (0x400 >> (-14 -i)))
-
- # normal numbers
- for i in xrange(-14, 16):
- value(sign | ((i + 15) << 10))
-
- # large numbers mapping to infinity
- for i in xrange(16, 128):
- value(sign | 0x7c00)
-
- # infinity and NaNs
- value(sign | 0x7c00)
-end()
-
-begin("uint8_t", "util_float_to_half_shift_table", 512)
-for sign in (0, 0x8000):
- # very small numbers mapping to zero
- for i in xrange(-127, -24):
- value(24)
-
- # small numbers mapping to denormals
- for i in xrange(-24, -14):
- value(-1 - i)
-
- # normal numbers
- for i in xrange(-14, 16):
- value(13)
-
- # large numbers mapping to infinity
- for i in xrange(16, 128):
- value(24)
-
- # infinity and NaNs
- value(13)
-end()
-
+# Copyright 2010 Luca Barbieri +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial +# portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# ************************************************************************* + +# The code is a reimplementation of the algorithm in +# www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf +# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008 +# +# The table contents have been slightly changed so that the exponent +# bias is now in the exponent table instead of the mantissa table (mostly +# for cosmetic reasons, and because it theoretically allows a variant +# that flushes denormal to zero but uses a mantissa table with 24-bit +# entries). +# +# The tables are also constructed slightly differently. +# + +# Note that using a 64K * 4 table is a terrible idea since it will not fit +# in the L1 cache and will massively pollute the L2 cache as well +# +# These should instead fit in the L1 cache. +# +# TODO: we could use a denormal bias table instead of the mantissa/offset +# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes +# but would involve more computation +# +# Note however that if denormals are never encountered, the L1 cache usage +# is only about 4608 bytes anyway. + +table_index = None +table_length = None + +def begin(t, n, l): + global table_length + global table_index + table_index = 0 + table_length = l + print + print "const " + t + " " + n + "[" + str(l) + "] = {" + +def value(v): + global table_index + table_index += 1 + print "\t" + hex(v) + "," + +def end(): + global table_length + global table_index + print "};" + assert table_index == table_length + +print "/* This file is autogenerated by u_half.py. Do not edit directly. */" +print "#include \"util/u_half.h\"" + +begin("uint32_t", "util_half_to_float_mantissa_table", 2048) +# zero +value(0) + +# denormals +for i in xrange(1, 1024): + m = i << 13 + e = 0 + + # normalize number + while (m & 0x00800000) == 0: + e -= 0x00800000 + m <<= 1 + + m &= ~0x00800000 + e += 0x38800000 + value(m | e) + +# normals +for i in xrange(1024, 2048): + value((i - 1024) << 13) +end() + +begin("uint32_t", "util_half_to_float_exponent_table", 64) +# positive zero or denormals +value(0) + +# positive numbers +for i in xrange(1, 31): + value(0x38000000 + (i << 23)) + +# positive infinity/NaN +value(0x7f800000) + +# negative zero or denormals +value(0x80000000) + +# negative numbers +for i in range(33, 63): + value(0xb8000000 + ((i - 32) << 23)) + +# negative infinity/NaN +value(0xff800000) +end() + +begin("uint32_t", "util_half_to_float_offset_table", 64) +# positive zero or denormals +value(0) + +# positive normals +for i in range(1, 32): + value(1024) + +# negative zero or denormals +value(0) + +# negative normals +for i in xrange(33, 64): + value(1024) +end() + +begin("uint16_t", "util_float_to_half_base_table", 512) +for sign in (0, 0x8000): + # very small numbers mapping to zero + for i in xrange(-127, -24): + value(sign | 0) + + # small numbers mapping to denormals + for i in xrange(-24, -14): + value(sign | (0x400 >> (-14 -i))) + + # normal numbers + for i in xrange(-14, 16): + value(sign | ((i + 15) << 10)) + + # large numbers mapping to infinity + for i in xrange(16, 128): + value(sign | 0x7c00) + + # infinity and NaNs + value(sign | 0x7c00) +end() + +begin("uint8_t", "util_float_to_half_shift_table", 512) +for sign in (0, 0x8000): + # very small numbers mapping to zero + for i in xrange(-127, -24): + value(24) + + # small numbers mapping to denormals + for i in xrange(-24, -14): + value(-1 - i) + + # normal numbers + for i in xrange(-14, 16): + value(13) + + # large numbers mapping to infinity + for i in xrange(16, 128): + value(24) + + # infinity and NaNs + value(13) +end() + |