diff options
Diffstat (limited to 'mesalib/src/gallium/auxiliary/util')
| -rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_cpu_detect.c | 29 | ||||
| -rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_cpu_detect.h | 1 | ||||
| -rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_format_srgb.h | 56 | ||||
| -rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_format_srgb.py | 57 | ||||
| -rw-r--r-- | mesalib/src/gallium/auxiliary/util/u_math.c | 2 | 
5 files changed, 133 insertions, 12 deletions
| diff --git a/mesalib/src/gallium/auxiliary/util/u_cpu_detect.c b/mesalib/src/gallium/auxiliary/util/u_cpu_detect.c index 588fc7c72..87ad78095 100644 --- a/mesalib/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/mesalib/src/gallium/auxiliary/util/u_cpu_detect.c @@ -230,8 +230,28 @@ static INLINE uint64_t xgetbv(void)  #else     return 0;  #endif +} + +#if defined(PIPE_ARCH_X86) +static INLINE boolean sse2_has_daz(void) +{ +   struct { +      uint32_t pad1[7]; +      uint32_t mxcsr_mask; +      uint32_t pad2[128-8]; +   } PIPE_ALIGN_VAR(16) fxarea; + +   fxarea.mxcsr_mask = 0; +#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) +   __asm __volatile ("fxsave %0" : "+m" (fxarea)); +#elif (defined(PIPE_CC_MSVC) || defined(PIPE_CC_ICL)) +   _fxsave(&fxarea); +#endif +   return !!(fxarea.mxcsr_mask & (1 << 6));  } +#endif +  #endif /* X86 or X86_64 */  void @@ -310,6 +330,12 @@ util_cpu_detect(void)                                      ((xgetbv() & 6) == 6);    // XMM & YMM           util_cpu_caps.has_f16c   = (regs2[2] >> 29) & 1;           util_cpu_caps.has_mmx2   = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */ +#if defined(PIPE_ARCH_X86_64) +         util_cpu_caps.has_daz = 1; +#else +         util_cpu_caps.has_daz = util_cpu_caps.has_sse3 || +            (util_cpu_caps.has_sse2 && sse2_has_daz()); +#endif           cacheline = ((regs2[1] >> 8) & 0xFF) * 8;           if (cacheline > 0) @@ -368,9 +394,12 @@ util_cpu_detect(void)        debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);        debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2);        debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx); +      debug_printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c); +      debug_printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt);        debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);        debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);        debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec); +      debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);     }  #endif diff --git a/mesalib/src/gallium/auxiliary/util/u_cpu_detect.h b/mesalib/src/gallium/auxiliary/util/u_cpu_detect.h index f9cd6475e..cc3e0ce03 100644 --- a/mesalib/src/gallium/auxiliary/util/u_cpu_detect.h +++ b/mesalib/src/gallium/auxiliary/util/u_cpu_detect.h @@ -68,6 +68,7 @@ struct util_cpu_caps {     unsigned has_3dnow:1;     unsigned has_3dnow_ext:1;     unsigned has_altivec:1; +   unsigned has_daz:1;  };  extern struct util_cpu_caps diff --git a/mesalib/src/gallium/auxiliary/util/u_format_srgb.h b/mesalib/src/gallium/auxiliary/util/u_format_srgb.h index 82ed9575d..740a91974 100644 --- a/mesalib/src/gallium/auxiliary/util/u_format_srgb.h +++ b/mesalib/src/gallium/auxiliary/util/u_format_srgb.h @@ -39,6 +39,7 @@  #include "pipe/p_compiler.h" +#include "u_pack_color.h"  #include "u_math.h" @@ -51,23 +52,58 @@ util_format_srgb_to_linear_8unorm_table[256];  extern const uint8_t  util_format_linear_to_srgb_8unorm_table[256]; +extern const unsigned +util_format_linear_to_srgb_helper_table[104]; +  /**   * Convert a unclamped linear float to srgb value in the [0,255]. - * XXX this hasn't been tested (render to srgb surface). - * XXX this needs optimization.   */  static INLINE uint8_t  util_format_linear_float_to_srgb_8unorm(float x)  { -   if (x >= 1.0f) -      return 255; -   else if (x >= 0.0031308f) -      return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f); -   else if (x > 0.0f) -      return float_to_ubyte(12.92f * x); -   else -      return 0; +   /* this would be exact but (probably much) slower */ +   if (0) { +      if (x >= 1.0f) +         return 255; +      else if (x >= 0.0031308f) +         return float_to_ubyte(1.055f * powf(x, 0.41666666f) - 0.055f); +      else if (x > 0.0f) +         return float_to_ubyte(12.92f * x); +      else +         return 0; +   } +   else { +      /* +       * This is taken from https://gist.github.com/rygorous/2203834 +       * Use LUT and do linear interpolation. +       */ +      union fi almostone, minval, f; +      unsigned tab, bias, scale, t; + +      almostone.ui = 0x3f7fffff; +      minval.ui = (127-13) << 23; + +      /* +       * Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively. +       * The tests are carefully written so that NaNs map to 0, same as in the +       * reference implementation. +       */ +      if (!(x > minval.f)) +         x = minval.f; +      if (x > almostone.f) +         x = almostone.f; + +      /* Do the table lookup and unpack bias, scale */ +      f.f = x; +      tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >> 20]; +      bias = (tab >> 16) << 9; +      scale = tab & 0xffff; + +      /* Grab next-highest mantissa bits and perform linear interpolation */ +      t = (f.ui >> 12) & 0xff; +      return (uint8_t) ((bias + scale*t) >> 16); +   }  } diff --git a/mesalib/src/gallium/auxiliary/util/u_format_srgb.py b/mesalib/src/gallium/auxiliary/util/u_format_srgb.py index cd63ae789..c6c02f053 100644 --- a/mesalib/src/gallium/auxiliary/util/u_format_srgb.py +++ b/mesalib/src/gallium/auxiliary/util/u_format_srgb.py @@ -40,6 +40,7 @@ CopyRight = '''  import math +import struct  def srgb_to_linear(x): @@ -51,10 +52,11 @@ def srgb_to_linear(x):  def linear_to_srgb(x):      if x >= 0.0031308: -        return 1.055 * math.pow(x, 0.41666) - 0.055 +        return 1.055 * math.pow(x, 0.41666666) - 0.055      else:          return 12.92 * x +  def generate_srgb_tables():      print 'const float'      print 'util_format_srgb_8unorm_to_linear_float_table[256] = {' @@ -84,6 +86,59 @@ def generate_srgb_tables():      print '};'      print +# calculate the table interpolation values used in float linear to unorm8 srgb +    numexp = 13 +    mantissa_msb = 3 +# stepshift is just used to only use every x-th float to make things faster, +# 5 is largest value which still gives exact same table as 0 +    stepshift = 5 +    nbuckets = numexp << mantissa_msb +    bucketsize = (1 << (23 - mantissa_msb)) >> stepshift +    mantshift = 12 +    valtable = [] +    sum_aa = float(bucketsize) +    sum_ab = 0.0 +    sum_bb = 0.0 +    for i in range(0, bucketsize): +        j = (i << stepshift) >> mantshift +        sum_ab += j +        sum_bb += j*j +    inv_det = 1.0 / (sum_aa * sum_bb - sum_ab * sum_ab) + +    for bucket in range(0, nbuckets): +        start = ((127 - numexp) << 23) + bucket*(bucketsize << stepshift) +        sum_a = 0.0 +        sum_b = 0.0 +  +        for i in range(0, bucketsize): +            j = (i << stepshift) >> mantshift +            fint = start + (i << stepshift) +            ffloat = struct.unpack('f', struct.pack('I', fint))[0] +            val = linear_to_srgb(ffloat) * 255.0 + 0.5 +            sum_a += val +            sum_b += j*val + +        solved_a = inv_det * (sum_bb*sum_a - sum_ab*sum_b) +        solved_b = inv_det * (sum_aa*sum_b - sum_ab*sum_a) + +        scaled_a = solved_a * 65536.0 / 512.0 +        scaled_b = solved_b * 65536.0 +  +        int_a = int(scaled_a + 0.5) +        int_b = int(scaled_b + 0.5) + +        valtable.append((int_a << 16) + int_b) + +    print 'const unsigned' +    print 'util_format_linear_to_srgb_helper_table[104] = {' + +    for j in range(0, nbuckets, 4): +        print '   ', +        for i in range(j, j + 4): +            print '0x%08x,' % (valtable[i],), +        print +    print '};' +    print  def main():      print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */' diff --git a/mesalib/src/gallium/auxiliary/util/u_math.c b/mesalib/src/gallium/auxiliary/util/u_math.c index f3fe392ba..6981ee939 100644 --- a/mesalib/src/gallium/auxiliary/util/u_math.c +++ b/mesalib/src/gallium/auxiliary/util/u_math.c @@ -111,7 +111,7 @@ util_fpstate_set_denorms_to_zero(unsigned current_mxcsr)     if (util_cpu_caps.has_sse) {        /* Enable flush to zero mode */        current_mxcsr |= _MM_FLUSH_ZERO_MASK; -      if (util_cpu_caps.has_sse3) { +      if (util_cpu_caps.has_daz) {           /* Enable denormals are zero mode */           current_mxcsr |= _MM_DENORMALS_ZERO_MASK;        } | 
