diff options
26 files changed, 898 insertions, 427 deletions
diff --git a/mesalib/src/gallium/auxiliary/Android.mk b/mesalib/src/gallium/auxiliary/Android.mk index 0c37dd31a..11fc2256a 100644 --- a/mesalib/src/gallium/auxiliary/Android.mk +++ b/mesalib/src/gallium/auxiliary/Android.mk @@ -44,8 +44,7 @@ $(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@ $(intermediates)/indices/u_indices_gen.c \ $(intermediates)/indices/u_unfilled_gen.c \ -$(intermediates)/util/u_format_srgb.c \ -$(intermediates)/util/u_half.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py +$(intermediates)/util/u_format_srgb.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py $(transform-generated-source) $(intermediates)/util/u_format_table.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/util/u_format.csv diff --git a/mesalib/src/gallium/auxiliary/Makefile b/mesalib/src/gallium/auxiliary/Makefile index a70ae7384..3ba3f9c40 100644 --- a/mesalib/src/gallium/auxiliary/Makefile +++ b/mesalib/src/gallium/auxiliary/Makefile @@ -39,6 +39,4 @@ util/u_format_srgb.c: util/u_format_srgb.py util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_format_parse.py util/u_format.csv $(PYTHON2) util/u_format_table.py util/u_format.csv > $@ -util/u_half.c: util/u_half.py - $(PYTHON2) util/u_half.py > $@ # DO NOT DELETE diff --git a/mesalib/src/gallium/auxiliary/Makefile.sources b/mesalib/src/gallium/auxiliary/Makefile.sources index 277428b38..28a176d68 100644 --- a/mesalib/src/gallium/auxiliary/Makefile.sources +++ b/mesalib/src/gallium/auxiliary/Makefile.sources @@ -155,8 +155,7 @@ GENERATED_SOURCES := \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ util/u_format_srgb.c \ - util/u_format_table.c \ - util/u_half.c + util/u_format_table.c GALLIVM_SOURCES := \ gallivm/lp_bld_arit.c \ diff --git a/mesalib/src/gallium/auxiliary/SConscript b/mesalib/src/gallium/auxiliary/SConscript index 07c420e13..bfd5ec34c 100644 --- a/mesalib/src/gallium/auxiliary/SConscript +++ b/mesalib/src/gallium/auxiliary/SConscript @@ -35,13 +35,6 @@ env.CodeGenerate( command = python_cmd + ' $SCRIPT $SOURCE > $TARGET' ) -env.CodeGenerate( - target = 'util/u_half.c', - script = 'util/u_half.py', - source = [], - command = python_cmd + ' $SCRIPT > $TARGET' -) - env.Depends('util/u_format_table.c', [ '#src/gallium/auxiliary/util/u_format_parse.py', 'util/u_format_pack.py', diff --git a/mesalib/src/gallium/auxiliary/util/.gitignore b/mesalib/src/gallium/auxiliary/util/.gitignore index 5dd0408ef..da74de623 100644 --- a/mesalib/src/gallium/auxiliary/util/.gitignore +++ b/mesalib/src/gallium/auxiliary/util/.gitignore @@ -1,3 +1,2 @@ u_format_srgb.c u_format_table.c -u_half.c diff --git a/mesalib/src/gallium/auxiliary/util/u_format.c b/mesalib/src/gallium/auxiliary/util/u_format.c index cfc4a17a0..6f4529835 100644 --- a/mesalib/src/gallium/auxiliary/util/u_format.c +++ b/mesalib/src/gallium/auxiliary/util/u_format.c @@ -159,6 +159,38 @@ util_format_is_pure_uint(enum pipe_format format) } boolean +util_format_is_array(const struct util_format_description *desc) +{ + unsigned chan; + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB || + desc->block.width != 1 || + desc->block.height != 1) { + return FALSE; + } + + for (chan = 0; chan < desc->nr_channels; ++chan) { + if (desc->swizzle[chan] != chan) + return FALSE; + + if (desc->channel[chan].type != desc->channel[0].type) + return FALSE; + + if (desc->channel[chan].normalized != desc->channel[0].normalized) + return FALSE; + + if (desc->channel[chan].pure_integer != desc->channel[0].pure_integer) + return FALSE; + + if (desc->channel[chan].size != desc->channel[0].size) + return FALSE; + } + + return TRUE; +} + +boolean util_format_is_luminance_alpha(enum pipe_format format) { const struct util_format_description *desc = diff --git a/mesalib/src/gallium/auxiliary/util/u_format.h b/mesalib/src/gallium/auxiliary/util/u_format.h index 1718fb5e2..e35e164b4 100644 --- a/mesalib/src/gallium/auxiliary/util/u_format.h +++ b/mesalib/src/gallium/auxiliary/util/u_format.h @@ -591,6 +591,13 @@ boolean util_format_is_pure_uint(enum pipe_format format); /** + * Whether the format is a simple array format where all channels + * are of the same type and can be loaded from memory as a vector + */ +boolean +util_format_is_array(const struct util_format_description *desc); + +/** * Check if the src format can be blitted to the destination format with * a simple memcpy. For example, blitting from RGBA to RGBx is OK, but not * the reverse. diff --git a/mesalib/src/gallium/auxiliary/util/u_format_tests.c b/mesalib/src/gallium/auxiliary/util/u_format_tests.c index fc29d8d48..457fda6c8 100644 --- a/mesalib/src/gallium/auxiliary/util/u_format_tests.c +++ b/mesalib/src/gallium/auxiliary/util/u_format_tests.c @@ -26,6 +26,9 @@ **************************************************************************/ +#include <float.h> + +#include "pipe/p_config.h" #include "u_memory.h" #include "u_format_tests.h" @@ -63,6 +66,9 @@ {{ 0, 0, 0, 0}, { 0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}} +#define NAN (0.0 / 0.0) +#define INF (1.0 / 0.0) + /** * Test cases. * @@ -876,7 +882,39 @@ util_format_test_cases[] = * Half float formats */ - {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + /* Minimum positive normal */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0400), UNPACKED_1x1( 6.10352E-5, 0.0, 0.0, 1.0)}, + + /* Max denormal */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x03FF), UNPACKED_1x1( 6.09756E-5, 0.0, 0.0, 1.0)}, + + /* Minimum positive denormal */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x0001), UNPACKED_1x1( 5.96046E-8, 0.0, 0.0, 1.0)}, + + /* Min representable value */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfbff), UNPACKED_1x1( -65504.0, 0.0, 0.0, 1.0)}, + + /* Max representable value */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7bff), UNPACKED_1x1( 65504.0, 0.0, 0.0, 1.0)}, + +#if !defined(PIPE_CC_MSVC) + + /* NaNs */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7c01), UNPACKED_1x1( NAN, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfc01), UNPACKED_1x1( -NAN, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7fff), UNPACKED_1x1( NAN, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xffff), UNPACKED_1x1( -NAN, 0.0, 0.0, 1.0)}, + + /* Inf */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x7c00), UNPACKED_1x1( INF, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xfc00), UNPACKED_1x1( -INF, 0.0, 0.0, 1.0)}, + +#endif + + /* Zero, ignore sign */ + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0x7fff), PACKED_1x16(0x8000), UNPACKED_1x1( -0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0x7fff), PACKED_1x16(0x0000), UNPACKED_1x1( 0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0x3c00), UNPACKED_1x1( 1.0, 0.0, 0.0, 1.0)}, {PIPE_FORMAT_R16_FLOAT, PACKED_1x16(0xffff), PACKED_1x16(0xbc00), UNPACKED_1x1( -1.0, 0.0, 0.0, 1.0)}, diff --git a/mesalib/src/gallium/auxiliary/util/u_half.h b/mesalib/src/gallium/auxiliary/util/u_half.h index ad030e90c..f7009f548 100644 --- a/mesalib/src/gallium/auxiliary/util/u_half.h +++ b/mesalib/src/gallium/auxiliary/util/u_half.h @@ -35,51 +35,84 @@ extern "C" { #endif -extern const uint32_t util_half_to_float_mantissa_table[2048]; -extern const uint32_t util_half_to_float_exponent_table[64]; -extern const uint32_t util_half_to_float_offset_table[64]; -extern const uint16_t util_float_to_half_base_table[512]; -extern const uint8_t util_float_to_half_shift_table[512]; - /* - * Note that if the half float is a signaling NaN, the x87 FPU will turn - * it into a quiet NaN immediately upon loading into a float. - * - * Additionally, denormals may be flushed to zero. + * References for float <-> half conversions * - * To avoid this, use the floatui functions instead of the float ones - * when just doing conversion rather than computation on the resulting - * floats. + * http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ + * https://gist.github.com/2156668 + * https://gist.github.com/2144712 */ -static INLINE uint32_t -util_half_to_floatui(uint16_t h) +static INLINE uint16_t +util_float_to_half(float f) { - unsigned exp = h >> 10; - return util_half_to_float_mantissa_table[util_half_to_float_offset_table[exp] + (h & 0x3ff)] + util_half_to_float_exponent_table[exp]; + uint32_t sign_mask = 0x80000000; + uint32_t round_mask = ~0xfff; + uint32_t f32inf = 0xff << 23; + uint32_t f16inf = 0x1f << 23; + uint32_t sign; + union fi magic; + union fi f32; + uint16_t f16; + + magic.ui = 0xf << 23; + + f32.f = f; + + /* Sign */ + sign = f32.ui & sign_mask; + f32.ui ^= sign; + + if (f32.ui == f32inf) { + /* Inf */ + f16 = 0x7c00; + } else if (f32.ui > f32inf) { + /* NaN */ + f16 = 0x7e00; + } else { + /* Number */ + f32.ui &= round_mask; + f32.f *= magic.f; + f32.ui -= round_mask; + + /* Clamp to infinity if overflowed */ + if (f32.ui > f16inf) + f32.ui = f16inf; + + f16 = f32.ui >> 13; + } + + /* Sign */ + f16 |= sign >> 16; + + return f16; } static INLINE float -util_half_to_float(uint16_t h) +util_half_to_float(uint16_t f16) { - union fi r; - r.ui = util_half_to_floatui(h); - return r.f; -} + union fi infnan; + union fi magic; + union fi f32; -static INLINE uint16_t -util_floatui_to_half(uint32_t v) -{ - unsigned signexp = v >> 23; - return util_float_to_half_base_table[signexp] + ((v & 0x007fffff) >> util_float_to_half_shift_table[signexp]); -} + infnan.ui = 0x8f << 23; + infnan.f = 65536.0f; + magic.ui = 0xef << 23; -static INLINE uint16_t -util_float_to_half(float f) -{ - union fi i; - i.f = f; - return util_floatui_to_half(i.ui); + /* Exponent / Mantissa */ + f32.ui = (f16 & 0x7fff) << 13; + + /* Adjust */ + f32.f *= magic.f; + + /* Inf / NaN */ + if (f32.f >= infnan.f) + f32.ui |= 0xff << 23; + + /* Sign */ + f32.ui |= (f16 & 0x8000) << 16; + + return f32.f; } #ifdef __cplusplus diff --git a/mesalib/src/gallium/auxiliary/util/u_half.py b/mesalib/src/gallium/auxiliary/util/u_half.py deleted file mode 100644 index 915cf3b92..000000000 --- a/mesalib/src/gallium/auxiliary/util/u_half.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright 2010 Luca Barbieri -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice (including the -# next paragraph) shall be included in all copies or substantial -# portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# ************************************************************************* - -# The code is a reimplementation of the algorithm in -# www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf -# "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008 -# -# The table contents have been slightly changed so that the exponent -# bias is now in the exponent table instead of the mantissa table (mostly -# for cosmetic reasons, and because it theoretically allows a variant -# that flushes denormal to zero but uses a mantissa table with 24-bit -# entries). -# -# The tables are also constructed slightly differently. -# - -# Note that using a 64K * 4 table is a terrible idea since it will not fit -# in the L1 cache and will massively pollute the L2 cache as well -# -# These should instead fit in the L1 cache. -# -# TODO: we could use a denormal bias table instead of the mantissa/offset -# tables: this would reduce the L1 cache usage from 8704 to 2304 bytes -# but would involve more computation -# -# Note however that if denormals are never encountered, the L1 cache usage -# is only about 4608 bytes anyway. - -table_index = None -table_length = None - -def begin(t, n, l): - global table_length - global table_index - table_index = 0 - table_length = l - print - print "const " + t + " " + n + "[" + str(l) + "] = {" - -def value(v): - global table_index - table_index += 1 - print "\t" + hex(v) + "," - -def end(): - global table_length - global table_index - print "};" - assert table_index == table_length - -print "/* This file is autogenerated by u_half.py. Do not edit directly. */" -print "#include \"util/u_half.h\"" - -begin("uint32_t", "util_half_to_float_mantissa_table", 2048) -# zero -value(0) - -# denormals -for i in xrange(1, 1024): - m = i << 13 - e = 0 - - # normalize number - while (m & 0x00800000) == 0: - e -= 0x00800000 - m <<= 1 - - m &= ~0x00800000 - e += 0x38800000 - value(m | e) - -# normals -for i in xrange(1024, 2048): - value((i - 1024) << 13) -end() - -begin("uint32_t", "util_half_to_float_exponent_table", 64) -# positive zero or denormals -value(0) - -# positive numbers -for i in xrange(1, 31): - value(0x38000000 + (i << 23)) - -# positive infinity/NaN -value(0x7f800000) - -# negative zero or denormals -value(0x80000000) - -# negative numbers -for i in range(33, 63): - value(0xb8000000 + ((i - 32) << 23)) - -# negative infinity/NaN -value(0xff800000) -end() - -begin("uint32_t", "util_half_to_float_offset_table", 64) -# positive zero or denormals -value(0) - -# positive normals -for i in range(1, 32): - value(1024) - -# negative zero or denormals -value(0) - -# negative normals -for i in xrange(33, 64): - value(1024) -end() - -begin("uint16_t", "util_float_to_half_base_table", 512) -for sign in (0, 0x8000): - # very small numbers mapping to zero - for i in xrange(-127, -24): - value(sign | 0) - - # small numbers mapping to denormals - for i in xrange(-24, -14): - value(sign | (0x400 >> (-14 -i))) - - # normal numbers - for i in xrange(-14, 16): - value(sign | ((i + 15) << 10)) - - # large numbers mapping to infinity - for i in xrange(16, 128): - value(sign | 0x7c00) - - # infinity and NaNs - value(sign | 0x7c00) -end() - -begin("uint8_t", "util_float_to_half_shift_table", 512) -for sign in (0, 0x8000): - # very small numbers mapping to zero - for i in xrange(-127, -24): - value(24) - - # small numbers mapping to denormals - for i in xrange(-24, -14): - value(-1 - i) - - # normal numbers - for i in xrange(-14, 16): - value(13) - - # large numbers mapping to infinity - for i in xrange(16, 128): - value(24) - - # infinity and NaNs - value(13) -end() - diff --git a/mesalib/src/gallium/auxiliary/util/u_math.h b/mesalib/src/gallium/auxiliary/util/u_math.h index f6196665f..724b136b5 100644 --- a/mesalib/src/gallium/auxiliary/util/u_math.h +++ b/mesalib/src/gallium/auxiliary/util/u_math.h @@ -183,6 +183,13 @@ union fi { }; +union di { + double d; + int64_t i; + uint64_t ui; +}; + + /** * Fast version of 2^x * Identity: exp2(a + b) = exp2(a) * exp2(b) @@ -325,14 +332,107 @@ util_is_approx(float a, float b, float tol) /** - * Test if x is NaN or +/- infinity. + * util_is_X_inf_or_nan = test if x is NaN or +/- Inf + * util_is_X_nan = test if x is NaN + * util_X_inf_sign = return +1 for +Inf, -1 for -Inf, or 0 for not Inf + * + * NaN can be checked with x != x, however this fails with the fast math flag + **/ + + +/** + * Single-float */ static INLINE boolean util_is_inf_or_nan(float x) { union fi tmp; tmp.f = x; - return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31); + return (tmp.ui & 0x7f800000) == 0x7f800000; +} + + +static INLINE boolean +util_is_nan(float x) +{ + union fi tmp; + tmp.f = x; + return (tmp.ui & 0x7fffffff) > 0x7f800000; +} + + +static INLINE int +util_inf_sign(float x) +{ + union fi tmp; + tmp.f = x; + if ((tmp.ui & 0x7fffffff) != 0x7f800000) { + return 0; + } + + return (x < 0) ? -1 : 1; +} + + +/** + * Double-float + */ +static INLINE boolean +util_is_double_inf_or_nan(double x) +{ + union di tmp; + tmp.d = x; + return (tmp.ui & 0x7ff0000000000000) == 0x7ff0000000000000; +} + + +static INLINE boolean +util_is_double_nan(double x) +{ + union di tmp; + tmp.d = x; + return (tmp.ui & 0x7fffffffffffffff) > 0x7ff0000000000000; +} + + +static INLINE int +util_double_inf_sign(double x) +{ + union di tmp; + tmp.d = x; + if ((tmp.ui & 0x7fffffffffffffff) != 0x7ff0000000000000) { + return 0; + } + + return (x < 0) ? -1 : 1; +} + + +/** + * Half-float + */ +static INLINE boolean +util_is_half_inf_or_nan(int16_t x) +{ + return (x & 0x7c00) == 0x7c00; +} + + +static INLINE boolean +util_is_half_nan(int16_t x) +{ + return (x & 0x7fff) > 0x7c00; +} + + +static INLINE int +util_half_inf_sign(int16_t x) +{ + if ((x & 0x7fff) != 0x7c00) { + return 0; + } + + return (x < 0) ? -1 : 1; } diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h index 5768ed7cd..bdbb5137e 100644 --- a/mesalib/src/mesa/main/mtypes.h +++ b/mesalib/src/mesa/main/mtypes.h @@ -3200,12 +3200,6 @@ struct gl_dlist_state GLubyte ActiveMaterialSize[MAT_ATTRIB_MAX]; GLfloat CurrentMaterial[MAT_ATTRIB_MAX][4]; - GLubyte ActiveIndex; - GLfloat CurrentIndex; - - GLubyte ActiveEdgeFlag; - GLboolean CurrentEdgeFlag; - struct { /* State known to have been set by the currently-compiling display * list. Used to eliminate some redundant state changes. diff --git a/mesalib/src/mesa/state_tracker/st_cb_fbo.c b/mesalib/src/mesa/state_tracker/st_cb_fbo.c index 10f4e09cf..e1818abb9 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_fbo.c +++ b/mesalib/src/mesa/state_tracker/st_cb_fbo.c @@ -57,10 +57,6 @@ #include "util/u_surface.h" -/** Set to 1 to enable extra debug code */ -#define ST_DEBUG_FBO 0 - - static GLboolean st_renderbuffer_alloc_sw_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, @@ -480,9 +476,9 @@ st_finish_render_texture(struct gl_context *ctx, static void st_fbo_invalid(const char *reason) { -#if ST_DEBUG_FBO - debug_printf("Invalid FBO: %s\n", reason); -#endif + if (MESA_DEBUG_FLAGS & DEBUG_INCOMPLETE_FBO) { + _mesa_debug(NULL, "Invalid FBO: %s\n", reason); + } } diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am index 1b232ad0f..deacf8728 100644 --- a/pixman/pixman/Makefile.am +++ b/pixman/pixman/Makefile.am @@ -92,6 +92,7 @@ endif # iwmmxt code if USE_ARM_IWMMXT +libpixman_iwmmxt_la_SOURCES = pixman-mmx.c noinst_LTLIBRARIES += libpixman-iwmmxt.la libpixman_1_la_LIBADD += libpixman-iwmmxt.la diff --git a/pixman/pixman/loongson-mmintrin.h b/pixman/pixman/loongson-mmintrin.h index 1a114fe0f..086c6e0f1 100644 --- a/pixman/pixman/loongson-mmintrin.h +++ b/pixman/pixman/loongson-mmintrin.h @@ -45,6 +45,28 @@ _mm_setzero_si64 (void) } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_pi32 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("paddw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pu16 (__m64 __m1, __m64 __m2) { __m64 ret; @@ -150,6 +172,78 @@ _mm_packs_pu16 (__m64 __m1, __m64 __m2) } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_packs_pi32 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("packsswh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_pi16 (uint16_t __w3, uint16_t __w2, uint16_t __w1, uint16_t __w0) +{ + if (__builtin_constant_p (__w3) && + __builtin_constant_p (__w2) && + __builtin_constant_p (__w1) && + __builtin_constant_p (__w0)) + { + uint64_t val = ((uint64_t)__w3 << 48) + | ((uint64_t)__w2 << 32) + | ((uint64_t)__w1 << 16) + | ((uint64_t)__w0 << 0); + return *(__m64 *)&val; + } + else if (__w3 == __w2 && __w2 == __w1 && __w1 == __w0) + { + /* TODO: handle other cases */ + uint64_t val = __w3; + uint64_t imm = _MM_SHUFFLE (0, 0, 0, 0); + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (*(__m64 *)&val), "f" (*(__m64 *)&imm) + ); + return ret; + } + uint64_t val = ((uint64_t)__w3 << 48) + | ((uint64_t)__w2 << 32) + | ((uint64_t)__w1 << 16) + | ((uint64_t)__w0 << 0); + return *(__m64 *)&val; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_pi32 (unsigned __i1, unsigned __i0) +{ + if (__builtin_constant_p (__i1) && + __builtin_constant_p (__i0)) + { + uint64_t val = ((uint64_t)__i1 << 32) + | ((uint64_t)__i0 << 0); + return *(__m64 *)&val; + } + else if (__i1 == __i0) + { + uint64_t imm = _MM_SHUFFLE (1, 0, 1, 0); + __m64 ret; + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm) + ); + return ret; + } + uint64_t val = ((uint64_t)__i1 << 32) + | ((uint64_t)__i0 << 0); + return *(__m64 *)&val; +} +#undef _MM_SHUFFLE + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pi16 (__m64 __m, int64_t __n) { __m64 ret; @@ -193,6 +287,17 @@ _mm_srli_pi16 (__m64 __m, int64_t __count) } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_pi32 (__m64 __m, int64_t __count) +{ + __m64 ret; + asm("psrlw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_si64 (__m64 __m, int64_t __count) { __m64 ret; @@ -204,6 +309,17 @@ _mm_srli_si64 (__m64 __m, int64_t __count) } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_pi16 (__m64 __m1, __m64 __m2) +{ + __m64 ret; + asm("psubh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + return ret; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) { __m64 ret; diff --git a/pixman/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman/pixman-arm-neon-asm-bilinear.S index f7913adb7..e37b5c298 100644 --- a/pixman/pixman/pixman-arm-neon-asm-bilinear.S +++ b/pixman/pixman/pixman-arm-neon-asm-bilinear.S @@ -64,6 +64,7 @@ .altmacro .p2align 2 +#include "pixman-private.h" #include "pixman-arm-neon-asm.h" /* @@ -488,12 +489,12 @@ fname: vmull.u8 q1, d0, d28 vmlal.u8 q1, d1, d29 /* 5 cycles bubble */ - vshll.u16 q0, d2, #8 + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d2, d30 vmlal.u16 q0, d3, d30 /* 5 cycles bubble */ bilinear_duplicate_mask mask_fmt, 1, d4 - vshrn.u32 d0, q0, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) /* 3 cycles bubble */ vmovn.u16 d0, q0 /* 1 cycle bubble */ @@ -514,16 +515,16 @@ fname: q1, q11, d0, d1, d20, d21, d22, d23 bilinear_load_mask mask_fmt, 2, d4 bilinear_load_dst dst_fmt, op, 2, d18, d19, q9 - vshll.u16 q0, d2, #8 + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d2, d30 vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #8 + vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q10, d22, d31 vmlal.u16 q10, d23, d31 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q10, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) bilinear_duplicate_mask mask_fmt, 2, d4 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 vmovn.u16 d0, q0 bilinear_interleave_src_dst \ @@ -544,29 +545,29 @@ fname: q3, q9, d4, d5, d16, d17, d18, d19 pld [TMP1, PF_OFFS] sub TMP1, TMP1, STRIDE - vshll.u16 q0, d2, #8 + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d2, d30 vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #8 + vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q10, d22, d31 vmlal.u16 q10, d23, d31 - vshr.u16 q15, q12, #8 - vshll.u16 q2, d6, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q2, d6, d30 vmlal.u16 q2, d7, d30 - vshll.u16 q8, d18, #8 + vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS bilinear_load_mask mask_fmt, 4, d22 bilinear_load_dst dst_fmt, op, 4, d2, d3, q1 pld [TMP1, PF_OFFS] vmlsl.u16 q8, d18, d31 vmlal.u16 q8, d19, d31 vadd.u16 q12, q12, q13 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q10, #16 - vshrn.u32 d4, q2, #16 - vshrn.u32 d5, q8, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS) bilinear_duplicate_mask mask_fmt, 4, d22 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vmovn.u16 d0, q0 vmovn.u16 d1, q2 vadd.u16 q12, q12, q13 @@ -694,13 +695,13 @@ pixman_asm_function fname blt 0f tst OUT, #(1 << dst_bpp_shift) beq 0f - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 bilinear_process_last_pixel sub WIDTH, WIDTH, #1 0: vadd.u16 q13, q13, q13 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 cmp WIDTH, #2 @@ -921,7 +922,7 @@ pixman_asm_function fname vmull.u8 q10, d22, d28 vmlal.u8 q10, d23, d29 - vshll.u16 q0, d16, #8 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d16, d30 vmlal.u16 q0, d17, d30 @@ -932,27 +933,27 @@ pixman_asm_function fname vmull.u8 q11, d16, d28 vmlal.u8 q11, d17, d29 - vshll.u16 q1, d18, #8 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q1, d18, d31 vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 .endm .macro bilinear_over_8888_8888_process_pixblock_tail - vshll.u16 q2, d20, #8 + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q2, d20, d30 vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #8 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q3, d22, d31 vmlal.u16 q3, d23, d31 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q1, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) vld1.32 {d2, d3}, [OUT, :128] pld [OUT, #(prefetch_offset * 4)] - vshrn.u32 d4, q2, #16 - vshr.u16 q15, q12, #8 - vshrn.u32 d5, q3, #16 + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) vmovn.u16 d6, q0 vmovn.u16 d7, q2 vuzp.8 d6, d7 @@ -975,7 +976,7 @@ pixman_asm_function fname .endm .macro bilinear_over_8888_8888_process_pixblock_tail_head - vshll.u16 q2, d20, #8 + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS mov TMP1, X, asr #16 add X, X, UX add TMP1, TOP, TMP1, asl #2 @@ -984,21 +985,21 @@ pixman_asm_function fname add X, X, UX add TMP2, TOP, TMP2, asl #2 vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #8 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS vld1.32 {d20}, [TMP1], STRIDE vmlsl.u16 q3, d22, d31 vmlal.u16 q3, d23, d31 vld1.32 {d21}, [TMP1] vmull.u8 q8, d20, d28 vmlal.u8 q8, d21, d29 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q1, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) vld1.32 {d2, d3}, [OUT, :128] pld [OUT, PF_OFFS] - vshrn.u32 d4, q2, #16 - vshr.u16 q15, q12, #8 + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #16 + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) vmovn.u16 d6, q0 vld1.32 {d23}, [TMP2] vmull.u8 q9, d22, d28 @@ -1022,7 +1023,7 @@ pixman_asm_function fname vmlal.u8 q10, d23, d29 vmull.u8 q11, d2, d4 vmull.u8 q2, d3, d4 - vshll.u16 q0, d16, #8 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d16, d30 vrshr.u16 q1, q11, #8 vmlal.u16 q0, d17, d30 @@ -1037,12 +1038,12 @@ pixman_asm_function fname vmull.u8 q11, d16, d28 vmlal.u8 q11, d17, d29 vuzp.8 d6, d7 - vshll.u16 q1, d18, #8 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS vuzp.8 d6, d7 vmlsl.u16 q1, d18, d31 vadd.u16 q12, q12, q13 vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 vst1.32 {d6, d7}, [OUT, :128]! .endm @@ -1081,14 +1082,14 @@ pixman_asm_function fname vmull.u8 q3, d2, d28 vmlal.u8 q2, d1, d29 vmlal.u8 q3, d3, d29 - vshll.u16 q0, d4, #8 - vshll.u16 q1, d6, #8 + vshll.u16 q0, d4, #BILINEAR_INTERPOLATION_BITS + vshll.u16 q1, d6, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d4, d30 vmlsl.u16 q1, d6, d31 vmlal.u16 q0, d5, d30 vmlal.u16 q1, d7, d31 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q1, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) vld1.32 {d2}, [TMP3], STRIDE vld1.32 {d3}, [TMP3] pld [TMP4, PF_OFFS] @@ -1099,7 +1100,7 @@ pixman_asm_function fname vmlal.u8 q3, d3, d29 vmull.u8 q1, d4, d28 vmlal.u8 q1, d5, d29 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vld1.32 {d22[0]}, [MASK]! pld [MASK, #prefetch_offset] vadd.u16 q12, q12, q13 @@ -1107,17 +1108,17 @@ pixman_asm_function fname .endm .macro bilinear_over_8888_8_8888_process_pixblock_tail - vshll.u16 q9, d6, #8 - vshll.u16 q10, d2, #8 + vshll.u16 q9, d6, #BILINEAR_INTERPOLATION_BITS + vshll.u16 q10, d2, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q9, d6, d30 vmlsl.u16 q10, d2, d31 vmlal.u16 q9, d7, d30 vmlal.u16 q10, d3, d31 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 vdup.32 d22, d22[0] - vshrn.u32 d18, q9, #16 - vshrn.u32 d19, q10, #16 + vshrn.u32 d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS) vmovn.u16 d17, q9 vld1.32 {d18, d19}, [OUT, :128] pld [OUT, PF_OFFS] @@ -1146,11 +1147,11 @@ pixman_asm_function fname .endm .macro bilinear_over_8888_8_8888_process_pixblock_tail_head - vshll.u16 q9, d6, #8 + vshll.u16 q9, d6, #BILINEAR_INTERPOLATION_BITS mov TMP1, X, asr #16 add X, X, UX add TMP1, TOP, TMP1, asl #2 - vshll.u16 q10, d2, #8 + vshll.u16 q10, d2, #BILINEAR_INTERPOLATION_BITS vld1.32 {d0}, [TMP1], STRIDE mov TMP2, X, asr #16 add X, X, UX @@ -1167,12 +1168,12 @@ pixman_asm_function fname mov TMP4, X, asr #16 add X, X, UX add TMP4, TOP, TMP4, asl #2 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 vld1.32 {d3}, [TMP2] vdup.32 d22, d22[0] - vshrn.u32 d18, q9, #16 - vshrn.u32 d19, q10, #16 + vshrn.u32 d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS) vmull.u8 q2, d0, d28 vmull.u8 q3, d2, d28 vmovn.u16 d17, q9 @@ -1182,8 +1183,8 @@ pixman_asm_function fname vmlal.u8 q3, d3, d29 vuzp.8 d16, d17 vuzp.8 d18, d19 - vshll.u16 q0, d4, #8 - vshll.u16 q1, d6, #8 + vshll.u16 q0, d4, #BILINEAR_INTERPOLATION_BITS + vshll.u16 q1, d6, #BILINEAR_INTERPOLATION_BITS vuzp.8 d16, d17 vuzp.8 d18, d19 vmlsl.u16 q0, d4, d30 @@ -1194,8 +1195,8 @@ pixman_asm_function fname vmlal.u16 q1, d7, d31 vrsra.u16 q10, q10, #8 vrsra.u16 q11, q11, #8 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q1, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) vrshrn.u16 d16, q10, #8 vrshrn.u16 d17, q11, #8 vld1.32 {d2}, [TMP3], STRIDE @@ -1216,7 +1217,7 @@ pixman_asm_function fname vraddhn.u16 d18, q9, q10 vraddhn.u16 d19, q15, q11 vmlal.u8 q1, d5, d29 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vqadd.u8 q9, q8, q9 vld1.32 {d22[0]}, [MASK]! vuzp.8 d18, d19 diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S index 87aae1d55..187197dc3 100644 --- a/pixman/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman/pixman-arm-neon-asm.S @@ -49,6 +49,7 @@ .altmacro .p2align 2 +#include "pixman-private.h" #include "pixman-arm-neon-asm.h" /* Global configuration options and preferences */ @@ -2986,11 +2987,11 @@ fname: vmull.u8 q1, d0, d28 vmlal.u8 q1, d1, d29 /* 5 cycles bubble */ - vshll.u16 q0, d2, #8 + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d2, d30 vmlal.u16 q0, d3, d30 /* 5 cycles bubble */ - vshrn.u32 d0, q0, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) /* 3 cycles bubble */ vmovn.u16 d0, q0 /* 1 cycle bubble */ @@ -3000,15 +3001,15 @@ fname: .macro bilinear_interpolate_two_pixels src_fmt, dst_fmt bilinear_load_and_vertical_interpolate_two_&src_fmt \ q1, q11, d0, d1, d20, d21, d22, d23 - vshll.u16 q0, d2, #8 + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d2, d30 vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #8 + vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q10, d22, d31 vmlal.u16 q10, d23, d31 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q10, #16 - vshr.u16 q15, q12, #8 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 vmovn.u16 d0, q0 bilinear_store_&dst_fmt 2, q2, q3 @@ -3020,26 +3021,26 @@ fname: q3, q9, d4, d5, d16, d17, d18, d19 pld [TMP1, PF_OFFS] sub TMP1, TMP1, STRIDE - vshll.u16 q0, d2, #8 + vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d2, d30 vmlal.u16 q0, d3, d30 - vshll.u16 q10, d22, #8 + vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q10, d22, d31 vmlal.u16 q10, d23, d31 - vshr.u16 q15, q12, #8 - vshll.u16 q2, d6, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q2, d6, d30 vmlal.u16 q2, d7, d30 - vshll.u16 q8, d18, #8 + vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS pld [TMP2, PF_OFFS] vmlsl.u16 q8, d18, d31 vmlal.u16 q8, d19, d31 vadd.u16 q12, q12, q13 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q10, #16 - vshrn.u32 d4, q2, #16 - vshrn.u32 d5, q8, #16 - vshr.u16 q15, q12, #8 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vmovn.u16 d0, q0 vmovn.u16 d1, q2 vadd.u16 q12, q12, q13 @@ -3158,13 +3159,13 @@ pixman_asm_function fname blt 0f tst OUT, #(1 << dst_bpp_shift) beq 0f - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 bilinear_interpolate_last_pixel src_fmt, dst_fmt sub WIDTH, WIDTH, #1 0: vadd.u16 q13, q13, q13 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 cmp WIDTH, #2 @@ -3282,7 +3283,7 @@ pixman_asm_function fname vmull.u8 q10, d22, d28 vmlal.u8 q10, d23, d29 - vshll.u16 q0, d16, #8 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d16, d30 vmlal.u16 q0, d17, d30 @@ -3293,25 +3294,25 @@ pixman_asm_function fname vmull.u8 q11, d16, d28 vmlal.u8 q11, d17, d29 - vshll.u16 q1, d18, #8 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q1, d18, d31 .endm .macro bilinear_interpolate_four_pixels_8888_8888_tail vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #8 - vshll.u16 q2, d20, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q2, d20, d30 vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #8 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q3, d22, d31 vmlal.u16 q3, d23, d31 vadd.u16 q12, q12, q13 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q1, #16 - vshrn.u32 d4, q2, #16 - vshr.u16 q15, q12, #8 - vshrn.u32 d5, q3, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) vmovn.u16 d6, q0 vmovn.u16 d7, q2 vadd.u16 q12, q12, q13 @@ -3326,22 +3327,22 @@ pixman_asm_function fname add X, X, UX add TMP2, TOP, TMP2, asl #2 vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #8 - vshll.u16 q2, d20, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q2, d20, d30 vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #8 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS vld1.32 {d20}, [TMP1], STRIDE vmlsl.u16 q3, d22, d31 vmlal.u16 q3, d23, d31 vld1.32 {d21}, [TMP1] vmull.u8 q8, d20, d28 vmlal.u8 q8, d21, d29 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q1, #16 - vshrn.u32 d4, q2, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #16 + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 vld1.32 {d23}, [TMP2] vmull.u8 q9, d22, d28 @@ -3353,12 +3354,12 @@ pixman_asm_function fname add TMP4, TOP, TMP4, asl #2 vmlal.u8 q9, d23, d29 vld1.32 {d22}, [TMP3], STRIDE - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vld1.32 {d23}, [TMP3] vmull.u8 q10, d22, d28 vmlal.u8 q10, d23, d29 vmovn.u16 d6, q0 - vshll.u16 q0, d16, #8 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS vmovn.u16 d7, q2 vmlsl.u16 q0, d16, d30 vmlal.u16 q0, d17, d30 @@ -3370,7 +3371,7 @@ pixman_asm_function fname vmull.u8 q11, d16, d28 vmlal.u8 q11, d17, d29 vst1.32 {d6, d7}, [OUT, :128]! - vshll.u16 q1, d18, #8 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q1, d18, d31 .endm @@ -3403,7 +3404,7 @@ pixman_asm_function fname vld1.32 {d23}, [TMP3] vmull.u8 q10, d22, d28 vmlal.u8 q10, d23, d29 - vshll.u16 q0, d16, #8 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q0, d16, d30 vmlal.u16 q0, d17, d30 pld [TMP4, PF_OFFS] @@ -3412,7 +3413,7 @@ pixman_asm_function fname pld [TMP4, PF_OFFS] vmull.u8 q11, d16, d28 vmlal.u8 q11, d17, d29 - vshll.u16 q1, d18, #8 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q1, d18, d31 mov TMP1, X, asr #16 @@ -3422,22 +3423,22 @@ pixman_asm_function fname add X, X, UX add TMP2, TOP, TMP2, asl #2 vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #8 - vshll.u16 q2, d20, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q2, d20, d30 vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #8 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS vld1.32 {d20}, [TMP1], STRIDE vmlsl.u16 q3, d22, d31 vmlal.u16 q3, d23, d31 vld1.32 {d21}, [TMP1] vmull.u8 q8, d20, d28 vmlal.u8 q8, d21, d29 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q1, #16 - vshrn.u32 d4, q2, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #16 + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 vld1.32 {d23}, [TMP2] vmull.u8 q9, d22, d28 @@ -3449,12 +3450,12 @@ pixman_asm_function fname add TMP4, TOP, TMP4, asl #2 vmlal.u8 q9, d23, d29 vld1.32 {d22}, [TMP3], STRIDE - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vld1.32 {d23}, [TMP3] vmull.u8 q10, d22, d28 vmlal.u8 q10, d23, d29 vmovn.u16 d8, q0 - vshll.u16 q0, d16, #8 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS vmovn.u16 d9, q2 vmlsl.u16 q0, d16, d30 vmlal.u16 q0, d17, d30 @@ -3465,25 +3466,25 @@ pixman_asm_function fname pld [TMP4, PF_OFFS] vmull.u8 q11, d16, d28 vmlal.u8 q11, d17, d29 - vshll.u16 q1, d18, #8 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q1, d18, d31 .endm .macro bilinear_interpolate_eight_pixels_8888_0565_tail vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #8 - vshll.u16 q2, d20, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q2, d20, d30 vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #8 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q3, d22, d31 vmlal.u16 q3, d23, d31 vadd.u16 q12, q12, q13 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q1, #16 - vshrn.u32 d4, q2, #16 - vshr.u16 q15, q12, #8 - vshrn.u32 d5, q3, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) vmovn.u16 d10, q0 vmovn.u16 d11, q2 vadd.u16 q12, q12, q13 @@ -3508,23 +3509,23 @@ pixman_asm_function fname add X, X, UX add TMP2, TOP, TMP2, asl #2 vmlal.u16 q1, d19, d31 - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vuzp.u8 d8, d9 - vshll.u16 q2, d20, #8 + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q2, d20, d30 vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #8 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS vld1.32 {d20}, [TMP1], STRIDE vmlsl.u16 q3, d22, d31 vmlal.u16 q3, d23, d31 vld1.32 {d21}, [TMP1] vmull.u8 q8, d20, d28 vmlal.u8 q8, d21, d29 - vshrn.u32 d0, q0, #16 - vshrn.u32 d1, q1, #16 - vshrn.u32 d4, q2, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #16 + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 vld1.32 {d23}, [TMP2] vmull.u8 q9, d22, d28 @@ -3536,12 +3537,12 @@ pixman_asm_function fname add TMP4, TOP, TMP4, asl #2 vmlal.u8 q9, d23, d29 vld1.32 {d22}, [TMP3], STRIDE - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vld1.32 {d23}, [TMP3] vmull.u8 q10, d22, d28 vmlal.u8 q10, d23, d29 vmovn.u16 d10, q0 - vshll.u16 q0, d16, #8 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS vmovn.u16 d11, q2 vmlsl.u16 q0, d16, d30 vmlal.u16 q0, d17, d30 @@ -3553,7 +3554,7 @@ pixman_asm_function fname vmull.u8 q11, d16, d28 vmlal.u8 q11, d17, d29 vuzp.u8 d10, d11 - vshll.u16 q1, d18, #8 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS vmlsl.u16 q1, d18, d31 mov TMP1, X, asr #16 @@ -3564,12 +3565,12 @@ pixman_asm_function fname add TMP2, TOP, TMP2, asl #2 vmlal.u16 q1, d19, d31 vuzp.u8 d9, d11 - vshr.u16 q15, q12, #8 - vshll.u16 q2, d20, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) + vshll.u16 q2, d20, #BILINEAR_INTERPOLATION_BITS vuzp.u8 d8, d10 vmlsl.u16 q2, d20, d30 vmlal.u16 q2, d21, d30 - vshll.u16 q3, d22, #8 + vshll.u16 q3, d22, #BILINEAR_INTERPOLATION_BITS vld1.32 {d20}, [TMP1], STRIDE vmlsl.u16 q3, d22, d31 vmlal.u16 q3, d23, d31 @@ -3579,13 +3580,13 @@ pixman_asm_function fname vshll.u8 q6, d9, #8 vshll.u8 q5, d10, #8 vshll.u8 q7, d8, #8 - vshrn.u32 d0, q0, #16 + vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS) vsri.u16 q5, q6, #5 - vshrn.u32 d1, q1, #16 + vshrn.u32 d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS) vsri.u16 q5, q7, #11 - vshrn.u32 d4, q2, #16 + vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS) vld1.32 {d22}, [TMP2], STRIDE - vshrn.u32 d5, q3, #16 + vshrn.u32 d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS) vadd.u16 q12, q12, q13 vld1.32 {d23}, [TMP2] vmull.u8 q9, d22, d28 @@ -3597,12 +3598,12 @@ pixman_asm_function fname add TMP4, TOP, TMP4, asl #2 vmlal.u8 q9, d23, d29 vld1.32 {d22}, [TMP3], STRIDE - vshr.u16 q15, q12, #8 + vshr.u16 q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS) vld1.32 {d23}, [TMP3] vmull.u8 q10, d22, d28 vmlal.u8 q10, d23, d29 vmovn.u16 d8, q0 - vshll.u16 q0, d16, #8 + vshll.u16 q0, d16, #BILINEAR_INTERPOLATION_BITS vmovn.u16 d9, q2 vmlsl.u16 q0, d16, d30 vmlal.u16 q0, d17, d30 @@ -3613,7 +3614,7 @@ pixman_asm_function fname pld [TMP4, PF_OFFS] vmull.u8 q11, d16, d28 vmlal.u8 q11, d17, d29 - vshll.u16 q1, d18, #8 + vshll.u16 q1, d18, #BILINEAR_INTERPOLATION_BITS vst1.32 {d10, d11}, [OUT, :128]! vmlsl.u16 q1, d18, d31 .endm diff --git a/pixman/pixman/pixman-bits-image.c b/pixman/pixman/pixman-bits-image.c index 05eab9634..b6c8630f4 100644 --- a/pixman/pixman/pixman-bits-image.c +++ b/pixman/pixman/pixman-bits-image.c @@ -131,8 +131,8 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, x1 = x - pixman_fixed_1 / 2; y1 = y - pixman_fixed_1 / 2; - distx = (x1 >> 8) & 0xff; - disty = (y1 >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x1); + disty = pixman_fixed_to_bilinear_weight (y1); x1 = pixman_fixed_to_int (x1); y1 = pixman_fixed_to_int (y1); @@ -200,7 +200,7 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; y = v.vector[1] - pixman_fixed_1/2; - disty = (y >> 8) & 0xff; + disty = pixman_fixed_to_bilinear_weight (y); /* Load the pointers to the first and second lines from the source * image that bilinear code must read. @@ -309,7 +309,7 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - distx = (x >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x); *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); @@ -334,7 +334,7 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - distx = (x >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x); *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); } @@ -358,7 +358,7 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; - distx = (x >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x); *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); } @@ -695,8 +695,8 @@ bits_image_fetch_bilinear_affine (pixman_image_t * image, x1 = x - pixman_fixed_1 / 2; y1 = y - pixman_fixed_1 / 2; - distx = (x1 >> 8) & 0xff; - disty = (y1 >> 8) & 0xff; + distx = pixman_fixed_to_bilinear_weight (x1); + disty = pixman_fixed_to_bilinear_weight (y1); y1 = pixman_fixed_to_int (y1); y2 = y1 + 1; diff --git a/pixman/pixman/pixman-inlines.h b/pixman/pixman/pixman-inlines.h index 3532867a4..5517de5a5 100644 --- a/pixman/pixman/pixman-inlines.h +++ b/pixman/pixman/pixman-inlines.h @@ -81,6 +81,13 @@ repeat (pixman_repeat_t repeat, int *c, int size) return TRUE; } +static force_inline int +pixman_fixed_to_bilinear_weight (pixman_fixed_t x) +{ + return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) & + ((1 << BILINEAR_INTERPOLATION_BITS) - 1); +} + #if SIZEOF_LONG > 4 static force_inline uint32_t @@ -92,6 +99,9 @@ bilinear_interpolation (uint32_t tl, uint32_t tr, uint64_t tl64, tr64, bl64, br64; uint64_t f, r; + distx <<= (8 - BILINEAR_INTERPOLATION_BITS); + disty <<= (8 - BILINEAR_INTERPOLATION_BITS); + distxy = distx * disty; distxiy = distx * (256 - disty); distixy = (256 - distx) * disty; @@ -135,6 +145,9 @@ bilinear_interpolation (uint32_t tl, uint32_t tr, int distxy, distxiy, distixy, distixiy; uint32_t f, r; + distx <<= (8 - BILINEAR_INTERPOLATION_BITS); + disty <<= (8 - BILINEAR_INTERPOLATION_BITS); + distxy = distx * disty; distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ @@ -758,12 +771,14 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, * all source pixels are fetched from zero padding * zone for NONE repeat * - * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256, - * but sometimes it may be less than that for NONE repeat when handling - * fuzzy antialiased top or bottom image edges. Also both top and - * bottom weight variables are guaranteed to have value in 0-255 - * range and can fit into unsigned byte or be used with 8-bit SIMD - * multiplication instructions. + * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to + * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that + * for NONE repeat when handling fuzzy antialiased top or bottom image + * edges. Also both top and bottom weight variables are guaranteed to + * have value, which is less than BILINEAR_INTERPOLATION_RANGE. + * For example, the weights can fit into unsigned byte or be used + * with 8-bit SIMD multiplication instructions for 8-bit interpolation + * precision. */ #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ dst_type_t, repeat_mode, flags) \ @@ -877,18 +892,18 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, } \ \ y1 = pixman_fixed_to_int (vy); \ - weight2 = (vy >> 8) & 0xff; \ + weight2 = pixman_fixed_to_bilinear_weight (vy); \ if (weight2) \ { \ - /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \ + /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \ y2 = y1 + 1; \ - weight1 = 256 - weight2; \ + weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \ } \ else \ { \ - /* set both top and bottom row to the same scanline, and weights to 128+128 */ \ + /* set both top and bottom row to the same scanline and tweak weights */ \ y2 = y1; \ - weight1 = weight2 = 128; \ + weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \ } \ vy += unit_y; \ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S index 87558f032..48f108ed9 100644 --- a/pixman/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman/pixman-mips-dspr2-asm.S @@ -29,6 +29,7 @@ * Author: Nemanja Lukic (nlukic@mips.com) */ +#include "pixman-private.h" #include "pixman-mips-dspr2-asm.h" LEAF_MIPS_DSPR2(pixman_fill_buff16_mips) @@ -771,11 +772,15 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) lw s1, 48(sp) /* s1 = wb */ lw s2, 52(sp) /* s2 = vx */ lw s3, 56(sp) /* s3 = unit_x */ - li v0, 256 + li v0, BILINEAR_INTERPOLATION_RANGE li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + 0: andi t4, s2, 0xffff /* t4 = (short)vx */ - srl t4, t4, 8 /* t4 = vx >> 8 */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c index d869c04c6..5441d6bc2 100644 --- a/pixman/pixman/pixman-mmx.c +++ b/pixman/pixman/pixman-mmx.c @@ -42,6 +42,7 @@ #endif #include "pixman-private.h" #include "pixman-combine32.h" +#include "pixman-inlines.h" #define no_vERBOSE @@ -694,6 +695,24 @@ combine (const uint32_t *src, const uint32_t *mask) return vsrc; } +static force_inline __m64 +core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst) +{ + vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ()); + + if (is_opaque (vsrc)) + { + return vsrc; + } + else if (!is_zero (vsrc)) + { + return over (vsrc, expand_alpha (vsrc), + _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ())); + } + + return _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ()); +} + static void mmx_combine_over_u (pixman_implementation_t *imp, pixman_op_t op, @@ -1599,9 +1618,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); - mask &= 0xff000000; - mask = mask | mask >> 8 | mask >> 16 | mask >> 24; - vmask = load8888 (&mask); + vmask = expand_alpha (load8888 (&mask)); while (height--) { @@ -1670,9 +1687,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); - mask &= 0xff000000; - mask = mask | mask >> 8 | mask >> 16 | mask >> 24; - vmask = load8888 (&mask); + vmask = expand_alpha (load8888 (&mask)); srca = MC (4x00ff); while (height--) @@ -3506,6 +3521,242 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, _mm_empty (); } +#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS)) +#define BMSK (BSHIFT - 1) + +#define BILINEAR_DECLARE_VARIABLES \ + const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); \ + const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); \ + const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT); \ + const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1); \ + const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK); \ + const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); \ + const __m64 mm_zero = _mm_setzero_si64 (); \ + __m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx) + +#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ +do { \ + /* fetch 2x2 pixel block into 2 mmx registers */ \ + __m64 t = ldq_u ((__m64 *)&src_top [pixman_fixed_to_int (vx)]); \ + __m64 b = ldq_u ((__m64 *)&src_bottom [pixman_fixed_to_int (vx)]); \ + vx += unit_x; \ + /* vertical interpolation */ \ + __m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt); \ + __m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t, mm_zero), mm_wt); \ + __m64 b_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (b, mm_zero), mm_wb); \ + __m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb); \ + __m64 hi = _mm_add_pi16 (t_hi, b_hi); \ + __m64 lo = _mm_add_pi16 (t_lo, b_lo); \ + if (BILINEAR_INTERPOLATION_BITS < 8) \ + { \ + /* calculate horizontal weights */ \ + __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \ + _mm_srli_pi16 (mm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS))); \ + mm_x = _mm_add_pi16 (mm_x, mm_ux); \ + /* horizontal interpolation */ \ + __m64 p = _mm_unpacklo_pi16 (lo, hi); \ + __m64 q = _mm_unpackhi_pi16 (lo, hi); \ + lo = _mm_madd_pi16 (p, mm_wh); \ + hi = _mm_madd_pi16 (q, mm_wh); \ + } \ + else \ + { \ + /* calculate horizontal weights */ \ + __m64 mm_wh_lo = _mm_sub_pi16 (mm_BSHIFT, _mm_srli_pi16 (mm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ + __m64 mm_wh_hi = _mm_srli_pi16 (mm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS); \ + mm_x = _mm_add_pi16 (mm_x, mm_ux); \ + /* horizontal interpolation */ \ + __m64 mm_lo_lo = _mm_mullo_pi16 (lo, mm_wh_lo); \ + __m64 mm_lo_hi = _mm_mullo_pi16 (hi, mm_wh_hi); \ + __m64 mm_hi_lo = _mm_mulhi_pu16 (lo, mm_wh_lo); \ + __m64 mm_hi_hi = _mm_mulhi_pu16 (hi, mm_wh_hi); \ + lo = _mm_add_pi32 (_mm_unpacklo_pi16 (mm_lo_lo, mm_hi_lo), \ + _mm_unpacklo_pi16 (mm_lo_hi, mm_hi_hi)); \ + hi = _mm_add_pi32 (_mm_unpackhi_pi16 (mm_lo_lo, mm_hi_lo), \ + _mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi)); \ + } \ + /* shift and pack the result */ \ + hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \ + lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2); \ + lo = _mm_packs_pi32 (lo, hi); \ + lo = _mm_packs_pu16 (lo, lo); \ + pix = lo; \ +} while (0) + +#define BILINEAR_SKIP_ONE_PIXEL() \ +do { \ + vx += unit_x; \ + mm_x = _mm_add_pi16 (mm_x, mm_ux); \ +} while(0) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix; + + while (w--) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix); + store (dst, pix); + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_SRC, + scaled_bilinear_scanline_mmx_8888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix1, pix2; + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (!is_zero (pix1)) + { + pix2 = load (dst); + store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2)); + } + + w--; + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER, + scaled_bilinear_scanline_mmx_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t * dst, + const uint8_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix1, pix2; + uint32_t m; + + while (w) + { + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (m == 0xff && is_opaque (pix1)) + { + store (dst, pix1); + } + else + { + __m64 ms, md, ma, msa; + + pix2 = load (dst); + ma = expand_alpha_rev (to_m64 (m)); + ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ()); + md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ()); + + msa = expand_alpha (ms); + + store8888 (dst, (in_over (ms, msa, ma, md))); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + COVER, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + PAD, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NONE, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NORMAL, FLAG_HAVE_NON_SOLID_MASK) + static uint32_t * mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) { @@ -3761,6 +4012,23 @@ static const pixman_fast_path_t mmx_fast_paths[] = PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ), PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888 ), + { PIXMAN_OP_NONE }, }; diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 72e3b4f6d..0c27798b0 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -1,10 +1,24 @@ +#ifndef PIXMAN_PRIVATE_H +#define PIXMAN_PRIVATE_H + +/* + * The defines which are shared between C and assembly code + */ + +/* bilinear interpolation precision (must be <= 8) */ +#define BILINEAR_INTERPOLATION_BITS 7 +#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS) + +/* + * C specific part + */ + +#ifndef __ASSEMBLER__ + #ifndef PACKAGE # error config.h must be included before pixman-private.h #endif -#ifndef PIXMAN_PRIVATE_H -#define PIXMAN_PRIVATE_H - #define PIXMAN_DISABLE_DEPRECATED #define PIXMAN_USE_INTERNAL_API @@ -1052,4 +1066,6 @@ void pixman_timer_register (pixman_timer_t *timer); #endif /* PIXMAN_TIMERS */ +#endif /* __ASSEMBLER__ */ + #endif /* PIXMAN_PRIVATE_H */ diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index ef82a18c3..665eeadbe 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -5364,11 +5364,15 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, scaled_nearest_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) +#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1) + #define BILINEAR_DECLARE_VARIABLES \ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ - const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);\ - const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ + const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\ + const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ + const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\ + const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \ unit_x, unit_x, unit_x, unit_x); \ const __m128i xmm_zero = _mm_setzero_si128 (); \ @@ -5388,18 +5392,30 @@ do { \ xmm_wt), \ _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), \ xmm_wb)); \ - /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc, \ - _mm_xor_si128 (xmm_xorc, \ - _mm_srli_epi16 (xmm_x, 8))); \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ - /* horizontal interpolation */ \ - xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ - xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \ - a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \ - _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \ + if (BILINEAR_INTERPOLATION_BITS < 8) \ + { \ + /* calculate horizontal weights */ \ + xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, \ + _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ + /* horizontal interpolation */ \ + a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ + a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh); \ + } \ + else \ + { \ + /* calculate horizontal weights */ \ + xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, \ + _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ + /* horizontal interpolation */ \ + xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ + xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \ + a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \ + _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \ + } \ /* shift and pack the result */ \ - a = _mm_srli_epi32 (a, 16); \ + a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2); \ a = _mm_packs_epi32 (a, a); \ a = _mm_packus_epi16 (a, a); \ pix = _mm_cvtsi128_si32 (a); \ @@ -5845,6 +5861,9 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), diff --git a/pixman/test/affine-test.c b/pixman/test/affine-test.c index a4ceed3da..6827cc3a8 100644 --- a/pixman/test/affine-test.c +++ b/pixman/test/affine-test.c @@ -301,11 +301,21 @@ test_composite (int testnum, return crc32; } +#if BILINEAR_INTERPOLATION_BITS == 8 +#define CHECKSUM 0x1EF2175A +#elif BILINEAR_INTERPOLATION_BITS == 7 +#define CHECKSUM 0x74050F50 +#elif BILINEAR_INTERPOLATION_BITS == 4 +#define CHECKSUM 0x4362EAE8 +#else +#define CHECKSUM 0x00000000 +#endif + int main (int argc, const char *argv[]) { pixman_disable_out_of_bounds_workaround (); - return fuzzer_test_main ("affine", 8000000, 0x1EF2175A, + return fuzzer_test_main ("affine", 8000000, CHECKSUM, test_composite, argc, argv); } diff --git a/pixman/test/scaling-test.c b/pixman/test/scaling-test.c index 6f2da1432..44c4f3de4 100644 --- a/pixman/test/scaling-test.c +++ b/pixman/test/scaling-test.c @@ -357,11 +357,21 @@ test_composite (int testnum, return crc32; } +#if BILINEAR_INTERPOLATION_BITS == 8 +#define CHECKSUM 0x80DF1CB2 +#elif BILINEAR_INTERPOLATION_BITS == 7 +#define CHECKSUM 0x2818D5FB +#elif BILINEAR_INTERPOLATION_BITS == 4 +#define CHECKSUM 0x387540A5 +#else +#define CHECKSUM 0x00000000 +#endif + int main (int argc, const char *argv[]) { pixman_disable_out_of_bounds_workaround (); - return fuzzer_test_main("scaling", 8000000, 0x80DF1CB2, + return fuzzer_test_main("scaling", 8000000, CHECKSUM, test_composite, argc, argv); } diff --git a/xorg-server/randr/rrscreen.c b/xorg-server/randr/rrscreen.c index 55110e088..c564d1f96 100644 --- a/xorg-server/randr/rrscreen.c +++ b/xorg-server/randr/rrscreen.c @@ -195,7 +195,7 @@ ProcRRGetScreenSizeRange(ClientPtr client) rrScrPrivPtr pScrPriv; int rc; - REQUEST_SIZE_MATCH(xRRGetScreenInfoReq); + REQUEST_SIZE_MATCH(xRRGetScreenSizeRangeReq); rc = dixLookupWindow(&pWin, stuff->window, client, DixGetAttrAccess); if (rc != Success) return rc; |