diff options
Diffstat (limited to 'pixman')
-rw-r--r-- | pixman/configure.ac | 18 | ||||
-rw-r--r-- | pixman/pixman/Makefile.am | 2 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-asm.h | 37 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-neon-asm-bilinear.S | 12 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-neon-asm.S | 12 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-simd-asm-scaled.S | 11 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-simd-asm.S | 78 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-simd.c | 6 | ||||
-rw-r--r-- | pixman/test/tolerance-test.c | 4 | ||||
-rw-r--r-- | pixman/test/utils-prng.c | 10 | ||||
-rw-r--r-- | pixman/test/utils-prng.h | 9 |
11 files changed, 155 insertions, 44 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac index 632797241..0339494b6 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -1061,6 +1061,24 @@ fi AC_MSG_RESULT($support_for_builtin_clz) +dnl ===================================== +dnl GCC vector extensions + +support_for_gcc_vector_extensions=no + +AC_MSG_CHECKING(for GCC vector extensions) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +unsigned int __attribute__ ((vector_size(16))) e, a, b; +int main (void) { e = a - ((b << 27) + (b >> (32 - 27))) + 1; return e[0]; } +]])], support_for_gcc_vector_extensions=yes) + +if test x$support_for_gcc_vector_extensions = xyes; then + AC_DEFINE([HAVE_GCC_VECTOR_EXTENSIONS], [], + [Whether the compiler supports GCC vector extensions]) +fi + +AC_MSG_RESULT($support_for_gcc_vector_extensions) + dnl ================== dnl libpng diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am index b376d9aeb..581b6f61e 100644 --- a/pixman/pixman/Makefile.am +++ b/pixman/pixman/Makefile.am @@ -72,6 +72,7 @@ libpixman_arm_simd_la_SOURCES = \ pixman-arm-common.h \ pixman-arm-simd-asm.S \ pixman-arm-simd-asm-scaled.S \ + pixman-arm-asm.h \ pixman-arm-simd-asm.h libpixman_1_la_LIBADD += libpixman-arm-simd.la @@ -86,6 +87,7 @@ libpixman_arm_neon_la_SOURCES = \ pixman-arm-common.h \ pixman-arm-neon-asm.S \ pixman-arm-neon-asm-bilinear.S \ + pixman-arm-asm.h \ pixman-arm-neon-asm.h libpixman_1_la_LIBADD += libpixman-arm-neon.la diff --git a/pixman/pixman/pixman-arm-asm.h b/pixman/pixman/pixman-arm-asm.h new file mode 100644 index 000000000..ee7854108 --- /dev/null +++ b/pixman/pixman/pixman-arm-asm.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2008 Mozilla Corporation + * Copyright © 2010 Nokia Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm diff --git a/pixman/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman/pixman-arm-neon-asm-bilinear.S index e37b5c298..0fd92d61c 100644 --- a/pixman/pixman/pixman-arm-neon-asm-bilinear.S +++ b/pixman/pixman/pixman-arm-neon-asm-bilinear.S @@ -65,23 +65,13 @@ .p2align 2 #include "pixman-private.h" +#include "pixman-arm-asm.h" #include "pixman-arm-neon-asm.h" /* * Bilinear macros from pixman-arm-neon-asm.S */ -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm - /* * Bilinear scaling support code which tries to provide pixel fetching, color * format conversion, and interpolation as separate macros which can be used diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S index 187197dc3..7e949a38f 100644 --- a/pixman/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman/pixman-arm-neon-asm.S @@ -50,6 +50,7 @@ .p2align 2 #include "pixman-private.h" +#include "pixman-arm-asm.h" #include "pixman-arm-neon-asm.h" /* Global configuration options and preferences */ @@ -2830,17 +2831,6 @@ generate_composite_function_nearest_scanline \ /******************************************************************************/ -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm - /* * Bilinear scaling support code which tries to provide pixel fetching, color * format conversion, and interpolation as separate macros which can be used diff --git a/pixman/pixman/pixman-arm-simd-asm-scaled.S b/pixman/pixman/pixman-arm-simd-asm-scaled.S index 711099548..e050292e0 100644 --- a/pixman/pixman/pixman-arm-simd-asm-scaled.S +++ b/pixman/pixman/pixman-arm-simd-asm-scaled.S @@ -37,16 +37,7 @@ .altmacro .p2align 2 -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm +#include "pixman-arm-asm.h" /* * Note: This code is only using armv5te instructions (not even armv6), diff --git a/pixman/pixman/pixman-arm-simd-asm.S b/pixman/pixman/pixman-arm-simd-asm.S index c20968879..dd6f78817 100644 --- a/pixman/pixman/pixman-arm-simd-asm.S +++ b/pixman/pixman/pixman-arm-simd-asm.S @@ -611,3 +611,81 @@ generate_composite_function \ /******************************************************************************/ +.macro over_reverse_n_8888_init + ldr SRC, [sp, #ARGS_STACK_OFFSET] + ldr MASK, =0x00800080 + /* Split source pixel into RB/AG parts */ + uxtb16 STRIDE_S, SRC + uxtb16 STRIDE_M, SRC, ror #8 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, MASK, MASK + line_saved_regs STRIDE_D, ORIG_W +.endm + +.macro over_reverse_n_8888_newline + mov STRIDE_D, #0xFF +.endm + +.macro over_reverse_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + pixld , numbytes, firstreg, DST, 0 +.endm + +.macro over_reverse_n_8888_1pixel d, is_only + teq WK&d, #0 + beq 8f /* replace with source */ + bics ORIG_W, STRIDE_D, WK&d, lsr #24 + .if is_only == 1 + beq 49f /* skip store */ + .else + beq 9f /* write same value back */ + .endif + mla SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */ + mla ORIG_W, STRIDE_M, ORIG_W, MASK /* alpha/green */ + uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 + uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8 + mov SCRATCH, SCRATCH, ror #8 + sel ORIG_W, SCRATCH, ORIG_W + uqadd8 WK&d, WK&d, ORIG_W + b 9f +8: mov WK&d, SRC +9: +.endm + +.macro over_reverse_n_8888_tail numbytes, reg1, reg2, reg3, reg4 + .if numbytes == 4 + over_reverse_n_8888_1pixel reg1, 1 + .else + and SCRATCH, WK®1, WK®2 + .if numbytes == 16 + and SCRATCH, SCRATCH, WK®3 + and SCRATCH, SCRATCH, WK®4 + .endif + mvns SCRATCH, SCRATCH, asr #24 + beq 49f /* skip store if all opaque */ + over_reverse_n_8888_1pixel reg1, 0 + over_reverse_n_8888_1pixel reg2, 0 + .if numbytes == 16 + over_reverse_n_8888_1pixel reg3, 0 + over_reverse_n_8888_1pixel reg4, 0 + .endif + .endif + pixst , numbytes, reg1, DST +49: +.endm + +.macro over_reverse_n_8888_process_tail cond, numbytes, firstreg + over_reverse_n_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3) +.endm + +generate_composite_function \ + pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \ + 3, /* prefetch distance */ \ + over_reverse_n_8888_init, \ + over_reverse_n_8888_newline, \ + nop_macro, /* cleanup */ \ + over_reverse_n_8888_process_head, \ + over_reverse_n_8888_process_tail + +/******************************************************************************/ + diff --git a/pixman/pixman/pixman-arm-simd.c b/pixman/pixman/pixman-arm-simd.c index af062e19d..8fbc4397d 100644 --- a/pixman/pixman/pixman-arm-simd.c +++ b/pixman/pixman/pixman-arm-simd.c @@ -47,6 +47,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888, + uint32_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, uint32_t, 1, uint32_t, 1) @@ -225,6 +228,9 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888), diff --git a/pixman/test/tolerance-test.c b/pixman/test/tolerance-test.c index 562563026..320bb7fe0 100644 --- a/pixman/test/tolerance-test.c +++ b/pixman/test/tolerance-test.c @@ -347,12 +347,12 @@ main (int argc, const char *argv[]) else { #ifdef USE_OPENMP -# pragma omp parallel for default(none) shared(i) private (result) +# pragma omp parallel for default(none) reduction(|:result) #endif for (i = 0; i < N_TESTS; ++i) { if (!do_check (i)) - result = 1; + result |= 1; } } diff --git a/pixman/test/utils-prng.c b/pixman/test/utils-prng.c index 7b32e3531..c27b5be83 100644 --- a/pixman/test/utils-prng.c +++ b/pixman/test/utils-prng.c @@ -27,7 +27,7 @@ #include "utils.h" #include "utils-prng.h" -#if defined(GCC_VECTOR_EXTENSIONS_SUPPORTED) && defined(__SSE2__) +#if defined(HAVE_GCC_VECTOR_EXTENSIONS) && defined(__SSE2__) #include <xmmintrin.h> #endif @@ -52,7 +52,7 @@ void smallprng_srand_r (smallprng_t *x, uint32_t seed) */ void prng_srand_r (prng_t *x, uint32_t seed) { -#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED +#ifdef HAVE_GCC_VECTOR_EXTENSIONS int i; prng_rand_128_data_t dummy; smallprng_srand_r (&x->p0, seed); @@ -75,7 +75,7 @@ void prng_srand_r (prng_t *x, uint32_t seed) static force_inline void store_rand_128_data (void *addr, prng_rand_128_data_t *d, int aligned) { -#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED +#ifdef HAVE_GCC_VECTOR_EXTENSIONS if (aligned) { *(uint8x16 *)addr = d->vb; @@ -120,7 +120,7 @@ randmemset_internal (prng_t *prng, { prng_rand_128_r (&local_prng, &t); prng_rand_128_r (&local_prng, &randdata); -#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED +#ifdef HAVE_GCC_VECTOR_EXTENSIONS if (flags & RANDMEMSET_MORE_FF) { const uint8x16 const_C0 = @@ -199,7 +199,7 @@ randmemset_internal (prng_t *prng, } else { -#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED +#ifdef HAVE_GCC_VECTOR_EXTENSIONS const uint8x16 bswap_shufflemask = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 diff --git a/pixman/test/utils-prng.h b/pixman/test/utils-prng.h index 564ffcef1..f9ae8ddf7 100644 --- a/pixman/test/utils-prng.h +++ b/pixman/test/utils-prng.h @@ -79,8 +79,7 @@ /*****************************************************************************/ -#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) -#define GCC_VECTOR_EXTENSIONS_SUPPORTED +#ifdef HAVE_GCC_VECTOR_EXTENSIONS typedef uint32_t uint32x4 __attribute__ ((vector_size(16))); typedef uint8_t uint8x16 __attribute__ ((vector_size(16))); #endif @@ -92,7 +91,7 @@ typedef struct typedef struct { -#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED +#ifdef HAVE_GCC_VECTOR_EXTENSIONS uint32x4 a, b, c, d; #else smallprng_t p1, p2, p3, p4; @@ -104,7 +103,7 @@ typedef union { uint8_t b[16]; uint32_t w[4]; -#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED +#ifdef HAVE_GCC_VECTOR_EXTENSIONS uint8x16 vb; uint32x4 vw; #endif @@ -134,7 +133,7 @@ prng_rand_r (prng_t *x) static force_inline void prng_rand_128_r (prng_t *x, prng_rand_128_data_t *data) { -#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED +#ifdef HAVE_GCC_VECTOR_EXTENSIONS uint32x4 e = x->a - ((x->b << 27) + (x->b >> (32 - 27))); x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17))); x->b = x->c + x->d; |