From 18a8f9f6839dfd899814a76a971e720c1e622236 Mon Sep 17 00:00:00 2001 From: marha Date: Wed, 28 Sep 2011 15:18:21 +0200 Subject: mesa pixman xkeyboard-config git update 28 sep 2011 --- pixman/configure.ac | 56 +- pixman/demos/Makefile.am | 72 +-- pixman/pixman/Makefile.am | 14 +- pixman/pixman/Makefile.win32 | 2 +- pixman/pixman/pixman-cpu.c | 1244 ++++++++++++++++++++-------------------- pixman/pixman/pixman-mmx.c | 213 +++++-- pixman/pixman/pixman-private.h | 2 +- pixman/test/Makefile.am | 8 +- 8 files changed, 889 insertions(+), 722 deletions(-) (limited to 'pixman') diff --git a/pixman/configure.ac b/pixman/configure.ac index dc523df0f..481d0bb00 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -303,7 +303,7 @@ CFLAGS=$xserver_save_CFLAGS AC_ARG_ENABLE(mmx, [AC_HELP_STRING([--disable-mmx], - [disable MMX fast paths])], + [disable x86 MMX fast paths])], [enable_mmx=$enableval], [enable_mmx=auto]) if test $enable_mmx = no ; then @@ -311,17 +311,17 @@ if test $enable_mmx = no ; then fi if test $have_mmx_intrinsics = yes ; then - AC_DEFINE(USE_MMX, 1, [use MMX compiler intrinsics]) + AC_DEFINE(USE_X86_MMX, 1, [use x86 MMX compiler intrinsics]) else MMX_CFLAGS= fi AC_MSG_RESULT($have_mmx_intrinsics) if test $enable_mmx = yes && test $have_mmx_intrinsics = no ; then - AC_MSG_ERROR([MMX intrinsics not detected]) + AC_MSG_ERROR([x86 MMX intrinsics not detected]) fi -AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes) +AM_CONDITIONAL(USE_X86_MMX, test $have_mmx_intrinsics = yes) dnl =========================================================================== dnl Check for SSE2 @@ -540,6 +540,54 @@ if test $enable_arm_neon = yes && test $have_arm_neon = no ; then AC_MSG_ERROR([ARM NEON intrinsics not detected]) fi +dnl =========================================================================== +dnl Check for IWMMXT + +if test "x$IWMMXT_CFLAGS" = "x" ; then + IWMMXT_CFLAGS="-march=iwmmxt -flax-vector-conversions -Winline" +fi + +have_iwmmxt_intrinsics=no +AC_MSG_CHECKING(whether to use ARM IWMMXT intrinsics) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$IWMMXT_CFLAGS $CFLAGS" +AC_COMPILE_IFELSE([ +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 3 && __GNUC_MINOR__ < 6)) +#error "Need GCC >= 4.6 for IWMMXT intrinsics" +#endif +#include +int main () { + union { + __m64 v; + [char c[8];] + } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} }; + int b = 4; + __m64 c = _mm_srli_si64 (a.v, b); +}], have_iwmmxt_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(arm-iwmmxt, + [AC_HELP_STRING([--disable-arm-iwmmxt], + [disable ARM IWMMXT fast paths])], + [enable_iwmmxt=$enableval], [enable_iwmmxt=auto]) + +if test $enable_iwmmxt = no ; then + have_iwmmxt_intrinsics=disabled +fi + +if test $have_iwmmxt_intrinsics = yes ; then + AC_DEFINE(USE_ARM_IWMMXT, 1, [use ARM IWMMXT compiler intrinsics]) +else + IWMMXT_CFLAGS= +fi + +AC_MSG_RESULT($have_iwmmxt_intrinsics) +if test $enable_iwmmxt = yes && test $have_iwmmxt_intrinsics = no ; then + AC_MSG_ERROR([IWMMXT intrinsics not detected]) +fi + +AM_CONDITIONAL(USE_ARM_IWMMXT, test $have_iwmmxt_intrinsics = yes) + dnl ========================================================================================= dnl Check for GNU-style inline assembly support diff --git a/pixman/demos/Makefile.am b/pixman/demos/Makefile.am index 171f8f419..070c2d748 100644 --- a/pixman/demos/Makefile.am +++ b/pixman/demos/Makefile.am @@ -1,36 +1,36 @@ -if HAVE_GTK - -AM_CFLAGS = @OPENMP_CFLAGS@ -AM_LDFLAGS = @OPENMP_CFLAGS@ - -LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) -INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) - -GTK_UTILS = gtk-utils.c gtk-utils.h - -DEMOS = \ - clip-test \ - clip-in \ - composite-test \ - gradient-test \ - radial-test \ - alpha-test \ - screen-test \ - convolution-test \ - trap-test \ - tri-test - -gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) -alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) -composite_test_SOURCES = composite-test.c $(GTK_UTILS) -clip_test_SOURCES = clip-test.c $(GTK_UTILS) -clip_in_SOURCES = clip-in.c $(GTK_UTILS) -trap_test_SOURCES = trap-test.c $(GTK_UTILS) -screen_test_SOURCES = screen-test.c $(GTK_UTILS) -convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) -radial_test_SOURCES = radial-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS) -tri_test_SOURCES = tri-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS) - -noinst_PROGRAMS = $(DEMOS) - -endif +if HAVE_GTK + +AM_CFLAGS = $(OPENMP_CFLAGS) +AM_LDFLAGS = $(OPENMP_CFLAGS) + +LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) $(PNG_LIBS) +INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS) + +GTK_UTILS = gtk-utils.c gtk-utils.h + +DEMOS = \ + clip-test \ + clip-in \ + composite-test \ + gradient-test \ + radial-test \ + alpha-test \ + screen-test \ + convolution-test \ + trap-test \ + tri-test + +gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) +alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) +composite_test_SOURCES = composite-test.c $(GTK_UTILS) +clip_test_SOURCES = clip-test.c $(GTK_UTILS) +clip_in_SOURCES = clip-in.c $(GTK_UTILS) +trap_test_SOURCES = trap-test.c $(GTK_UTILS) +screen_test_SOURCES = screen-test.c $(GTK_UTILS) +convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) +radial_test_SOURCES = radial-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS) +tri_test_SOURCES = tri-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS) + +noinst_PROGRAMS = $(DEMOS) + +endif diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am index 2421a4f9e..286b7cf36 100644 --- a/pixman/pixman/Makefile.am +++ b/pixman/pixman/Makefile.am @@ -22,7 +22,7 @@ EXTRA_DIST = \ DISTCLEANFILES = $(BUILT_SOURCES) # mmx code -if USE_MMX +if USE_X86_MMX noinst_LTLIBRARIES += libpixman-mmx.la libpixman_mmx_la_SOURCES = \ pixman-mmx.c @@ -90,5 +90,17 @@ libpixman_1_la_LIBADD += libpixman-arm-neon.la ASM_CFLAGS_arm_neon= endif +# iwmmxt code +if USE_ARM_IWMMXT +noinst_LTLIBRARIES += libpixman-iwmmxt.la +libpixman_iwmmxt_la_SOURCES = pixman-mmx.c +libpixman_iwmmxt_la_CFLAGS = $(DEP_CFLAGS) $(IWMMXT_CFLAGS) +libpixman_iwmmxt_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LDFLAGS += $(IWMMXT_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-iwmmxt.la + +ASM_CFLAGS_IWMMXT=$(IWMMXT_CFLAGS) +endif + .c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES) $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $< diff --git a/pixman/pixman/Makefile.win32 b/pixman/pixman/Makefile.win32 index beff4a068..381f2cd9d 100644 --- a/pixman/pixman/Makefile.win32 +++ b/pixman/pixman/Makefile.win32 @@ -14,7 +14,7 @@ ifeq ($(SSE2_VAR),) SSE2_VAR=on endif -MMX_CFLAGS = -DUSE_MMX -w14710 -w14714 +MMX_CFLAGS = -DUSE_X86_MMX -w14710 -w14714 SSE2_CFLAGS = -DUSE_SSE2 # MMX compilation flags diff --git a/pixman/pixman/pixman-cpu.c b/pixman/pixman/pixman-cpu.c index 77e5b7f7a..dff27d1ea 100644 --- a/pixman/pixman/pixman-cpu.c +++ b/pixman/pixman/pixman-cpu.c @@ -1,613 +1,631 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include - -#if defined(USE_ARM_SIMD) && defined(_MSC_VER) -/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ -#include -#endif - -#include "pixman-private.h" - -#ifdef USE_VMX - -/* The CPU detection code needs to be in a file not compiled with - * "-maltivec -mabi=altivec", as gcc would try to save vector register - * across function calls causing SIGILL on cpus without Altivec/vmx. - */ -static pixman_bool_t initialized = FALSE; -static volatile pixman_bool_t have_vmx = TRUE; - -#ifdef __APPLE__ -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - size_t length = sizeof(have_vmx); - int error = - sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); - - if (error) - have_vmx = FALSE; - - initialized = TRUE; - } - return have_vmx; -} - -#elif defined (__OpenBSD__) -#include -#include -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; - size_t length = sizeof(have_vmx); - int error = - sysctl (mib, 2, &have_vmx, &length, NULL, 0); - - if (error != 0) - have_vmx = FALSE; - - initialized = TRUE; - } - return have_vmx; -} - -#elif defined (__linux__) -#include -#include -#include -#include -#include -#include -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - char fname[64]; - unsigned long buf[64]; - ssize_t count = 0; - pid_t pid; - int fd, i; - - pid = getpid (); - snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid); - - fd = open (fname, O_RDONLY); - if (fd >= 0) - { - for (i = 0; i <= (count / sizeof(unsigned long)); i += 2) - { - /* Read more if buf is empty... */ - if (i == (count / sizeof(unsigned long))) - { - count = read (fd, buf, sizeof(buf)); - if (count <= 0) - break; - i = 0; - } - - if (buf[i] == AT_HWCAP) - { - have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC); - initialized = TRUE; - break; - } - else if (buf[i] == AT_NULL) - { - break; - } - } - close (fd); - } - } - if (!initialized) - { - /* Something went wrong. Assume 'no' rather than playing - fragile tricks with catching SIGILL. */ - have_vmx = FALSE; - initialized = TRUE; - } - - return have_vmx; -} - -#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */ -#include -#include - -static jmp_buf jump_env; - -static void -vmx_test (int sig, - siginfo_t *si, - void * unused) -{ - longjmp (jump_env, 1); -} - -static pixman_bool_t -pixman_have_vmx (void) -{ - struct sigaction sa, osa; - int jmp_result; - - if (!initialized) - { - sa.sa_flags = SA_SIGINFO; - sigemptyset (&sa.sa_mask); - sa.sa_sigaction = vmx_test; - sigaction (SIGILL, &sa, &osa); - jmp_result = setjmp (jump_env); - if (jmp_result == 0) - { - asm volatile ( "vor 0, 0, 0" ); - } - sigaction (SIGILL, &osa, NULL); - have_vmx = (jmp_result == 0); - initialized = TRUE; - } - return have_vmx; -} - -#endif /* __APPLE__ */ -#endif /* USE_VMX */ - -#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) - -#if defined(_MSC_VER) - -#if defined(USE_ARM_SIMD) -extern int pixman_msvc_try_arm_simd_op (); - -pixman_bool_t -pixman_have_arm_simd (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t have_arm_simd = FALSE; - - if (!initialized) - { - __try { - pixman_msvc_try_arm_simd_op (); - have_arm_simd = TRUE; - } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { - have_arm_simd = FALSE; - } - initialized = TRUE; - } - - return have_arm_simd; -} - -#endif /* USE_ARM_SIMD */ - -#if defined(USE_ARM_NEON) -extern int pixman_msvc_try_arm_neon_op (); - -pixman_bool_t -pixman_have_arm_neon (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t have_arm_neon = FALSE; - - if (!initialized) - { - __try - { - pixman_msvc_try_arm_neon_op (); - have_arm_neon = TRUE; - } - __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) - { - have_arm_neon = FALSE; - } - initialized = TRUE; - } - - return have_arm_neon; -} - -#endif /* USE_ARM_NEON */ - -#elif defined (__linux__) /* linux ELF */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static pixman_bool_t arm_has_v7 = FALSE; -static pixman_bool_t arm_has_v6 = FALSE; -static pixman_bool_t arm_has_vfp = FALSE; -static pixman_bool_t arm_has_neon = FALSE; -static pixman_bool_t arm_has_iwmmxt = FALSE; -static pixman_bool_t arm_tests_initialized = FALSE; - -static void -pixman_arm_read_auxv () -{ - int fd; - Elf32_auxv_t aux; - - fd = open ("/proc/self/auxv", O_RDONLY); - if (fd >= 0) - { - while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) - { - if (aux.a_type == AT_HWCAP) - { - uint32_t hwcap = aux.a_un.a_val; - /* hardcode these values to avoid depending on specific - * versions of the hwcap header, e.g. HWCAP_NEON - */ - arm_has_vfp = (hwcap & 64) != 0; - arm_has_iwmmxt = (hwcap & 512) != 0; - /* this flag is only present on kernel 2.6.29 */ - arm_has_neon = (hwcap & 4096) != 0; - } - else if (aux.a_type == AT_PLATFORM) - { - const char *plat = (const char*) aux.a_un.a_val; - if (strncmp (plat, "v7l", 3) == 0) - { - arm_has_v7 = TRUE; - arm_has_v6 = TRUE; - } - else if (strncmp (plat, "v6l", 3) == 0) - { - arm_has_v6 = TRUE; - } - } - } - close (fd); - } - - arm_tests_initialized = TRUE; -} - -#if defined(USE_ARM_SIMD) -pixman_bool_t -pixman_have_arm_simd (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv (); - - return arm_has_v6; -} - -#endif /* USE_ARM_SIMD */ - -#if defined(USE_ARM_NEON) -pixman_bool_t -pixman_have_arm_neon (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv (); - - return arm_has_neon; -} - -#endif /* USE_ARM_NEON */ - -#else /* linux ELF */ - -#define pixman_have_arm_simd() FALSE -#define pixman_have_arm_neon() FALSE - -#endif - -#endif /* USE_ARM_SIMD || USE_ARM_NEON */ - -#if defined(USE_MMX) || defined(USE_SSE2) -/* The CPU detection code needs to be in a file not compiled with - * "-mmmx -msse", as gcc would generate CMOV instructions otherwise - * that would lead to SIGILL instructions on old CPUs that don't have - * it. - */ -#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) - -#ifdef HAVE_GETISAX -#include -#endif - -typedef enum -{ - NO_FEATURES = 0, - MMX = 0x1, - MMX_EXTENSIONS = 0x2, - SSE = 0x6, - SSE2 = 0x8, - CMOV = 0x10 -} cpu_features_t; - - -static unsigned int -detect_cpu_features (void) -{ - unsigned int features = 0; - unsigned int result = 0; - -#ifdef HAVE_GETISAX - if (getisax (&result, 1)) - { - if (result & AV_386_CMOV) - features |= CMOV; - if (result & AV_386_MMX) - features |= MMX; - if (result & AV_386_AMD_MMX) - features |= MMX_EXTENSIONS; - if (result & AV_386_SSE) - features |= SSE; - if (result & AV_386_SSE2) - features |= SSE2; - } -#else - char vendor[13]; -#ifdef _MSC_VER - int vendor0 = 0, vendor1, vendor2; -#endif - vendor[0] = 0; - vendor[12] = 0; - -#ifdef __GNUC__ - /* see p. 118 of amd64 instruction set manual Vol3 */ - /* We need to be careful about the handling of %ebx and - * %esp here. We can't declare either one as clobbered - * since they are special registers (%ebx is the "PIC - * register" holding an offset to global data, %esp the - * stack pointer), so we need to make sure they have their - * original values when we access the output operands. - */ - __asm__ ( - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ecx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "mov $0x0, %%edx\n" - "xor %%ecx, %%eax\n" - "jz 1f\n" - - "mov $0x00000000, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "mov %%ebx, %%eax\n" - "pop %%ebx\n" - "mov %%eax, %1\n" - "mov %%edx, %2\n" - "mov %%ecx, %3\n" - "mov $0x00000001, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "pop %%ebx\n" - "1:\n" - "mov %%edx, %0\n" - : "=r" (result), - "=m" (vendor[0]), - "=m" (vendor[4]), - "=m" (vendor[8]) - : - : "%eax", "%ecx", "%edx" - ); - -#elif defined (_MSC_VER) - - _asm { - pushfd - pop eax - mov ecx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ecx - jz nocpuid - - mov eax, 0 - push ebx - cpuid - mov eax, ebx - pop ebx - mov vendor0, eax - mov vendor1, edx - mov vendor2, ecx - mov eax, 1 - push ebx - cpuid - pop ebx - nocpuid: - mov result, edx - } - memmove (vendor + 0, &vendor0, 4); - memmove (vendor + 4, &vendor1, 4); - memmove (vendor + 8, &vendor2, 4); - -#else -# error unsupported compiler -#endif - - features = 0; - if (result) - { - /* result now contains the standard feature bits */ - if (result & (1 << 15)) - features |= CMOV; - if (result & (1 << 23)) - features |= MMX; - if (result & (1 << 25)) - features |= SSE; - if (result & (1 << 26)) - features |= SSE2; - if ((features & MMX) && !(features & SSE) && - (strcmp (vendor, "AuthenticAMD") == 0 || - strcmp (vendor, "Geode by NSC") == 0)) - { - /* check for AMD MMX extensions */ -#ifdef __GNUC__ - __asm__ ( - " push %%ebx\n" - " mov $0x80000000, %%eax\n" - " cpuid\n" - " xor %%edx, %%edx\n" - " cmp $0x1, %%eax\n" - " jge 2f\n" - " mov $0x80000001, %%eax\n" - " cpuid\n" - "2:\n" - " pop %%ebx\n" - " mov %%edx, %0\n" - : "=r" (result) - : - : "%eax", "%ecx", "%edx" - ); -#elif defined _MSC_VER - _asm { - push ebx - mov eax, 80000000h - cpuid - xor edx, edx - cmp eax, 1 - jge notamd - mov eax, 80000001h - cpuid - notamd: - pop ebx - mov result, edx - } -#endif - if (result & (1 << 22)) - features |= MMX_EXTENSIONS; - } - } -#endif /* HAVE_GETISAX */ - - return features; -} - -static pixman_bool_t -pixman_have_mmx (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t mmx_present; - - if (!initialized) - { - unsigned int features = detect_cpu_features (); - mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); - initialized = TRUE; - } - - return mmx_present; -} - -#ifdef USE_SSE2 -static pixman_bool_t -pixman_have_sse2 (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t sse2_present; - - if (!initialized) - { - unsigned int features = detect_cpu_features (); - sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2); - initialized = TRUE; - } - - return sse2_present; -} - -#endif - -#else /* __amd64__ */ -#ifdef USE_MMX -#define pixman_have_mmx() TRUE -#endif -#ifdef USE_SSE2 -#define pixman_have_sse2() TRUE -#endif -#endif /* __amd64__ */ -#endif - -pixman_implementation_t * -_pixman_choose_implementation (void) -{ - pixman_implementation_t *imp; - - imp = _pixman_implementation_create_general(); - imp = _pixman_implementation_create_fast_path (imp); - -#ifdef USE_MMX - if (pixman_have_mmx ()) - imp = _pixman_implementation_create_mmx (imp); -#endif - -#ifdef USE_SSE2 - if (pixman_have_sse2 ()) - imp = _pixman_implementation_create_sse2 (imp); -#endif - -#ifdef USE_ARM_SIMD - if (pixman_have_arm_simd ()) - imp = _pixman_implementation_create_arm_simd (imp); -#endif - -#ifdef USE_ARM_NEON - if (pixman_have_arm_neon ()) - imp = _pixman_implementation_create_arm_neon (imp); -#endif - -#ifdef USE_VMX - if (pixman_have_vmx ()) - imp = _pixman_implementation_create_vmx (imp); -#endif - - imp = _pixman_implementation_create_noop (imp); - - return imp; -} - +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#if defined(USE_ARM_SIMD) && defined(_MSC_VER) +/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ +#include +#endif + +#include "pixman-private.h" + +#ifdef USE_VMX + +/* The CPU detection code needs to be in a file not compiled with + * "-maltivec -mabi=altivec", as gcc would try to save vector register + * across function calls causing SIGILL on cpus without Altivec/vmx. + */ +static pixman_bool_t initialized = FALSE; +static volatile pixman_bool_t have_vmx = TRUE; + +#ifdef __APPLE__ +#include + +static pixman_bool_t +pixman_have_vmx (void) +{ + if (!initialized) + { + size_t length = sizeof(have_vmx); + int error = + sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); + + if (error) + have_vmx = FALSE; + + initialized = TRUE; + } + return have_vmx; +} + +#elif defined (__OpenBSD__) +#include +#include +#include + +static pixman_bool_t +pixman_have_vmx (void) +{ + if (!initialized) + { + int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; + size_t length = sizeof(have_vmx); + int error = + sysctl (mib, 2, &have_vmx, &length, NULL, 0); + + if (error != 0) + have_vmx = FALSE; + + initialized = TRUE; + } + return have_vmx; +} + +#elif defined (__linux__) +#include +#include +#include +#include +#include +#include +#include + +static pixman_bool_t +pixman_have_vmx (void) +{ + if (!initialized) + { + char fname[64]; + unsigned long buf[64]; + ssize_t count = 0; + pid_t pid; + int fd, i; + + pid = getpid (); + snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid); + + fd = open (fname, O_RDONLY); + if (fd >= 0) + { + for (i = 0; i <= (count / sizeof(unsigned long)); i += 2) + { + /* Read more if buf is empty... */ + if (i == (count / sizeof(unsigned long))) + { + count = read (fd, buf, sizeof(buf)); + if (count <= 0) + break; + i = 0; + } + + if (buf[i] == AT_HWCAP) + { + have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC); + initialized = TRUE; + break; + } + else if (buf[i] == AT_NULL) + { + break; + } + } + close (fd); + } + } + if (!initialized) + { + /* Something went wrong. Assume 'no' rather than playing + fragile tricks with catching SIGILL. */ + have_vmx = FALSE; + initialized = TRUE; + } + + return have_vmx; +} + +#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */ +#include +#include + +static jmp_buf jump_env; + +static void +vmx_test (int sig, + siginfo_t *si, + void * unused) +{ + longjmp (jump_env, 1); +} + +static pixman_bool_t +pixman_have_vmx (void) +{ + struct sigaction sa, osa; + int jmp_result; + + if (!initialized) + { + sa.sa_flags = SA_SIGINFO; + sigemptyset (&sa.sa_mask); + sa.sa_sigaction = vmx_test; + sigaction (SIGILL, &sa, &osa); + jmp_result = setjmp (jump_env); + if (jmp_result == 0) + { + asm volatile ( "vor 0, 0, 0" ); + } + sigaction (SIGILL, &osa, NULL); + have_vmx = (jmp_result == 0); + initialized = TRUE; + } + return have_vmx; +} + +#endif /* __APPLE__ */ +#endif /* USE_VMX */ + +#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) + +#if defined(_MSC_VER) + +#if defined(USE_ARM_SIMD) +extern int pixman_msvc_try_arm_simd_op (); + +pixman_bool_t +pixman_have_arm_simd (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t have_arm_simd = FALSE; + + if (!initialized) + { + __try { + pixman_msvc_try_arm_simd_op (); + have_arm_simd = TRUE; + } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { + have_arm_simd = FALSE; + } + initialized = TRUE; + } + + return have_arm_simd; +} + +#endif /* USE_ARM_SIMD */ + +#if defined(USE_ARM_NEON) +extern int pixman_msvc_try_arm_neon_op (); + +pixman_bool_t +pixman_have_arm_neon (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t have_arm_neon = FALSE; + + if (!initialized) + { + __try + { + pixman_msvc_try_arm_neon_op (); + have_arm_neon = TRUE; + } + __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) + { + have_arm_neon = FALSE; + } + initialized = TRUE; + } + + return have_arm_neon; +} + +#endif /* USE_ARM_NEON */ + +#elif defined (__linux__) /* linux ELF */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static pixman_bool_t arm_has_v7 = FALSE; +static pixman_bool_t arm_has_v6 = FALSE; +static pixman_bool_t arm_has_vfp = FALSE; +static pixman_bool_t arm_has_neon = FALSE; +static pixman_bool_t arm_has_iwmmxt = FALSE; +static pixman_bool_t arm_tests_initialized = FALSE; + +static void +pixman_arm_read_auxv () +{ + int fd; + Elf32_auxv_t aux; + + fd = open ("/proc/self/auxv", O_RDONLY); + if (fd >= 0) + { + while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) + { + if (aux.a_type == AT_HWCAP) + { + uint32_t hwcap = aux.a_un.a_val; + /* hardcode these values to avoid depending on specific + * versions of the hwcap header, e.g. HWCAP_NEON + */ + arm_has_vfp = (hwcap & 64) != 0; + arm_has_iwmmxt = (hwcap & 512) != 0; + /* this flag is only present on kernel 2.6.29 */ + arm_has_neon = (hwcap & 4096) != 0; + } + else if (aux.a_type == AT_PLATFORM) + { + const char *plat = (const char*) aux.a_un.a_val; + if (strncmp (plat, "v7l", 3) == 0) + { + arm_has_v7 = TRUE; + arm_has_v6 = TRUE; + } + else if (strncmp (plat, "v6l", 3) == 0) + { + arm_has_v6 = TRUE; + } + } + } + close (fd); + } + + arm_tests_initialized = TRUE; +} + +#if defined(USE_ARM_SIMD) +pixman_bool_t +pixman_have_arm_simd (void) +{ + if (!arm_tests_initialized) + pixman_arm_read_auxv (); + + return arm_has_v6; +} + +#endif /* USE_ARM_SIMD */ + +#if defined(USE_ARM_NEON) +pixman_bool_t +pixman_have_arm_neon (void) +{ + if (!arm_tests_initialized) + pixman_arm_read_auxv (); + + return arm_has_neon; +} + +#endif /* USE_ARM_NEON */ + +#if defined(USE_ARM_IWMMXT) +pixman_bool_t +pixman_have_arm_iwmmxt (void) +{ + if (!arm_tests_initialized) + pixman_arm_read_auxv (); + + return arm_has_iwmmxt; +} + +#endif /* USE_ARM_IWMMXT */ + +#else /* linux ELF */ + +#define pixman_have_arm_simd() FALSE +#define pixman_have_arm_neon() FALSE +#define pixman_have_arm_iwmmxt() FALSE + +#endif + +#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */ + +#if defined(USE_X86_MMX) || defined(USE_SSE2) +/* The CPU detection code needs to be in a file not compiled with + * "-mmmx -msse", as gcc would generate CMOV instructions otherwise + * that would lead to SIGILL instructions on old CPUs that don't have + * it. + */ +#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) + +#ifdef HAVE_GETISAX +#include +#endif + +typedef enum +{ + NO_FEATURES = 0, + MMX = 0x1, + MMX_EXTENSIONS = 0x2, + SSE = 0x6, + SSE2 = 0x8, + CMOV = 0x10 +} cpu_features_t; + + +static unsigned int +detect_cpu_features (void) +{ + unsigned int features = 0; + unsigned int result = 0; + +#ifdef HAVE_GETISAX + if (getisax (&result, 1)) + { + if (result & AV_386_CMOV) + features |= CMOV; + if (result & AV_386_MMX) + features |= MMX; + if (result & AV_386_AMD_MMX) + features |= MMX_EXTENSIONS; + if (result & AV_386_SSE) + features |= SSE; + if (result & AV_386_SSE2) + features |= SSE2; + } +#else + char vendor[13]; +#ifdef _MSC_VER + int vendor0 = 0, vendor1, vendor2; +#endif + vendor[0] = 0; + vendor[12] = 0; + +#ifdef __GNUC__ + /* see p. 118 of amd64 instruction set manual Vol3 */ + /* We need to be careful about the handling of %ebx and + * %esp here. We can't declare either one as clobbered + * since they are special registers (%ebx is the "PIC + * register" holding an offset to global data, %esp the + * stack pointer), so we need to make sure they have their + * original values when we access the output operands. + */ + __asm__ ( + "pushf\n" + "pop %%eax\n" + "mov %%eax, %%ecx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "mov $0x0, %%edx\n" + "xor %%ecx, %%eax\n" + "jz 1f\n" + + "mov $0x00000000, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "mov %%ebx, %%eax\n" + "pop %%ebx\n" + "mov %%eax, %1\n" + "mov %%edx, %2\n" + "mov %%ecx, %3\n" + "mov $0x00000001, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "pop %%ebx\n" + "1:\n" + "mov %%edx, %0\n" + : "=r" (result), + "=m" (vendor[0]), + "=m" (vendor[4]), + "=m" (vendor[8]) + : + : "%eax", "%ecx", "%edx" + ); + +#elif defined (_MSC_VER) + + _asm { + pushfd + pop eax + mov ecx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ecx + jz nocpuid + + mov eax, 0 + push ebx + cpuid + mov eax, ebx + pop ebx + mov vendor0, eax + mov vendor1, edx + mov vendor2, ecx + mov eax, 1 + push ebx + cpuid + pop ebx + nocpuid: + mov result, edx + } + memmove (vendor + 0, &vendor0, 4); + memmove (vendor + 4, &vendor1, 4); + memmove (vendor + 8, &vendor2, 4); + +#else +# error unsupported compiler +#endif + + features = 0; + if (result) + { + /* result now contains the standard feature bits */ + if (result & (1 << 15)) + features |= CMOV; + if (result & (1 << 23)) + features |= MMX; + if (result & (1 << 25)) + features |= SSE; + if (result & (1 << 26)) + features |= SSE2; + if ((features & MMX) && !(features & SSE) && + (strcmp (vendor, "AuthenticAMD") == 0 || + strcmp (vendor, "Geode by NSC") == 0)) + { + /* check for AMD MMX extensions */ +#ifdef __GNUC__ + __asm__ ( + " push %%ebx\n" + " mov $0x80000000, %%eax\n" + " cpuid\n" + " xor %%edx, %%edx\n" + " cmp $0x1, %%eax\n" + " jge 2f\n" + " mov $0x80000001, %%eax\n" + " cpuid\n" + "2:\n" + " pop %%ebx\n" + " mov %%edx, %0\n" + : "=r" (result) + : + : "%eax", "%ecx", "%edx" + ); +#elif defined _MSC_VER + _asm { + push ebx + mov eax, 80000000h + cpuid + xor edx, edx + cmp eax, 1 + jge notamd + mov eax, 80000001h + cpuid + notamd: + pop ebx + mov result, edx + } +#endif + if (result & (1 << 22)) + features |= MMX_EXTENSIONS; + } + } +#endif /* HAVE_GETISAX */ + + return features; +} + +static pixman_bool_t +pixman_have_mmx (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t mmx_present; + + if (!initialized) + { + unsigned int features = detect_cpu_features (); + mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); + initialized = TRUE; + } + + return mmx_present; +} + +#ifdef USE_SSE2 +static pixman_bool_t +pixman_have_sse2 (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t sse2_present; + + if (!initialized) + { + unsigned int features = detect_cpu_features (); + sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2); + initialized = TRUE; + } + + return sse2_present; +} + +#endif + +#else /* __amd64__ */ +#ifdef USE_X86_MMX +#define pixman_have_mmx() TRUE +#endif +#ifdef USE_SSE2 +#define pixman_have_sse2() TRUE +#endif +#endif /* __amd64__ */ +#endif + +pixman_implementation_t * +_pixman_choose_implementation (void) +{ + pixman_implementation_t *imp; + + imp = _pixman_implementation_create_general(); + imp = _pixman_implementation_create_fast_path (imp); + +#ifdef USE_X86_MMX + if (pixman_have_mmx ()) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_SSE2 + if (pixman_have_sse2 ()) + imp = _pixman_implementation_create_sse2 (imp); +#endif + +#ifdef USE_ARM_SIMD + if (pixman_have_arm_simd ()) + imp = _pixman_implementation_create_arm_simd (imp); +#endif + +#ifdef USE_ARM_IWMMXT + if (pixman_have_arm_iwmmxt ()) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_ARM_NEON + if (pixman_have_arm_neon ()) + imp = _pixman_implementation_create_arm_neon (imp); +#endif + +#ifdef USE_VMX + if (pixman_have_vmx ()) + imp = _pixman_implementation_create_vmx (imp); +#endif + + imp = _pixman_implementation_create_noop (imp); + + return imp; +} + diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c index c044593d3..f848ab4d5 100644 --- a/pixman/pixman/pixman-mmx.c +++ b/pixman/pixman/pixman-mmx.c @@ -33,7 +33,7 @@ #include #endif -#ifdef USE_MMX +#if defined USE_X86_MMX || defined USE_ARM_IWMMXT #include #include "pixman-private.h" @@ -47,6 +47,15 @@ #define CHECKPOINT() #endif +#ifdef USE_ARM_IWMMXT +/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_empty (void) +{ + +} +#endif + /* Notes about writing mmx code * * give memory operands as the second operand. If you give it as the @@ -298,6 +307,39 @@ in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest) #endif +/* Elemental unaligned loads */ + +static __inline__ uint64_t ldq_u(uint64_t *p) +{ +#ifdef USE_X86_MMX + /* x86's alignment restrictions are very relaxed. */ + return *p; +#elif defined USE_ARM_IWMMXT + int align = (uintptr_t)p & 7; + __m64 *aligned_p; + if (align == 0) + return *p; + aligned_p = (__m64 *)((uintptr_t)p & ~7); + return _mm_align_si64 (aligned_p[0], aligned_p[1], align); +#else + struct __una_u64 { uint64_t x __attribute__((packed)); }; + const struct __una_u64 *ptr = (const struct __una_u64 *) p; + return ptr->x; +#endif +} + +static __inline__ uint32_t ldl_u(uint32_t *p) +{ +#ifdef USE_X86_MMX + /* x86's alignment restrictions are very relaxed. */ + return *p; +#else + struct __una_u32 { uint32_t x __attribute__((packed)); }; + const struct __una_u32 *ptr = (const struct __una_u32 *) p; + return ptr->x; +#endif +} + static force_inline __m64 load8888 (uint32_t v) { @@ -1366,7 +1408,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, while (w >= 2) { - __m64 vs = *(__m64 *)src; + __m64 vs = (__m64)ldq_u((uint64_t *)src); __m64 vd = *(__m64 *)dst; __m64 vsrc0 = expand8888 (vs, 0); __m64 vsrc1 = expand8888 (vs, 1); @@ -1447,14 +1489,14 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, __m64 vd6 = *(__m64 *)(dst + 12); __m64 vd7 = *(__m64 *)(dst + 14); - __m64 vs0 = *(__m64 *)(src + 0); - __m64 vs1 = *(__m64 *)(src + 2); - __m64 vs2 = *(__m64 *)(src + 4); - __m64 vs3 = *(__m64 *)(src + 6); - __m64 vs4 = *(__m64 *)(src + 8); - __m64 vs5 = *(__m64 *)(src + 10); - __m64 vs6 = *(__m64 *)(src + 12); - __m64 vs7 = *(__m64 *)(src + 14); + __m64 vs0 = (__m64)ldq_u((uint64_t *)(src + 0)); + __m64 vs1 = (__m64)ldq_u((uint64_t *)(src + 2)); + __m64 vs2 = (__m64)ldq_u((uint64_t *)(src + 4)); + __m64 vs3 = (__m64)ldq_u((uint64_t *)(src + 6)); + __m64 vs4 = (__m64)ldq_u((uint64_t *)(src + 8)); + __m64 vs5 = (__m64)ldq_u((uint64_t *)(src + 10)); + __m64 vs6 = (__m64)ldq_u((uint64_t *)(src + 12)); + __m64 vs7 = (__m64)ldq_u((uint64_t *)(src + 14)); vd0 = pack8888 ( in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), @@ -1784,7 +1826,7 @@ pixman_fill_mmx (uint32_t *bits, uint32_t byte_width; uint8_t *byte_line; -#ifdef __GNUC__ +#if defined __GNUC__ && defined USE_X86_MMX __m64 v1, v2, v3, v4, v5, v6, v7; #endif @@ -1818,7 +1860,7 @@ pixman_fill_mmx (uint32_t *bits, fill = ((uint64_t)xor << 32) | xor; vfill = to_m64 (fill); -#ifdef __GNUC__ +#if defined __GNUC__ && defined USE_X86_MMX __asm__ ( "movq %7, %0\n" "movq %7, %1\n" @@ -1864,7 +1906,7 @@ pixman_fill_mmx (uint32_t *bits, while (w >= 64) { -#ifdef __GNUC__ +#if defined __GNUC__ && defined USE_X86_MMX __asm__ ( "movq %1, (%0)\n" "movq %2, 8(%0)\n" @@ -2491,23 +2533,35 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - if ((((unsigned long)dest_image & 3) == 0) && - (((unsigned long)src_image & 3) == 0)) + while (w && (unsigned long)dst & 7) { - while (w >= 4) - { - __m64 vmask; - __m64 vdest; + uint16_t tmp; + uint8_t a; + uint32_t m, d; - vmask = load8888 (*(uint32_t *)mask); - vdest = load8888 (*(uint32_t *)dst); + a = *mask++; + d = *dst; - *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest)); + m = MUL_UN8 (sa, a, tmp); + d = MUL_UN8 (m, d, tmp); - dst += 4; - mask += 4; - w -= 4; - } + *dst++ = d; + w--; + } + + while (w >= 4) + { + __m64 vmask; + __m64 vdest; + + vmask = load8888 (ldl_u((uint32_t *)mask)); + vdest = load8888 (*(uint32_t *)dst); + + *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest)); + + dst += 4; + mask += 4; + w -= 4; } while (w--) @@ -2550,20 +2604,31 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp, src_line += src_stride; w = width; - if ((((unsigned long)dest_image & 3) == 0) && - (((unsigned long)src_image & 3) == 0)) + while (w && (unsigned long)dst & 3) { - while (w >= 4) - { - uint32_t *s = (uint32_t *)src; - uint32_t *d = (uint32_t *)dst; + uint8_t s, d; + uint16_t tmp; - *d = store8888 (in (load8888 (*s), load8888 (*d))); + s = *src; + d = *dst; - w -= 4; - dst += 4; - src += 4; - } + *dst = MUL_UN8 (s, d, tmp); + + src++; + dst++; + w--; + } + + while (w >= 4) + { + uint32_t *s = (uint32_t *)src; + uint32_t *d = (uint32_t *)dst; + + *d = store8888 (in (load8888 (ldl_u((uint32_t *)s)), load8888 (*d))); + + w -= 4; + dst += 4; + src += 4; } while (w--) @@ -2618,20 +2683,36 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - if ((((unsigned long)mask_image & 3) == 0) && - (((unsigned long)dest_image & 3) == 0)) + while (w && (unsigned long)dst & 3) { - while (w >= 4) - { - __m64 vmask = load8888 (*(uint32_t *)mask); - __m64 vdest = load8888 (*(uint32_t *)dst); + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; - *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest)); + a = *mask++; + d = *dst; - w -= 4; - dst += 4; - mask += 4; - } + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); + + *dst++ = r; + w--; + } + + while (w >= 4) + { + __m64 vmask; + __m64 vdest; + + vmask = load8888 (ldl_u((uint32_t *)mask)); + vdest = load8888 (*(uint32_t *)dst); + + *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest)); + + dst += 4; + mask += 4; + w -= 4; } while (w--) @@ -2694,7 +2775,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp, while (w >= 8) { - *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst); + *(__m64*)dst = _mm_adds_pu8 ((__m64)ldq_u((uint64_t *)src), *(__m64*)dst); dst += 8; src += 8; w -= 8; @@ -2752,7 +2833,7 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp, while (w >= 2) { - dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst); + dst64 = _mm_adds_pu8 ((__m64)ldq_u((uint64_t *)src), *(__m64*)dst); *(uint64_t*)dst = to_uint64 (dst64); dst += 2; src += 2; @@ -2825,6 +2906,14 @@ pixman_blt_mmx (uint32_t *src_bits, dst_bytes += dst_stride; w = byte_width; + while (w >= 1 && ((unsigned long)d & 1)) + { + *(uint8_t *)d = *(uint8_t *)s; + w -= 1; + s += 1; + d += 1; + } + while (w >= 2 && ((unsigned long)d & 3)) { *(uint16_t *)d = *(uint16_t *)s; @@ -2835,7 +2924,7 @@ pixman_blt_mmx (uint32_t *src_bits, while (w >= 4 && ((unsigned long)d & 7)) { - *(uint32_t *)d = *(uint32_t *)s; + *(uint32_t *)d = ldl_u((uint32_t *)s); w -= 4; s += 4; @@ -2844,7 +2933,7 @@ pixman_blt_mmx (uint32_t *src_bits, while (w >= 64) { -#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) +#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX __asm__ ( "movq (%1), %%mm0\n" "movq 8(%1), %%mm1\n" @@ -2869,14 +2958,14 @@ pixman_blt_mmx (uint32_t *src_bits, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); #else - __m64 v0 = *(__m64 *)(s + 0); - __m64 v1 = *(__m64 *)(s + 8); - __m64 v2 = *(__m64 *)(s + 16); - __m64 v3 = *(__m64 *)(s + 24); - __m64 v4 = *(__m64 *)(s + 32); - __m64 v5 = *(__m64 *)(s + 40); - __m64 v6 = *(__m64 *)(s + 48); - __m64 v7 = *(__m64 *)(s + 56); + __m64 v0 = ldq_u((uint64_t *)(s + 0)); + __m64 v1 = ldq_u((uint64_t *)(s + 8)); + __m64 v2 = ldq_u((uint64_t *)(s + 16)); + __m64 v3 = ldq_u((uint64_t *)(s + 24)); + __m64 v4 = ldq_u((uint64_t *)(s + 32)); + __m64 v5 = ldq_u((uint64_t *)(s + 40)); + __m64 v6 = ldq_u((uint64_t *)(s + 48)); + __m64 v7 = ldq_u((uint64_t *)(s + 56)); *(__m64 *)(d + 0) = v0; *(__m64 *)(d + 8) = v1; *(__m64 *)(d + 16) = v2; @@ -2893,7 +2982,7 @@ pixman_blt_mmx (uint32_t *src_bits, } while (w >= 4) { - *(uint32_t *)d = *(uint32_t *)s; + *(uint32_t *)d = ldl_u((uint32_t *)s); w -= 4; s += 4; @@ -3145,4 +3234,4 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback) return imp; } -#endif /* USE_MMX */ +#endif /* USE_X86_MMX || USE_ARM_IWMMXT */ diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 6e716c6e6..4d645fe4b 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -545,7 +545,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback); pixman_implementation_t * _pixman_implementation_create_noop (pixman_implementation_t *fallback); -#ifdef USE_MMX +#if defined USE_X86_MMX || defined USE_ARM_IWMMXT pixman_implementation_t * _pixman_implementation_create_mmx (pixman_implementation_t *fallback); #endif diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am index 802d3f4fc..eeb3679f0 100644 --- a/pixman/test/Makefile.am +++ b/pixman/test/Makefile.am @@ -1,9 +1,9 @@ include $(top_srcdir)/test/Makefile.sources -AM_CFLAGS = @OPENMP_CFLAGS@ -AM_LDFLAGS = @OPENMP_CFLAGS@ @TESTPROGS_EXTRA_LDFLAGS@ -LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm @PNG_LIBS@ -INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman @PNG_CFLAGS@ +AM_CFLAGS = $(OPENMP_CFLAGS) +AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) +LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) +INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS) libutils_la_SOURCES = $(libutils_sources) $(libutils_headers) -- cgit v1.2.3