diff options
Diffstat (limited to 'pixman')
-rw-r--r-- | pixman/configure.ac | 56 | ||||
-rw-r--r-- | pixman/demos/Makefile.am | 72 | ||||
-rw-r--r-- | pixman/pixman/Makefile.am | 14 | ||||
-rw-r--r-- | pixman/pixman/Makefile.win32 | 2 | ||||
-rw-r--r-- | pixman/pixman/pixman-cpu.c | 1244 | ||||
-rw-r--r-- | pixman/pixman/pixman-mmx.c | 213 | ||||
-rw-r--r-- | pixman/pixman/pixman-private.h | 2 | ||||
-rw-r--r-- | pixman/test/Makefile.am | 8 |
8 files changed, 889 insertions, 722 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac index dc523df0f..481d0bb00 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -303,7 +303,7 @@ CFLAGS=$xserver_save_CFLAGS AC_ARG_ENABLE(mmx, [AC_HELP_STRING([--disable-mmx], - [disable MMX fast paths])], + [disable x86 MMX fast paths])], [enable_mmx=$enableval], [enable_mmx=auto]) if test $enable_mmx = no ; then @@ -311,17 +311,17 @@ if test $enable_mmx = no ; then fi if test $have_mmx_intrinsics = yes ; then - AC_DEFINE(USE_MMX, 1, [use MMX compiler intrinsics]) + AC_DEFINE(USE_X86_MMX, 1, [use x86 MMX compiler intrinsics]) else MMX_CFLAGS= fi AC_MSG_RESULT($have_mmx_intrinsics) if test $enable_mmx = yes && test $have_mmx_intrinsics = no ; then - AC_MSG_ERROR([MMX intrinsics not detected]) + AC_MSG_ERROR([x86 MMX intrinsics not detected]) fi -AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes) +AM_CONDITIONAL(USE_X86_MMX, test $have_mmx_intrinsics = yes) dnl =========================================================================== dnl Check for SSE2 @@ -540,6 +540,54 @@ if test $enable_arm_neon = yes && test $have_arm_neon = no ; then AC_MSG_ERROR([ARM NEON intrinsics not detected]) fi +dnl =========================================================================== +dnl Check for IWMMXT + +if test "x$IWMMXT_CFLAGS" = "x" ; then + IWMMXT_CFLAGS="-march=iwmmxt -flax-vector-conversions -Winline" +fi + +have_iwmmxt_intrinsics=no +AC_MSG_CHECKING(whether to use ARM IWMMXT intrinsics) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$IWMMXT_CFLAGS $CFLAGS" +AC_COMPILE_IFELSE([ +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 3 && __GNUC_MINOR__ < 6)) +#error "Need GCC >= 4.6 for IWMMXT intrinsics" +#endif +#include <mmintrin.h> +int main () { + union { + __m64 v; + [char c[8];] + } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} }; + int b = 4; + __m64 c = _mm_srli_si64 (a.v, b); +}], have_iwmmxt_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(arm-iwmmxt, + [AC_HELP_STRING([--disable-arm-iwmmxt], + [disable ARM IWMMXT fast paths])], + [enable_iwmmxt=$enableval], [enable_iwmmxt=auto]) + +if test $enable_iwmmxt = no ; then + have_iwmmxt_intrinsics=disabled +fi + +if test $have_iwmmxt_intrinsics = yes ; then + AC_DEFINE(USE_ARM_IWMMXT, 1, [use ARM IWMMXT compiler intrinsics]) +else + IWMMXT_CFLAGS= +fi + +AC_MSG_RESULT($have_iwmmxt_intrinsics) +if test $enable_iwmmxt = yes && test $have_iwmmxt_intrinsics = no ; then + AC_MSG_ERROR([IWMMXT intrinsics not detected]) +fi + +AM_CONDITIONAL(USE_ARM_IWMMXT, test $have_iwmmxt_intrinsics = yes) + dnl ========================================================================================= dnl Check for GNU-style inline assembly support diff --git a/pixman/demos/Makefile.am b/pixman/demos/Makefile.am index 171f8f419..070c2d748 100644 --- a/pixman/demos/Makefile.am +++ b/pixman/demos/Makefile.am @@ -1,36 +1,36 @@ -if HAVE_GTK
-
-AM_CFLAGS = @OPENMP_CFLAGS@
-AM_LDFLAGS = @OPENMP_CFLAGS@
-
-LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS)
-INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS)
-
-GTK_UTILS = gtk-utils.c gtk-utils.h
-
-DEMOS = \
- clip-test \
- clip-in \
- composite-test \
- gradient-test \
- radial-test \
- alpha-test \
- screen-test \
- convolution-test \
- trap-test \
- tri-test
-
-gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
-alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
-composite_test_SOURCES = composite-test.c $(GTK_UTILS)
-clip_test_SOURCES = clip-test.c $(GTK_UTILS)
-clip_in_SOURCES = clip-in.c $(GTK_UTILS)
-trap_test_SOURCES = trap-test.c $(GTK_UTILS)
-screen_test_SOURCES = screen-test.c $(GTK_UTILS)
-convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
-radial_test_SOURCES = radial-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS)
-tri_test_SOURCES = tri-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS)
-
-noinst_PROGRAMS = $(DEMOS)
-
-endif
+if HAVE_GTK + +AM_CFLAGS = $(OPENMP_CFLAGS) +AM_LDFLAGS = $(OPENMP_CFLAGS) + +LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) $(PNG_LIBS) +INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS) + +GTK_UTILS = gtk-utils.c gtk-utils.h + +DEMOS = \ + clip-test \ + clip-in \ + composite-test \ + gradient-test \ + radial-test \ + alpha-test \ + screen-test \ + convolution-test \ + trap-test \ + tri-test + +gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) +alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) +composite_test_SOURCES = composite-test.c $(GTK_UTILS) +clip_test_SOURCES = clip-test.c $(GTK_UTILS) +clip_in_SOURCES = clip-in.c $(GTK_UTILS) +trap_test_SOURCES = trap-test.c $(GTK_UTILS) +screen_test_SOURCES = screen-test.c $(GTK_UTILS) +convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) +radial_test_SOURCES = radial-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS) +tri_test_SOURCES = tri-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS) + +noinst_PROGRAMS = $(DEMOS) + +endif diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am index 2421a4f9e..286b7cf36 100644 --- a/pixman/pixman/Makefile.am +++ b/pixman/pixman/Makefile.am @@ -22,7 +22,7 @@ EXTRA_DIST = \ DISTCLEANFILES = $(BUILT_SOURCES) # mmx code -if USE_MMX +if USE_X86_MMX noinst_LTLIBRARIES += libpixman-mmx.la libpixman_mmx_la_SOURCES = \ pixman-mmx.c @@ -90,5 +90,17 @@ libpixman_1_la_LIBADD += libpixman-arm-neon.la ASM_CFLAGS_arm_neon= endif +# iwmmxt code +if USE_ARM_IWMMXT +noinst_LTLIBRARIES += libpixman-iwmmxt.la +libpixman_iwmmxt_la_SOURCES = pixman-mmx.c +libpixman_iwmmxt_la_CFLAGS = $(DEP_CFLAGS) $(IWMMXT_CFLAGS) +libpixman_iwmmxt_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LDFLAGS += $(IWMMXT_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-iwmmxt.la + +ASM_CFLAGS_IWMMXT=$(IWMMXT_CFLAGS) +endif + .c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES) $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $< diff --git a/pixman/pixman/Makefile.win32 b/pixman/pixman/Makefile.win32 index beff4a068..381f2cd9d 100644 --- a/pixman/pixman/Makefile.win32 +++ b/pixman/pixman/Makefile.win32 @@ -14,7 +14,7 @@ ifeq ($(SSE2_VAR),) SSE2_VAR=on endif -MMX_CFLAGS = -DUSE_MMX -w14710 -w14714 +MMX_CFLAGS = -DUSE_X86_MMX -w14710 -w14714 SSE2_CFLAGS = -DUSE_SSE2 # MMX compilation flags diff --git a/pixman/pixman/pixman-cpu.c b/pixman/pixman/pixman-cpu.c index 77e5b7f7a..dff27d1ea 100644 --- a/pixman/pixman/pixman-cpu.c +++ b/pixman/pixman/pixman-cpu.c @@ -1,613 +1,631 @@ -/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <string.h>
-
-#if defined(USE_ARM_SIMD) && defined(_MSC_VER)
-/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
-#include <windows.h>
-#endif
-
-#include "pixman-private.h"
-
-#ifdef USE_VMX
-
-/* The CPU detection code needs to be in a file not compiled with
- * "-maltivec -mabi=altivec", as gcc would try to save vector register
- * across function calls causing SIGILL on cpus without Altivec/vmx.
- */
-static pixman_bool_t initialized = FALSE;
-static volatile pixman_bool_t have_vmx = TRUE;
-
-#ifdef __APPLE__
-#include <sys/sysctl.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- if (!initialized)
- {
- size_t length = sizeof(have_vmx);
- int error =
- sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0);
-
- if (error)
- have_vmx = FALSE;
-
- initialized = TRUE;
- }
- return have_vmx;
-}
-
-#elif defined (__OpenBSD__)
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#include <machine/cpu.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- if (!initialized)
- {
- int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
- size_t length = sizeof(have_vmx);
- int error =
- sysctl (mib, 2, &have_vmx, &length, NULL, 0);
-
- if (error != 0)
- have_vmx = FALSE;
-
- initialized = TRUE;
- }
- return have_vmx;
-}
-
-#elif defined (__linux__)
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <linux/auxvec.h>
-#include <asm/cputable.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- if (!initialized)
- {
- char fname[64];
- unsigned long buf[64];
- ssize_t count = 0;
- pid_t pid;
- int fd, i;
-
- pid = getpid ();
- snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid);
-
- fd = open (fname, O_RDONLY);
- if (fd >= 0)
- {
- for (i = 0; i <= (count / sizeof(unsigned long)); i += 2)
- {
- /* Read more if buf is empty... */
- if (i == (count / sizeof(unsigned long)))
- {
- count = read (fd, buf, sizeof(buf));
- if (count <= 0)
- break;
- i = 0;
- }
-
- if (buf[i] == AT_HWCAP)
- {
- have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC);
- initialized = TRUE;
- break;
- }
- else if (buf[i] == AT_NULL)
- {
- break;
- }
- }
- close (fd);
- }
- }
- if (!initialized)
- {
- /* Something went wrong. Assume 'no' rather than playing
- fragile tricks with catching SIGILL. */
- have_vmx = FALSE;
- initialized = TRUE;
- }
-
- return have_vmx;
-}
-
-#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */
-#include <signal.h>
-#include <setjmp.h>
-
-static jmp_buf jump_env;
-
-static void
-vmx_test (int sig,
- siginfo_t *si,
- void * unused)
-{
- longjmp (jump_env, 1);
-}
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- struct sigaction sa, osa;
- int jmp_result;
-
- if (!initialized)
- {
- sa.sa_flags = SA_SIGINFO;
- sigemptyset (&sa.sa_mask);
- sa.sa_sigaction = vmx_test;
- sigaction (SIGILL, &sa, &osa);
- jmp_result = setjmp (jump_env);
- if (jmp_result == 0)
- {
- asm volatile ( "vor 0, 0, 0" );
- }
- sigaction (SIGILL, &osa, NULL);
- have_vmx = (jmp_result == 0);
- initialized = TRUE;
- }
- return have_vmx;
-}
-
-#endif /* __APPLE__ */
-#endif /* USE_VMX */
-
-#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON)
-
-#if defined(_MSC_VER)
-
-#if defined(USE_ARM_SIMD)
-extern int pixman_msvc_try_arm_simd_op ();
-
-pixman_bool_t
-pixman_have_arm_simd (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t have_arm_simd = FALSE;
-
- if (!initialized)
- {
- __try {
- pixman_msvc_try_arm_simd_op ();
- have_arm_simd = TRUE;
- } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) {
- have_arm_simd = FALSE;
- }
- initialized = TRUE;
- }
-
- return have_arm_simd;
-}
-
-#endif /* USE_ARM_SIMD */
-
-#if defined(USE_ARM_NEON)
-extern int pixman_msvc_try_arm_neon_op ();
-
-pixman_bool_t
-pixman_have_arm_neon (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t have_arm_neon = FALSE;
-
- if (!initialized)
- {
- __try
- {
- pixman_msvc_try_arm_neon_op ();
- have_arm_neon = TRUE;
- }
- __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
- {
- have_arm_neon = FALSE;
- }
- initialized = TRUE;
- }
-
- return have_arm_neon;
-}
-
-#endif /* USE_ARM_NEON */
-
-#elif defined (__linux__) /* linux ELF */
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <string.h>
-#include <elf.h>
-
-static pixman_bool_t arm_has_v7 = FALSE;
-static pixman_bool_t arm_has_v6 = FALSE;
-static pixman_bool_t arm_has_vfp = FALSE;
-static pixman_bool_t arm_has_neon = FALSE;
-static pixman_bool_t arm_has_iwmmxt = FALSE;
-static pixman_bool_t arm_tests_initialized = FALSE;
-
-static void
-pixman_arm_read_auxv ()
-{
- int fd;
- Elf32_auxv_t aux;
-
- fd = open ("/proc/self/auxv", O_RDONLY);
- if (fd >= 0)
- {
- while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
- {
- if (aux.a_type == AT_HWCAP)
- {
- uint32_t hwcap = aux.a_un.a_val;
- /* hardcode these values to avoid depending on specific
- * versions of the hwcap header, e.g. HWCAP_NEON
- */
- arm_has_vfp = (hwcap & 64) != 0;
- arm_has_iwmmxt = (hwcap & 512) != 0;
- /* this flag is only present on kernel 2.6.29 */
- arm_has_neon = (hwcap & 4096) != 0;
- }
- else if (aux.a_type == AT_PLATFORM)
- {
- const char *plat = (const char*) aux.a_un.a_val;
- if (strncmp (plat, "v7l", 3) == 0)
- {
- arm_has_v7 = TRUE;
- arm_has_v6 = TRUE;
- }
- else if (strncmp (plat, "v6l", 3) == 0)
- {
- arm_has_v6 = TRUE;
- }
- }
- }
- close (fd);
- }
-
- arm_tests_initialized = TRUE;
-}
-
-#if defined(USE_ARM_SIMD)
-pixman_bool_t
-pixman_have_arm_simd (void)
-{
- if (!arm_tests_initialized)
- pixman_arm_read_auxv ();
-
- return arm_has_v6;
-}
-
-#endif /* USE_ARM_SIMD */
-
-#if defined(USE_ARM_NEON)
-pixman_bool_t
-pixman_have_arm_neon (void)
-{
- if (!arm_tests_initialized)
- pixman_arm_read_auxv ();
-
- return arm_has_neon;
-}
-
-#endif /* USE_ARM_NEON */
-
-#else /* linux ELF */
-
-#define pixman_have_arm_simd() FALSE
-#define pixman_have_arm_neon() FALSE
-
-#endif
-
-#endif /* USE_ARM_SIMD || USE_ARM_NEON */
-
-#if defined(USE_MMX) || defined(USE_SSE2)
-/* The CPU detection code needs to be in a file not compiled with
- * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
- * that would lead to SIGILL instructions on old CPUs that don't have
- * it.
- */
-#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
-
-#ifdef HAVE_GETISAX
-#include <sys/auxv.h>
-#endif
-
-typedef enum
-{
- NO_FEATURES = 0,
- MMX = 0x1,
- MMX_EXTENSIONS = 0x2,
- SSE = 0x6,
- SSE2 = 0x8,
- CMOV = 0x10
-} cpu_features_t;
-
-
-static unsigned int
-detect_cpu_features (void)
-{
- unsigned int features = 0;
- unsigned int result = 0;
-
-#ifdef HAVE_GETISAX
- if (getisax (&result, 1))
- {
- if (result & AV_386_CMOV)
- features |= CMOV;
- if (result & AV_386_MMX)
- features |= MMX;
- if (result & AV_386_AMD_MMX)
- features |= MMX_EXTENSIONS;
- if (result & AV_386_SSE)
- features |= SSE;
- if (result & AV_386_SSE2)
- features |= SSE2;
- }
-#else
- char vendor[13];
-#ifdef _MSC_VER
- int vendor0 = 0, vendor1, vendor2;
-#endif
- vendor[0] = 0;
- vendor[12] = 0;
-
-#ifdef __GNUC__
- /* see p. 118 of amd64 instruction set manual Vol3 */
- /* We need to be careful about the handling of %ebx and
- * %esp here. We can't declare either one as clobbered
- * since they are special registers (%ebx is the "PIC
- * register" holding an offset to global data, %esp the
- * stack pointer), so we need to make sure they have their
- * original values when we access the output operands.
- */
- __asm__ (
- "pushf\n"
- "pop %%eax\n"
- "mov %%eax, %%ecx\n"
- "xor $0x00200000, %%eax\n"
- "push %%eax\n"
- "popf\n"
- "pushf\n"
- "pop %%eax\n"
- "mov $0x0, %%edx\n"
- "xor %%ecx, %%eax\n"
- "jz 1f\n"
-
- "mov $0x00000000, %%eax\n"
- "push %%ebx\n"
- "cpuid\n"
- "mov %%ebx, %%eax\n"
- "pop %%ebx\n"
- "mov %%eax, %1\n"
- "mov %%edx, %2\n"
- "mov %%ecx, %3\n"
- "mov $0x00000001, %%eax\n"
- "push %%ebx\n"
- "cpuid\n"
- "pop %%ebx\n"
- "1:\n"
- "mov %%edx, %0\n"
- : "=r" (result),
- "=m" (vendor[0]),
- "=m" (vendor[4]),
- "=m" (vendor[8])
- :
- : "%eax", "%ecx", "%edx"
- );
-
-#elif defined (_MSC_VER)
-
- _asm {
- pushfd
- pop eax
- mov ecx, eax
- xor eax, 00200000h
- push eax
- popfd
- pushfd
- pop eax
- mov edx, 0
- xor eax, ecx
- jz nocpuid
-
- mov eax, 0
- push ebx
- cpuid
- mov eax, ebx
- pop ebx
- mov vendor0, eax
- mov vendor1, edx
- mov vendor2, ecx
- mov eax, 1
- push ebx
- cpuid
- pop ebx
- nocpuid:
- mov result, edx
- }
- memmove (vendor + 0, &vendor0, 4);
- memmove (vendor + 4, &vendor1, 4);
- memmove (vendor + 8, &vendor2, 4);
-
-#else
-# error unsupported compiler
-#endif
-
- features = 0;
- if (result)
- {
- /* result now contains the standard feature bits */
- if (result & (1 << 15))
- features |= CMOV;
- if (result & (1 << 23))
- features |= MMX;
- if (result & (1 << 25))
- features |= SSE;
- if (result & (1 << 26))
- features |= SSE2;
- if ((features & MMX) && !(features & SSE) &&
- (strcmp (vendor, "AuthenticAMD") == 0 ||
- strcmp (vendor, "Geode by NSC") == 0))
- {
- /* check for AMD MMX extensions */
-#ifdef __GNUC__
- __asm__ (
- " push %%ebx\n"
- " mov $0x80000000, %%eax\n"
- " cpuid\n"
- " xor %%edx, %%edx\n"
- " cmp $0x1, %%eax\n"
- " jge 2f\n"
- " mov $0x80000001, %%eax\n"
- " cpuid\n"
- "2:\n"
- " pop %%ebx\n"
- " mov %%edx, %0\n"
- : "=r" (result)
- :
- : "%eax", "%ecx", "%edx"
- );
-#elif defined _MSC_VER
- _asm {
- push ebx
- mov eax, 80000000h
- cpuid
- xor edx, edx
- cmp eax, 1
- jge notamd
- mov eax, 80000001h
- cpuid
- notamd:
- pop ebx
- mov result, edx
- }
-#endif
- if (result & (1 << 22))
- features |= MMX_EXTENSIONS;
- }
- }
-#endif /* HAVE_GETISAX */
-
- return features;
-}
-
-static pixman_bool_t
-pixman_have_mmx (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t mmx_present;
-
- if (!initialized)
- {
- unsigned int features = detect_cpu_features ();
- mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS);
- initialized = TRUE;
- }
-
- return mmx_present;
-}
-
-#ifdef USE_SSE2
-static pixman_bool_t
-pixman_have_sse2 (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t sse2_present;
-
- if (!initialized)
- {
- unsigned int features = detect_cpu_features ();
- sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2);
- initialized = TRUE;
- }
-
- return sse2_present;
-}
-
-#endif
-
-#else /* __amd64__ */
-#ifdef USE_MMX
-#define pixman_have_mmx() TRUE
-#endif
-#ifdef USE_SSE2
-#define pixman_have_sse2() TRUE
-#endif
-#endif /* __amd64__ */
-#endif
-
-pixman_implementation_t *
-_pixman_choose_implementation (void)
-{
- pixman_implementation_t *imp;
-
- imp = _pixman_implementation_create_general();
- imp = _pixman_implementation_create_fast_path (imp);
-
-#ifdef USE_MMX
- if (pixman_have_mmx ())
- imp = _pixman_implementation_create_mmx (imp);
-#endif
-
-#ifdef USE_SSE2
- if (pixman_have_sse2 ())
- imp = _pixman_implementation_create_sse2 (imp);
-#endif
-
-#ifdef USE_ARM_SIMD
- if (pixman_have_arm_simd ())
- imp = _pixman_implementation_create_arm_simd (imp);
-#endif
-
-#ifdef USE_ARM_NEON
- if (pixman_have_arm_neon ())
- imp = _pixman_implementation_create_arm_neon (imp);
-#endif
-
-#ifdef USE_VMX
- if (pixman_have_vmx ())
- imp = _pixman_implementation_create_vmx (imp);
-#endif
-
- imp = _pixman_implementation_create_noop (imp);
-
- return imp;
-}
-
+/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> + +#if defined(USE_ARM_SIMD) && defined(_MSC_VER) +/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ +#include <windows.h> +#endif + +#include "pixman-private.h" + +#ifdef USE_VMX + +/* The CPU detection code needs to be in a file not compiled with + * "-maltivec -mabi=altivec", as gcc would try to save vector register + * across function calls causing SIGILL on cpus without Altivec/vmx. + */ +static pixman_bool_t initialized = FALSE; +static volatile pixman_bool_t have_vmx = TRUE; + +#ifdef __APPLE__ +#include <sys/sysctl.h> + +static pixman_bool_t +pixman_have_vmx (void) +{ + if (!initialized) + { + size_t length = sizeof(have_vmx); + int error = + sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); + + if (error) + have_vmx = FALSE; + + initialized = TRUE; + } + return have_vmx; +} + +#elif defined (__OpenBSD__) +#include <sys/param.h> +#include <sys/sysctl.h> +#include <machine/cpu.h> + +static pixman_bool_t +pixman_have_vmx (void) +{ + if (!initialized) + { + int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; + size_t length = sizeof(have_vmx); + int error = + sysctl (mib, 2, &have_vmx, &length, NULL, 0); + + if (error != 0) + have_vmx = FALSE; + + initialized = TRUE; + } + return have_vmx; +} + +#elif defined (__linux__) +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <linux/auxvec.h> +#include <asm/cputable.h> + +static pixman_bool_t +pixman_have_vmx (void) +{ + if (!initialized) + { + char fname[64]; + unsigned long buf[64]; + ssize_t count = 0; + pid_t pid; + int fd, i; + + pid = getpid (); + snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid); + + fd = open (fname, O_RDONLY); + if (fd >= 0) + { + for (i = 0; i <= (count / sizeof(unsigned long)); i += 2) + { + /* Read more if buf is empty... */ + if (i == (count / sizeof(unsigned long))) + { + count = read (fd, buf, sizeof(buf)); + if (count <= 0) + break; + i = 0; + } + + if (buf[i] == AT_HWCAP) + { + have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC); + initialized = TRUE; + break; + } + else if (buf[i] == AT_NULL) + { + break; + } + } + close (fd); + } + } + if (!initialized) + { + /* Something went wrong. Assume 'no' rather than playing + fragile tricks with catching SIGILL. */ + have_vmx = FALSE; + initialized = TRUE; + } + + return have_vmx; +} + +#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */ +#include <signal.h> +#include <setjmp.h> + +static jmp_buf jump_env; + +static void +vmx_test (int sig, + siginfo_t *si, + void * unused) +{ + longjmp (jump_env, 1); +} + +static pixman_bool_t +pixman_have_vmx (void) +{ + struct sigaction sa, osa; + int jmp_result; + + if (!initialized) + { + sa.sa_flags = SA_SIGINFO; + sigemptyset (&sa.sa_mask); + sa.sa_sigaction = vmx_test; + sigaction (SIGILL, &sa, &osa); + jmp_result = setjmp (jump_env); + if (jmp_result == 0) + { + asm volatile ( "vor 0, 0, 0" ); + } + sigaction (SIGILL, &osa, NULL); + have_vmx = (jmp_result == 0); + initialized = TRUE; + } + return have_vmx; +} + +#endif /* __APPLE__ */ +#endif /* USE_VMX */ + +#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) + +#if defined(_MSC_VER) + +#if defined(USE_ARM_SIMD) +extern int pixman_msvc_try_arm_simd_op (); + +pixman_bool_t +pixman_have_arm_simd (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t have_arm_simd = FALSE; + + if (!initialized) + { + __try { + pixman_msvc_try_arm_simd_op (); + have_arm_simd = TRUE; + } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { + have_arm_simd = FALSE; + } + initialized = TRUE; + } + + return have_arm_simd; +} + +#endif /* USE_ARM_SIMD */ + +#if defined(USE_ARM_NEON) +extern int pixman_msvc_try_arm_neon_op (); + +pixman_bool_t +pixman_have_arm_neon (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t have_arm_neon = FALSE; + + if (!initialized) + { + __try + { + pixman_msvc_try_arm_neon_op (); + have_arm_neon = TRUE; + } + __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) + { + have_arm_neon = FALSE; + } + initialized = TRUE; + } + + return have_arm_neon; +} + +#endif /* USE_ARM_NEON */ + +#elif defined (__linux__) /* linux ELF */ + +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <string.h> +#include <elf.h> + +static pixman_bool_t arm_has_v7 = FALSE; +static pixman_bool_t arm_has_v6 = FALSE; +static pixman_bool_t arm_has_vfp = FALSE; +static pixman_bool_t arm_has_neon = FALSE; +static pixman_bool_t arm_has_iwmmxt = FALSE; +static pixman_bool_t arm_tests_initialized = FALSE; + +static void +pixman_arm_read_auxv () +{ + int fd; + Elf32_auxv_t aux; + + fd = open ("/proc/self/auxv", O_RDONLY); + if (fd >= 0) + { + while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) + { + if (aux.a_type == AT_HWCAP) + { + uint32_t hwcap = aux.a_un.a_val; + /* hardcode these values to avoid depending on specific + * versions of the hwcap header, e.g. HWCAP_NEON + */ + arm_has_vfp = (hwcap & 64) != 0; + arm_has_iwmmxt = (hwcap & 512) != 0; + /* this flag is only present on kernel 2.6.29 */ + arm_has_neon = (hwcap & 4096) != 0; + } + else if (aux.a_type == AT_PLATFORM) + { + const char *plat = (const char*) aux.a_un.a_val; + if (strncmp (plat, "v7l", 3) == 0) + { + arm_has_v7 = TRUE; + arm_has_v6 = TRUE; + } + else if (strncmp (plat, "v6l", 3) == 0) + { + arm_has_v6 = TRUE; + } + } + } + close (fd); + } + + arm_tests_initialized = TRUE; +} + +#if defined(USE_ARM_SIMD) +pixman_bool_t +pixman_have_arm_simd (void) +{ + if (!arm_tests_initialized) + pixman_arm_read_auxv (); + + return arm_has_v6; +} + +#endif /* USE_ARM_SIMD */ + +#if defined(USE_ARM_NEON) +pixman_bool_t +pixman_have_arm_neon (void) +{ + if (!arm_tests_initialized) + pixman_arm_read_auxv (); + + return arm_has_neon; +} + +#endif /* USE_ARM_NEON */ + +#if defined(USE_ARM_IWMMXT) +pixman_bool_t +pixman_have_arm_iwmmxt (void) +{ + if (!arm_tests_initialized) + pixman_arm_read_auxv (); + + return arm_has_iwmmxt; +} + +#endif /* USE_ARM_IWMMXT */ + +#else /* linux ELF */ + +#define pixman_have_arm_simd() FALSE +#define pixman_have_arm_neon() FALSE +#define pixman_have_arm_iwmmxt() FALSE + +#endif + +#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */ + +#if defined(USE_X86_MMX) || defined(USE_SSE2) +/* The CPU detection code needs to be in a file not compiled with + * "-mmmx -msse", as gcc would generate CMOV instructions otherwise + * that would lead to SIGILL instructions on old CPUs that don't have + * it. + */ +#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) + +#ifdef HAVE_GETISAX +#include <sys/auxv.h> +#endif + +typedef enum +{ + NO_FEATURES = 0, + MMX = 0x1, + MMX_EXTENSIONS = 0x2, + SSE = 0x6, + SSE2 = 0x8, + CMOV = 0x10 +} cpu_features_t; + + +static unsigned int +detect_cpu_features (void) +{ + unsigned int features = 0; + unsigned int result = 0; + +#ifdef HAVE_GETISAX + if (getisax (&result, 1)) + { + if (result & AV_386_CMOV) + features |= CMOV; + if (result & AV_386_MMX) + features |= MMX; + if (result & AV_386_AMD_MMX) + features |= MMX_EXTENSIONS; + if (result & AV_386_SSE) + features |= SSE; + if (result & AV_386_SSE2) + features |= SSE2; + } +#else + char vendor[13]; +#ifdef _MSC_VER + int vendor0 = 0, vendor1, vendor2; +#endif + vendor[0] = 0; + vendor[12] = 0; + +#ifdef __GNUC__ + /* see p. 118 of amd64 instruction set manual Vol3 */ + /* We need to be careful about the handling of %ebx and + * %esp here. We can't declare either one as clobbered + * since they are special registers (%ebx is the "PIC + * register" holding an offset to global data, %esp the + * stack pointer), so we need to make sure they have their + * original values when we access the output operands. + */ + __asm__ ( + "pushf\n" + "pop %%eax\n" + "mov %%eax, %%ecx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "mov $0x0, %%edx\n" + "xor %%ecx, %%eax\n" + "jz 1f\n" + + "mov $0x00000000, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "mov %%ebx, %%eax\n" + "pop %%ebx\n" + "mov %%eax, %1\n" + "mov %%edx, %2\n" + "mov %%ecx, %3\n" + "mov $0x00000001, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "pop %%ebx\n" + "1:\n" + "mov %%edx, %0\n" + : "=r" (result), + "=m" (vendor[0]), + "=m" (vendor[4]), + "=m" (vendor[8]) + : + : "%eax", "%ecx", "%edx" + ); + +#elif defined (_MSC_VER) + + _asm { + pushfd + pop eax + mov ecx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ecx + jz nocpuid + + mov eax, 0 + push ebx + cpuid + mov eax, ebx + pop ebx + mov vendor0, eax + mov vendor1, edx + mov vendor2, ecx + mov eax, 1 + push ebx + cpuid + pop ebx + nocpuid: + mov result, edx + } + memmove (vendor + 0, &vendor0, 4); + memmove (vendor + 4, &vendor1, 4); + memmove (vendor + 8, &vendor2, 4); + +#else +# error unsupported compiler +#endif + + features = 0; + if (result) + { + /* result now contains the standard feature bits */ + if (result & (1 << 15)) + features |= CMOV; + if (result & (1 << 23)) + features |= MMX; + if (result & (1 << 25)) + features |= SSE; + if (result & (1 << 26)) + features |= SSE2; + if ((features & MMX) && !(features & SSE) && + (strcmp (vendor, "AuthenticAMD") == 0 || + strcmp (vendor, "Geode by NSC") == 0)) + { + /* check for AMD MMX extensions */ +#ifdef __GNUC__ + __asm__ ( + " push %%ebx\n" + " mov $0x80000000, %%eax\n" + " cpuid\n" + " xor %%edx, %%edx\n" + " cmp $0x1, %%eax\n" + " jge 2f\n" + " mov $0x80000001, %%eax\n" + " cpuid\n" + "2:\n" + " pop %%ebx\n" + " mov %%edx, %0\n" + : "=r" (result) + : + : "%eax", "%ecx", "%edx" + ); +#elif defined _MSC_VER + _asm { + push ebx + mov eax, 80000000h + cpuid + xor edx, edx + cmp eax, 1 + jge notamd + mov eax, 80000001h + cpuid + notamd: + pop ebx + mov result, edx + } +#endif + if (result & (1 << 22)) + features |= MMX_EXTENSIONS; + } + } +#endif /* HAVE_GETISAX */ + + return features; +} + +static pixman_bool_t +pixman_have_mmx (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t mmx_present; + + if (!initialized) + { + unsigned int features = detect_cpu_features (); + mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); + initialized = TRUE; + } + + return mmx_present; +} + +#ifdef USE_SSE2 +static pixman_bool_t +pixman_have_sse2 (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t sse2_present; + + if (!initialized) + { + unsigned int features = detect_cpu_features (); + sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2); + initialized = TRUE; + } + + return sse2_present; +} + +#endif + +#else /* __amd64__ */ +#ifdef USE_X86_MMX +#define pixman_have_mmx() TRUE +#endif +#ifdef USE_SSE2 +#define pixman_have_sse2() TRUE +#endif +#endif /* __amd64__ */ +#endif + +pixman_implementation_t * +_pixman_choose_implementation (void) +{ + pixman_implementation_t *imp; + + imp = _pixman_implementation_create_general(); + imp = _pixman_implementation_create_fast_path (imp); + +#ifdef USE_X86_MMX + if (pixman_have_mmx ()) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_SSE2 + if (pixman_have_sse2 ()) + imp = _pixman_implementation_create_sse2 (imp); +#endif + +#ifdef USE_ARM_SIMD + if (pixman_have_arm_simd ()) + imp = _pixman_implementation_create_arm_simd (imp); +#endif + +#ifdef USE_ARM_IWMMXT + if (pixman_have_arm_iwmmxt ()) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_ARM_NEON + if (pixman_have_arm_neon ()) + imp = _pixman_implementation_create_arm_neon (imp); +#endif + +#ifdef USE_VMX + if (pixman_have_vmx ()) + imp = _pixman_implementation_create_vmx (imp); +#endif + + imp = _pixman_implementation_create_noop (imp); + + return imp; +} + diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c index 11644e87b..9c428a3de 100644 --- a/pixman/pixman/pixman-mmx.c +++ b/pixman/pixman/pixman-mmx.c @@ -33,7 +33,7 @@ #include <config.h> #endif -#ifdef USE_MMX +#if defined USE_X86_MMX || defined USE_ARM_IWMMXT #include <mmintrin.h> #include "pixman-private.h" @@ -47,6 +47,15 @@ #define CHECKPOINT() #endif +#ifdef USE_ARM_IWMMXT +/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_empty (void) +{ + +} +#endif + /* Notes about writing mmx code * * give memory operands as the second operand. If you give it as the @@ -302,6 +311,39 @@ in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest) #endif +/* Elemental unaligned loads */ + +static __inline__ uint64_t ldq_u(uint64_t *p) +{ +#ifdef USE_X86_MMX + /* x86's alignment restrictions are very relaxed. */ + return *p; +#elif defined USE_ARM_IWMMXT + int align = (uintptr_t)p & 7; + __m64 *aligned_p; + if (align == 0) + return *p; + aligned_p = (__m64 *)((uintptr_t)p & ~7); + return _mm_align_si64 (aligned_p[0], aligned_p[1], align); +#else + struct __una_u64 { uint64_t x __attribute__((packed)); }; + const struct __una_u64 *ptr = (const struct __una_u64 *) p; + return ptr->x; +#endif +} + +static __inline__ uint32_t ldl_u(uint32_t *p) +{ +#ifdef USE_X86_MMX + /* x86's alignment restrictions are very relaxed. */ + return *p; +#else + struct __una_u32 { uint32_t x __attribute__((packed)); }; + const struct __una_u32 *ptr = (const struct __una_u32 *) p; + return ptr->x; +#endif +} + static force_inline __m64 load8888 (uint32_t v) { @@ -1382,7 +1424,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, while (w >= 2) { - __m64 vs = *(__m64 *)src; + __m64 vs = (__m64)ldq_u((uint64_t *)src); __m64 vd = *(__m64 *)dst; __m64 vsrc0 = expand8888 (vs, 0); __m64 vsrc1 = expand8888 (vs, 1); @@ -1463,14 +1505,14 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, __m64 vd6 = *(__m64 *)(dst + 12); __m64 vd7 = *(__m64 *)(dst + 14); - __m64 vs0 = *(__m64 *)(src + 0); - __m64 vs1 = *(__m64 *)(src + 2); - __m64 vs2 = *(__m64 *)(src + 4); - __m64 vs3 = *(__m64 *)(src + 6); - __m64 vs4 = *(__m64 *)(src + 8); - __m64 vs5 = *(__m64 *)(src + 10); - __m64 vs6 = *(__m64 *)(src + 12); - __m64 vs7 = *(__m64 *)(src + 14); + __m64 vs0 = (__m64)ldq_u((uint64_t *)(src + 0)); + __m64 vs1 = (__m64)ldq_u((uint64_t *)(src + 2)); + __m64 vs2 = (__m64)ldq_u((uint64_t *)(src + 4)); + __m64 vs3 = (__m64)ldq_u((uint64_t *)(src + 6)); + __m64 vs4 = (__m64)ldq_u((uint64_t *)(src + 8)); + __m64 vs5 = (__m64)ldq_u((uint64_t *)(src + 10)); + __m64 vs6 = (__m64)ldq_u((uint64_t *)(src + 12)); + __m64 vs7 = (__m64)ldq_u((uint64_t *)(src + 14)); vd0 = pack8888 ( in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), @@ -1800,7 +1842,7 @@ pixman_fill_mmx (uint32_t *bits, uint32_t byte_width; uint8_t *byte_line; -#ifdef __GNUC__ +#if defined __GNUC__ && defined USE_X86_MMX __m64 v1, v2, v3, v4, v5, v6, v7; #endif @@ -1834,7 +1876,7 @@ pixman_fill_mmx (uint32_t *bits, fill = ((uint64_t)xor << 32) | xor; vfill = to_m64 (fill); -#ifdef __GNUC__ +#if defined __GNUC__ && defined USE_X86_MMX __asm__ ( "movq %7, %0\n" "movq %7, %1\n" @@ -1880,7 +1922,7 @@ pixman_fill_mmx (uint32_t *bits, while (w >= 64) { -#ifdef __GNUC__ +#if defined __GNUC__ && defined USE_X86_MMX __asm__ ( "movq %1, (%0)\n" "movq %2, 8(%0)\n" @@ -2507,23 +2549,35 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - if ((((unsigned long)dest_image & 3) == 0) && - (((unsigned long)src_image & 3) == 0)) + while (w && (unsigned long)dst & 7) { - while (w >= 4) - { - __m64 vmask; - __m64 vdest; + uint16_t tmp; + uint8_t a; + uint32_t m, d; - vmask = load8888 (*(uint32_t *)mask); - vdest = load8888 (*(uint32_t *)dst); + a = *mask++; + d = *dst; - *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest)); + m = MUL_UN8 (sa, a, tmp); + d = MUL_UN8 (m, d, tmp); - dst += 4; - mask += 4; - w -= 4; - } + *dst++ = d; + w--; + } + + while (w >= 4) + { + __m64 vmask; + __m64 vdest; + + vmask = load8888 (ldl_u((uint32_t *)mask)); + vdest = load8888 (*(uint32_t *)dst); + + *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest)); + + dst += 4; + mask += 4; + w -= 4; } while (w--) @@ -2566,20 +2620,31 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp, src_line += src_stride; w = width; - if ((((unsigned long)dest_image & 3) == 0) && - (((unsigned long)src_image & 3) == 0)) + while (w && (unsigned long)dst & 3) { - while (w >= 4) - { - uint32_t *s = (uint32_t *)src; - uint32_t *d = (uint32_t *)dst; + uint8_t s, d; + uint16_t tmp; - *d = store8888 (in (load8888 (*s), load8888 (*d))); + s = *src; + d = *dst; - w -= 4; - dst += 4; - src += 4; - } + *dst = MUL_UN8 (s, d, tmp); + + src++; + dst++; + w--; + } + + while (w >= 4) + { + uint32_t *s = (uint32_t *)src; + uint32_t *d = (uint32_t *)dst; + + *d = store8888 (in (load8888 (ldl_u((uint32_t *)s)), load8888 (*d))); + + w -= 4; + dst += 4; + src += 4; } while (w--) @@ -2634,20 +2699,36 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - if ((((unsigned long)mask_image & 3) == 0) && - (((unsigned long)dest_image & 3) == 0)) + while (w && (unsigned long)dst & 3) { - while (w >= 4) - { - __m64 vmask = load8888 (*(uint32_t *)mask); - __m64 vdest = load8888 (*(uint32_t *)dst); + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; - *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest)); + a = *mask++; + d = *dst; - w -= 4; - dst += 4; - mask += 4; - } + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); + + *dst++ = r; + w--; + } + + while (w >= 4) + { + __m64 vmask; + __m64 vdest; + + vmask = load8888 (ldl_u((uint32_t *)mask)); + vdest = load8888 (*(uint32_t *)dst); + + *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest)); + + dst += 4; + mask += 4; + w -= 4; } while (w--) @@ -2710,7 +2791,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp, while (w >= 8) { - *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst); + *(__m64*)dst = _mm_adds_pu8 ((__m64)ldq_u((uint64_t *)src), *(__m64*)dst); dst += 8; src += 8; w -= 8; @@ -2768,7 +2849,7 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp, while (w >= 2) { - dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst); + dst64 = _mm_adds_pu8 ((__m64)ldq_u((uint64_t *)src), *(__m64*)dst); *(uint64_t*)dst = to_uint64 (dst64); dst += 2; src += 2; @@ -2841,6 +2922,14 @@ pixman_blt_mmx (uint32_t *src_bits, dst_bytes += dst_stride; w = byte_width; + while (w >= 1 && ((unsigned long)d & 1)) + { + *(uint8_t *)d = *(uint8_t *)s; + w -= 1; + s += 1; + d += 1; + } + while (w >= 2 && ((unsigned long)d & 3)) { *(uint16_t *)d = *(uint16_t *)s; @@ -2851,7 +2940,7 @@ pixman_blt_mmx (uint32_t *src_bits, while (w >= 4 && ((unsigned long)d & 7)) { - *(uint32_t *)d = *(uint32_t *)s; + *(uint32_t *)d = ldl_u((uint32_t *)s); w -= 4; s += 4; @@ -2860,7 +2949,7 @@ pixman_blt_mmx (uint32_t *src_bits, while (w >= 64) { -#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) +#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX __asm__ ( "movq (%1), %%mm0\n" "movq 8(%1), %%mm1\n" @@ -2885,14 +2974,14 @@ pixman_blt_mmx (uint32_t *src_bits, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); #else - __m64 v0 = *(__m64 *)(s + 0); - __m64 v1 = *(__m64 *)(s + 8); - __m64 v2 = *(__m64 *)(s + 16); - __m64 v3 = *(__m64 *)(s + 24); - __m64 v4 = *(__m64 *)(s + 32); - __m64 v5 = *(__m64 *)(s + 40); - __m64 v6 = *(__m64 *)(s + 48); - __m64 v7 = *(__m64 *)(s + 56); + __m64 v0 = ldq_u((uint64_t *)(s + 0)); + __m64 v1 = ldq_u((uint64_t *)(s + 8)); + __m64 v2 = ldq_u((uint64_t *)(s + 16)); + __m64 v3 = ldq_u((uint64_t *)(s + 24)); + __m64 v4 = ldq_u((uint64_t *)(s + 32)); + __m64 v5 = ldq_u((uint64_t *)(s + 40)); + __m64 v6 = ldq_u((uint64_t *)(s + 48)); + __m64 v7 = ldq_u((uint64_t *)(s + 56)); *(__m64 *)(d + 0) = v0; *(__m64 *)(d + 8) = v1; *(__m64 *)(d + 16) = v2; @@ -2909,7 +2998,7 @@ pixman_blt_mmx (uint32_t *src_bits, } while (w >= 4) { - *(uint32_t *)d = *(uint32_t *)s; + *(uint32_t *)d = ldl_u((uint32_t *)s); w -= 4; s += 4; @@ -3161,4 +3250,4 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback) return imp; } -#endif /* USE_MMX */ +#endif /* USE_X86_MMX || USE_ARM_IWMMXT */ diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 6e716c6e6..4d645fe4b 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -545,7 +545,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback); pixman_implementation_t * _pixman_implementation_create_noop (pixman_implementation_t *fallback); -#ifdef USE_MMX +#if defined USE_X86_MMX || defined USE_ARM_IWMMXT pixman_implementation_t * _pixman_implementation_create_mmx (pixman_implementation_t *fallback); #endif diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am index 802d3f4fc..eeb3679f0 100644 --- a/pixman/test/Makefile.am +++ b/pixman/test/Makefile.am @@ -1,9 +1,9 @@ include $(top_srcdir)/test/Makefile.sources -AM_CFLAGS = @OPENMP_CFLAGS@ -AM_LDFLAGS = @OPENMP_CFLAGS@ @TESTPROGS_EXTRA_LDFLAGS@ -LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm @PNG_LIBS@ -INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman @PNG_CFLAGS@ +AM_CFLAGS = $(OPENMP_CFLAGS) +AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) +LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) +INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS) libutils_la_SOURCES = $(libutils_sources) $(libutils_headers) |