aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman')
-rw-r--r--pixman/configure.ac56
-rw-r--r--pixman/demos/Makefile.am72
-rw-r--r--pixman/pixman/Makefile.am14
-rw-r--r--pixman/pixman/Makefile.win322
-rw-r--r--pixman/pixman/pixman-cpu.c1244
-rw-r--r--pixman/pixman/pixman-mmx.c213
-rw-r--r--pixman/pixman/pixman-private.h2
-rw-r--r--pixman/test/Makefile.am8
8 files changed, 889 insertions, 722 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac
index dc523df0f..481d0bb00 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -303,7 +303,7 @@ CFLAGS=$xserver_save_CFLAGS
AC_ARG_ENABLE(mmx,
[AC_HELP_STRING([--disable-mmx],
- [disable MMX fast paths])],
+ [disable x86 MMX fast paths])],
[enable_mmx=$enableval], [enable_mmx=auto])
if test $enable_mmx = no ; then
@@ -311,17 +311,17 @@ if test $enable_mmx = no ; then
fi
if test $have_mmx_intrinsics = yes ; then
- AC_DEFINE(USE_MMX, 1, [use MMX compiler intrinsics])
+ AC_DEFINE(USE_X86_MMX, 1, [use x86 MMX compiler intrinsics])
else
MMX_CFLAGS=
fi
AC_MSG_RESULT($have_mmx_intrinsics)
if test $enable_mmx = yes && test $have_mmx_intrinsics = no ; then
- AC_MSG_ERROR([MMX intrinsics not detected])
+ AC_MSG_ERROR([x86 MMX intrinsics not detected])
fi
-AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes)
+AM_CONDITIONAL(USE_X86_MMX, test $have_mmx_intrinsics = yes)
dnl ===========================================================================
dnl Check for SSE2
@@ -540,6 +540,54 @@ if test $enable_arm_neon = yes && test $have_arm_neon = no ; then
AC_MSG_ERROR([ARM NEON intrinsics not detected])
fi
+dnl ===========================================================================
+dnl Check for IWMMXT
+
+if test "x$IWMMXT_CFLAGS" = "x" ; then
+ IWMMXT_CFLAGS="-march=iwmmxt -flax-vector-conversions -Winline"
+fi
+
+have_iwmmxt_intrinsics=no
+AC_MSG_CHECKING(whether to use ARM IWMMXT intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$IWMMXT_CFLAGS $CFLAGS"
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 3 && __GNUC_MINOR__ < 6))
+#error "Need GCC >= 4.6 for IWMMXT intrinsics"
+#endif
+#include <mmintrin.h>
+int main () {
+ union {
+ __m64 v;
+ [char c[8];]
+ } a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} };
+ int b = 4;
+ __m64 c = _mm_srli_si64 (a.v, b);
+}], have_iwmmxt_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(arm-iwmmxt,
+ [AC_HELP_STRING([--disable-arm-iwmmxt],
+ [disable ARM IWMMXT fast paths])],
+ [enable_iwmmxt=$enableval], [enable_iwmmxt=auto])
+
+if test $enable_iwmmxt = no ; then
+ have_iwmmxt_intrinsics=disabled
+fi
+
+if test $have_iwmmxt_intrinsics = yes ; then
+ AC_DEFINE(USE_ARM_IWMMXT, 1, [use ARM IWMMXT compiler intrinsics])
+else
+ IWMMXT_CFLAGS=
+fi
+
+AC_MSG_RESULT($have_iwmmxt_intrinsics)
+if test $enable_iwmmxt = yes && test $have_iwmmxt_intrinsics = no ; then
+ AC_MSG_ERROR([IWMMXT intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_ARM_IWMMXT, test $have_iwmmxt_intrinsics = yes)
+
dnl =========================================================================================
dnl Check for GNU-style inline assembly support
diff --git a/pixman/demos/Makefile.am b/pixman/demos/Makefile.am
index 171f8f419..070c2d748 100644
--- a/pixman/demos/Makefile.am
+++ b/pixman/demos/Makefile.am
@@ -1,36 +1,36 @@
-if HAVE_GTK
-
-AM_CFLAGS = @OPENMP_CFLAGS@
-AM_LDFLAGS = @OPENMP_CFLAGS@
-
-LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS)
-INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS)
-
-GTK_UTILS = gtk-utils.c gtk-utils.h
-
-DEMOS = \
- clip-test \
- clip-in \
- composite-test \
- gradient-test \
- radial-test \
- alpha-test \
- screen-test \
- convolution-test \
- trap-test \
- tri-test
-
-gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
-alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
-composite_test_SOURCES = composite-test.c $(GTK_UTILS)
-clip_test_SOURCES = clip-test.c $(GTK_UTILS)
-clip_in_SOURCES = clip-in.c $(GTK_UTILS)
-trap_test_SOURCES = trap-test.c $(GTK_UTILS)
-screen_test_SOURCES = screen-test.c $(GTK_UTILS)
-convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
-radial_test_SOURCES = radial-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS)
-tri_test_SOURCES = tri-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS)
-
-noinst_PROGRAMS = $(DEMOS)
-
-endif
+if HAVE_GTK
+
+AM_CFLAGS = $(OPENMP_CFLAGS)
+AM_LDFLAGS = $(OPENMP_CFLAGS)
+
+LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) $(PNG_LIBS)
+INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS)
+
+GTK_UTILS = gtk-utils.c gtk-utils.h
+
+DEMOS = \
+ clip-test \
+ clip-in \
+ composite-test \
+ gradient-test \
+ radial-test \
+ alpha-test \
+ screen-test \
+ convolution-test \
+ trap-test \
+ tri-test
+
+gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
+alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
+composite_test_SOURCES = composite-test.c $(GTK_UTILS)
+clip_test_SOURCES = clip-test.c $(GTK_UTILS)
+clip_in_SOURCES = clip-in.c $(GTK_UTILS)
+trap_test_SOURCES = trap-test.c $(GTK_UTILS)
+screen_test_SOURCES = screen-test.c $(GTK_UTILS)
+convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
+radial_test_SOURCES = radial-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS)
+tri_test_SOURCES = tri-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS)
+
+noinst_PROGRAMS = $(DEMOS)
+
+endif
diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am
index 2421a4f9e..286b7cf36 100644
--- a/pixman/pixman/Makefile.am
+++ b/pixman/pixman/Makefile.am
@@ -22,7 +22,7 @@ EXTRA_DIST = \
DISTCLEANFILES = $(BUILT_SOURCES)
# mmx code
-if USE_MMX
+if USE_X86_MMX
noinst_LTLIBRARIES += libpixman-mmx.la
libpixman_mmx_la_SOURCES = \
pixman-mmx.c
@@ -90,5 +90,17 @@ libpixman_1_la_LIBADD += libpixman-arm-neon.la
ASM_CFLAGS_arm_neon=
endif
+# iwmmxt code
+if USE_ARM_IWMMXT
+noinst_LTLIBRARIES += libpixman-iwmmxt.la
+libpixman_iwmmxt_la_SOURCES = pixman-mmx.c
+libpixman_iwmmxt_la_CFLAGS = $(DEP_CFLAGS) $(IWMMXT_CFLAGS)
+libpixman_iwmmxt_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LDFLAGS += $(IWMMXT_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-iwmmxt.la
+
+ASM_CFLAGS_IWMMXT=$(IWMMXT_CFLAGS)
+endif
+
.c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
$(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
diff --git a/pixman/pixman/Makefile.win32 b/pixman/pixman/Makefile.win32
index beff4a068..381f2cd9d 100644
--- a/pixman/pixman/Makefile.win32
+++ b/pixman/pixman/Makefile.win32
@@ -14,7 +14,7 @@ ifeq ($(SSE2_VAR),)
SSE2_VAR=on
endif
-MMX_CFLAGS = -DUSE_MMX -w14710 -w14714
+MMX_CFLAGS = -DUSE_X86_MMX -w14710 -w14714
SSE2_CFLAGS = -DUSE_SSE2
# MMX compilation flags
diff --git a/pixman/pixman/pixman-cpu.c b/pixman/pixman/pixman-cpu.c
index 77e5b7f7a..dff27d1ea 100644
--- a/pixman/pixman/pixman-cpu.c
+++ b/pixman/pixman/pixman-cpu.c
@@ -1,613 +1,631 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <string.h>
-
-#if defined(USE_ARM_SIMD) && defined(_MSC_VER)
-/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
-#include <windows.h>
-#endif
-
-#include "pixman-private.h"
-
-#ifdef USE_VMX
-
-/* The CPU detection code needs to be in a file not compiled with
- * "-maltivec -mabi=altivec", as gcc would try to save vector register
- * across function calls causing SIGILL on cpus without Altivec/vmx.
- */
-static pixman_bool_t initialized = FALSE;
-static volatile pixman_bool_t have_vmx = TRUE;
-
-#ifdef __APPLE__
-#include <sys/sysctl.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- if (!initialized)
- {
- size_t length = sizeof(have_vmx);
- int error =
- sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0);
-
- if (error)
- have_vmx = FALSE;
-
- initialized = TRUE;
- }
- return have_vmx;
-}
-
-#elif defined (__OpenBSD__)
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#include <machine/cpu.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- if (!initialized)
- {
- int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
- size_t length = sizeof(have_vmx);
- int error =
- sysctl (mib, 2, &have_vmx, &length, NULL, 0);
-
- if (error != 0)
- have_vmx = FALSE;
-
- initialized = TRUE;
- }
- return have_vmx;
-}
-
-#elif defined (__linux__)
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <linux/auxvec.h>
-#include <asm/cputable.h>
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- if (!initialized)
- {
- char fname[64];
- unsigned long buf[64];
- ssize_t count = 0;
- pid_t pid;
- int fd, i;
-
- pid = getpid ();
- snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid);
-
- fd = open (fname, O_RDONLY);
- if (fd >= 0)
- {
- for (i = 0; i <= (count / sizeof(unsigned long)); i += 2)
- {
- /* Read more if buf is empty... */
- if (i == (count / sizeof(unsigned long)))
- {
- count = read (fd, buf, sizeof(buf));
- if (count <= 0)
- break;
- i = 0;
- }
-
- if (buf[i] == AT_HWCAP)
- {
- have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC);
- initialized = TRUE;
- break;
- }
- else if (buf[i] == AT_NULL)
- {
- break;
- }
- }
- close (fd);
- }
- }
- if (!initialized)
- {
- /* Something went wrong. Assume 'no' rather than playing
- fragile tricks with catching SIGILL. */
- have_vmx = FALSE;
- initialized = TRUE;
- }
-
- return have_vmx;
-}
-
-#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */
-#include <signal.h>
-#include <setjmp.h>
-
-static jmp_buf jump_env;
-
-static void
-vmx_test (int sig,
- siginfo_t *si,
- void * unused)
-{
- longjmp (jump_env, 1);
-}
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
- struct sigaction sa, osa;
- int jmp_result;
-
- if (!initialized)
- {
- sa.sa_flags = SA_SIGINFO;
- sigemptyset (&sa.sa_mask);
- sa.sa_sigaction = vmx_test;
- sigaction (SIGILL, &sa, &osa);
- jmp_result = setjmp (jump_env);
- if (jmp_result == 0)
- {
- asm volatile ( "vor 0, 0, 0" );
- }
- sigaction (SIGILL, &osa, NULL);
- have_vmx = (jmp_result == 0);
- initialized = TRUE;
- }
- return have_vmx;
-}
-
-#endif /* __APPLE__ */
-#endif /* USE_VMX */
-
-#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON)
-
-#if defined(_MSC_VER)
-
-#if defined(USE_ARM_SIMD)
-extern int pixman_msvc_try_arm_simd_op ();
-
-pixman_bool_t
-pixman_have_arm_simd (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t have_arm_simd = FALSE;
-
- if (!initialized)
- {
- __try {
- pixman_msvc_try_arm_simd_op ();
- have_arm_simd = TRUE;
- } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) {
- have_arm_simd = FALSE;
- }
- initialized = TRUE;
- }
-
- return have_arm_simd;
-}
-
-#endif /* USE_ARM_SIMD */
-
-#if defined(USE_ARM_NEON)
-extern int pixman_msvc_try_arm_neon_op ();
-
-pixman_bool_t
-pixman_have_arm_neon (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t have_arm_neon = FALSE;
-
- if (!initialized)
- {
- __try
- {
- pixman_msvc_try_arm_neon_op ();
- have_arm_neon = TRUE;
- }
- __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
- {
- have_arm_neon = FALSE;
- }
- initialized = TRUE;
- }
-
- return have_arm_neon;
-}
-
-#endif /* USE_ARM_NEON */
-
-#elif defined (__linux__) /* linux ELF */
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <string.h>
-#include <elf.h>
-
-static pixman_bool_t arm_has_v7 = FALSE;
-static pixman_bool_t arm_has_v6 = FALSE;
-static pixman_bool_t arm_has_vfp = FALSE;
-static pixman_bool_t arm_has_neon = FALSE;
-static pixman_bool_t arm_has_iwmmxt = FALSE;
-static pixman_bool_t arm_tests_initialized = FALSE;
-
-static void
-pixman_arm_read_auxv ()
-{
- int fd;
- Elf32_auxv_t aux;
-
- fd = open ("/proc/self/auxv", O_RDONLY);
- if (fd >= 0)
- {
- while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
- {
- if (aux.a_type == AT_HWCAP)
- {
- uint32_t hwcap = aux.a_un.a_val;
- /* hardcode these values to avoid depending on specific
- * versions of the hwcap header, e.g. HWCAP_NEON
- */
- arm_has_vfp = (hwcap & 64) != 0;
- arm_has_iwmmxt = (hwcap & 512) != 0;
- /* this flag is only present on kernel 2.6.29 */
- arm_has_neon = (hwcap & 4096) != 0;
- }
- else if (aux.a_type == AT_PLATFORM)
- {
- const char *plat = (const char*) aux.a_un.a_val;
- if (strncmp (plat, "v7l", 3) == 0)
- {
- arm_has_v7 = TRUE;
- arm_has_v6 = TRUE;
- }
- else if (strncmp (plat, "v6l", 3) == 0)
- {
- arm_has_v6 = TRUE;
- }
- }
- }
- close (fd);
- }
-
- arm_tests_initialized = TRUE;
-}
-
-#if defined(USE_ARM_SIMD)
-pixman_bool_t
-pixman_have_arm_simd (void)
-{
- if (!arm_tests_initialized)
- pixman_arm_read_auxv ();
-
- return arm_has_v6;
-}
-
-#endif /* USE_ARM_SIMD */
-
-#if defined(USE_ARM_NEON)
-pixman_bool_t
-pixman_have_arm_neon (void)
-{
- if (!arm_tests_initialized)
- pixman_arm_read_auxv ();
-
- return arm_has_neon;
-}
-
-#endif /* USE_ARM_NEON */
-
-#else /* linux ELF */
-
-#define pixman_have_arm_simd() FALSE
-#define pixman_have_arm_neon() FALSE
-
-#endif
-
-#endif /* USE_ARM_SIMD || USE_ARM_NEON */
-
-#if defined(USE_MMX) || defined(USE_SSE2)
-/* The CPU detection code needs to be in a file not compiled with
- * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
- * that would lead to SIGILL instructions on old CPUs that don't have
- * it.
- */
-#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
-
-#ifdef HAVE_GETISAX
-#include <sys/auxv.h>
-#endif
-
-typedef enum
-{
- NO_FEATURES = 0,
- MMX = 0x1,
- MMX_EXTENSIONS = 0x2,
- SSE = 0x6,
- SSE2 = 0x8,
- CMOV = 0x10
-} cpu_features_t;
-
-
-static unsigned int
-detect_cpu_features (void)
-{
- unsigned int features = 0;
- unsigned int result = 0;
-
-#ifdef HAVE_GETISAX
- if (getisax (&result, 1))
- {
- if (result & AV_386_CMOV)
- features |= CMOV;
- if (result & AV_386_MMX)
- features |= MMX;
- if (result & AV_386_AMD_MMX)
- features |= MMX_EXTENSIONS;
- if (result & AV_386_SSE)
- features |= SSE;
- if (result & AV_386_SSE2)
- features |= SSE2;
- }
-#else
- char vendor[13];
-#ifdef _MSC_VER
- int vendor0 = 0, vendor1, vendor2;
-#endif
- vendor[0] = 0;
- vendor[12] = 0;
-
-#ifdef __GNUC__
- /* see p. 118 of amd64 instruction set manual Vol3 */
- /* We need to be careful about the handling of %ebx and
- * %esp here. We can't declare either one as clobbered
- * since they are special registers (%ebx is the "PIC
- * register" holding an offset to global data, %esp the
- * stack pointer), so we need to make sure they have their
- * original values when we access the output operands.
- */
- __asm__ (
- "pushf\n"
- "pop %%eax\n"
- "mov %%eax, %%ecx\n"
- "xor $0x00200000, %%eax\n"
- "push %%eax\n"
- "popf\n"
- "pushf\n"
- "pop %%eax\n"
- "mov $0x0, %%edx\n"
- "xor %%ecx, %%eax\n"
- "jz 1f\n"
-
- "mov $0x00000000, %%eax\n"
- "push %%ebx\n"
- "cpuid\n"
- "mov %%ebx, %%eax\n"
- "pop %%ebx\n"
- "mov %%eax, %1\n"
- "mov %%edx, %2\n"
- "mov %%ecx, %3\n"
- "mov $0x00000001, %%eax\n"
- "push %%ebx\n"
- "cpuid\n"
- "pop %%ebx\n"
- "1:\n"
- "mov %%edx, %0\n"
- : "=r" (result),
- "=m" (vendor[0]),
- "=m" (vendor[4]),
- "=m" (vendor[8])
- :
- : "%eax", "%ecx", "%edx"
- );
-
-#elif defined (_MSC_VER)
-
- _asm {
- pushfd
- pop eax
- mov ecx, eax
- xor eax, 00200000h
- push eax
- popfd
- pushfd
- pop eax
- mov edx, 0
- xor eax, ecx
- jz nocpuid
-
- mov eax, 0
- push ebx
- cpuid
- mov eax, ebx
- pop ebx
- mov vendor0, eax
- mov vendor1, edx
- mov vendor2, ecx
- mov eax, 1
- push ebx
- cpuid
- pop ebx
- nocpuid:
- mov result, edx
- }
- memmove (vendor + 0, &vendor0, 4);
- memmove (vendor + 4, &vendor1, 4);
- memmove (vendor + 8, &vendor2, 4);
-
-#else
-# error unsupported compiler
-#endif
-
- features = 0;
- if (result)
- {
- /* result now contains the standard feature bits */
- if (result & (1 << 15))
- features |= CMOV;
- if (result & (1 << 23))
- features |= MMX;
- if (result & (1 << 25))
- features |= SSE;
- if (result & (1 << 26))
- features |= SSE2;
- if ((features & MMX) && !(features & SSE) &&
- (strcmp (vendor, "AuthenticAMD") == 0 ||
- strcmp (vendor, "Geode by NSC") == 0))
- {
- /* check for AMD MMX extensions */
-#ifdef __GNUC__
- __asm__ (
- " push %%ebx\n"
- " mov $0x80000000, %%eax\n"
- " cpuid\n"
- " xor %%edx, %%edx\n"
- " cmp $0x1, %%eax\n"
- " jge 2f\n"
- " mov $0x80000001, %%eax\n"
- " cpuid\n"
- "2:\n"
- " pop %%ebx\n"
- " mov %%edx, %0\n"
- : "=r" (result)
- :
- : "%eax", "%ecx", "%edx"
- );
-#elif defined _MSC_VER
- _asm {
- push ebx
- mov eax, 80000000h
- cpuid
- xor edx, edx
- cmp eax, 1
- jge notamd
- mov eax, 80000001h
- cpuid
- notamd:
- pop ebx
- mov result, edx
- }
-#endif
- if (result & (1 << 22))
- features |= MMX_EXTENSIONS;
- }
- }
-#endif /* HAVE_GETISAX */
-
- return features;
-}
-
-static pixman_bool_t
-pixman_have_mmx (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t mmx_present;
-
- if (!initialized)
- {
- unsigned int features = detect_cpu_features ();
- mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS);
- initialized = TRUE;
- }
-
- return mmx_present;
-}
-
-#ifdef USE_SSE2
-static pixman_bool_t
-pixman_have_sse2 (void)
-{
- static pixman_bool_t initialized = FALSE;
- static pixman_bool_t sse2_present;
-
- if (!initialized)
- {
- unsigned int features = detect_cpu_features ();
- sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2);
- initialized = TRUE;
- }
-
- return sse2_present;
-}
-
-#endif
-
-#else /* __amd64__ */
-#ifdef USE_MMX
-#define pixman_have_mmx() TRUE
-#endif
-#ifdef USE_SSE2
-#define pixman_have_sse2() TRUE
-#endif
-#endif /* __amd64__ */
-#endif
-
-pixman_implementation_t *
-_pixman_choose_implementation (void)
-{
- pixman_implementation_t *imp;
-
- imp = _pixman_implementation_create_general();
- imp = _pixman_implementation_create_fast_path (imp);
-
-#ifdef USE_MMX
- if (pixman_have_mmx ())
- imp = _pixman_implementation_create_mmx (imp);
-#endif
-
-#ifdef USE_SSE2
- if (pixman_have_sse2 ())
- imp = _pixman_implementation_create_sse2 (imp);
-#endif
-
-#ifdef USE_ARM_SIMD
- if (pixman_have_arm_simd ())
- imp = _pixman_implementation_create_arm_simd (imp);
-#endif
-
-#ifdef USE_ARM_NEON
- if (pixman_have_arm_neon ())
- imp = _pixman_implementation_create_arm_neon (imp);
-#endif
-
-#ifdef USE_VMX
- if (pixman_have_vmx ())
- imp = _pixman_implementation_create_vmx (imp);
-#endif
-
- imp = _pixman_implementation_create_noop (imp);
-
- return imp;
-}
-
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#if defined(USE_ARM_SIMD) && defined(_MSC_VER)
+/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
+#include <windows.h>
+#endif
+
+#include "pixman-private.h"
+
+#ifdef USE_VMX
+
+/* The CPU detection code needs to be in a file not compiled with
+ * "-maltivec -mabi=altivec", as gcc would try to save vector register
+ * across function calls causing SIGILL on cpus without Altivec/vmx.
+ */
+static pixman_bool_t initialized = FALSE;
+static volatile pixman_bool_t have_vmx = TRUE;
+
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+
+static pixman_bool_t
+pixman_have_vmx (void)
+{
+ if (!initialized)
+ {
+ size_t length = sizeof(have_vmx);
+ int error =
+ sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0);
+
+ if (error)
+ have_vmx = FALSE;
+
+ initialized = TRUE;
+ }
+ return have_vmx;
+}
+
+#elif defined (__OpenBSD__)
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
+
+static pixman_bool_t
+pixman_have_vmx (void)
+{
+ if (!initialized)
+ {
+ int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
+ size_t length = sizeof(have_vmx);
+ int error =
+ sysctl (mib, 2, &have_vmx, &length, NULL, 0);
+
+ if (error != 0)
+ have_vmx = FALSE;
+
+ initialized = TRUE;
+ }
+ return have_vmx;
+}
+
+#elif defined (__linux__)
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <linux/auxvec.h>
+#include <asm/cputable.h>
+
+static pixman_bool_t
+pixman_have_vmx (void)
+{
+ if (!initialized)
+ {
+ char fname[64];
+ unsigned long buf[64];
+ ssize_t count = 0;
+ pid_t pid;
+ int fd, i;
+
+ pid = getpid ();
+ snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid);
+
+ fd = open (fname, O_RDONLY);
+ if (fd >= 0)
+ {
+ for (i = 0; i <= (count / sizeof(unsigned long)); i += 2)
+ {
+ /* Read more if buf is empty... */
+ if (i == (count / sizeof(unsigned long)))
+ {
+ count = read (fd, buf, sizeof(buf));
+ if (count <= 0)
+ break;
+ i = 0;
+ }
+
+ if (buf[i] == AT_HWCAP)
+ {
+ have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC);
+ initialized = TRUE;
+ break;
+ }
+ else if (buf[i] == AT_NULL)
+ {
+ break;
+ }
+ }
+ close (fd);
+ }
+ }
+ if (!initialized)
+ {
+ /* Something went wrong. Assume 'no' rather than playing
+ fragile tricks with catching SIGILL. */
+ have_vmx = FALSE;
+ initialized = TRUE;
+ }
+
+ return have_vmx;
+}
+
+#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */
+#include <signal.h>
+#include <setjmp.h>
+
+static jmp_buf jump_env;
+
+static void
+vmx_test (int sig,
+ siginfo_t *si,
+ void * unused)
+{
+ longjmp (jump_env, 1);
+}
+
+static pixman_bool_t
+pixman_have_vmx (void)
+{
+ struct sigaction sa, osa;
+ int jmp_result;
+
+ if (!initialized)
+ {
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset (&sa.sa_mask);
+ sa.sa_sigaction = vmx_test;
+ sigaction (SIGILL, &sa, &osa);
+ jmp_result = setjmp (jump_env);
+ if (jmp_result == 0)
+ {
+ asm volatile ( "vor 0, 0, 0" );
+ }
+ sigaction (SIGILL, &osa, NULL);
+ have_vmx = (jmp_result == 0);
+ initialized = TRUE;
+ }
+ return have_vmx;
+}
+
+#endif /* __APPLE__ */
+#endif /* USE_VMX */
+
+#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT)
+
+#if defined(_MSC_VER)
+
+#if defined(USE_ARM_SIMD)
+extern int pixman_msvc_try_arm_simd_op ();
+
+pixman_bool_t
+pixman_have_arm_simd (void)
+{
+ static pixman_bool_t initialized = FALSE;
+ static pixman_bool_t have_arm_simd = FALSE;
+
+ if (!initialized)
+ {
+ __try {
+ pixman_msvc_try_arm_simd_op ();
+ have_arm_simd = TRUE;
+ } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) {
+ have_arm_simd = FALSE;
+ }
+ initialized = TRUE;
+ }
+
+ return have_arm_simd;
+}
+
+#endif /* USE_ARM_SIMD */
+
+#if defined(USE_ARM_NEON)
+extern int pixman_msvc_try_arm_neon_op ();
+
+pixman_bool_t
+pixman_have_arm_neon (void)
+{
+ static pixman_bool_t initialized = FALSE;
+ static pixman_bool_t have_arm_neon = FALSE;
+
+ if (!initialized)
+ {
+ __try
+ {
+ pixman_msvc_try_arm_neon_op ();
+ have_arm_neon = TRUE;
+ }
+ __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
+ {
+ have_arm_neon = FALSE;
+ }
+ initialized = TRUE;
+ }
+
+ return have_arm_neon;
+}
+
+#endif /* USE_ARM_NEON */
+
+#elif defined (__linux__) /* linux ELF */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <string.h>
+#include <elf.h>
+
+static pixman_bool_t arm_has_v7 = FALSE;
+static pixman_bool_t arm_has_v6 = FALSE;
+static pixman_bool_t arm_has_vfp = FALSE;
+static pixman_bool_t arm_has_neon = FALSE;
+static pixman_bool_t arm_has_iwmmxt = FALSE;
+static pixman_bool_t arm_tests_initialized = FALSE;
+
+static void
+pixman_arm_read_auxv ()
+{
+ int fd;
+ Elf32_auxv_t aux;
+
+ fd = open ("/proc/self/auxv", O_RDONLY);
+ if (fd >= 0)
+ {
+ while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
+ {
+ if (aux.a_type == AT_HWCAP)
+ {
+ uint32_t hwcap = aux.a_un.a_val;
+ /* hardcode these values to avoid depending on specific
+ * versions of the hwcap header, e.g. HWCAP_NEON
+ */
+ arm_has_vfp = (hwcap & 64) != 0;
+ arm_has_iwmmxt = (hwcap & 512) != 0;
+ /* this flag is only present on kernel 2.6.29 */
+ arm_has_neon = (hwcap & 4096) != 0;
+ }
+ else if (aux.a_type == AT_PLATFORM)
+ {
+ const char *plat = (const char*) aux.a_un.a_val;
+ if (strncmp (plat, "v7l", 3) == 0)
+ {
+ arm_has_v7 = TRUE;
+ arm_has_v6 = TRUE;
+ }
+ else if (strncmp (plat, "v6l", 3) == 0)
+ {
+ arm_has_v6 = TRUE;
+ }
+ }
+ }
+ close (fd);
+ }
+
+ arm_tests_initialized = TRUE;
+}
+
+#if defined(USE_ARM_SIMD)
+pixman_bool_t
+pixman_have_arm_simd (void)
+{
+ if (!arm_tests_initialized)
+ pixman_arm_read_auxv ();
+
+ return arm_has_v6;
+}
+
+#endif /* USE_ARM_SIMD */
+
+#if defined(USE_ARM_NEON)
+pixman_bool_t
+pixman_have_arm_neon (void)
+{
+ if (!arm_tests_initialized)
+ pixman_arm_read_auxv ();
+
+ return arm_has_neon;
+}
+
+#endif /* USE_ARM_NEON */
+
+#if defined(USE_ARM_IWMMXT)
+pixman_bool_t
+pixman_have_arm_iwmmxt (void)
+{
+ if (!arm_tests_initialized)
+ pixman_arm_read_auxv ();
+
+ return arm_has_iwmmxt;
+}
+
+#endif /* USE_ARM_IWMMXT */
+
+#else /* linux ELF */
+
+#define pixman_have_arm_simd() FALSE
+#define pixman_have_arm_neon() FALSE
+#define pixman_have_arm_iwmmxt() FALSE
+
+#endif
+
+#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */
+
+#if defined(USE_X86_MMX) || defined(USE_SSE2)
+/* The CPU detection code needs to be in a file not compiled with
+ * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
+ * that would lead to SIGILL instructions on old CPUs that don't have
+ * it.
+ */
+#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
+
+#ifdef HAVE_GETISAX
+#include <sys/auxv.h>
+#endif
+
+typedef enum
+{
+ NO_FEATURES = 0,
+ MMX = 0x1,
+ MMX_EXTENSIONS = 0x2,
+ SSE = 0x6,
+ SSE2 = 0x8,
+ CMOV = 0x10
+} cpu_features_t;
+
+
+static unsigned int
+detect_cpu_features (void)
+{
+ unsigned int features = 0;
+ unsigned int result = 0;
+
+#ifdef HAVE_GETISAX
+ if (getisax (&result, 1))
+ {
+ if (result & AV_386_CMOV)
+ features |= CMOV;
+ if (result & AV_386_MMX)
+ features |= MMX;
+ if (result & AV_386_AMD_MMX)
+ features |= MMX_EXTENSIONS;
+ if (result & AV_386_SSE)
+ features |= SSE;
+ if (result & AV_386_SSE2)
+ features |= SSE2;
+ }
+#else
+ char vendor[13];
+#ifdef _MSC_VER
+ int vendor0 = 0, vendor1, vendor2;
+#endif
+ vendor[0] = 0;
+ vendor[12] = 0;
+
+#ifdef __GNUC__
+ /* see p. 118 of amd64 instruction set manual Vol3 */
+ /* We need to be careful about the handling of %ebx and
+ * %esp here. We can't declare either one as clobbered
+ * since they are special registers (%ebx is the "PIC
+ * register" holding an offset to global data, %esp the
+ * stack pointer), so we need to make sure they have their
+ * original values when we access the output operands.
+ */
+ __asm__ (
+ "pushf\n"
+ "pop %%eax\n"
+ "mov %%eax, %%ecx\n"
+ "xor $0x00200000, %%eax\n"
+ "push %%eax\n"
+ "popf\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "mov $0x0, %%edx\n"
+ "xor %%ecx, %%eax\n"
+ "jz 1f\n"
+
+ "mov $0x00000000, %%eax\n"
+ "push %%ebx\n"
+ "cpuid\n"
+ "mov %%ebx, %%eax\n"
+ "pop %%ebx\n"
+ "mov %%eax, %1\n"
+ "mov %%edx, %2\n"
+ "mov %%ecx, %3\n"
+ "mov $0x00000001, %%eax\n"
+ "push %%ebx\n"
+ "cpuid\n"
+ "pop %%ebx\n"
+ "1:\n"
+ "mov %%edx, %0\n"
+ : "=r" (result),
+ "=m" (vendor[0]),
+ "=m" (vendor[4]),
+ "=m" (vendor[8])
+ :
+ : "%eax", "%ecx", "%edx"
+ );
+
+#elif defined (_MSC_VER)
+
+ _asm {
+ pushfd
+ pop eax
+ mov ecx, eax
+ xor eax, 00200000h
+ push eax
+ popfd
+ pushfd
+ pop eax
+ mov edx, 0
+ xor eax, ecx
+ jz nocpuid
+
+ mov eax, 0
+ push ebx
+ cpuid
+ mov eax, ebx
+ pop ebx
+ mov vendor0, eax
+ mov vendor1, edx
+ mov vendor2, ecx
+ mov eax, 1
+ push ebx
+ cpuid
+ pop ebx
+ nocpuid:
+ mov result, edx
+ }
+ memmove (vendor + 0, &vendor0, 4);
+ memmove (vendor + 4, &vendor1, 4);
+ memmove (vendor + 8, &vendor2, 4);
+
+#else
+# error unsupported compiler
+#endif
+
+ features = 0;
+ if (result)
+ {
+ /* result now contains the standard feature bits */
+ if (result & (1 << 15))
+ features |= CMOV;
+ if (result & (1 << 23))
+ features |= MMX;
+ if (result & (1 << 25))
+ features |= SSE;
+ if (result & (1 << 26))
+ features |= SSE2;
+ if ((features & MMX) && !(features & SSE) &&
+ (strcmp (vendor, "AuthenticAMD") == 0 ||
+ strcmp (vendor, "Geode by NSC") == 0))
+ {
+ /* check for AMD MMX extensions */
+#ifdef __GNUC__
+ __asm__ (
+ " push %%ebx\n"
+ " mov $0x80000000, %%eax\n"
+ " cpuid\n"
+ " xor %%edx, %%edx\n"
+ " cmp $0x1, %%eax\n"
+ " jge 2f\n"
+ " mov $0x80000001, %%eax\n"
+ " cpuid\n"
+ "2:\n"
+ " pop %%ebx\n"
+ " mov %%edx, %0\n"
+ : "=r" (result)
+ :
+ : "%eax", "%ecx", "%edx"
+ );
+#elif defined _MSC_VER
+ _asm {
+ push ebx
+ mov eax, 80000000h
+ cpuid
+ xor edx, edx
+ cmp eax, 1
+ jge notamd
+ mov eax, 80000001h
+ cpuid
+ notamd:
+ pop ebx
+ mov result, edx
+ }
+#endif
+ if (result & (1 << 22))
+ features |= MMX_EXTENSIONS;
+ }
+ }
+#endif /* HAVE_GETISAX */
+
+ return features;
+}
+
+static pixman_bool_t
+pixman_have_mmx (void)
+{
+ static pixman_bool_t initialized = FALSE;
+ static pixman_bool_t mmx_present;
+
+ if (!initialized)
+ {
+ unsigned int features = detect_cpu_features ();
+ mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS);
+ initialized = TRUE;
+ }
+
+ return mmx_present;
+}
+
+#ifdef USE_SSE2
+static pixman_bool_t
+pixman_have_sse2 (void)
+{
+ static pixman_bool_t initialized = FALSE;
+ static pixman_bool_t sse2_present;
+
+ if (!initialized)
+ {
+ unsigned int features = detect_cpu_features ();
+ sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2);
+ initialized = TRUE;
+ }
+
+ return sse2_present;
+}
+
+#endif
+
+#else /* __amd64__ */
+#ifdef USE_X86_MMX
+#define pixman_have_mmx() TRUE
+#endif
+#ifdef USE_SSE2
+#define pixman_have_sse2() TRUE
+#endif
+#endif /* __amd64__ */
+#endif
+
+pixman_implementation_t *
+_pixman_choose_implementation (void)
+{
+ pixman_implementation_t *imp;
+
+ imp = _pixman_implementation_create_general();
+ imp = _pixman_implementation_create_fast_path (imp);
+
+#ifdef USE_X86_MMX
+ if (pixman_have_mmx ())
+ imp = _pixman_implementation_create_mmx (imp);
+#endif
+
+#ifdef USE_SSE2
+ if (pixman_have_sse2 ())
+ imp = _pixman_implementation_create_sse2 (imp);
+#endif
+
+#ifdef USE_ARM_SIMD
+ if (pixman_have_arm_simd ())
+ imp = _pixman_implementation_create_arm_simd (imp);
+#endif
+
+#ifdef USE_ARM_IWMMXT
+ if (pixman_have_arm_iwmmxt ())
+ imp = _pixman_implementation_create_mmx (imp);
+#endif
+
+#ifdef USE_ARM_NEON
+ if (pixman_have_arm_neon ())
+ imp = _pixman_implementation_create_arm_neon (imp);
+#endif
+
+#ifdef USE_VMX
+ if (pixman_have_vmx ())
+ imp = _pixman_implementation_create_vmx (imp);
+#endif
+
+ imp = _pixman_implementation_create_noop (imp);
+
+ return imp;
+}
+
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c
index 11644e87b..9c428a3de 100644
--- a/pixman/pixman/pixman-mmx.c
+++ b/pixman/pixman/pixman-mmx.c
@@ -33,7 +33,7 @@
#include <config.h>
#endif
-#ifdef USE_MMX
+#if defined USE_X86_MMX || defined USE_ARM_IWMMXT
#include <mmintrin.h>
#include "pixman-private.h"
@@ -47,6 +47,15 @@
#define CHECKPOINT()
#endif
+#ifdef USE_ARM_IWMMXT
+/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+
+}
+#endif
+
/* Notes about writing mmx code
*
* give memory operands as the second operand. If you give it as the
@@ -302,6 +311,39 @@ in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
#endif
+/* Elemental unaligned loads */
+
+static __inline__ uint64_t ldq_u(uint64_t *p)
+{
+#ifdef USE_X86_MMX
+ /* x86's alignment restrictions are very relaxed. */
+ return *p;
+#elif defined USE_ARM_IWMMXT
+ int align = (uintptr_t)p & 7;
+ __m64 *aligned_p;
+ if (align == 0)
+ return *p;
+ aligned_p = (__m64 *)((uintptr_t)p & ~7);
+ return _mm_align_si64 (aligned_p[0], aligned_p[1], align);
+#else
+ struct __una_u64 { uint64_t x __attribute__((packed)); };
+ const struct __una_u64 *ptr = (const struct __una_u64 *) p;
+ return ptr->x;
+#endif
+}
+
+static __inline__ uint32_t ldl_u(uint32_t *p)
+{
+#ifdef USE_X86_MMX
+ /* x86's alignment restrictions are very relaxed. */
+ return *p;
+#else
+ struct __una_u32 { uint32_t x __attribute__((packed)); };
+ const struct __una_u32 *ptr = (const struct __una_u32 *) p;
+ return ptr->x;
+#endif
+}
+
static force_inline __m64
load8888 (uint32_t v)
{
@@ -1382,7 +1424,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
while (w >= 2)
{
- __m64 vs = *(__m64 *)src;
+ __m64 vs = (__m64)ldq_u((uint64_t *)src);
__m64 vd = *(__m64 *)dst;
__m64 vsrc0 = expand8888 (vs, 0);
__m64 vsrc1 = expand8888 (vs, 1);
@@ -1463,14 +1505,14 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
__m64 vd6 = *(__m64 *)(dst + 12);
__m64 vd7 = *(__m64 *)(dst + 14);
- __m64 vs0 = *(__m64 *)(src + 0);
- __m64 vs1 = *(__m64 *)(src + 2);
- __m64 vs2 = *(__m64 *)(src + 4);
- __m64 vs3 = *(__m64 *)(src + 6);
- __m64 vs4 = *(__m64 *)(src + 8);
- __m64 vs5 = *(__m64 *)(src + 10);
- __m64 vs6 = *(__m64 *)(src + 12);
- __m64 vs7 = *(__m64 *)(src + 14);
+ __m64 vs0 = (__m64)ldq_u((uint64_t *)(src + 0));
+ __m64 vs1 = (__m64)ldq_u((uint64_t *)(src + 2));
+ __m64 vs2 = (__m64)ldq_u((uint64_t *)(src + 4));
+ __m64 vs3 = (__m64)ldq_u((uint64_t *)(src + 6));
+ __m64 vs4 = (__m64)ldq_u((uint64_t *)(src + 8));
+ __m64 vs5 = (__m64)ldq_u((uint64_t *)(src + 10));
+ __m64 vs6 = (__m64)ldq_u((uint64_t *)(src + 12));
+ __m64 vs7 = (__m64)ldq_u((uint64_t *)(src + 14));
vd0 = pack8888 (
in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
@@ -1800,7 +1842,7 @@ pixman_fill_mmx (uint32_t *bits,
uint32_t byte_width;
uint8_t *byte_line;
-#ifdef __GNUC__
+#if defined __GNUC__ && defined USE_X86_MMX
__m64 v1, v2, v3, v4, v5, v6, v7;
#endif
@@ -1834,7 +1876,7 @@ pixman_fill_mmx (uint32_t *bits,
fill = ((uint64_t)xor << 32) | xor;
vfill = to_m64 (fill);
-#ifdef __GNUC__
+#if defined __GNUC__ && defined USE_X86_MMX
__asm__ (
"movq %7, %0\n"
"movq %7, %1\n"
@@ -1880,7 +1922,7 @@ pixman_fill_mmx (uint32_t *bits,
while (w >= 64)
{
-#ifdef __GNUC__
+#if defined __GNUC__ && defined USE_X86_MMX
__asm__ (
"movq %1, (%0)\n"
"movq %2, 8(%0)\n"
@@ -2507,23 +2549,35 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
mask_line += mask_stride;
w = width;
- if ((((unsigned long)dest_image & 3) == 0) &&
- (((unsigned long)src_image & 3) == 0))
+ while (w && (unsigned long)dst & 7)
{
- while (w >= 4)
- {
- __m64 vmask;
- __m64 vdest;
+ uint16_t tmp;
+ uint8_t a;
+ uint32_t m, d;
- vmask = load8888 (*(uint32_t *)mask);
- vdest = load8888 (*(uint32_t *)dst);
+ a = *mask++;
+ d = *dst;
- *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
+ m = MUL_UN8 (sa, a, tmp);
+ d = MUL_UN8 (m, d, tmp);
- dst += 4;
- mask += 4;
- w -= 4;
- }
+ *dst++ = d;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m64 vmask;
+ __m64 vdest;
+
+ vmask = load8888 (ldl_u((uint32_t *)mask));
+ vdest = load8888 (*(uint32_t *)dst);
+
+ *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
+
+ dst += 4;
+ mask += 4;
+ w -= 4;
}
while (w--)
@@ -2566,20 +2620,31 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp,
src_line += src_stride;
w = width;
- if ((((unsigned long)dest_image & 3) == 0) &&
- (((unsigned long)src_image & 3) == 0))
+ while (w && (unsigned long)dst & 3)
{
- while (w >= 4)
- {
- uint32_t *s = (uint32_t *)src;
- uint32_t *d = (uint32_t *)dst;
+ uint8_t s, d;
+ uint16_t tmp;
- *d = store8888 (in (load8888 (*s), load8888 (*d)));
+ s = *src;
+ d = *dst;
- w -= 4;
- dst += 4;
- src += 4;
- }
+ *dst = MUL_UN8 (s, d, tmp);
+
+ src++;
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ uint32_t *s = (uint32_t *)src;
+ uint32_t *d = (uint32_t *)dst;
+
+ *d = store8888 (in (load8888 (ldl_u((uint32_t *)s)), load8888 (*d)));
+
+ w -= 4;
+ dst += 4;
+ src += 4;
}
while (w--)
@@ -2634,20 +2699,36 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
mask_line += mask_stride;
w = width;
- if ((((unsigned long)mask_image & 3) == 0) &&
- (((unsigned long)dest_image & 3) == 0))
+ while (w && (unsigned long)dst & 3)
{
- while (w >= 4)
- {
- __m64 vmask = load8888 (*(uint32_t *)mask);
- __m64 vdest = load8888 (*(uint32_t *)dst);
+ uint16_t tmp;
+ uint16_t a;
+ uint32_t m, d;
+ uint32_t r;
- *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+ a = *mask++;
+ d = *dst;
- w -= 4;
- dst += 4;
- mask += 4;
- }
+ m = MUL_UN8 (sa, a, tmp);
+ r = ADD_UN8 (m, d, tmp);
+
+ *dst++ = r;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m64 vmask;
+ __m64 vdest;
+
+ vmask = load8888 (ldl_u((uint32_t *)mask));
+ vdest = load8888 (*(uint32_t *)dst);
+
+ *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+
+ dst += 4;
+ mask += 4;
+ w -= 4;
}
while (w--)
@@ -2710,7 +2791,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp,
while (w >= 8)
{
- *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
+ *(__m64*)dst = _mm_adds_pu8 ((__m64)ldq_u((uint64_t *)src), *(__m64*)dst);
dst += 8;
src += 8;
w -= 8;
@@ -2768,7 +2849,7 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
while (w >= 2)
{
- dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
+ dst64 = _mm_adds_pu8 ((__m64)ldq_u((uint64_t *)src), *(__m64*)dst);
*(uint64_t*)dst = to_uint64 (dst64);
dst += 2;
src += 2;
@@ -2841,6 +2922,14 @@ pixman_blt_mmx (uint32_t *src_bits,
dst_bytes += dst_stride;
w = byte_width;
+ while (w >= 1 && ((unsigned long)d & 1))
+ {
+ *(uint8_t *)d = *(uint8_t *)s;
+ w -= 1;
+ s += 1;
+ d += 1;
+ }
+
while (w >= 2 && ((unsigned long)d & 3))
{
*(uint16_t *)d = *(uint16_t *)s;
@@ -2851,7 +2940,7 @@ pixman_blt_mmx (uint32_t *src_bits,
while (w >= 4 && ((unsigned long)d & 7))
{
- *(uint32_t *)d = *(uint32_t *)s;
+ *(uint32_t *)d = ldl_u((uint32_t *)s);
w -= 4;
s += 4;
@@ -2860,7 +2949,7 @@ pixman_blt_mmx (uint32_t *src_bits,
while (w >= 64)
{
-#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
+#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
__asm__ (
"movq (%1), %%mm0\n"
"movq 8(%1), %%mm1\n"
@@ -2885,14 +2974,14 @@ pixman_blt_mmx (uint32_t *src_bits,
"%mm0", "%mm1", "%mm2", "%mm3",
"%mm4", "%mm5", "%mm6", "%mm7");
#else
- __m64 v0 = *(__m64 *)(s + 0);
- __m64 v1 = *(__m64 *)(s + 8);
- __m64 v2 = *(__m64 *)(s + 16);
- __m64 v3 = *(__m64 *)(s + 24);
- __m64 v4 = *(__m64 *)(s + 32);
- __m64 v5 = *(__m64 *)(s + 40);
- __m64 v6 = *(__m64 *)(s + 48);
- __m64 v7 = *(__m64 *)(s + 56);
+ __m64 v0 = ldq_u((uint64_t *)(s + 0));
+ __m64 v1 = ldq_u((uint64_t *)(s + 8));
+ __m64 v2 = ldq_u((uint64_t *)(s + 16));
+ __m64 v3 = ldq_u((uint64_t *)(s + 24));
+ __m64 v4 = ldq_u((uint64_t *)(s + 32));
+ __m64 v5 = ldq_u((uint64_t *)(s + 40));
+ __m64 v6 = ldq_u((uint64_t *)(s + 48));
+ __m64 v7 = ldq_u((uint64_t *)(s + 56));
*(__m64 *)(d + 0) = v0;
*(__m64 *)(d + 8) = v1;
*(__m64 *)(d + 16) = v2;
@@ -2909,7 +2998,7 @@ pixman_blt_mmx (uint32_t *src_bits,
}
while (w >= 4)
{
- *(uint32_t *)d = *(uint32_t *)s;
+ *(uint32_t *)d = ldl_u((uint32_t *)s);
w -= 4;
s += 4;
@@ -3161,4 +3250,4 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback)
return imp;
}
-#endif /* USE_MMX */
+#endif /* USE_X86_MMX || USE_ARM_IWMMXT */
diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h
index 6e716c6e6..4d645fe4b 100644
--- a/pixman/pixman/pixman-private.h
+++ b/pixman/pixman/pixman-private.h
@@ -545,7 +545,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback);
pixman_implementation_t *
_pixman_implementation_create_noop (pixman_implementation_t *fallback);
-#ifdef USE_MMX
+#if defined USE_X86_MMX || defined USE_ARM_IWMMXT
pixman_implementation_t *
_pixman_implementation_create_mmx (pixman_implementation_t *fallback);
#endif
diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am
index 802d3f4fc..eeb3679f0 100644
--- a/pixman/test/Makefile.am
+++ b/pixman/test/Makefile.am
@@ -1,9 +1,9 @@
include $(top_srcdir)/test/Makefile.sources
-AM_CFLAGS = @OPENMP_CFLAGS@
-AM_LDFLAGS = @OPENMP_CFLAGS@ @TESTPROGS_EXTRA_LDFLAGS@
-LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm @PNG_LIBS@
-INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman @PNG_CFLAGS@
+AM_CFLAGS = $(OPENMP_CFLAGS)
+AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS)
+LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS)
+INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS)
libutils_la_SOURCES = $(libutils_sources) $(libutils_headers)