From c29d91cfd8df084f16d0d2dfa82c3a86f7719a73 Mon Sep 17 00:00:00 2001 From: marha Date: Mon, 9 Jul 2012 08:08:20 +0200 Subject: fontconfig libX11 mesa pixman xserver git update 9 Jul 2012 --- pixman/configure.ac | 3 +- pixman/pixman/Makefile.sources | 5 +- pixman/pixman/pixman-arm.c | 225 ++++++++++ pixman/pixman/pixman-cpu.c | 815 ---------------------------------- pixman/pixman/pixman-fast-path.c | 5 +- pixman/pixman/pixman-implementation.c | 51 +++ pixman/pixman/pixman-matrix.c | 4 +- pixman/pixman/pixman-mips-dspr2-asm.S | 704 +++++++++++++++++++++++++++++ pixman/pixman/pixman-mips-dspr2-asm.h | 34 ++ pixman/pixman/pixman-mips-dspr2.c | 51 +++ pixman/pixman/pixman-mips-dspr2.h | 49 ++ pixman/pixman/pixman-mips.c | 84 ++++ pixman/pixman/pixman-ppc.c | 155 +++++++ pixman/pixman/pixman-private.h | 17 + pixman/pixman/pixman-region.c | 14 +- pixman/pixman/pixman-x86.c | 282 ++++++++++++ pixman/test/stress-test.c | 71 ++- 17 files changed, 1724 insertions(+), 845 deletions(-) create mode 100644 pixman/pixman/pixman-arm.c delete mode 100644 pixman/pixman/pixman-cpu.c create mode 100644 pixman/pixman/pixman-mips.c create mode 100644 pixman/pixman/pixman-ppc.c create mode 100644 pixman/pixman/pixman-x86.c (limited to 'pixman') diff --git a/pixman/configure.ac b/pixman/configure.ac index 2b9d1ba20..36f423ef7 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -351,12 +351,11 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ int main () { __m64 v = _mm_cvtsi32_si64 (1); __m64 w; - signed char x = 0; /* Some versions of clang will choke on K */ asm ("pshufw %2, %1, %0\n\t" : "=y" (w) - : "y" (v), "K" (x) + : "y" (v), "K" (5) ); return _mm_cvtsi64_si32 (v); diff --git a/pixman/pixman/Makefile.sources b/pixman/pixman/Makefile.sources index 11f959d97..6472994c4 100644 --- a/pixman/pixman/Makefile.sources +++ b/pixman/pixman/Makefile.sources @@ -6,7 +6,10 @@ libpixman_sources = \ pixman-combine32.c \ pixman-combine64.c \ pixman-conical-gradient.c \ - pixman-cpu.c \ + pixman-x86.c \ + pixman-mips.c \ + pixman-arm.c \ + pixman-ppc.c \ pixman-edge.c \ pixman-edge-accessors.c \ pixman-fast-path.c \ diff --git a/pixman/pixman/pixman-arm.c b/pixman/pixman/pixman-arm.c new file mode 100644 index 000000000..23374e41c --- /dev/null +++ b/pixman/pixman/pixman-arm.c @@ -0,0 +1,225 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-private.h" + +typedef enum +{ + ARM_V7 = (1 << 0), + ARM_V6 = (1 << 1), + ARM_VFP = (1 << 2), + ARM_NEON = (1 << 3), + ARM_IWMMXT = (1 << 4) +} arm_cpu_features_t; + +#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) + +#if defined(_MSC_VER) + +/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ +#include + +extern int pixman_msvc_try_arm_neon_op (); +extern int pixman_msvc_try_arm_simd_op (); + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + + __try + { + pixman_msvc_try_arm_simd_op (); + features |= ARM_V6; + } + __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) + { + } + + __try + { + pixman_msvc_try_arm_neon_op (); + features |= ARM_NEON; + } + __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) + { + } + + return features; +} + +#elif defined(__APPLE__) && defined(TARGET_OS_IPHONE) /* iOS */ + +#include "TargetConditionals.h" + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + + features |= ARM_V6; + + /* Detection of ARM NEON on iOS is fairly simple because iOS binaries + * contain separate executable images for each processor architecture. + * So all we have to do is detect the armv7 architecture build. The + * operating system automatically runs the armv7 binary for armv7 devices + * and the armv6 binary for armv6 devices. + */ +#if defined(__ARM_NEON__) + features |= ARM_NEON; +#endif + + return features; +} + +#elif defined(__ANDROID__) || defined(ANDROID) /* Android */ + +#include + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + AndroidCpuFamily cpu_family; + uint64_t cpu_features; + + cpu_family = android_getCpuFamily(); + cpu_features = android_getCpuFeatures(); + + if (cpu_family == ANDROID_CPU_FAMILY_ARM) + { + if (cpu_features & ANDROID_CPU_ARM_FEATURE_ARMv7) + features |= ARM_V7; + + if (cpu_features & ANDROID_CPU_ARM_FEATURE_VFPv3) + features |= ARM_VFP; + + if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) + features |= ARM_NEON; + } + + return features; +} + +#elif defined (__linux__) /* linux ELF */ + +#include +#include +#include +#include +#include +#include +#include + +static arm_cpu_features_t +detect_cpu_features (void) +{ + arm_cpu_features_t features = 0; + Elf32_auxv_t aux; + int fd; + + fd = open ("/proc/self/auxv", O_RDONLY); + if (fd >= 0) + { + while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) + { + if (aux.a_type == AT_HWCAP) + { + uint32_t hwcap = aux.a_un.a_val; + + /* hardcode these values to avoid depending on specific + * versions of the hwcap header, e.g. HWCAP_NEON + */ + if ((hwcap & 64) != 0) + features |= ARM_VFP; + if ((hwcap & 512) != 0) + features |= ARM_IWMMXT; + /* this flag is only present on kernel 2.6.29 */ + if ((hwcap & 4096) != 0) + features |= ARM_NEON; + } + else if (aux.a_type == AT_PLATFORM) + { + const char *plat = (const char*) aux.a_un.a_val; + + if (strncmp (plat, "v7l", 3) == 0) + features |= (ARM_V7 | ARM_V6); + else if (strncmp (plat, "v6l", 3) == 0) + features |= ARM_V6; + } + } + close (fd); + } + + return features; +} + +#else /* Unknown */ + +static arm_cpu_features_t +detect_cpu_features (void) +{ + return 0; +} + +#endif /* Linux elf */ + +static pixman_bool_t +have_feature (arm_cpu_features_t feature) +{ + static pixman_bool_t initialized; + static arm_cpu_features_t features; + + if (!initialized) + { + features = detect_cpu_features(); + initialized = TRUE; + } + + return (features & feature) == feature; +} + +#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */ + +pixman_implementation_t * +_pixman_arm_get_implementations (pixman_implementation_t *imp) +{ +#ifdef USE_ARM_SIMD + if (!_pixman_disabled ("arm-simd") && have_feature (ARM_V6)) + imp = _pixman_implementation_create_arm_simd (imp); +#endif + +#ifdef USE_ARM_IWMMXT + if (!_pixman_disabled ("arm-iwmmxt") && have_feature (ARM_IWMMXT)) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_ARM_NEON + if (!_pixman_disabled ("arm-neon") && have_feature (ARM_NEON)) + imp = _pixman_implementation_create_arm_neon (imp); +#endif + + return imp; +} diff --git a/pixman/pixman/pixman-cpu.c b/pixman/pixman/pixman-cpu.c deleted file mode 100644 index aa9036fa2..000000000 --- a/pixman/pixman/pixman-cpu.c +++ /dev/null @@ -1,815 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include - -#if defined(USE_ARM_SIMD) && defined(_MSC_VER) -/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ -#include -#endif - -#if defined(__APPLE__) -#include "TargetConditionals.h" -#endif - -#include "pixman-private.h" - -#ifdef USE_VMX - -/* The CPU detection code needs to be in a file not compiled with - * "-maltivec -mabi=altivec", as gcc would try to save vector register - * across function calls causing SIGILL on cpus without Altivec/vmx. - */ -static pixman_bool_t initialized = FALSE; -static volatile pixman_bool_t have_vmx = TRUE; - -#ifdef __APPLE__ -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - size_t length = sizeof(have_vmx); - int error = - sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); - - if (error) - have_vmx = FALSE; - - initialized = TRUE; - } - return have_vmx; -} - -#elif defined (__OpenBSD__) -#include -#include -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; - size_t length = sizeof(have_vmx); - int error = - sysctl (mib, 2, &have_vmx, &length, NULL, 0); - - if (error != 0) - have_vmx = FALSE; - - initialized = TRUE; - } - return have_vmx; -} - -#elif defined (__linux__) -#include -#include -#include -#include -#include -#include -#include - -static pixman_bool_t -pixman_have_vmx (void) -{ - if (!initialized) - { - char fname[64]; - unsigned long buf[64]; - ssize_t count = 0; - pid_t pid; - int fd, i; - - pid = getpid (); - snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid); - - fd = open (fname, O_RDONLY); - if (fd >= 0) - { - for (i = 0; i <= (count / sizeof(unsigned long)); i += 2) - { - /* Read more if buf is empty... */ - if (i == (count / sizeof(unsigned long))) - { - count = read (fd, buf, sizeof(buf)); - if (count <= 0) - break; - i = 0; - } - - if (buf[i] == AT_HWCAP) - { - have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC); - initialized = TRUE; - break; - } - else if (buf[i] == AT_NULL) - { - break; - } - } - close (fd); - } - } - if (!initialized) - { - /* Something went wrong. Assume 'no' rather than playing - fragile tricks with catching SIGILL. */ - have_vmx = FALSE; - initialized = TRUE; - } - - return have_vmx; -} - -#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */ -#include -#include - -static jmp_buf jump_env; - -static void -vmx_test (int sig, - siginfo_t *si, - void * unused) -{ - longjmp (jump_env, 1); -} - -static pixman_bool_t -pixman_have_vmx (void) -{ - struct sigaction sa, osa; - int jmp_result; - - if (!initialized) - { - sa.sa_flags = SA_SIGINFO; - sigemptyset (&sa.sa_mask); - sa.sa_sigaction = vmx_test; - sigaction (SIGILL, &sa, &osa); - jmp_result = setjmp (jump_env); - if (jmp_result == 0) - { - asm volatile ( "vor 0, 0, 0" ); - } - sigaction (SIGILL, &osa, NULL); - have_vmx = (jmp_result == 0); - initialized = TRUE; - } - return have_vmx; -} - -#endif /* __APPLE__ */ -#endif /* USE_VMX */ - -#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) - -#if defined(_MSC_VER) - -#if defined(USE_ARM_SIMD) -extern int pixman_msvc_try_arm_simd_op (); - -pixman_bool_t -pixman_have_arm_simd (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t have_arm_simd = FALSE; - - if (!initialized) - { - __try { - pixman_msvc_try_arm_simd_op (); - have_arm_simd = TRUE; - } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { - have_arm_simd = FALSE; - } - initialized = TRUE; - } - - return have_arm_simd; -} - -#endif /* USE_ARM_SIMD */ - -#if defined(USE_ARM_NEON) -extern int pixman_msvc_try_arm_neon_op (); - -pixman_bool_t -pixman_have_arm_neon (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t have_arm_neon = FALSE; - - if (!initialized) - { - __try - { - pixman_msvc_try_arm_neon_op (); - have_arm_neon = TRUE; - } - __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) - { - have_arm_neon = FALSE; - } - initialized = TRUE; - } - - return have_arm_neon; -} - -#endif /* USE_ARM_NEON */ - -#elif (defined (__APPLE__) && defined(TARGET_OS_IPHONE)) /* iOS (iPhone/iPad/iPod touch) */ - -/* Detection of ARM NEON on iOS is fairly simple because iOS binaries - * contain separate executable images for each processor architecture. - * So all we have to do is detect the armv7 architecture build. The - * operating system automatically runs the armv7 binary for armv7 devices - * and the armv6 binary for armv6 devices. - */ - -pixman_bool_t -pixman_have_arm_simd (void) -{ -#if defined(USE_ARM_SIMD) - return TRUE; -#else - return FALSE; -#endif -} - -pixman_bool_t -pixman_have_arm_neon (void) -{ -#if defined(USE_ARM_NEON) && defined(__ARM_NEON__) - /* This is an armv7 cpu build */ - return TRUE; -#else - /* This is an armv6 cpu build */ - return FALSE; -#endif -} - -pixman_bool_t -pixman_have_arm_iwmmxt (void) -{ -#if defined(USE_ARM_IWMMXT) - return FALSE; -#else - return FALSE; -#endif -} - -#elif defined (__linux__) || defined(__ANDROID__) || defined(ANDROID) /* linux ELF or ANDROID */ - -static pixman_bool_t arm_has_v7 = FALSE; -static pixman_bool_t arm_has_v6 = FALSE; -static pixman_bool_t arm_has_vfp = FALSE; -static pixman_bool_t arm_has_neon = FALSE; -static pixman_bool_t arm_has_iwmmxt = FALSE; -static pixman_bool_t arm_tests_initialized = FALSE; - -#if defined(__ANDROID__) || defined(ANDROID) /* Android device support */ - -#include - -static void -pixman_arm_read_auxv_or_cpu_features () -{ - AndroidCpuFamily cpu_family; - uint64_t cpu_features; - - cpu_family = android_getCpuFamily(); - cpu_features = android_getCpuFeatures(); - - if (cpu_family == ANDROID_CPU_FAMILY_ARM) - { - if (cpu_features & ANDROID_CPU_ARM_FEATURE_ARMv7) - arm_has_v7 = TRUE; - - if (cpu_features & ANDROID_CPU_ARM_FEATURE_VFPv3) - arm_has_vfp = TRUE; - - if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) - arm_has_neon = TRUE; - } - - arm_tests_initialized = TRUE; -} - -#elif defined (__linux__) /* linux ELF */ - -#include -#include -#include -#include -#include -#include -#include - -static void -pixman_arm_read_auxv_or_cpu_features () -{ - int fd; - Elf32_auxv_t aux; - - fd = open ("/proc/self/auxv", O_RDONLY); - if (fd >= 0) - { - while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) - { - if (aux.a_type == AT_HWCAP) - { - uint32_t hwcap = aux.a_un.a_val; - /* hardcode these values to avoid depending on specific - * versions of the hwcap header, e.g. HWCAP_NEON - */ - arm_has_vfp = (hwcap & 64) != 0; - arm_has_iwmmxt = (hwcap & 512) != 0; - /* this flag is only present on kernel 2.6.29 */ - arm_has_neon = (hwcap & 4096) != 0; - } - else if (aux.a_type == AT_PLATFORM) - { - const char *plat = (const char*) aux.a_un.a_val; - if (strncmp (plat, "v7l", 3) == 0) - { - arm_has_v7 = TRUE; - arm_has_v6 = TRUE; - } - else if (strncmp (plat, "v6l", 3) == 0) - { - arm_has_v6 = TRUE; - } - } - } - close (fd); - } - - arm_tests_initialized = TRUE; -} - -#endif /* Linux elf */ - -#if defined(USE_ARM_SIMD) -pixman_bool_t -pixman_have_arm_simd (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv_or_cpu_features (); - - return arm_has_v6; -} - -#endif /* USE_ARM_SIMD */ - -#if defined(USE_ARM_NEON) -pixman_bool_t -pixman_have_arm_neon (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv_or_cpu_features (); - - return arm_has_neon; -} - -#endif /* USE_ARM_NEON */ - -#if defined(USE_ARM_IWMMXT) -pixman_bool_t -pixman_have_arm_iwmmxt (void) -{ - if (!arm_tests_initialized) - pixman_arm_read_auxv_or_cpu_features (); - - return arm_has_iwmmxt; -} - -#endif /* USE_ARM_IWMMXT */ - -#else /* !_MSC_VER && !Linux elf && !Android */ - -#define pixman_have_arm_simd() FALSE -#define pixman_have_arm_neon() FALSE -#define pixman_have_arm_iwmmxt() FALSE - -#endif - -#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */ - -#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI) - -#if defined (__linux__) /* linux ELF */ - -static pixman_bool_t -pixman_have_mips_feature (const char *search_string) -{ - const char *file_name = "/proc/cpuinfo"; - /* Simple detection of MIPS features at runtime for Linux. - * It is based on /proc/cpuinfo, which reveals hardware configuration - * to user-space applications. According to MIPS (early 2010), no similar - * facility is universally available on the MIPS architectures, so it's up - * to individual OSes to provide such. - */ - - char cpuinfo_line[256]; - - FILE *f = NULL; - - if ((f = fopen (file_name, "r")) == NULL) - return FALSE; - - while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL) - { - if (strstr (cpuinfo_line, search_string) != NULL) - { - fclose (f); - return TRUE; - } - } - - fclose (f); - - /* Did not find string in the proc file. */ - return FALSE; -} - -#if defined(USE_MIPS_DSPR2) -pixman_bool_t -pixman_have_mips_dspr2 (void) -{ - /* Only currently available MIPS core that supports DSPr2 is 74K. */ - return pixman_have_mips_feature ("MIPS 74K"); -} -#endif - -#if defined(USE_LOONGSON_MMI) -pixman_bool_t -pixman_have_loongson_mmi (void) -{ - /* I really don't know if some Loongson CPUs don't have MMI. */ - return pixman_have_mips_feature ("Loongson"); -} -#endif - -#else /* linux ELF */ - -#define pixman_have_mips_dspr2() FALSE -#define pixman_have_loongson_mmi() FALSE - -#endif /* linux ELF */ - -#endif /* USE_MIPS_DSPR2 || USE_LOONGSON_MMI */ - -#if defined(USE_X86_MMX) || defined(USE_SSE2) -/* The CPU detection code needs to be in a file not compiled with - * "-mmmx -msse", as gcc would generate CMOV instructions otherwise - * that would lead to SIGILL instructions on old CPUs that don't have - * it. - */ -#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) - -#ifdef HAVE_GETISAX -#include -#endif - -typedef enum -{ - NO_FEATURES = 0, - MMX = 0x1, - MMX_EXTENSIONS = 0x2, - SSE = 0x6, - SSE2 = 0x8, - CMOV = 0x10 -} cpu_features_t; - - -static unsigned int -detect_cpu_features (void) -{ - unsigned int features = 0; - unsigned int result = 0; - -#ifdef HAVE_GETISAX - if (getisax (&result, 1)) - { - if (result & AV_386_CMOV) - features |= CMOV; - if (result & AV_386_MMX) - features |= MMX; - if (result & AV_386_AMD_MMX) - features |= MMX_EXTENSIONS; - if (result & AV_386_SSE) - features |= SSE; - if (result & AV_386_SSE2) - features |= SSE2; - } -#else - char vendor[13]; -#ifdef _MSC_VER - int vendor0 = 0, vendor1, vendor2; -#endif - vendor[0] = 0; - vendor[12] = 0; - -#ifdef __GNUC__ - /* see p. 118 of amd64 instruction set manual Vol3 */ - /* We need to be careful about the handling of %ebx and - * %esp here. We can't declare either one as clobbered - * since they are special registers (%ebx is the "PIC - * register" holding an offset to global data, %esp the - * stack pointer), so we need to make sure they have their - * original values when we access the output operands. - */ - __asm__ ( - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ecx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "mov $0x0, %%edx\n" - "xor %%ecx, %%eax\n" - "jz 1f\n" - - "mov $0x00000000, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "mov %%ebx, %%eax\n" - "pop %%ebx\n" - "mov %%eax, %1\n" - "mov %%edx, %2\n" - "mov %%ecx, %3\n" - "mov $0x00000001, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "pop %%ebx\n" - "1:\n" - "mov %%edx, %0\n" - : "=r" (result), - "=m" (vendor[0]), - "=m" (vendor[4]), - "=m" (vendor[8]) - : - : "%eax", "%ecx", "%edx" - ); - -#elif defined (_MSC_VER) - - _asm { - pushfd - pop eax - mov ecx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ecx - jz nocpuid - - mov eax, 0 - push ebx - cpuid - mov eax, ebx - pop ebx - mov vendor0, eax - mov vendor1, edx - mov vendor2, ecx - mov eax, 1 - push ebx - cpuid - pop ebx - nocpuid: - mov result, edx - } - memmove (vendor + 0, &vendor0, 4); - memmove (vendor + 4, &vendor1, 4); - memmove (vendor + 8, &vendor2, 4); - -#else -# error unsupported compiler -#endif - - features = 0; - if (result) - { - /* result now contains the standard feature bits */ - if (result & (1 << 15)) - features |= CMOV; - if (result & (1 << 23)) - features |= MMX; - if (result & (1 << 25)) - features |= SSE; - if (result & (1 << 26)) - features |= SSE2; - if ((features & MMX) && !(features & SSE) && - (strcmp (vendor, "AuthenticAMD") == 0 || - strcmp (vendor, "Geode by NSC") == 0)) - { - /* check for AMD MMX extensions */ -#ifdef __GNUC__ - __asm__ ( - " push %%ebx\n" - " mov $0x80000000, %%eax\n" - " cpuid\n" - " xor %%edx, %%edx\n" - " cmp $0x1, %%eax\n" - " jge 2f\n" - " mov $0x80000001, %%eax\n" - " cpuid\n" - "2:\n" - " pop %%ebx\n" - " mov %%edx, %0\n" - : "=r" (result) - : - : "%eax", "%ecx", "%edx" - ); -#elif defined _MSC_VER - _asm { - push ebx - mov eax, 80000000h - cpuid - xor edx, edx - cmp eax, 1 - jge notamd - mov eax, 80000001h - cpuid - notamd: - pop ebx - mov result, edx - } -#endif - if (result & (1 << 22)) - features |= MMX_EXTENSIONS; - } - } -#endif /* HAVE_GETISAX */ - - return features; -} - -#ifdef USE_X86_MMX -static pixman_bool_t -pixman_have_mmx (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t mmx_present; - - if (!initialized) - { - unsigned int features = detect_cpu_features (); - mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); - initialized = TRUE; - } - - return mmx_present; -} -#endif - -#ifdef USE_SSE2 -static pixman_bool_t -pixman_have_sse2 (void) -{ - static pixman_bool_t initialized = FALSE; - static pixman_bool_t sse2_present; - - if (!initialized) - { - unsigned int features = detect_cpu_features (); - sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2); - initialized = TRUE; - } - - return sse2_present; -} - -#endif - -#else /* __amd64__ */ -#ifdef USE_X86_MMX -#define pixman_have_mmx() TRUE -#endif -#ifdef USE_SSE2 -#define pixman_have_sse2() TRUE -#endif -#endif /* __amd64__ */ -#endif - -static pixman_bool_t -disabled (const char *name) -{ - const char *env; - - if ((env = getenv ("PIXMAN_DISABLE"))) - { - do - { - const char *end; - int len; - - if ((end = strchr (env, ' '))) - len = end - env; - else - len = strlen (env); - - if (strlen (name) == len && strncmp (name, env, len) == 0) - { - printf ("pixman: Disabled %s implementation\n", name); - return TRUE; - } - - env += len; - } - while (*env++); - } - - return FALSE; -} - -pixman_implementation_t * -_pixman_choose_implementation (void) -{ - pixman_implementation_t *imp; - - imp = _pixman_implementation_create_general(); - - if (!disabled ("fast")) - imp = _pixman_implementation_create_fast_path (imp); - -#ifdef USE_X86_MMX - if (!disabled ("mmx") && pixman_have_mmx ()) - imp = _pixman_implementation_create_mmx (imp); -#endif - -#ifdef USE_SSE2 - if (!disabled ("sse2") && pixman_have_sse2 ()) - imp = _pixman_implementation_create_sse2 (imp); -#endif - -#ifdef USE_ARM_SIMD - if (!disabled ("arm-simd") && pixman_have_arm_simd ()) - imp = _pixman_implementation_create_arm_simd (imp); -#endif - -#ifdef USE_ARM_IWMMXT - if (!disabled ("arm-iwmmxt") && pixman_have_arm_iwmmxt ()) - imp = _pixman_implementation_create_mmx (imp); -#endif -#ifdef USE_LOONGSON_MMI - if (!disabled ("loongson-mmi") && pixman_have_loongson_mmi ()) - imp = _pixman_implementation_create_mmx (imp); -#endif -#ifdef USE_ARM_NEON - if (!disabled ("arm-neon") && pixman_have_arm_neon ()) - imp = _pixman_implementation_create_arm_neon (imp); -#endif - -#ifdef USE_MIPS_DSPR2 - if (!disabled ("mips-dspr2") && pixman_have_mips_dspr2 ()) - imp = _pixman_implementation_create_mips_dspr2 (imp); -#endif - -#ifdef USE_VMX - if (!disabled ("vmx") && pixman_have_vmx ()) - imp = _pixman_implementation_create_vmx (imp); -#endif - - imp = _pixman_implementation_create_noop (imp); - - return imp; -} - diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index e79b06970..9778b0cbe 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -1280,8 +1280,9 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); - if (src_image->bits.width < REPEAT_MIN_WIDTH && - (src_bpp == 32 || src_bpp == 16 || src_bpp == 8)) + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && + !src_image->bits.indexed) { sx = src_x; sx = MOD (sx, src_image->bits.width); diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c index c769ab8cb..77d0906c8 100644 --- a/pixman/pixman/pixman-implementation.c +++ b/pixman/pixman/pixman-implementation.c @@ -223,3 +223,54 @@ _pixman_implementation_dest_iter_init (pixman_implementation_t *imp, (*imp->dest_iter_init) (imp, iter); } + +pixman_bool_t +_pixman_disabled (const char *name) +{ + const char *env; + + if ((env = getenv ("PIXMAN_DISABLE"))) + { + do + { + const char *end; + int len; + + if ((end = strchr (env, ' '))) + len = end - env; + else + len = strlen (env); + + if (strlen (name) == len && strncmp (name, env, len) == 0) + { + printf ("pixman: Disabled %s implementation\n", name); + return TRUE; + } + + env += len; + } + while (*env++); + } + + return FALSE; +} + +pixman_implementation_t * +_pixman_choose_implementation (void) +{ + pixman_implementation_t *imp; + + imp = _pixman_implementation_create_general(); + + if (!_pixman_disabled ("fast")) + imp = _pixman_implementation_create_fast_path (imp); + + imp = _pixman_x86_get_implementations (imp); + imp = _pixman_arm_get_implementations (imp); + imp = _pixman_ppc_get_implementations (imp); + imp = _pixman_mips_get_implementations (imp); + + imp = _pixman_implementation_create_noop (imp); + + return imp; +} diff --git a/pixman/pixman/pixman-matrix.c b/pixman/pixman/pixman-matrix.c index 8d0d97325..6d215ff10 100644 --- a/pixman/pixman/pixman-matrix.c +++ b/pixman/pixman/pixman-matrix.c @@ -471,8 +471,8 @@ pixman_f_transform_invert (struct pixman_f_transform * dst, { double det; int i, j; - static int a[3] = { 2, 2, 1 }; - static int b[3] = { 1, 0, 0 }; + static const int a[3] = { 2, 2, 1 }; + static const int b[3] = { 1, 0, 0 }; det = 0; for (i = 0; i < 3; i++) diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S index 48f108ed9..a8fccd5ad 100644 --- a/pixman/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman/pixman-mips-dspr2-asm.S @@ -749,6 +749,645 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips) END(pixman_composite_over_n_8_0565_asm_mips) +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s0, 36(sp) /* s0 = wt */ + lw s1, 40(sp) /* s1 = wb */ + lw s2, 44(sp) /* s2 = vx */ + lw s3, 48(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s0, 36(sp) /* s0 = wt */ + lw s1, 40(sp) /* s1 = wb */ + lw s2, 44(sp) /* s2 = vx */ + lw s3, 48(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez a3, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a1) /* t0 = tl */ + lhx t1, t8(a1) /* t1 = tr */ + andi t1, t1, 0xffff + addiu a3, a3, -1 + lhx t2, t9(a2) /* t2 = bl */ + lhx t3, t8(a2) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a1) /* t0 = tl */ + lhx t1, t8(a1) /* t1 = tr */ + andi t1, t1, 0xffff + addiu a3, a3, -1 + lhx t2, t9(a2) /* t2 = bl */ + lhx t3, t8(a2) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez a3, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 40(sp) /* s0 = wt */ + lw s1, 44(sp) /* s1 = wb */ + lw s2, 48(sp) /* s2 = vx */ + lw s3, 52(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lw t1, 0(a0) /* t1 = dest */ + OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t2, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s0, 36(sp) /* s0 = wt */ + lw s1, 40(sp) /* s1 = wb */ + lw s2, 44(sp) /* s2 = vx */ + lw s3, 48(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lw t1, 0(a0) + addu_s.qb t2, t0, t1 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t2, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw v1, 32(sp) + beqz v1, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a2) /* t0 = tl */ + lwx t1, t8(a2) /* t1 = tr */ + addiu v1, v1, -1 + lwx t2, t9(a3) /* t2 = bl */ + lwx t3, t8(a3) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez v1, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw v1, 32(sp) + beqz v1, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a2) /* t0 = tl */ + lwx t1, t8(a2) /* t1 = tr */ + addiu v1, v1, -1 + lwx t2, t9(a3) /* t2 = bl */ + lwx t3, t8(a3) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez v1, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw t0, 32(sp) + beqz t0, 1f + nop + + SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra + + lw s0, 48(sp) /* s0 = wt */ + lw s1, 52(sp) /* s1 = wb */ + lw s2, 56(sp) /* s2 = vx */ + lw s3, 60(sp) /* s3 = unit_x */ + lw ra, 64(sp) /* ra = w */ + li v0, 0x00ff00ff + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + li t5, BILINEAR_INTERPOLATION_RANGE + subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a2) /* t0 = tl */ + lhx t1, t8(a2) /* t1 = tr */ + andi t1, t1, 0xffff + addiu ra, ra, -1 + lhx t2, t9(a3) /* t2 = bl */ + lhx t3, t8(a3) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez ra, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw t0, 32(sp) + beqz t0, 1f + nop + + SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra + + lw s0, 48(sp) /* s0 = wt */ + lw s1, 52(sp) /* s1 = wb */ + lw s2, 56(sp) /* s2 = vx */ + lw s3, 60(sp) /* s3 = unit_x */ + lw ra, 64(sp) /* ra = w */ + li v0, 0x00ff00ff + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + li t5, BILINEAR_INTERPOLATION_RANGE + subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a2) /* t0 = tl */ + lhx t1, t8(a2) /* t1 = tr */ + andi t1, t1, 0xffff + addiu ra, ra, -1 + lhx t2, t9(a3) /* t2 = bl */ + lhx t3, t8(a3) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez ra, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) /* * a0 - dst (a8r8g8b8) @@ -815,3 +1454,68 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) nop END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips) +/* + * a0 - *dst + * a1 - *mask + * a2 - *src_top + * a3 - *src_bottom + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + * 32(sp) - w + */ + + lw v1, 32(sp) + beqz v1, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a2) /* t0 = tl */ + lwx t1, t8(a2) /* t1 = tr */ + addiu v1, v1, -1 + lwx t2, t9(a3) /* t2 = bl */ + lwx t3, t8(a3) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lbu t1, 0(a1) /* t1 = mask */ + lw t2, 0(a0) /* t2 = dst */ + addiu a1, a1, 1 + MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez v1, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips) diff --git a/pixman/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman/pixman-mips-dspr2-asm.h index 7cf3281ed..7327dc6dc 100644 --- a/pixman/pixman/pixman-mips-dspr2-asm.h +++ b/pixman/pixman/pixman-mips-dspr2-asm.h @@ -566,6 +566,40 @@ LEAF_MIPS32R2(symbol) \ addu_s.qb \out2_8888, \d2_8888, \scratch2 .endm +/* + * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 + * destination pixel (d_8888). It also requires maskLSR needed for rounding + * process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_8888_8888 s_8888, \ + d_8888, \ + out_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, scratch4 + not \scratch1, \s_8888 + srl \scratch1, \scratch1, 24 + + MIPS_UN8x4_MUL_UN8 \d_8888, \scratch1, \ + \out_8888, \maskLSR, \ + \scratch2, \scratch3, \scratch4 + + addu_s.qb \out_8888, \out_8888, \s_8888 +.endm + +.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ + m_8, \ + d_8888, \ + out_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3 + MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ + \out_8888, \maskLSR, \ + \scratch1, \scratch2, \scratch3 + + addu_s.qb \out_8888, \out_8888, \d_8888 +.endm + .macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br, \ scratch1, scratch2, \ alpha, red, green, blue \ diff --git a/pixman/pixman/pixman-mips-dspr2.c b/pixman/pixman/pixman-mips-dspr2.c index 06d4335dd..63a0225a2 100644 --- a/pixman/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman/pixman-mips-dspr2.c @@ -58,8 +58,31 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888, PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565, uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC, + uint16_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD, + uint32_t, uint32_t) + +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_x888, SRC, + uint16_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_0565, SRC, + uint16_t, uint16_t) PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, OVER, uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, ADD, + uint32_t, uint32_t) static pixman_bool_t pixman_fill_mips (uint32_t *bits, @@ -209,9 +232,37 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565), PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_0565), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_8_0565), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_8_0565), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8_x888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_8_0565), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), { PIXMAN_OP_NONE }, }; diff --git a/pixman/pixman/pixman-mips-dspr2.h b/pixman/pixman/pixman-mips-dspr2.h index 5036938c1..a3d774fcb 100644 --- a/pixman/pixman/pixman-mips-dspr2.h +++ b/pixman/pixman/pixman-mips-dspr2.h @@ -127,6 +127,55 @@ mips_composite_##name (pixman_implementation_t *imp, \ } \ } +/****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips( \ + dst_type * dst, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ +static force_inline void \ +scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \ + const uint32_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top, \ + src_bottom, w, \ + wt, wb, \ + vx, unit_x); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, COVER, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, NONE, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, PAD, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, NORMAL, \ + FLAG_NONE) + /*****************************************************************************/ #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op, \ diff --git a/pixman/pixman/pixman-mips.c b/pixman/pixman/pixman-mips.c new file mode 100644 index 000000000..2b280c6f1 --- /dev/null +++ b/pixman/pixman/pixman-mips.c @@ -0,0 +1,84 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-private.h" + +#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI) + +#include +#include + +static pixman_bool_t +have_feature (const char *search_string) +{ +#if defined (__linux__) /* linux ELF */ + /* Simple detection of MIPS features at runtime for Linux. + * It is based on /proc/cpuinfo, which reveals hardware configuration + * to user-space applications. According to MIPS (early 2010), no similar + * facility is universally available on the MIPS architectures, so it's up + * to individual OSes to provide such. + */ + const char *file_name = "/proc/cpuinfo"; + char cpuinfo_line[256]; + FILE *f = NULL; + + if ((f = fopen (file_name, "r")) == NULL) + return FALSE; + + while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL) + { + if (strstr (cpuinfo_line, search_string) != NULL) + { + fclose (f); + return TRUE; + } + } + + fclose (f); +#endif + + /* Did not find string in the proc file, or not Linux ELF. */ + return FALSE; +} + +#endif + +pixman_implementation_t * +_pixman_mips_get_implementations (pixman_implementation_t *imp) +{ +#ifdef USE_LOONGSON_MMI + /* I really don't know if some Loongson CPUs don't have MMI. */ + if (!_pixman_disabled ("loongson-mmi") && have_feature ("Loongson")) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_MIPS_DSPR2 + /* Only currently available MIPS core that supports DSPr2 is 74K. */ + if (!_pixman_disabled ("mips-dspr2") && have_feature ("MIPS 74K")) + imp = _pixman_implementation_create_mips_dspr2 (imp); +#endif + + return imp; +} diff --git a/pixman/pixman/pixman-ppc.c b/pixman/pixman/pixman-ppc.c new file mode 100644 index 000000000..f1bea1eac --- /dev/null +++ b/pixman/pixman/pixman-ppc.c @@ -0,0 +1,155 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-private.h" + +#ifdef USE_VMX + +/* The CPU detection code needs to be in a file not compiled with + * "-maltivec -mabi=altivec", as gcc would try to save vector register + * across function calls causing SIGILL on cpus without Altivec/vmx. + */ +#ifdef __APPLE__ +#include + +static pixman_bool_t +pixman_have_vmx (void) +{ + size_t length = sizeof(have_vmx); + int error, have_mmx; + + sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); + + if (error) + return FALSE; + + return have_vmx; +} + +#elif defined (__OpenBSD__) +#include +#include +#include + +static pixman_bool_t +pixman_have_vmx (void) +{ + int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; + size_t length = sizeof(have_vmx); + int error, have_vmx; + + error = sysctl (mib, 2, &have_vmx, &length, NULL, 0); + + if (error != 0) + return FALSE; + + return have_vmx; +} + +#elif defined (__linux__) + +#include +#include +#include +#include +#include +#include +#include + +static pixman_bool_t +pixman_have_vmx (void) +{ + int have_vmx = FALSE; + int fd; + struct + { + unsigned long type; + unsigned long value; + } aux; + + fd = open ("/proc/self/auxv", O_RDONLY); + if (fd >= 0) + { + while (read (fd, &aux, sizeof (aux)) == sizeof (aux)) + { + if (aux.type == AT_HWCAP && (aux.value & PPC_FEATURE_HAS_ALTIVEC)) + { + have_vmx = TRUE; + break; + } + } + + close (fd); + } + + return have_vmx; +} + +#else /* !__APPLE__ && !__OpenBSD__ && !__linux__ */ +#include +#include + +static jmp_buf jump_env; + +static void +vmx_test (int sig, + siginfo_t *si, + void * unused) +{ + longjmp (jump_env, 1); +} + +static pixman_bool_t +pixman_have_vmx (void) +{ + struct sigaction sa, osa; + int jmp_result; + + sa.sa_flags = SA_SIGINFO; + sigemptyset (&sa.sa_mask); + sa.sa_sigaction = vmx_test; + sigaction (SIGILL, &sa, &osa); + jmp_result = setjmp (jump_env); + if (jmp_result == 0) + { + asm volatile ( "vor 0, 0, 0" ); + } + sigaction (SIGILL, &osa, NULL); + return (jmp_result == 0); +} + +#endif /* __APPLE__ */ +#endif /* USE_VMX */ + +pixman_implementation_t * +_pixman_ppc_get_implementations (pixman_implementation_t *imp) +{ +#ifdef USE_VMX + if (!_pixman_disabled ("vmx") && pixman_have_vmx ()) + imp = _pixman_implementation_create_vmx (imp); +#endif + + return imp; +} diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 0c27798b0..4d8f64d3b 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -585,9 +585,26 @@ pixman_implementation_t * _pixman_implementation_create_vmx (pixman_implementation_t *fallback); #endif +pixman_bool_t +_pixman_implementation_disabled (const char *name); + +pixman_implementation_t * +_pixman_x86_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_arm_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_ppc_get_implementations (pixman_implementation_t *imp); + +pixman_implementation_t * +_pixman_mips_get_implementations (pixman_implementation_t *imp); + pixman_implementation_t * _pixman_choose_implementation (void); +pixman_bool_t +_pixman_disabled (const char *name); /* diff --git a/pixman/pixman/pixman-region.c b/pixman/pixman/pixman-region.c index 70c282d40..4626f9cc6 100644 --- a/pixman/pixman/pixman-region.c +++ b/pixman/pixman/pixman-region.c @@ -2041,10 +2041,10 @@ PREFIX (_subtract) (region_type_t *reg_d, * *----------------------------------------------------------------------- */ -pixman_bool_t -PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ - region_type_t *reg1, /* Region to invert */ - box_type_t * inv_rect) /* Bounding box for inversion */ +PIXMAN_EXPORT pixman_bool_t +PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ + region_type_t *reg1, /* Region to invert */ + box_type_t * inv_rect) /* Bounding box for inversion */ { region_type_t inv_reg; /* Quick and dirty region made from the * bounding box */ @@ -2137,9 +2137,9 @@ find_box_for_y (box_type_t *begin, box_type_t *end, int y) * partially in the region) or is outside the region (we reached a band * that doesn't overlap the box at all and part_in is false) */ -pixman_region_overlap_t -PIXMAN_EXPORT PREFIX (_contains_rectangle) (region_type_t * region, - box_type_t * prect) +PIXMAN_EXPORT pixman_region_overlap_t +PREFIX (_contains_rectangle) (region_type_t * region, + box_type_t * prect) { box_type_t * pbox; box_type_t * pbox_end; diff --git a/pixman/pixman/pixman-x86.c b/pixman/pixman/pixman-x86.c new file mode 100644 index 000000000..52ad3dfd5 --- /dev/null +++ b/pixman/pixman/pixman-x86.c @@ -0,0 +1,282 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-private.h" + +#if defined(USE_X86_MMX) || defined (USE_SSE2) + +/* The CPU detection code needs to be in a file not compiled with + * "-mmmx -msse", as gcc would generate CMOV instructions otherwise + * that would lead to SIGILL instructions on old CPUs that don't have + * it. + */ +#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) + +#ifdef HAVE_GETISAX +#include +#endif + +typedef enum +{ + NO_FEATURES = 0, + MMX = 0x1, + MMX_EXTENSIONS = 0x2, + SSE = 0x6, + SSE2 = 0x8, + CMOV = 0x10 +} cpu_features_t; + + +static unsigned int +detect_cpu_features (void) +{ + unsigned int features = 0; + unsigned int result = 0; + +#ifdef HAVE_GETISAX + if (getisax (&result, 1)) + { + if (result & AV_386_CMOV) + features |= CMOV; + if (result & AV_386_MMX) + features |= MMX; + if (result & AV_386_AMD_MMX) + features |= MMX_EXTENSIONS; + if (result & AV_386_SSE) + features |= SSE; + if (result & AV_386_SSE2) + features |= SSE2; + } +#else + char vendor[13]; +#ifdef _MSC_VER + int vendor0 = 0, vendor1, vendor2; +#endif + vendor[0] = 0; + vendor[12] = 0; + +#ifdef __GNUC__ + /* see p. 118 of amd64 instruction set manual Vol3 */ + /* We need to be careful about the handling of %ebx and + * %esp here. We can't declare either one as clobbered + * since they are special registers (%ebx is the "PIC + * register" holding an offset to global data, %esp the + * stack pointer), so we need to make sure they have their + * original values when we access the output operands. + */ + __asm__ ( + "pushf\n" + "pop %%eax\n" + "mov %%eax, %%ecx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "mov $0x0, %%edx\n" + "xor %%ecx, %%eax\n" + "jz 1f\n" + + "mov $0x00000000, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "mov %%ebx, %%eax\n" + "pop %%ebx\n" + "mov %%eax, %1\n" + "mov %%edx, %2\n" + "mov %%ecx, %3\n" + "mov $0x00000001, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "pop %%ebx\n" + "1:\n" + "mov %%edx, %0\n" + : "=r" (result), + "=m" (vendor[0]), + "=m" (vendor[4]), + "=m" (vendor[8]) + : + : "%eax", "%ecx", "%edx" + ); + +#elif defined (_MSC_VER) + + _asm { + pushfd + pop eax + mov ecx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ecx + jz nocpuid + + mov eax, 0 + push ebx + cpuid + mov eax, ebx + pop ebx + mov vendor0, eax + mov vendor1, edx + mov vendor2, ecx + mov eax, 1 + push ebx + cpuid + pop ebx + nocpuid: + mov result, edx + } + memmove (vendor + 0, &vendor0, 4); + memmove (vendor + 4, &vendor1, 4); + memmove (vendor + 8, &vendor2, 4); + +#else +# error unsupported compiler +#endif + + features = 0; + if (result) + { + /* result now contains the standard feature bits */ + if (result & (1 << 15)) + features |= CMOV; + if (result & (1 << 23)) + features |= MMX; + if (result & (1 << 25)) + features |= SSE; + if (result & (1 << 26)) + features |= SSE2; + if ((features & MMX) && !(features & SSE) && + (strcmp (vendor, "AuthenticAMD") == 0 || + strcmp (vendor, "Geode by NSC") == 0)) + { + /* check for AMD MMX extensions */ +#ifdef __GNUC__ + __asm__ ( + " push %%ebx\n" + " mov $0x80000000, %%eax\n" + " cpuid\n" + " xor %%edx, %%edx\n" + " cmp $0x1, %%eax\n" + " jge 2f\n" + " mov $0x80000001, %%eax\n" + " cpuid\n" + "2:\n" + " pop %%ebx\n" + " mov %%edx, %0\n" + : "=r" (result) + : + : "%eax", "%ecx", "%edx" + ); +#elif defined _MSC_VER + _asm { + push ebx + mov eax, 80000000h + cpuid + xor edx, edx + cmp eax, 1 + jge notamd + mov eax, 80000001h + cpuid + notamd: + pop ebx + mov result, edx + } +#endif + if (result & (1 << 22)) + features |= MMX_EXTENSIONS; + } + } +#endif /* HAVE_GETISAX */ + + return features; +} + +#ifdef USE_X86_MMX +static pixman_bool_t +pixman_have_mmx (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t mmx_present; + + if (!initialized) + { + unsigned int features = detect_cpu_features (); + mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); + initialized = TRUE; + } + + return mmx_present; +} +#endif + +#ifdef USE_SSE2 +static pixman_bool_t +pixman_have_sse2 (void) +{ + static pixman_bool_t initialized = FALSE; + static pixman_bool_t sse2_present; + + if (!initialized) + { + unsigned int features = detect_cpu_features (); + sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2); + initialized = TRUE; + } + + return sse2_present; +} + +#endif + +#else /* __amd64__ */ +#ifdef USE_X86_MMX +#define pixman_have_mmx() TRUE +#endif +#ifdef USE_SSE2 +#define pixman_have_sse2() TRUE +#endif +#endif /* __amd64__ */ + +#endif + +pixman_implementation_t * +_pixman_x86_get_implementations (pixman_implementation_t *imp) +{ +#ifdef USE_X86_MMX + if (!_pixman_disabled ("mmx") && pixman_have_mmx()) + imp = _pixman_implementation_create_mmx (imp); +#endif + +#ifdef USE_SSE2 + if (!_pixman_disabled ("sse2") && pixman_have_sse2()) + imp = _pixman_implementation_create_sse2 (imp); +#endif + + return imp; +} diff --git a/pixman/test/stress-test.c b/pixman/test/stress-test.c index 7fc067e82..9280802bd 100644 --- a/pixman/test/stress-test.c +++ b/pixman/test/stress-test.c @@ -83,7 +83,7 @@ get_size (void) default: case 2: - return lcg_rand_n (200); + return lcg_rand_n (100); case 4: return lcg_rand_n (2000) + 1000; @@ -181,9 +181,27 @@ log_rand (void) { uint32_t mask; - mask = (1 << lcg_rand_n (31)) - 1; + mask = (1 << lcg_rand_n (10)) - 1; - return (lcg_rand () & mask) - (mask >> 1); + return (lcg_rand_u32 () & mask) - (mask >> 1); +} + +static int32_t +rand_x (pixman_image_t *image) +{ + if (image->type == BITS) + return lcg_rand_n (image->bits.width); + else + return log_rand (); +} + +static int32_t +rand_y (pixman_image_t *image) +{ + if (image->type == BITS) + return lcg_rand_n (image->bits.height); + else + return log_rand (); } static pixman_image_t * @@ -225,7 +243,7 @@ create_random_bits_image (void) width = get_size (); height = get_size (); - if ((uint64_t)width * height > 200000) + while ((uint64_t)width * height > 200000) { if (lcg_rand_n(2) == 0) height = 200000 / width; @@ -304,8 +322,8 @@ create_random_bits_image (void) filter = filters[lcg_rand_n (ARRAY_LENGTH (filters))]; if (filter == PIXMAN_FILTER_CONVOLUTION) { - int width = lcg_rand_n (17); - int height = lcg_rand_n (19); + int width = lcg_rand_n (3); + int height = lcg_rand_n (4); n_coefficients = width * height + 2; coefficients = malloc (n_coefficients * sizeof (pixman_fixed_t)); @@ -365,7 +383,7 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) pixman_image_set_repeat (image, repeat); /* Alpha map */ - if (allow_alpha_map && lcg_rand_n (3) == 0) + if (allow_alpha_map && lcg_rand_n (4) == 0) { pixman_image_t *alpha_map; int16_t x, y; @@ -376,8 +394,8 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) { set_general_properties (alpha_map, FALSE); - x = lcg_rand_N (100000) - 65536; - y = lcg_rand_N (100000) - 65536; + x = rand_x (image) - image->bits.width / 2; + y = rand_y (image) - image->bits.height / 2; pixman_image_set_alpha_map (image, alpha_map, x, y); @@ -389,14 +407,14 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) pixman_image_set_component_alpha (image, lcg_rand_n (3) == 0); /* Clip region */ - if (lcg_rand_n (8) != 0) + if (lcg_rand_n (8) < 2) { pixman_region32_t region; int i, n_rects; pixman_region32_init (®ion); - switch (lcg_rand_n (10)) + switch (lcg_rand_n (12)) { case 0: n_rects = 0; @@ -433,6 +451,27 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) ®ion, ®ion, x, y, width, height); } + if (image->type == BITS && lcg_rand_n (8) != 0) + { + uint32_t width, height; + int x, y; + int i; + + /* Also add a couple of clip rectangles inside the image + * so that compositing will actually take place. + */ + for (i = 0; i < 5; ++i) + { + x = lcg_rand_n (2 * image->bits.width) - image->bits.width; + y = lcg_rand_n (2 * image->bits.height) - image->bits.height; + width = lcg_rand_n (image->bits.width) - x + 10; + height = lcg_rand_n (image->bits.height) - y + 10; + + pixman_region32_union_rect ( + ®ion, ®ion, x, y, width, height); + } + } + pixman_image_set_clip_region32 (image, ®ion); pixman_region32_fini (®ion); @@ -762,11 +801,11 @@ run_test (uint32_t seed, pixman_bool_t verbose, uint32_t mod) pixman_image_composite32 (op, source, mask, dest, - log_rand(), log_rand(), - log_rand(), log_rand(), - log_rand(), log_rand(), - absolute (log_rand()), - absolute (log_rand())); + rand_x (source), rand_y (source), + rand_x (mask), rand_y (mask), + 0, 0, + dest->bits.width, + dest->bits.height); } if (source) pixman_image_unref (source); -- cgit v1.2.3