aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman')
-rw-r--r--pixman/configure.ac42
-rw-r--r--pixman/pixman/pixman-compiler.h4
-rw-r--r--pixman/pixman/pixman-cpu.c2
-rw-r--r--pixman/pixman/pixman-mips-dspr2-asm.S219
-rw-r--r--pixman/pixman/pixman-mips-dspr2-asm.h296
-rw-r--r--pixman/pixman/pixman-mips-dspr2.c12
-rw-r--r--pixman/pixman/pixman-mips-dspr2.h42
-rw-r--r--pixman/pixman/pixman-mmx.c4
8 files changed, 601 insertions, 20 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 17e30f5e4..1cf9eb4e5 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -294,12 +294,18 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
#error "Need GCC >= 3.4 for MMX intrinsics"
#endif
-#if defined(__clang__)
-#error "clang chokes on the inline assembly in pixman-mmx.c"
-#endif
#include <mmintrin.h>
int main () {
__m64 v = _mm_cvtsi32_si64 (1);
+ __m64 w;
+ signed char x = 0;
+
+ /* Some versions of clang will choke on K */
+ asm ("pshufw %2, %1, %0\n\t"
+ : "=y" (w)
+ : "y" (v), "K" (x)
+ );
+
return _mm_cvtsi64_si32 (v);
}]])], have_mmx_intrinsics=yes)
CFLAGS=$xserver_save_CFLAGS
@@ -603,7 +609,7 @@ AC_MSG_CHECKING(whether to use MIPS DSPr2 assembler)
xserver_save_CFLAGS=$CFLAGS
CFLAGS="-mdspr2 $CFLAGS"
-AC_COMPILE_IFELSE([[
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#if !(defined(__mips__) && __mips_isa_rev >= 2)
#error MIPS DSPr2 is currently only available on MIPS32r2 platforms.
#endif
@@ -617,7 +623,7 @@ main ()
: [a] "r" (a), [b] "r" (b)
);
return c;
-}]], have_mips_dspr2=yes)
+}]])], have_mips_dspr2=yes)
CFLAGS=$xserver_save_CFLAGS
AC_ARG_ENABLE(mips-dspr2,
@@ -782,25 +788,27 @@ fi
dnl =====================================
dnl Thread local storage
-support_for__thread=no
-
-AC_MSG_CHECKING(for __thread)
-AC_LINK_IFELSE([AC_LANG_SOURCE([[
+AC_MSG_CHECKING(for thread local storage (TLS) support)
+AC_CACHE_VAL(ac_cv_tls, [
+ ac_cv_tls=none
+ keywords="__thread __declspec(thread)"
+ for kw in $keywords ; do
+ AC_TRY_COMPILE([
#if defined(__MINGW32__) && !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
#error This MinGW version has broken __thread support
#endif
#ifdef __OpenBSD__
#error OpenBSD has broken __thread support
#endif
-static __thread int x ;
-int main () { x = 123; return x; }
-]])], support_for__thread=yes)
-if test $support_for__thread = yes; then
- AC_DEFINE([TOOLCHAIN_SUPPORTS__THREAD],[],[Whether the tool chain supports __thread])
-fi
+int $kw test;], [], ac_cv_tls=$kw)
+ done
+])
+AC_MSG_RESULT($ac_cv_tls)
-AC_MSG_RESULT($support_for__thread)
+if test "$ac_cv_tls" != "none"; then
+ AC_DEFINE_UNQUOTED([TLS], $ac_cv_tls, [The compiler supported TLS storage class])
+fi
dnl
dnl posix tls
@@ -852,7 +860,7 @@ AC_DEFUN([PIXMAN_CHECK_PTHREAD],[dnl
fi
])
-if test $support_for__thread = no; then
+if test $ac_cv_tls = none ; then
support_for_pthread_setspecific=no
AC_MSG_CHECKING(for pthread_setspecific)
diff --git a/pixman/pixman/pixman-compiler.h b/pixman/pixman/pixman-compiler.h
index 5b568e114..ffd51720e 100644
--- a/pixman/pixman/pixman-compiler.h
+++ b/pixman/pixman/pixman-compiler.h
@@ -97,10 +97,10 @@
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
-#elif defined(TOOLCHAIN_SUPPORTS__THREAD)
+#elif defined(TLS)
# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
- static __thread type name
+ static TLS type name
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
diff --git a/pixman/pixman/pixman-cpu.c b/pixman/pixman/pixman-cpu.c
index bb97ae3e6..1060f4776 100644
--- a/pixman/pixman/pixman-cpu.c
+++ b/pixman/pixman/pixman-cpu.c
@@ -666,6 +666,7 @@ detect_cpu_features (void)
return features;
}
+#ifdef USE_X86_MMX
static pixman_bool_t
pixman_have_mmx (void)
{
@@ -681,6 +682,7 @@ pixman_have_mmx (void)
return mmx_present;
}
+#endif
#ifdef USE_SSE2
static pixman_bool_t
diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S
index f1087a776..6a0fc1803 100644
--- a/pixman/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman/pixman-mips-dspr2-asm.S
@@ -308,3 +308,222 @@ LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
nop
END(pixman_composite_src_x888_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (32bit constant)
+ * a2 - mask (a8r8g8b8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 16, s0, s1, s2, s3, s4, s5, s6, s7
+ beqz a3, 4f
+ nop
+ li t6, 0xff
+ addiu t7, zero, -1 /* t7 = 0xffffffff */
+ srl t8, a1, 24 /* t8 = srca */
+ li t9, 0x00ff00ff
+ addiu t1, a3, -1
+ beqz t1, 3f /* last pixel */
+ nop
+ beq t8, t6, 2f /* if (srca == 0xff) */
+ nop
+1:
+ /* a1 = src */
+ lw t0, 0(a2) /* t0 = mask */
+ lw t1, 4(a2) /* t1 = mask */
+ or t2, t0, t1
+ beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */
+ addiu a2, a2, 8
+ and t3, t0, t1
+ move s0, t8 /* s0 = srca */
+ move s1, t8 /* s1 = srca */
+ move t4, a1 /* t4 = src */
+ move t5, a1 /* t5 = src */
+ lw t2, 0(a0) /* t2 = dst */
+ beq t3, t7, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+ lw t3, 4(a0) /* t0 = dst */
+ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, s0, s1, t9, s2, s3, s4, s5, s6, s7
+11:
+ not s0, s0
+ not s1, s1
+ MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, s0, s1, s2, s3, t9, t0, t1, s4, s5, s6, s7
+ addu_s.qb t0, t4, s2
+ addu_s.qb t1, t5, s3
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+12:
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 1b
+ addiu a0, a0, 8
+ b 3f
+ nop
+2:
+ /* a1 = src */
+ lw t0, 0(a2) /* t0 = mask */
+ lw t1, 4(a2) /* t1 = mask */
+ or t2, t0, t1
+ beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */
+ addiu a2, a2, 8
+ and t2, t0, t1
+ move s0, a1
+ beq t2, t7, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+ move s1, a1
+ lw t2, 0(a0) /* t2 = dst */
+ lw t3, 4(a0) /* t3 = dst */
+ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
+ not t0, t0
+ not t1, t1
+ MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, s0, s1, t9, s2, s3, s4, s5, s6, s7
+ addu_s.qb s0, t4, s0
+ addu_s.qb s1, t5, s1
+21:
+ sw s0, 0(a0)
+ sw s1, 4(a0)
+22:
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 2b
+ addiu a0, a0, 8
+3:
+ blez a3, 4f
+ nop
+ /* a1 = src */
+ lw t1, 0(a2) /* t1 = mask */
+ beqz t1, 4f
+ nop
+ move s0, t8 /* s0 = srca */
+ move t2, a1 /* t2 = src */
+ beq t1, t7, 31f
+ lw t0, 0(a0) /* t0 = dst */
+
+ MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6
+ MIPS_UN8x4_MUL_UN8 t1, t8, s0, t9, t3, t4, t5
+31:
+ not s0, s0
+ MIPS_UN8x4_MUL_UN8x4 t0, s0, t3, t9, t4, t5, t6, t1
+ addu_s.qb t0, t2, t3
+ sw t0, 0(a0)
+4:
+ RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3, s4, s5, s6, s7
+ j ra
+ nop
+
+END(pixman_composite_over_n_8888_8888_ca_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (32bit constant)
+ * a2 - mask (a8r8g8b8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ beqz a3, 4f
+ nop
+ li t5, 0xf800f800
+ li t6, 0x07e007e0
+ li t7, 0x001F001F
+ li t9, 0x00ff00ff
+
+ srl t8, a1, 24 /* t8 = srca */
+ addiu t1, a3, -1
+ beqz t1, 3f /* last pixel */
+ nop
+ li s0, 0xff /* s0 = 0xff */
+ addiu s1, zero, -1 /* s1 = 0xffffffff */
+
+ beq t8, s0, 2f /* if (srca == 0xff) */
+ nop
+1:
+ /* a1 = src */
+ lw t0, 0(a2) /* t0 = mask */
+ lw t1, 4(a2) /* t1 = mask */
+ or t2, t0, t1
+ beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */
+ addiu a2, a2, 8
+ and t3, t0, t1
+ move t0, t8
+ move t1, a1
+ lhu t2, 0(a0) /* t2 = dst */
+ beq t3, s1, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+ lhu t3, 2(a0) /* t3 = dst */
+ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8
+11:
+ not t0, t0
+ not t1, t1
+ CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
+ MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1
+ addu_s.qb s2, s2, s4
+ addu_s.qb s3, s3, s5
+ CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s1, s2
+ sh t2, 0(a0)
+ sh t3, 2(a0)
+12:
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 1b
+ addiu a0, a0, 4
+ b 3f
+ nop
+2:
+ /* a1 = src */
+ lw t0, 0(a2) /* t0 = mask */
+ lw t1, 4(a2) /* t1 = mask */
+ or t2, t0, t1
+ beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */
+ addiu a2, a2, 8
+ and t3, t0, t1
+ move t2, a1
+ beq t3, s1, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+ move t3, a1
+ lhu t2, 0(a0) /* t2 = dst */
+ lhu t3, 2(a0) /* t3 = dst */
+ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
+ not t0, t0
+ not t1, t1
+ CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
+ MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3
+ addu_s.qb t2, s2, s4
+ addu_s.qb t3, s3, s5
+21:
+ CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3
+ sh t0, 0(a0)
+ sh t1, 2(a0)
+22:
+ addiu a3, a3, -2
+ addiu t1, a3, -1
+ bgtz t1, 2b
+ addiu a0, a0, 4
+3:
+ blez a3, 4f
+ nop
+ /* a1 = src */
+ lw t1, 0(a2) /* t1 = mask */
+ beqz t1, 4f
+ nop
+ move s0, t8 /* s0 = srca */
+ move t2, a1 /* t2 = src */
+ beq t1, t7, 31f
+ lhu t0, 0(a0) /* t0 = dst */
+
+ MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6
+ MIPS_UN8x4_MUL_UN8 t1, t8, s0, t9, t3, t4, t5
+31:
+ not s0, s0
+ CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3
+ MIPS_UN8x4_MUL_UN8x4 s1, s0, t3, t9, t4, t5, t6, t1
+ addu_s.qb t0, t2, t3
+ CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3
+ sh s1, 0(a0)
+4:
+ RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ j ra
+ nop
+
+END(pixman_composite_over_n_8888_0565_ca_asm_mips)
diff --git a/pixman/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman/pixman-mips-dspr2-asm.h
index e07cda470..12ff42c57 100644
--- a/pixman/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman/pixman-mips-dspr2-asm.h
@@ -96,6 +96,170 @@ LEAF_MIPS32R2(symbol) \
.size function,.-function
/*
+ * Checks if stack offset is big enough for storing/restoring regs_num
+ * number of register to/from stack. Stack offset must be greater than
+ * or equal to the number of bytes needed for storing registers (regs_num*4).
+ * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
+ * preserved for input arguments of the functions, already stored in a0-a3),
+ * stack size can be further optimized by utilizing this space.
+ */
+.macro CHECK_STACK_OFFSET regs_num, stack_offset
+.if \stack_offset < \regs_num * 4 - 16
+.error "Stack offset too small."
+.endif
+.endm
+
+/*
+ * Saves set of registers on stack. Maximum number of registers that
+ * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * before registers are pushed in order to provide enough space on stack
+ * (offset must be multiple of 4, and must be big enough, as described by
+ * CHECK_STACK_OFFSET macro). This macro is intended to be used in
+ * combination with RESTORE_REGS_FROM_STACK macro. Example:
+ * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
+ * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
+ r2 = 0, r3 = 0, r4 = 0, \
+ r5 = 0, r6 = 0, r7 = 0, \
+ r8 = 0, r9 = 0, r10 = 0, \
+ r11 = 0, r12 = 0, r13 = 0, \
+ r14 = 0
+ .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
+ .error "Stack offset must be pozitive and multiple of 4."
+ .endif
+ .if \stack_offset != 0
+ addiu sp, sp, -\stack_offset
+ .endif
+ sw \r1, 0(sp)
+ .if \r2 != 0
+ sw \r2, 4(sp)
+ .endif
+ .if \r3 != 0
+ sw \r3, 8(sp)
+ .endif
+ .if \r4 != 0
+ sw \r4, 12(sp)
+ .endif
+ .if \r5 != 0
+ CHECK_STACK_OFFSET 5, \stack_offset
+ sw \r5, 16(sp)
+ .endif
+ .if \r6 != 0
+ CHECK_STACK_OFFSET 6, \stack_offset
+ sw \r6, 20(sp)
+ .endif
+ .if \r7 != 0
+ CHECK_STACK_OFFSET 7, \stack_offset
+ sw \r7, 24(sp)
+ .endif
+ .if \r8 != 0
+ CHECK_STACK_OFFSET 8, \stack_offset
+ sw \r8, 28(sp)
+ .endif
+ .if \r9 != 0
+ CHECK_STACK_OFFSET 9, \stack_offset
+ sw \r9, 32(sp)
+ .endif
+ .if \r10 != 0
+ CHECK_STACK_OFFSET 10, \stack_offset
+ sw \r10, 36(sp)
+ .endif
+ .if \r11 != 0
+ CHECK_STACK_OFFSET 11, \stack_offset
+ sw \r11, 40(sp)
+ .endif
+ .if \r12 != 0
+ CHECK_STACK_OFFSET 12, \stack_offset
+ sw \r12, 44(sp)
+ .endif
+ .if \r13 != 0
+ CHECK_STACK_OFFSET 13, \stack_offset
+ sw \r13, 48(sp)
+ .endif
+ .if \r14 != 0
+ CHECK_STACK_OFFSET 14, \stack_offset
+ sw \r14, 52(sp)
+ .endif
+.endm
+
+/*
+ * Restores set of registers from stack. Maximum number of registers that
+ * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * after registers are restored (offset must be multiple of 4, and must
+ * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
+ * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
+ * Example:
+ * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
+ * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
+ r2 = 0, r3 = 0, r4 = 0, \
+ r5 = 0, r6 = 0, r7 = 0, \
+ r8 = 0, r9 = 0, r10 = 0, \
+ r11 = 0, r12 = 0, r13 = 0, \
+ r14 = 0
+ .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
+ .error "Stack offset must be pozitive and multiple of 4."
+ .endif
+ lw \r1, 0(sp)
+ .if \r2 != 0
+ lw \r2, 4(sp)
+ .endif
+ .if \r3 != 0
+ lw \r3, 8(sp)
+ .endif
+ .if \r4 != 0
+ lw \r4, 12(sp)
+ .endif
+ .if \r5 != 0
+ CHECK_STACK_OFFSET 5, \stack_offset
+ lw \r5, 16(sp)
+ .endif
+ .if \r6 != 0
+ CHECK_STACK_OFFSET 6, \stack_offset
+ lw \r6, 20(sp)
+ .endif
+ .if \r7 != 0
+ CHECK_STACK_OFFSET 7, \stack_offset
+ lw \r7, 24(sp)
+ .endif
+ .if \r8 != 0
+ CHECK_STACK_OFFSET 8, \stack_offset
+ lw \r8, 28(sp)
+ .endif
+ .if \r9 != 0
+ CHECK_STACK_OFFSET 9, \stack_offset
+ lw \r9, 32(sp)
+ .endif
+ .if \r10 != 0
+ CHECK_STACK_OFFSET 10, \stack_offset
+ lw \r10, 36(sp)
+ .endif
+ .if \r11 != 0
+ CHECK_STACK_OFFSET 11, \stack_offset
+ lw \r11, 40(sp)
+ .endif
+ .if \r12 != 0
+ CHECK_STACK_OFFSET 12, \stack_offset
+ lw \r12, 44(sp)
+ .endif
+ .if \r13 != 0
+ CHECK_STACK_OFFSET 13, \stack_offset
+ lw \r13, 48(sp)
+ .endif
+ .if \r14 != 0
+ CHECK_STACK_OFFSET 14, \stack_offset
+ lw \r14, 52(sp)
+ .endif
+ .if \stack_offset != 0
+ addiu sp, sp, \stack_offset
+ .endif
+.endm
+
+/*
* Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel
* returned in (out_8888) register. Requires two temporary registers
* (scratch1 and scratch2).
@@ -203,4 +367,136 @@ LEAF_MIPS32R2(symbol) \
srl \out2_565, \out1_565, 16
.endm
+/*
+ * Multiply pixel (a8) with single pixel (a8r8g8b8). It requires maskLSR needed
+ * for rounding process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro MIPS_UN8x4_MUL_UN8 s_8888, \
+ m_8, \
+ d_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3
+ replv.ph \m_8, \m_8 /* 0 | M | 0 | M */
+ muleu_s.ph.qbl \scratch1, \s_8888, \m_8 /* A*M | R*M */
+ muleu_s.ph.qbr \scratch2, \s_8888, \m_8 /* G*M | B*M */
+ shra_r.ph \scratch3, \scratch1, 8
+ shra_r.ph \d_8888, \scratch2, 8
+ and \scratch3, \scratch3, \maskLSR /* 0 |A*M| 0 |R*M */
+ and \d_8888, \d_8888, \maskLSR /* 0 |G*M| 0 |B*M */
+ addq.ph \scratch1, \scratch1, \scratch3 /* A*M+A*M | R*M+R*M */
+ addq.ph \scratch2, \scratch2, \d_8888 /* G*M+G*M | B*M+B*M */
+ shra_r.ph \scratch1, \scratch1, 8
+ shra_r.ph \scratch2, \scratch2, 8
+ precr.qb.ph \d_8888, \scratch1, \scratch2
+.endm
+
+/*
+ * Multiply two pixels (a8) with two pixels (a8r8g8b8). It requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro MIPS_2xUN8x4_MUL_2xUN8 s1_8888, \
+ s2_8888, \
+ m1_8, \
+ m2_8, \
+ d1_8888, \
+ d2_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, \
+ scratch4, scratch5, scratch6
+ replv.ph \m1_8, \m1_8 /* 0 | M1 | 0 | M1 */
+ replv.ph \m2_8, \m2_8 /* 0 | M2 | 0 | M2 */
+ muleu_s.ph.qbl \scratch1, \s1_8888, \m1_8 /* A1*M1 | R1*M1 */
+ muleu_s.ph.qbr \scratch2, \s1_8888, \m1_8 /* G1*M1 | B1*M1 */
+ muleu_s.ph.qbl \scratch3, \s2_8888, \m2_8 /* A2*M2 | R2*M2 */
+ muleu_s.ph.qbr \scratch4, \s2_8888, \m2_8 /* G2*M2 | B2*M2 */
+ shra_r.ph \scratch5, \scratch1, 8
+ shra_r.ph \d1_8888, \scratch2, 8
+ shra_r.ph \scratch6, \scratch3, 8
+ shra_r.ph \d2_8888, \scratch4, 8
+ and \scratch5, \scratch5, \maskLSR /* 0 |A1*M1| 0 |R1*M1 */
+ and \d1_8888, \d1_8888, \maskLSR /* 0 |G1*M1| 0 |B1*M1 */
+ and \scratch6, \scratch6, \maskLSR /* 0 |A2*M2| 0 |R2*M2 */
+ and \d2_8888, \d2_8888, \maskLSR /* 0 |G2*M2| 0 |B2*M2 */
+ addq.ph \scratch1, \scratch1, \scratch5
+ addq.ph \scratch2, \scratch2, \d1_8888
+ addq.ph \scratch3, \scratch3, \scratch6
+ addq.ph \scratch4, \scratch4, \d2_8888
+ shra_r.ph \scratch1, \scratch1, 8
+ shra_r.ph \scratch2, \scratch2, 8
+ shra_r.ph \scratch3, \scratch3, 8
+ shra_r.ph \scratch4, \scratch4, 8
+ precr.qb.ph \d1_8888, \scratch1, \scratch2
+ precr.qb.ph \d2_8888, \scratch3, \scratch4
+.endm
+
+/*
+ * Multiply pixel (a8r8g8b8) with single pixel (a8r8g8b8). It requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro MIPS_UN8x4_MUL_UN8x4 s_8888, \
+ m_8888, \
+ d_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, scratch4
+ preceu.ph.qbl \scratch1, \m_8888 /* 0 | A | 0 | R */
+ preceu.ph.qbr \scratch2, \m_8888 /* 0 | G | 0 | B */
+ muleu_s.ph.qbl \scratch3, \s_8888, \scratch1 /* A*A | R*R */
+ muleu_s.ph.qbr \scratch4, \s_8888, \scratch2 /* G*G | B*B */
+ shra_r.ph \scratch1, \scratch3, 8
+ shra_r.ph \scratch2, \scratch4, 8
+ and \scratch1, \scratch1, \maskLSR /* 0 |A*A| 0 |R*R */
+ and \scratch2, \scratch2, \maskLSR /* 0 |G*G| 0 |B*B */
+ addq.ph \scratch1, \scratch1, \scratch3
+ addq.ph \scratch2, \scratch2, \scratch4
+ shra_r.ph \scratch1, \scratch1, 8
+ shra_r.ph \scratch2, \scratch2, 8
+ precr.qb.ph \d_8888, \scratch1, \scratch2
+.endm
+
+/*
+ * Multiply two pixels (a8r8g8b8) with two pixels (a8r8g8b8). It requires
+ * maskLSR needed for rounding process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+
+.macro MIPS_2xUN8x4_MUL_2xUN8x4 s1_8888, \
+ s2_8888, \
+ m1_8888, \
+ m2_8888, \
+ d1_8888, \
+ d2_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, \
+ scratch4, scratch5, scratch6
+ preceu.ph.qbl \scratch1, \m1_8888 /* 0 | A | 0 | R */
+ preceu.ph.qbr \scratch2, \m1_8888 /* 0 | G | 0 | B */
+ preceu.ph.qbl \scratch3, \m2_8888 /* 0 | A | 0 | R */
+ preceu.ph.qbr \scratch4, \m2_8888 /* 0 | G | 0 | B */
+ muleu_s.ph.qbl \scratch5, \s1_8888, \scratch1 /* A*A | R*R */
+ muleu_s.ph.qbr \scratch6, \s1_8888, \scratch2 /* G*G | B*B */
+ muleu_s.ph.qbl \scratch1, \s2_8888, \scratch3 /* A*A | R*R */
+ muleu_s.ph.qbr \scratch2, \s2_8888, \scratch4 /* G*G | B*B */
+ shra_r.ph \scratch3, \scratch5, 8
+ shra_r.ph \scratch4, \scratch6, 8
+ shra_r.ph \d1_8888, \scratch1, 8
+ shra_r.ph \d2_8888, \scratch2, 8
+ and \scratch3, \scratch3, \maskLSR /* 0 |A*A| 0 |R*R */
+ and \scratch4, \scratch4, \maskLSR /* 0 |G*G| 0 |B*B */
+ and \d1_8888, \d1_8888, \maskLSR /* 0 |A*A| 0 |R*R */
+ and \d2_8888, \d2_8888, \maskLSR /* 0 |G*G| 0 |B*B */
+ addq.ph \scratch3, \scratch3, \scratch5
+ addq.ph \scratch4, \scratch4, \scratch6
+ addq.ph \d1_8888, \d1_8888, \scratch1
+ addq.ph \d2_8888, \d2_8888, \scratch2
+ shra_r.ph \scratch3, \scratch3, 8
+ shra_r.ph \scratch4, \scratch4, 8
+ shra_r.ph \scratch5, \d1_8888, 8
+ shra_r.ph \scratch6, \d2_8888, 8
+ precr.qb.ph \d1_8888, \scratch3, \scratch4
+ precr.qb.ph \d2_8888, \scratch5, \scratch6
+.endm
+
#endif //PIXMAN_MIPS_DSPR2_ASM_H
diff --git a/pixman/pixman/pixman-mips-dspr2.c b/pixman/pixman/pixman-mips-dspr2.c
index 2beada390..018770a4a 100644
--- a/pixman/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman/pixman-mips-dspr2.c
@@ -49,6 +49,11 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
uint8_t, 3, uint8_t, 3)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
+ uint32_t, 1, uint16_t, 1)
+
static pixman_bool_t
pixman_fill_mips (uint32_t *bits,
int stride,
@@ -184,6 +189,13 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mips_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mips_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, mips_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca),
+
{ PIXMAN_OP_NONE },
};
diff --git a/pixman/pixman/pixman-mips-dspr2.h b/pixman/pixman/pixman-mips-dspr2.h
index a40e7c87a..cc35d02ed 100644
--- a/pixman/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman/pixman-mips-dspr2.h
@@ -85,4 +85,46 @@ mips_composite_##name (pixman_implementation_t *imp, \
} \
}
+/*******************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_mips (dst_type *dst, \
+ uint32_t src, \
+ mask_type *mask, \
+ int32_t w); \
+ \
+static void \
+mips_composite_##name (pixman_implementation_t *imp, \
+ pixman_composite_info_t *info) \
+{ \
+ PIXMAN_COMPOSITE_ARGS (info); \
+ dst_type *dst_line, *dst; \
+ mask_type *mask_line, *mask; \
+ int32_t dst_stride, mask_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid ( \
+ imp, src_image, dest_image->bits.format); \
+ \
+ if ((flags & SKIP_ZERO_SRC) && src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ while (height--) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ mask = mask_line; \
+ mask_line += mask_stride; \
+ pixman_composite_##name##_asm_mips (dst, src, mask, width); \
+ } \
+}
+
#endif //PIXMAN_MIPS_DSPR2_H
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c
index f9efd73d3..9d1f6afd4 100644
--- a/pixman/pixman/pixman-mmx.c
+++ b/pixman/pixman/pixman-mmx.c
@@ -57,7 +57,7 @@ _mm_empty (void)
#endif
#ifdef USE_X86_MMX
-# ifdef __SUNPRO_C
+# if (defined(__SUNPRO_C) || defined(_MSC_VER))
# include <xmmintrin.h>
# else
/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
@@ -88,8 +88,10 @@ _mm_shuffle_pi16 (__m64 __A, int8_t const __N)
# endif
#endif
+#ifndef _MSC_VER
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+#endif
/* Notes about writing mmx code
*