aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
authormarha <marha@users.sourceforge.net>2012-05-15 15:07:22 +0200
committermarha <marha@users.sourceforge.net>2012-05-15 15:07:22 +0200
commit9818207986d5db9831e43eb2a640be68f54bb2ef (patch)
treee1ac6d17b2da887a2110c1337f01fc6fbdfa68fd /pixman
parente6ff1fe09702cb307729b3208175c84f623f2968 (diff)
parent062c45ff0df6a52080dcd74433710d47127cbe29 (diff)
downloadvcxsrv-9818207986d5db9831e43eb2a640be68f54bb2ef.tar.gz
vcxsrv-9818207986d5db9831e43eb2a640be68f54bb2ef.tar.bz2
vcxsrv-9818207986d5db9831e43eb2a640be68f54bb2ef.zip
Merge remote-tracking branch 'origin/released'
Diffstat (limited to 'pixman')
-rw-r--r--pixman/Makefile.am4
-rw-r--r--pixman/configure.ac16
-rw-r--r--pixman/pixman/pixman-mips-dspr2-asm.S224
-rw-r--r--pixman/pixman/pixman-mips-dspr2-asm.h67
-rw-r--r--pixman/pixman/pixman-mips-dspr2.c10
-rw-r--r--pixman/pixman/pixman-mmx.c107
6 files changed, 365 insertions, 63 deletions
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index df8677a2f..88ff897be 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -21,6 +21,10 @@ RELEASE_XORG_HOST = $(USERNAME)@xorg.freedesktop.org
RELEASE_XORG_DIR = /srv/xorg.freedesktop.org/archive/individual/lib
RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org, pixman@lists.freedesktop.org
+EXTRA_DIST = \
+ Makefile.win32 \
+ Makefile.win32.common
+
tar_gz = $(PACKAGE)-$(VERSION).tar.gz
tar_bz2 = $(PACKAGE)-$(VERSION).tar.bz2
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 54787342e..d9498397b 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -278,10 +278,10 @@ if test "x$LS_CFLAGS" = "x" ; then
fi
have_loongson_mmi=no
-AC_MSG_CHECKING(whether to use Loongson MMI)
+AC_MSG_CHECKING(whether to use Loongson MMI assembler)
xserver_save_CFLAGS=$CFLAGS
-CFLAGS=" $CFLAGS $LS_CFLAGS"
+CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#ifndef __mips_loongson_vector_rev
#error "Loongson Multimedia Instructions are only available on Loongson"
@@ -301,12 +301,12 @@ int main () {
}]])], have_loongson_mmi=yes)
CFLAGS=$xserver_save_CFLAGS
-AC_ARG_ENABLE(loongson,
- [AC_HELP_STRING([--disable-loongson],
- [disable Loongson fast paths])],
- [enable_loongson=$enableval], [enable_loongson=auto])
+AC_ARG_ENABLE(loongson-mmi,
+ [AC_HELP_STRING([--disable-loongson-mmi],
+ [disable Loongson MMI fast paths])],
+ [enable_loongson_mmi=$enableval], [enable_loongson_mmi=auto])
-if test $enable_loongson = no ; then
+if test $enable_loongson_mmi = no ; then
have_loongson_mmi=disabled
fi
@@ -317,7 +317,7 @@ else
fi
AC_MSG_RESULT($have_loongson_mmi)
-if test $enable_loongson = yes && test $have_loongson_mmi = no ; then
+if test $enable_loongson_mmi = yes && test $have_loongson_mmi = no ; then
AC_MSG_ERROR([Loongson MMI not detected])
fi
diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S
index 6a0fc1803..68ad33f7c 100644
--- a/pixman/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman/pixman-mips-dspr2-asm.S
@@ -527,3 +527,227 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
nop
END(pixman_composite_over_n_8888_0565_ca_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4
+ beqz a3, 4f
+ nop
+ li t4, 0x00ff00ff
+ li t5, 0xff
+ addiu t0, a3, -1
+ beqz t0, 3f /* last pixel */
+ srl t6, a1, 24 /* t6 = srca */
+ not s4, a1
+ beq t5, t6, 2f /* if (srca == 0xff) */
+ srl s4, s4, 24
+1:
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ lbu t1, 1(a2) /* t1 = mask */
+ or t2, t0, t1
+ beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */
+ addiu a2, a2, 2
+ and t3, t0, t1
+
+ lw t2, 0(a0) /* t2 = dst */
+ beq t3, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */
+ lw t3, 4(a0) /* t3 = dst */
+
+ MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3
+ not s2, s0
+ not s3, s1
+ srl s2, s2, 24
+ srl s3, s3, 24
+ MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9
+ addu_s.qb s2, t2, s0
+ addu_s.qb s3, t3, s1
+ sw s2, 0(a0)
+ b 111f
+ sw s3, 4(a0)
+11:
+ MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9
+ addu_s.qb s2, t2, a1
+ addu_s.qb s3, t3, a1
+ sw s2, 0(a0)
+ sw s3, 4(a0)
+
+111:
+ addiu a3, a3, -2
+ addiu t0, a3, -1
+ bgtz t0, 1b
+ addiu a0, a0, 8
+ b 3f
+ nop
+2:
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ lbu t1, 1(a2) /* t1 = mask */
+ or t2, t0, t1
+ beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */
+ addiu a2, a2, 2
+ and t3, t0, t1
+ beq t3, t5, 22f /* if (t0 == 0xff) && (t1 == 0xff) */
+ nop
+ lw t2, 0(a0) /* t2 = dst */
+ lw t3, 4(a0) /* t3 = dst */
+
+ OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \
+ t6, t7, t4, t8, t9, s0, s1, s2, s3
+ sw t6, 0(a0)
+ b 222f
+ sw t7, 4(a0)
+22:
+ sw a1, 0(a0)
+ sw a1, 4(a0)
+222:
+ addiu a3, a3, -2
+ addiu t0, a3, -1
+ bgtz t0, 2b
+ addiu a0, a0, 8
+3:
+ blez a3, 4f
+ nop
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ beqz t0, 4f /* if (t0 == 0) */
+ addiu a2, a2, 1
+ move t3, a1
+ beq t0, t5, 31f /* if (t0 == 0xff) */
+ lw t1, 0(a0) /* t1 = dst */
+
+ MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8
+31:
+ not t2, t3
+ srl t2, t2, 24
+ MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8
+ addu_s.qb t2, t1, t3
+ sw t2, 0(a0)
+4:
+ RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4
+ j ra
+ nop
+
+END(pixman_composite_over_n_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+ SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ beqz a3, 4f
+ nop
+ li t4, 0x00ff00ff
+ li t5, 0xff
+ li t6, 0xf800f800
+ li t7, 0x07e007e0
+ li t8, 0x001F001F
+ addiu t1, a3, -1
+ beqz t1, 3f /* last pixel */
+ srl t0, a1, 24 /* t0 = srca */
+ not v0, a1
+ beq t0, t5, 2f /* if (srca == 0xff) */
+ srl v0, v0, 24
+1:
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ lbu t1, 1(a2) /* t1 = mask */
+ or t2, t0, t1
+ beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */
+ addiu a2, a2, 2
+ lhu t2, 0(a0) /* t2 = dst */
+ lhu t3, 2(a0) /* t3 = dst */
+ CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4
+ and t9, t0, t1
+ beq t9, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */
+ nop
+
+ MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8
+ not s4, s2
+ not s5, s3
+ srl s4, s4, 24
+ srl s5, s5, 24
+ MIPS_2xUN8x4_MUL_2xUN8 s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8
+ addu_s.qb s4, s2, s0
+ addu_s.qb s5, s3, s1
+ CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
+ sh t2, 0(a0)
+ b 111f
+ sh t3, 2(a0)
+11:
+ MIPS_2xUN8x4_MUL_2xUN8 s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8
+ addu_s.qb s4, a1, s0
+ addu_s.qb s5, a1, s1
+ CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
+ sh t2, 0(a0)
+ sh t3, 2(a0)
+111:
+ addiu a3, a3, -2
+ addiu t0, a3, -1
+ bgtz t0, 1b
+ addiu a0, a0, 4
+ b 3f
+ nop
+2:
+ CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2
+21:
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ lbu t1, 1(a2) /* t1 = mask */
+ or t2, t0, t1
+ beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */
+ addiu a2, a2, 2
+ and t9, t0, t1
+ move s2, s0
+ beq t9, t5, 22f /* if (t0 == 0xff) && (t2 == 0xff) */
+ move s3, s0
+ lhu t2, 0(a0) /* t2 = dst */
+ lhu t3, 2(a0) /* t3 = dst */
+
+ CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7
+ OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, s2, s3, \
+ t2, t3, t4, t9, s4, s5, s6, s7, s8
+ CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5
+22:
+ sh s2, 0(a0)
+ sh s3, 2(a0)
+222:
+ addiu a3, a3, -2
+ addiu t0, a3, -1
+ bgtz t0, 21b
+ addiu a0, a0, 4
+3:
+ blez a3, 4f
+ nop
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ beqz t0, 4f /* if (t0 == 0) */
+ nop
+ lhu t1, 0(a0) /* t1 = dst */
+ CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
+ beq t0, t5, 31f /* if (t0 == 0xff) */
+ move t3, a1
+
+ MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t7, t8, t9
+31:
+ not t6, t3
+ srl t6, t6, 24
+ MIPS_UN8x4_MUL_UN8 t2, t6, t2, t4, t7, t8, t9
+ addu_s.qb t1, t2, t3
+ CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7
+ sh t2, 0(a0)
+4:
+ RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ j ra
+ nop
+
+END(pixman_composite_over_n_8_0565_asm_mips)
diff --git a/pixman/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman/pixman-mips-dspr2-asm.h
index 12ff42c57..838306043 100644
--- a/pixman/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman/pixman-mips-dspr2-asm.h
@@ -499,4 +499,71 @@ LEAF_MIPS32R2(symbol) \
precr.qb.ph \d2_8888, \scratch5, \scratch6
.endm
+/*
+ * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
+ * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro OVER_8888_8_8888 s_8888, \
+ m_8, \
+ d_8888, \
+ out_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, scratch4
+ MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \
+ \scratch1, \maskLSR, \
+ \scratch2, \scratch3, \scratch4
+
+ not \scratch2, \scratch1
+ srl \scratch2, \scratch2, 24
+
+ MIPS_UN8x4_MUL_UN8 \d_8888, \scratch2, \
+ \d_8888, \maskLSR, \
+ \scratch3, \scratch4, \out_8888
+
+ addu_s.qb \out_8888, \d_8888, \scratch1
+.endm
+
+/*
+ * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
+ * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and
+ * m2_8). It also requires maskLSR needed for rounding process. maskLSR must
+ * have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro OVER_2x8888_2x8_2x8888 s1_8888, \
+ s2_8888, \
+ m1_8, \
+ m2_8, \
+ d1_8888, \
+ d2_8888, \
+ out1_8888, \
+ out2_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, \
+ scratch4, scratch5, scratch6
+ MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \
+ \m1_8, \m2_8, \
+ \scratch1, \scratch2, \
+ \maskLSR, \
+ \scratch3, \scratch4, \out1_8888, \
+ \out2_8888, \scratch5, \scratch6
+
+ not \scratch3, \scratch1
+ srl \scratch3, \scratch3, 24
+ not \scratch4, \scratch2
+ srl \scratch4, \scratch4, 24
+
+ MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \
+ \scratch3, \scratch4, \
+ \d1_8888, \d2_8888, \
+ \maskLSR, \
+ \scratch5, \scratch6, \out1_8888, \
+ \out2_8888, \scratch3, \scratch4
+
+ addu_s.qb \out1_8888, \d1_8888, \scratch1
+ addu_s.qb \out2_8888, \d2_8888, \scratch2
+.endm
+
#endif //PIXMAN_MIPS_DSPR2_ASM_H
diff --git a/pixman/pixman/pixman-mips-dspr2.c b/pixman/pixman/pixman-mips-dspr2.c
index 018770a4a..7081734c0 100644
--- a/pixman/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman/pixman-mips-dspr2.c
@@ -53,6 +53,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
uint32_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
+ uint8_t, 1, uint16_t, 1)
static pixman_bool_t
pixman_fill_mips (uint32_t *bits,
@@ -195,6 +199,12 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mips_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mips_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mips_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mips_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565),
{ PIXMAN_OP_NONE },
};
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c
index 8abf62469..54d5b274b 100644
--- a/pixman/pixman/pixman-mmx.c
+++ b/pixman/pixman/pixman-mmx.c
@@ -620,6 +620,12 @@ pack_4xpacked565 (__m64 a, __m64 b)
#endif
}
+static force_inline __m64
+pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3)
+{
+ return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3));
+}
+
#ifndef _MSC_VER
static force_inline __m64
@@ -1418,16 +1424,14 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
while (w >= 4)
{
- __m64 vdest;
+ __m64 vdest = *(__m64 *)dst;
- vdest = *(__m64 *)dst;
-
- vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0);
- vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1);
- vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2);
- vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3);
+ __m64 v0 = over (vsrc, vsrca, expand565 (vdest, 0));
+ __m64 v1 = over (vsrc, vsrca, expand565 (vdest, 1));
+ __m64 v2 = over (vsrc, vsrca, expand565 (vdest, 2));
+ __m64 v3 = over (vsrc, vsrca, expand565 (vdest, 3));
- *(__m64 *)dst = vdest;
+ *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
dst += 4;
w -= 4;
@@ -1840,22 +1844,19 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
while (w >= 4)
{
- __m64 vsrc0, vsrc1, vsrc2, vsrc3;
- __m64 vdest;
+ __m64 vdest = *(__m64 *)dst;
- vsrc0 = load8888 ((src + 0));
- vsrc1 = load8888 ((src + 1));
- vsrc2 = load8888 ((src + 2));
- vsrc3 = load8888 ((src + 3));
+ __m64 vsrc0 = load8888 ((src + 0));
+ __m64 vsrc1 = load8888 ((src + 1));
+ __m64 vsrc2 = load8888 ((src + 2));
+ __m64 vsrc3 = load8888 ((src + 3));
- vdest = *(__m64 *)dst;
-
- vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0);
- vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1);
- vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2);
- vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3);
+ __m64 v0 = over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0));
+ __m64 v1 = over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1));
+ __m64 v2 = over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2));
+ __m64 v3 = over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3));
- *(__m64 *)dst = vdest;
+ *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
w -= 4;
dst += 4;
@@ -2391,25 +2392,22 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
}
else if (m0 | m1 | m2 | m3)
{
- __m64 vdest;
- __m64 vm0, vm1, vm2, vm3;
-
- vdest = *(__m64 *)dst;
+ __m64 vdest = *(__m64 *)dst;
- vm0 = to_m64 (m0);
- vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0),
- expand565 (vdest, 0)), vdest, 0);
- vm1 = to_m64 (m1);
- vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1),
- expand565 (vdest, 1)), vdest, 1);
- vm2 = to_m64 (m2);
- vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2),
- expand565 (vdest, 2)), vdest, 2);
- vm3 = to_m64 (m3);
- vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3),
- expand565 (vdest, 3)), vdest, 3);
-
- *(__m64 *)dst = vdest;
+ __m64 vm0 = to_m64 (m0);
+ __m64 v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0),
+ expand565 (vdest, 0));
+ __m64 vm1 = to_m64 (m1);
+ __m64 v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1),
+ expand565 (vdest, 1));
+ __m64 vm2 = to_m64 (m2);
+ __m64 v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2),
+ expand565 (vdest, 2));
+ __m64 vm3 = to_m64 (m3);
+ __m64 v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3),
+ expand565 (vdest, 3));
+
+ *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);;
}
w -= 4;
@@ -2506,24 +2504,23 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
if ((a0 & a1 & a2 & a3) == 0xFF)
{
- __m64 vdest;
- vdest = pack_565 (invert_colors (load8888 (&s0)), _mm_setzero_si64 (), 0);
- vdest = pack_565 (invert_colors (load8888 (&s1)), vdest, 1);
- vdest = pack_565 (invert_colors (load8888 (&s2)), vdest, 2);
- vdest = pack_565 (invert_colors (load8888 (&s3)), vdest, 3);
+ __m64 v0 = invert_colors (load8888 (&s0));
+ __m64 v1 = invert_colors (load8888 (&s1));
+ __m64 v2 = invert_colors (load8888 (&s2));
+ __m64 v3 = invert_colors (load8888 (&s3));
- *(__m64 *)dst = vdest;
+ *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
}
else if (s0 | s1 | s2 | s3)
{
__m64 vdest = *(__m64 *)dst;
- vdest = pack_565 (over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0)), vdest, 0);
- vdest = pack_565 (over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1)), vdest, 1);
- vdest = pack_565 (over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2)), vdest, 2);
- vdest = pack_565 (over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3)), vdest, 3);
+ __m64 v0 = over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0));
+ __m64 v1 = over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1));
+ __m64 v2 = over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2));
+ __m64 v3 = over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3));
- *(__m64 *)dst = vdest;
+ *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
}
w -= 4;
@@ -2698,12 +2695,12 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
{
__m64 vdest = *(__m64 *)q;
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0)), vdest, 0);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1)), vdest, 1);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2)), vdest, 2);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3)), vdest, 3);
+ __m64 v0 = in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0));
+ __m64 v1 = in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1));
+ __m64 v2 = in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2));
+ __m64 v3 = in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3));
- *(__m64 *)q = vdest;
+ *(__m64 *)q = pack_4x565 (v0, v1, v2, v3);
}
twidth -= 4;
p += 4;