diff options
Diffstat (limited to 'pixman')
-rw-r--r-- | pixman/Makefile.am | 4 | ||||
-rw-r--r-- | pixman/configure.ac | 16 | ||||
-rw-r--r-- | pixman/pixman/pixman-mips-dspr2-asm.S | 224 | ||||
-rw-r--r-- | pixman/pixman/pixman-mips-dspr2-asm.h | 67 | ||||
-rw-r--r-- | pixman/pixman/pixman-mips-dspr2.c | 10 | ||||
-rw-r--r-- | pixman/pixman/pixman-mmx.c | 107 |
6 files changed, 365 insertions, 63 deletions
diff --git a/pixman/Makefile.am b/pixman/Makefile.am index df8677a2f..88ff897be 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -21,6 +21,10 @@ RELEASE_XORG_HOST = $(USERNAME)@xorg.freedesktop.org RELEASE_XORG_DIR = /srv/xorg.freedesktop.org/archive/individual/lib RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org, pixman@lists.freedesktop.org +EXTRA_DIST = \ + Makefile.win32 \ + Makefile.win32.common + tar_gz = $(PACKAGE)-$(VERSION).tar.gz tar_bz2 = $(PACKAGE)-$(VERSION).tar.bz2 diff --git a/pixman/configure.ac b/pixman/configure.ac index 54787342e..d9498397b 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -278,10 +278,10 @@ if test "x$LS_CFLAGS" = "x" ; then fi have_loongson_mmi=no -AC_MSG_CHECKING(whether to use Loongson MMI) +AC_MSG_CHECKING(whether to use Loongson MMI assembler) xserver_save_CFLAGS=$CFLAGS -CFLAGS=" $CFLAGS $LS_CFLAGS" +CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir" AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ #ifndef __mips_loongson_vector_rev #error "Loongson Multimedia Instructions are only available on Loongson" @@ -301,12 +301,12 @@ int main () { }]])], have_loongson_mmi=yes) CFLAGS=$xserver_save_CFLAGS -AC_ARG_ENABLE(loongson, - [AC_HELP_STRING([--disable-loongson], - [disable Loongson fast paths])], - [enable_loongson=$enableval], [enable_loongson=auto]) +AC_ARG_ENABLE(loongson-mmi, + [AC_HELP_STRING([--disable-loongson-mmi], + [disable Loongson MMI fast paths])], + [enable_loongson_mmi=$enableval], [enable_loongson_mmi=auto]) -if test $enable_loongson = no ; then +if test $enable_loongson_mmi = no ; then have_loongson_mmi=disabled fi @@ -317,7 +317,7 @@ else fi AC_MSG_RESULT($have_loongson_mmi) -if test $enable_loongson = yes && test $have_loongson_mmi = no ; then +if test $enable_loongson_mmi = yes && test $have_loongson_mmi = no ; then AC_MSG_ERROR([Loongson MMI not detected]) fi diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S index 6a0fc1803..68ad33f7c 100644 --- a/pixman/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman/pixman-mips-dspr2-asm.S @@ -527,3 +527,227 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips) nop END(pixman_composite_over_n_8888_0565_ca_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4 + beqz a3, 4f + nop + li t4, 0x00ff00ff + li t5, 0xff + addiu t0, a3, -1 + beqz t0, 3f /* last pixel */ + srl t6, a1, 24 /* t6 = srca */ + not s4, a1 + beq t5, t6, 2f /* if (srca == 0xff) */ + srl s4, s4, 24 +1: + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + lbu t1, 1(a2) /* t1 = mask */ + or t2, t0, t1 + beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 2 + and t3, t0, t1 + + lw t2, 0(a0) /* t2 = dst */ + beq t3, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */ + lw t3, 4(a0) /* t3 = dst */ + + MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3 + not s2, s0 + not s3, s1 + srl s2, s2, 24 + srl s3, s3, 24 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9 + addu_s.qb s2, t2, s0 + addu_s.qb s3, t3, s1 + sw s2, 0(a0) + b 111f + sw s3, 4(a0) +11: + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9 + addu_s.qb s2, t2, a1 + addu_s.qb s3, t3, a1 + sw s2, 0(a0) + sw s3, 4(a0) + +111: + addiu a3, a3, -2 + addiu t0, a3, -1 + bgtz t0, 1b + addiu a0, a0, 8 + b 3f + nop +2: + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + lbu t1, 1(a2) /* t1 = mask */ + or t2, t0, t1 + beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 2 + and t3, t0, t1 + beq t3, t5, 22f /* if (t0 == 0xff) && (t1 == 0xff) */ + nop + lw t2, 0(a0) /* t2 = dst */ + lw t3, 4(a0) /* t3 = dst */ + + OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \ + t6, t7, t4, t8, t9, s0, s1, s2, s3 + sw t6, 0(a0) + b 222f + sw t7, 4(a0) +22: + sw a1, 0(a0) + sw a1, 4(a0) +222: + addiu a3, a3, -2 + addiu t0, a3, -1 + bgtz t0, 2b + addiu a0, a0, 8 +3: + blez a3, 4f + nop + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + beqz t0, 4f /* if (t0 == 0) */ + addiu a2, a2, 1 + move t3, a1 + beq t0, t5, 31f /* if (t0 == 0xff) */ + lw t1, 0(a0) /* t1 = dst */ + + MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8 +31: + not t2, t3 + srl t2, t2, 24 + MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8 + addu_s.qb t2, t1, t3 + sw t2, 0(a0) +4: + RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4 + j ra + nop + +END(pixman_composite_over_n_8_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 + beqz a3, 4f + nop + li t4, 0x00ff00ff + li t5, 0xff + li t6, 0xf800f800 + li t7, 0x07e007e0 + li t8, 0x001F001F + addiu t1, a3, -1 + beqz t1, 3f /* last pixel */ + srl t0, a1, 24 /* t0 = srca */ + not v0, a1 + beq t0, t5, 2f /* if (srca == 0xff) */ + srl v0, v0, 24 +1: + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + lbu t1, 1(a2) /* t1 = mask */ + or t2, t0, t1 + beqz t2, 111f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 2 + lhu t2, 0(a0) /* t2 = dst */ + lhu t3, 2(a0) /* t3 = dst */ + CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4 + and t9, t0, t1 + beq t9, t5, 11f /* if (t0 == 0xff) && (t1 == 0xff) */ + nop + + MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8 + not s4, s2 + not s5, s3 + srl s4, s4, 24 + srl s5, s5, 24 + MIPS_2xUN8x4_MUL_2xUN8 s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8 + addu_s.qb s4, s2, s0 + addu_s.qb s5, s3, s1 + CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1 + sh t2, 0(a0) + b 111f + sh t3, 2(a0) +11: + MIPS_2xUN8x4_MUL_2xUN8 s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8 + addu_s.qb s4, a1, s0 + addu_s.qb s5, a1, s1 + CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1 + sh t2, 0(a0) + sh t3, 2(a0) +111: + addiu a3, a3, -2 + addiu t0, a3, -1 + bgtz t0, 1b + addiu a0, a0, 4 + b 3f + nop +2: + CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2 +21: + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + lbu t1, 1(a2) /* t1 = mask */ + or t2, t0, t1 + beqz t2, 222f /* if (t0 == 0) && (t1 == 0) */ + addiu a2, a2, 2 + and t9, t0, t1 + move s2, s0 + beq t9, t5, 22f /* if (t0 == 0xff) && (t2 == 0xff) */ + move s3, s0 + lhu t2, 0(a0) /* t2 = dst */ + lhu t3, 2(a0) /* t3 = dst */ + + CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7 + OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, s2, s3, \ + t2, t3, t4, t9, s4, s5, s6, s7, s8 + CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5 +22: + sh s2, 0(a0) + sh s3, 2(a0) +222: + addiu a3, a3, -2 + addiu t0, a3, -1 + bgtz t0, 21b + addiu a0, a0, 4 +3: + blez a3, 4f + nop + /* a1 = src */ + lbu t0, 0(a2) /* t0 = mask */ + beqz t0, 4f /* if (t0 == 0) */ + nop + lhu t1, 0(a0) /* t1 = dst */ + CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7 + beq t0, t5, 31f /* if (t0 == 0xff) */ + move t3, a1 + + MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t7, t8, t9 +31: + not t6, t3 + srl t6, t6, 24 + MIPS_UN8x4_MUL_UN8 t2, t6, t2, t4, t7, t8, t9 + addu_s.qb t1, t2, t3 + CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7 + sh t2, 0(a0) +4: + RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop + +END(pixman_composite_over_n_8_0565_asm_mips) diff --git a/pixman/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman/pixman-mips-dspr2-asm.h index 12ff42c57..838306043 100644 --- a/pixman/pixman/pixman-mips-dspr2-asm.h +++ b/pixman/pixman/pixman-mips-dspr2-asm.h @@ -499,4 +499,71 @@ LEAF_MIPS32R2(symbol) \ precr.qb.ph \d2_8888, \scratch5, \scratch6 .endm +/* + * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 + * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR + * needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_8888_8_8888 s_8888, \ + m_8, \ + d_8888, \ + out_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, scratch4 + MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \ + \scratch1, \maskLSR, \ + \scratch2, \scratch3, \scratch4 + + not \scratch2, \scratch1 + srl \scratch2, \scratch2, 24 + + MIPS_UN8x4_MUL_UN8 \d_8888, \scratch2, \ + \d_8888, \maskLSR, \ + \scratch3, \scratch4, \out_8888 + + addu_s.qb \out_8888, \d_8888, \scratch1 +.endm + +/* + * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two + * a8r8g8b8 destination pixels (d1_8888 and d2_8888) using a8 masks (m1_8 and + * m2_8). It also requires maskLSR needed for rounding process. maskLSR must + * have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_2x8888_2x8_2x8888 s1_8888, \ + s2_8888, \ + m1_8, \ + m2_8, \ + d1_8888, \ + d2_8888, \ + out1_8888, \ + out2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ + \m1_8, \m2_8, \ + \scratch1, \scratch2, \ + \maskLSR, \ + \scratch3, \scratch4, \out1_8888, \ + \out2_8888, \scratch5, \scratch6 + + not \scratch3, \scratch1 + srl \scratch3, \scratch3, 24 + not \scratch4, \scratch2 + srl \scratch4, \scratch4, 24 + + MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ + \scratch3, \scratch4, \ + \d1_8888, \d2_8888, \ + \maskLSR, \ + \scratch5, \scratch6, \out1_8888, \ + \out2_8888, \scratch3, \scratch4 + + addu_s.qb \out1_8888, \d1_8888, \scratch1 + addu_s.qb \out2_8888, \d2_8888, \scratch2 +.endm + #endif //PIXMAN_MIPS_DSPR2_ASM_H diff --git a/pixman/pixman/pixman-mips-dspr2.c b/pixman/pixman/pixman-mips-dspr2.c index 018770a4a..7081734c0 100644 --- a/pixman/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman/pixman-mips-dspr2.c @@ -53,6 +53,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca, uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca, uint32_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565, + uint8_t, 1, uint16_t, 1) static pixman_bool_t pixman_fill_mips (uint32_t *bits, @@ -195,6 +199,12 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mips_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mips_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mips_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mips_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565), { PIXMAN_OP_NONE }, }; diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c index b14201a4e..01a2bc93b 100644 --- a/pixman/pixman/pixman-mmx.c +++ b/pixman/pixman/pixman-mmx.c @@ -598,6 +598,12 @@ pack_4xpacked565 (__m64 a, __m64 b) #endif } +static force_inline __m64 +pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3) +{ + return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3)); +} + #ifndef _MSC_VER static force_inline __m64 @@ -1396,16 +1402,14 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp, while (w >= 4) { - __m64 vdest; + __m64 vdest = *(__m64 *)dst; - vdest = *(__m64 *)dst; - - vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3); + __m64 v0 = over (vsrc, vsrca, expand565 (vdest, 0)); + __m64 v1 = over (vsrc, vsrca, expand565 (vdest, 1)); + __m64 v2 = over (vsrc, vsrca, expand565 (vdest, 2)); + __m64 v3 = over (vsrc, vsrca, expand565 (vdest, 3)); - *(__m64 *)dst = vdest; + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); dst += 4; w -= 4; @@ -1818,22 +1822,19 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, while (w >= 4) { - __m64 vsrc0, vsrc1, vsrc2, vsrc3; - __m64 vdest; + __m64 vdest = *(__m64 *)dst; - vsrc0 = load8888 ((src + 0)); - vsrc1 = load8888 ((src + 1)); - vsrc2 = load8888 ((src + 2)); - vsrc3 = load8888 ((src + 3)); + __m64 vsrc0 = load8888 ((src + 0)); + __m64 vsrc1 = load8888 ((src + 1)); + __m64 vsrc2 = load8888 ((src + 2)); + __m64 vsrc3 = load8888 ((src + 3)); - vdest = *(__m64 *)dst; - - vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3); + __m64 v0 = over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)); + __m64 v1 = over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)); + __m64 v2 = over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)); + __m64 v3 = over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)); - *(__m64 *)dst = vdest; + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); w -= 4; dst += 4; @@ -2368,25 +2369,22 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, } else if (m0 | m1 | m2 | m3) { - __m64 vdest; - __m64 vm0, vm1, vm2, vm3; - - vdest = *(__m64 *)dst; + __m64 vdest = *(__m64 *)dst; - vm0 = to_m64 (m0); - vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0), - expand565 (vdest, 0)), vdest, 0); - vm1 = to_m64 (m1); - vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1), - expand565 (vdest, 1)), vdest, 1); - vm2 = to_m64 (m2); - vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2), - expand565 (vdest, 2)), vdest, 2); - vm3 = to_m64 (m3); - vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3), - expand565 (vdest, 3)), vdest, 3); - - *(__m64 *)dst = vdest; + __m64 vm0 = to_m64 (m0); + __m64 v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0), + expand565 (vdest, 0)); + __m64 vm1 = to_m64 (m1); + __m64 v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1), + expand565 (vdest, 1)); + __m64 vm2 = to_m64 (m2); + __m64 v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2), + expand565 (vdest, 2)); + __m64 vm3 = to_m64 (m3); + __m64 v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3), + expand565 (vdest, 3)); + + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);; } w -= 4; @@ -2483,24 +2481,23 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, if ((a0 & a1 & a2 & a3) == 0xFF) { - __m64 vdest; - vdest = pack_565 (invert_colors (load8888 (&s0)), _mm_setzero_si64 (), 0); - vdest = pack_565 (invert_colors (load8888 (&s1)), vdest, 1); - vdest = pack_565 (invert_colors (load8888 (&s2)), vdest, 2); - vdest = pack_565 (invert_colors (load8888 (&s3)), vdest, 3); + __m64 v0 = invert_colors (load8888 (&s0)); + __m64 v1 = invert_colors (load8888 (&s1)); + __m64 v2 = invert_colors (load8888 (&s2)); + __m64 v3 = invert_colors (load8888 (&s3)); - *(__m64 *)dst = vdest; + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); } else if (s0 | s1 | s2 | s3) { __m64 vdest = *(__m64 *)dst; - vdest = pack_565 (over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3)), vdest, 3); + __m64 v0 = over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0)); + __m64 v1 = over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1)); + __m64 v2 = over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2)); + __m64 v3 = over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3)); - *(__m64 *)dst = vdest; + *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3); } w -= 4; @@ -2675,12 +2672,12 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)q; - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3)), vdest, 3); + __m64 v0 = in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0)); + __m64 v1 = in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1)); + __m64 v2 = in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2)); + __m64 v3 = in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3)); - *(__m64 *)q = vdest; + *(__m64 *)q = pack_4x565 (v0, v1, v2, v3); } twidth -= 4; p += 4; |