diff options
Diffstat (limited to 'pixman/pixman/pixman-sse2.c')
-rw-r--r-- | pixman/pixman/pixman-sse2.c | 589 |
1 files changed, 383 insertions, 206 deletions
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index bb74882b2..78b0ad185 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -368,6 +368,22 @@ cache_prefetch_next (__m128i* addr) _mm_prefetch ((void const *)(addr + 4), _MM_HINT_T0); /* 64 bytes ahead */ } +/* prefetching NULL is very slow on some systems. don't do that. */ + +static force_inline void +maybe_prefetch (__m128i* addr) +{ + if (addr) + cache_prefetch (addr); +} + +static force_inline void +maybe_prefetch_next (__m128i* addr) +{ + if (addr) + cache_prefetch_next (addr); +} + /* load 4 pixels from a 16-byte boundary aligned address */ static force_inline __m128i load_128_aligned (__m128i* src) @@ -413,9 +429,15 @@ save_128_unaligned (__m128i* dst, */ static force_inline __m64 +load_32_1x64 (uint32_t data) +{ + return _mm_cvtsi32_si64 (data); +} + +static force_inline __m64 unpack_32_1x64 (uint32_t data) { - return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (data), _mm_setzero_si64 ()); + return _mm_unpacklo_pi8 (load_32_1x64 (data), _mm_setzero_si64 ()); } static force_inline __m64 @@ -629,7 +651,7 @@ core_combine_over_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); /* Align dst on a 16-byte boundary */ while (w && ((unsigned long)pd & 15)) @@ -647,14 +669,14 @@ core_combine_over_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); /* I'm loading unaligned because I'm not sure about * the address alignment. @@ -720,7 +742,7 @@ core_combine_over_reverse_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); /* Align dst on a 16-byte boundary */ while (w && @@ -739,14 +761,14 @@ core_combine_over_reverse_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); /* I'm loading unaligned because I'm not sure * about the address alignment. @@ -822,7 +844,7 @@ core_combine_in_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { @@ -839,14 +861,14 @@ core_combine_in_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); xmm_dst_hi = load_128_aligned ((__m128i*) pd); xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm); @@ -896,7 +918,7 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { @@ -913,14 +935,14 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); xmm_dst_hi = load_128_aligned ((__m128i*) pd); xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); @@ -965,7 +987,7 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { @@ -986,7 +1008,7 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { @@ -996,7 +1018,7 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd, /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); xmm_dst_hi = load_128_aligned ((__m128i*) pd); @@ -1047,7 +1069,7 @@ core_combine_out_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { @@ -1067,7 +1089,7 @@ core_combine_out_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { @@ -1077,7 +1099,7 @@ core_combine_out_u_sse2 (uint32_t* pd, /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); xmm_dst_hi = load_128_aligned ((__m128i*) pd); @@ -1147,7 +1169,7 @@ core_combine_atop_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { @@ -1164,14 +1186,14 @@ core_combine_atop_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); xmm_dst_hi = load_128_aligned ((__m128i*) pd); @@ -1244,7 +1266,7 @@ core_combine_reverse_atop_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { @@ -1261,14 +1283,14 @@ core_combine_reverse_atop_u_sse2 (uint32_t* pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); xmm_dst_hi = load_128_aligned ((__m128i*) pd); @@ -1345,7 +1367,7 @@ core_combine_xor_u_sse2 (uint32_t* dst, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { @@ -1362,14 +1384,14 @@ core_combine_xor_u_sse2 (uint32_t* dst, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm); xmm_dst = load_128_aligned ((__m128i*) pd); @@ -1430,7 +1452,7 @@ core_combine_add_u_sse2 (uint32_t* dst, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { @@ -1448,7 +1470,7 @@ core_combine_add_u_sse2 (uint32_t* dst, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { @@ -1457,7 +1479,7 @@ core_combine_add_u_sse2 (uint32_t* dst, /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); s = combine4 ((__m128i*)ps, (__m128i*)pm); @@ -1516,7 +1538,7 @@ core_combine_saturate_u_sse2 (uint32_t * pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { @@ -1533,14 +1555,14 @@ core_combine_saturate_u_sse2 (uint32_t * pd, /* call prefetch hint to optimize cache load*/ cache_prefetch ((__m128i*)ps); cache_prefetch ((__m128i*)pd); - cache_prefetch ((__m128i*)pm); + maybe_prefetch ((__m128i*)pm); while (w >= 4) { /* fill cache line with next memory */ cache_prefetch_next ((__m128i*)ps); cache_prefetch_next ((__m128i*)pd); - cache_prefetch_next ((__m128i*)pm); + maybe_prefetch_next ((__m128i*)pm); xmm_dst = load_128_aligned ((__m128i*)pd); xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm); @@ -3221,7 +3243,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, if (m) { d = *pd; - + mmx_mask = unpack_32_1x64 (m); mmx_dest = unpack_32_1x64 (d); @@ -3412,7 +3434,7 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE ( src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); + mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8); xmm_mask = create_mask_16_128 (mask >> 24); @@ -3528,7 +3550,7 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE ( src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); + mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8); xmm_mask = create_mask_16_128 (mask >> 24); xmm_alpha = mask_00ff; @@ -5033,23 +5055,23 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp, } /* ------------------------------------------------------------------------- - * composite_add_8888_8_8 + * composite_add_n_8_8 */ static void -sse2_composite_add_8888_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +sse2_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { uint8_t *dst_line, *dst; uint8_t *mask_line, *mask; @@ -5428,9 +5450,7 @@ sse2_composite_copy_area (pixman_implementation_t *imp, src_x, src_y, dest_x, dest_y, width, height); } -#if 0 -/* This code are buggy in MMX version, now the bug was translated to SSE2 version */ -void +static void sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, pixman_op_t op, pixman_image_t * src_image, @@ -5451,6 +5471,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, uint32_t m; int src_stride, mask_stride, dst_stride; uint16_t w; + __m64 ms; __m128i xmm_src, xmm_src_lo, xmm_src_hi; __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; @@ -5465,199 +5486,355 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, while (height--) { - src = src_line; - src_line += src_stride; - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + cache_prefetch ((__m128i*)mask); + + while (w && (unsigned long)dst & 15) + { + s = 0xff000000 | *src++; + m = (uint32_t) *mask++; + d = *dst; + ms = unpack_32_1x64 (s); + + if (m != 0xff) + { + __m64 ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m)); + __m64 md = unpack_32_1x64 (d); + + ms = in_over_1x64 (&ms, &mask_x00ff, &ma, &md); + } + + *dst++ = pack_1x64_32 (ms); + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + cache_prefetch ((__m128i*)mask); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)src); + cache_prefetch_next ((__m128i*)dst); + cache_prefetch_next ((__m128i*)mask); + + m = *(uint32_t*) mask; + xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000); + + if (m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + src += 4; + dst += 4; + mask += 4; + w -= 4; + } + + while (w) + { + m = (uint32_t) *mask++; + + if (m) + { + s = 0xff000000 | *src; + + if (m == 0xff) + { + *dst = s; + } + else + { + __m64 ma, md, ms; + + d = *dst; + + ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m)); + md = unpack_32_1x64 (d); + ms = unpack_32_1x64 (s); + + *dst = pack_1x64_32 (in_over_1x64 (&ms, &mask_x00ff, &ma, &md)); + } + + } + + src++; + dst++; + w--; + } + } - w = width; + _mm_empty (); +} - /* call prefetch hint to optimize cache load*/ - cache_prefetch ((__m128i*)src); - cache_prefetch ((__m128i*)dst); - cache_prefetch ((__m128i*)mask); +static void +sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *src, *src_line, s; + uint32_t *dst, *dst_line, d; + uint8_t *mask, *mask_line; + uint32_t m; + int src_stride, mask_stride, dst_stride; + uint16_t w; - while (w && (unsigned long)dst & 15) - { - s = 0xff000000 | *src++; - m = (uint32_t) *mask++; - d = *dst; + __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - __m64 ms = unpack_32_1x64 (s); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - if (m != 0xff) - { - ms = in_over_1x64 (ms, - mask_x00ff, - expand_alpha_rev_1x64 (unpack_32_1x64 (m)), - unpack_32_1x64 (d)); - } + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; - *dst++ = pack_1x64_32 (ms); - w--; - } + w = width; - /* call prefetch hint to optimize cache load*/ - cache_prefetch ((__m128i*)src); - cache_prefetch ((__m128i*)dst); - cache_prefetch ((__m128i*)mask); + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i *)src); + cache_prefetch ((__m128i *)dst); + cache_prefetch ((__m128i *)mask); - while (w >= 4) - { - /* fill cache line with next memory */ - cache_prefetch_next ((__m128i*)src); - cache_prefetch_next ((__m128i*)dst); - cache_prefetch_next ((__m128i*)mask); + while (w && (unsigned long)dst & 15) + { + uint32_t sa; - m = *(uint32_t*) mask; - xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000); + s = *src++; + m = (uint32_t) *mask++; + d = *dst; - if (m == 0xffffffff) + sa = s >> 24; + + if (m) { - save_128_aligned ((__m128i*)dst, xmm_src); + if (sa == 0xff && m == 0xff) + { + *dst = s; + } + else + { + __m64 ms, md, ma, msa; + + ma = expand_alpha_rev_1x64 (load_32_1x64 (m)); + ms = unpack_32_1x64 (s); + md = unpack_32_1x64 (d); + + msa = expand_alpha_rev_1x64 (load_32_1x64 (sa)); + + *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md)); + } } - else + + dst++; + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i *)src); + cache_prefetch ((__m128i *)dst); + cache_prefetch ((__m128i *)mask); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i *)src); + cache_prefetch_next ((__m128i *)dst); + cache_prefetch_next ((__m128i *)mask); + + m = *(uint32_t *) mask; + + if (m) { - xmm_dst = load_128_aligned ((__m128i*)dst); + xmm_src = load_128_unaligned ((__m128i*)src); - xmm_mask = _mm_unpacklo_epi16 ( - unpack_32_1x128 (m), _mm_setzero_si128 ()); + if (m == 0xffffffff && is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i *)dst); - unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); - expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, - &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - in_over_2x128 (xmm_src_lo, xmm_src_hi, - mask_00ff, mask_00ff, - xmm_mask_lo, xmm_mask_hi, - &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, + &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); - save_128_aligned ( - (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } } - src += 4; - dst += 4; - mask += 4; - w -= 4; - } + src += 4; + dst += 4; + mask += 4; + w -= 4; + } - while (w) - { - m = (uint32_t) *mask++; + while (w) + { + uint32_t sa; + + s = *src++; + m = (uint32_t) *mask++; + d = *dst; + + sa = s >> 24; if (m) { - s = 0xff000000 | *src; - - if (m == 0xff) + if (sa == 0xff && m == 0xff) { *dst = s; } else { - d = *dst; + __m64 ms, md, ma, msa; - *dst = pack_1x64_32 ( - in_over_1x64 ( - unpack_32_1x64 (s), - mask_x00ff, - expand_alpha_rev_1x64 (unpack_32_1x64 (m)), - unpack_32_1x64 (d))); - } + ma = expand_alpha_rev_1x64 (load_32_1x64 (m)); + ms = unpack_32_1x64 (s); + md = unpack_32_1x64 (d); + msa = expand_alpha_rev_1x64 (load_32_1x64 (sa)); + + *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md)); + } } - src++; dst++; - w--; - } + w--; + } } _mm_empty (); } -#endif - static const pixman_fast_path_t sse2_fast_paths[] = { - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, sse2_composite_over_n_8_0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, sse2_composite_over_n_8_0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_over_n_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_over_n_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_over_n_0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_over_8888_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_over_8888_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_over_8888_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_over_8888_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_over_8888_0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, sse2_composite_over_8888_0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_n_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_n_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_n_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_n_8_8888, 0 }, -#if 0 - /* FIXME: This code are buggy in MMX version, now the bug was translated to SSE2 version */ - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_x888_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_x888_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_x888_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_x888_8_8888, 0 }, -#endif - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, sse2_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, sse2_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area, 0 }, - - { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_add_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, sse2_composite_add_8000_8000, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_add_8888_8888, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_add_8888_8888, 0 }, - { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, sse2_composite_add_8888_8_8, 0 }, - - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_src_n_8_8888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_src_n_8_8888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_src_n_8_8888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_src_n_8_8888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_copy_area, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_copy_area, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area, 0 }, - { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_copy_area, 0 }, - { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, sse2_composite_copy_area, 0 }, - - { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, sse2_composite_in_8_8, 0 }, - { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, sse2_composite_in_n_8_8, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, sse2_composite_over_n_8_0565 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, sse2_composite_over_n_8_0565 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_over_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_over_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_over_n_0565 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_over_8888_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_over_8888_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_over_8888_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_over_8888_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_over_8888_0565 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, sse2_composite_over_8888_0565 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_n_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_n_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_n_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_n_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_8888_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_8888_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_8888_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_8888_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_x888_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_x888_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_x888_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_x888_8_8888 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_solid, PIXMAN_a8r8g8b8, sse2_composite_over_x888_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_solid, PIXMAN_x8r8g8b8, sse2_composite_over_x888_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_solid, PIXMAN_a8b8g8r8, sse2_composite_over_x888_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_solid, PIXMAN_x8b8g8r8, sse2_composite_over_x888_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_a8r8g8b8, sse2_composite_over_8888_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_x8r8g8b8, sse2_composite_over_8888_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_solid, PIXMAN_a8b8g8r8, sse2_composite_over_8888_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_solid, PIXMAN_x8b8g8r8, sse2_composite_over_8888_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8_ca, PIXMAN_a8r8g8b8, sse2_composite_over_n_8888_8888_ca }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8_ca, PIXMAN_x8r8g8b8, sse2_composite_over_n_8888_8888_ca }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8_ca, PIXMAN_a8b8g8r8, sse2_composite_over_n_8888_8888_ca }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8_ca, PIXMAN_x8b8g8r8, sse2_composite_over_n_8888_8888_ca }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8_ca, PIXMAN_r5g6b5, sse2_composite_over_n_8888_0565_ca }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8_ca, PIXMAN_b5g6r5, sse2_composite_over_n_8888_0565_ca }, + { PIXMAN_OP_OVER, PIXMAN_pixbuf, PIXMAN_pixbuf, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888 }, + { PIXMAN_OP_OVER, PIXMAN_pixbuf, PIXMAN_pixbuf, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888 }, + { PIXMAN_OP_OVER, PIXMAN_rpixbuf, PIXMAN_rpixbuf, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888 }, + { PIXMAN_OP_OVER, PIXMAN_rpixbuf, PIXMAN_rpixbuf, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888 }, + { PIXMAN_OP_OVER, PIXMAN_pixbuf, PIXMAN_pixbuf, PIXMAN_r5g6b5, sse2_composite_over_pixbuf_0565 }, + { PIXMAN_OP_OVER, PIXMAN_rpixbuf, PIXMAN_rpixbuf, PIXMAN_b5g6r5, sse2_composite_over_pixbuf_0565 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area }, + + { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8r8g8b8_ca, PIXMAN_a8r8g8b8, sse2_composite_add_n_8888_8888_ca }, + { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, sse2_composite_add_8000_8000 }, + { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_add_8888_8888 }, + { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_add_8888_8888 }, + { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, sse2_composite_add_n_8_8 }, + + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_src_n_8_8888 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_src_n_8_8888 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_src_n_8_8888 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_src_n_8_8888 }, + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_copy_area }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_copy_area }, + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area }, + { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area }, + { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area }, + { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_copy_area }, + { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, sse2_composite_copy_area }, + + { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, sse2_composite_in_8_8 }, + { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, sse2_composite_in_n_8_8 }, { PIXMAN_OP_NONE }, }; |