aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman')
-rw-r--r--pixman/configure.ac2
-rw-r--r--pixman/pixman/pixman-sse2.c1601
2 files changed, 572 insertions, 1031 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 5242799bb..8d96647f9 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -326,7 +326,7 @@ if test "x$SSE2_CFLAGS" = "x" ; then
SSE2_CFLAGS="-xarch=sse2"
fi
else
- SSE2_CFLAGS="-mmmx -msse2 -Winline"
+ SSE2_CFLAGS="-msse2 -Winline"
fi
fi
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index 2e135e2fe..88287b453 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -30,36 +30,12 @@
#include <config.h>
#endif
-#include <mmintrin.h>
#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
#include <emmintrin.h> /* for SSE2 intrinsics */
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-fast-path.h"
-#if defined(_MSC_VER) && defined(_M_AMD64)
-/* Windows 64 doesn't allow MMX to be used, so
- * the pixman-x64-mmx-emulation.h file contains
- * implementations of those MMX intrinsics that
- * are used in the SSE2 implementation.
- */
-# include "pixman-x64-mmx-emulation.h"
-#endif
-
-#ifdef USE_SSE2
-
-/* --------------------------------------------------------------------
- * Locals
- */
-
-static __m64 mask_x0080;
-static __m64 mask_x00ff;
-static __m64 mask_x0101;
-static __m64 mask_x_alpha;
-
-static __m64 mask_x565_rgb;
-static __m64 mask_x565_unpack;
-
static __m128i mask_0080;
static __m128i mask_00ff;
static __m128i mask_0101;
@@ -77,9 +53,6 @@ static __m128i mask_blue;
static __m128i mask_565_fix_rb;
static __m128i mask_565_fix_g;
-/* ----------------------------------------------------------------------
- * SSE2 Inlines
- */
static force_inline __m128i
unpack_32_1x128 (uint32_t data)
{
@@ -397,146 +370,104 @@ save_128_unaligned (__m128i* dst,
_mm_storeu_si128 (dst, data);
}
-/* ------------------------------------------------------------------
- * MMX inlines
- */
-
-static force_inline __m64
-load_32_1x64 (uint32_t data)
-{
- return _mm_cvtsi32_si64 (data);
-}
-
-static force_inline __m64
-unpack_32_1x64 (uint32_t data)
-{
- return _mm_unpacklo_pi8 (load_32_1x64 (data), _mm_setzero_si64 ());
-}
-
-static force_inline __m64
-expand_alpha_1x64 (__m64 data)
+static force_inline __m128i
+load_32_1x128 (uint32_t data)
{
- return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 3, 3, 3));
+ return _mm_cvtsi32_si128 (data);
}
-static force_inline __m64
-expand_alpha_rev_1x64 (__m64 data)
+static force_inline __m128i
+expand_alpha_rev_1x128 (__m128i data)
{
- return _mm_shuffle_pi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
+ return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
}
-static force_inline __m64
-expand_pixel_8_1x64 (uint8_t data)
+static force_inline __m128i
+expand_pixel_8_1x128 (uint8_t data)
{
- return _mm_shuffle_pi16 (
- unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
+ return _mm_shufflelo_epi16 (
+ unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
}
-static force_inline __m64
-pix_multiply_1x64 (__m64 data,
- __m64 alpha)
+static force_inline __m128i
+pix_multiply_1x128 (__m128i data,
+ __m128i alpha)
{
- return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (data, alpha),
- mask_x0080),
- mask_x0101);
+ return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha),
+ mask_0080),
+ mask_0101);
}
-static force_inline __m64
-pix_add_multiply_1x64 (__m64* src,
- __m64* alpha_dst,
- __m64* dst,
- __m64* alpha_src)
+static force_inline __m128i
+pix_add_multiply_1x128 (__m128i* src,
+ __m128i* alpha_dst,
+ __m128i* dst,
+ __m128i* alpha_src)
{
- __m64 t1 = pix_multiply_1x64 (*src, *alpha_dst);
- __m64 t2 = pix_multiply_1x64 (*dst, *alpha_src);
+ __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst);
+ __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src);
- return _mm_adds_pu8 (t1, t2);
+ return _mm_adds_epu8 (t1, t2);
}
-static force_inline __m64
-negate_1x64 (__m64 data)
+static force_inline __m128i
+negate_1x128 (__m128i data)
{
- return _mm_xor_si64 (data, mask_x00ff);
+ return _mm_xor_si128 (data, mask_00ff);
}
-static force_inline __m64
-invert_colors_1x64 (__m64 data)
+static force_inline __m128i
+invert_colors_1x128 (__m128i data)
{
- return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
+ return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
}
-static force_inline __m64
-over_1x64 (__m64 src, __m64 alpha, __m64 dst)
+static force_inline __m128i
+over_1x128 (__m128i src, __m128i alpha, __m128i dst)
{
- return _mm_adds_pu8 (src, pix_multiply_1x64 (dst, negate_1x64 (alpha)));
+ return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
}
-static force_inline __m64
-in_over_1x64 (__m64* src, __m64* alpha, __m64* mask, __m64* dst)
+static force_inline __m128i
+in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
{
- return over_1x64 (pix_multiply_1x64 (*src, *mask),
- pix_multiply_1x64 (*alpha, *mask),
- *dst);
+ return over_1x128 (pix_multiply_1x128 (*src, *mask),
+ pix_multiply_1x128 (*alpha, *mask),
+ *dst);
}
-static force_inline __m64
-over_rev_non_pre_1x64 (__m64 src, __m64 dst)
+static force_inline __m128i
+over_rev_non_pre_1x128 (__m128i src, __m128i dst)
{
- __m64 alpha = expand_alpha_1x64 (src);
+ __m128i alpha = expand_alpha_1x128 (src);
- return over_1x64 (pix_multiply_1x64 (invert_colors_1x64 (src),
- _mm_or_si64 (alpha, mask_x_alpha)),
- alpha,
- dst);
+ return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src),
+ _mm_or_si128 (alpha, mask_alpha)),
+ alpha,
+ dst);
}
static force_inline uint32_t
-pack_1x64_32 (__m64 data)
+pack_1x128_32 (__m128i data)
{
- return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64 ()));
+ return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
}
-/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
- *
- * 00RR00GG00BB
- *
- * --- Expanding 565 in the low word ---
- *
- * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
- * m = m & (01f0003f001f);
- * m = m * (008404100840);
- * m = m >> 8;
- *
- * Note the trick here - the top word is shifted by another nibble to
- * avoid it bumping into the middle word
- */
-static force_inline __m64
-expand565_16_1x64 (uint16_t pixel)
+static force_inline __m128i
+expand565_16_1x128 (uint16_t pixel)
{
- __m64 p;
- __m64 t1, t2;
+ __m128i m = _mm_cvtsi32_si128 (pixel);
- p = _mm_cvtsi32_si64 ((uint32_t) pixel);
+ m = unpack_565_to_8888 (m);
- t1 = _mm_slli_si64 (p, 36 - 11);
- t2 = _mm_slli_si64 (p, 16 - 5);
-
- p = _mm_or_si64 (t1, p);
- p = _mm_or_si64 (t2, p);
- p = _mm_and_si64 (p, mask_x565_rgb);
- p = _mm_mullo_pi16 (p, mask_x565_unpack);
-
- return _mm_srli_pi16 (p, 8);
+ return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
}
-/* ----------------------------------------------------------------------------
- * Compose Core transformations
- */
static force_inline uint32_t
core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
{
uint8_t a;
- __m64 ms;
+ __m128i xmms;
a = src >> 24;
@@ -546,9 +477,10 @@ core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
}
else if (src)
{
- ms = unpack_32_1x64 (src);
- return pack_1x64_32 (
- over_1x64 (ms, expand_alpha_1x64 (ms), unpack_32_1x64 (dst)));
+ xmms = unpack_32_1x128 (src);
+ return pack_1x128_32 (
+ over_1x128 (xmms, expand_alpha_1x128 (xmms),
+ unpack_32_1x128 (dst)));
}
return dst;
@@ -561,15 +493,15 @@ combine1 (const uint32_t *ps, const uint32_t *pm)
if (pm)
{
- __m64 ms, mm;
+ __m128i ms, mm;
- mm = unpack_32_1x64 (*pm);
- mm = expand_alpha_1x64 (mm);
+ mm = unpack_32_1x128 (*pm);
+ mm = expand_alpha_1x128 (mm);
- ms = unpack_32_1x64 (s);
- ms = pix_multiply_1x64 (ms, mm);
+ ms = unpack_32_1x128 (s);
+ ms = pix_multiply_1x128 (ms, mm);
- s = pack_1x64_32 (ms);
+ s = pack_1x128_32 (ms);
}
return s;
@@ -766,10 +698,12 @@ core_combine_over_u_sse2_no_mask (uint32_t * pd,
}
static force_inline void
-core_combine_over_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+sse2_combine_over_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
if (pm)
core_combine_over_u_sse2_mask (pd, ps, pm, w);
@@ -777,11 +711,13 @@ core_combine_over_u_sse2 (uint32_t* pd,
core_combine_over_u_sse2_no_mask (pd, ps, w);
}
-static force_inline void
-core_combine_over_reverse_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_over_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
@@ -847,7 +783,7 @@ core_combine_over_reverse_u_sse2 (uint32_t* pd,
}
static force_inline uint32_t
-core_combine_in_u_pixelsse2 (uint32_t src, uint32_t dst)
+core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst)
{
uint32_t maska = src >> 24;
@@ -857,19 +793,21 @@ core_combine_in_u_pixelsse2 (uint32_t src, uint32_t dst)
}
else if (maska != 0xff)
{
- return pack_1x64_32 (
- pix_multiply_1x64 (unpack_32_1x64 (dst),
- expand_alpha_1x64 (unpack_32_1x64 (src))));
+ return pack_1x128_32 (
+ pix_multiply_1x128 (unpack_32_1x128 (dst),
+ expand_alpha_1x128 (unpack_32_1x128 (src))));
}
return dst;
}
-static force_inline void
-core_combine_in_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_in_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
@@ -881,7 +819,7 @@ core_combine_in_u_sse2 (uint32_t* pd,
s = combine1 (ps, pm);
d = *pd;
- *pd++ = core_combine_in_u_pixelsse2 (d, s);
+ *pd++ = core_combine_in_u_pixel_sse2 (d, s);
w--;
ps++;
if (pm)
@@ -916,7 +854,7 @@ core_combine_in_u_sse2 (uint32_t* pd,
s = combine1 (ps, pm);
d = *pd;
- *pd++ = core_combine_in_u_pixelsse2 (d, s);
+ *pd++ = core_combine_in_u_pixel_sse2 (d, s);
w--;
ps++;
if (pm)
@@ -924,11 +862,13 @@ core_combine_in_u_sse2 (uint32_t* pd,
}
}
-static force_inline void
-core_combine_reverse_in_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_in_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
@@ -940,7 +880,7 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd,
s = combine1 (ps, pm);
d = *pd;
- *pd++ = core_combine_in_u_pixelsse2 (s, d);
+ *pd++ = core_combine_in_u_pixel_sse2 (s, d);
ps++;
w--;
if (pm)
@@ -975,7 +915,7 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd,
s = combine1 (ps, pm);
d = *pd;
- *pd++ = core_combine_in_u_pixelsse2 (s, d);
+ *pd++ = core_combine_in_u_pixel_sse2 (s, d);
w--;
ps++;
if (pm)
@@ -983,21 +923,23 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd,
}
}
-static force_inline void
-core_combine_reverse_out_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_out_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
while (w && ((unsigned long) pd & 15))
{
uint32_t s = combine1 (ps, pm);
uint32_t d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- unpack_32_1x64 (d), negate_1x64 (
- expand_alpha_1x64 (unpack_32_1x64 (s)))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d), negate_1x128 (
+ expand_alpha_1x128 (unpack_32_1x128 (s)))));
if (pm)
pm++;
@@ -1039,10 +981,10 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd,
uint32_t s = combine1 (ps, pm);
uint32_t d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- unpack_32_1x64 (d), negate_1x64 (
- expand_alpha_1x64 (unpack_32_1x64 (s)))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d), negate_1x128 (
+ expand_alpha_1x128 (unpack_32_1x128 (s)))));
ps++;
if (pm)
pm++;
@@ -1050,21 +992,23 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd,
}
}
-static force_inline void
-core_combine_out_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_out_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
while (w && ((unsigned long) pd & 15))
{
uint32_t s = combine1 (ps, pm);
uint32_t d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- unpack_32_1x64 (s), negate_1x64 (
- expand_alpha_1x64 (unpack_32_1x64 (d)))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), negate_1x128 (
+ expand_alpha_1x128 (unpack_32_1x128 (d)))));
w--;
ps++;
if (pm)
@@ -1104,10 +1048,10 @@ core_combine_out_u_sse2 (uint32_t* pd,
uint32_t s = combine1 (ps, pm);
uint32_t d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- unpack_32_1x64 (s), negate_1x64 (
- expand_alpha_1x64 (unpack_32_1x64 (d)))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), negate_1x128 (
+ expand_alpha_1x128 (unpack_32_1x128 (d)))));
w--;
ps++;
if (pm)
@@ -1119,20 +1063,22 @@ static force_inline uint32_t
core_combine_atop_u_pixel_sse2 (uint32_t src,
uint32_t dst)
{
- __m64 s = unpack_32_1x64 (src);
- __m64 d = unpack_32_1x64 (dst);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
- __m64 sa = negate_1x64 (expand_alpha_1x64 (s));
- __m64 da = expand_alpha_1x64 (d);
+ __m128i sa = negate_1x128 (expand_alpha_1x128 (s));
+ __m128i da = expand_alpha_1x128 (d);
- return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa));
+ return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
}
-static force_inline void
-core_combine_atop_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_atop_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
@@ -1201,20 +1147,22 @@ static force_inline uint32_t
core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
uint32_t dst)
{
- __m64 s = unpack_32_1x64 (src);
- __m64 d = unpack_32_1x64 (dst);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
- __m64 sa = expand_alpha_1x64 (s);
- __m64 da = negate_1x64 (expand_alpha_1x64 (d));
+ __m128i sa = expand_alpha_1x128 (s);
+ __m128i da = negate_1x128 (expand_alpha_1x128 (d));
- return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa));
+ return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
}
-static force_inline void
-core_combine_reverse_atop_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
@@ -1283,20 +1231,22 @@ static force_inline uint32_t
core_combine_xor_u_pixel_sse2 (uint32_t src,
uint32_t dst)
{
- __m64 s = unpack_32_1x64 (src);
- __m64 d = unpack_32_1x64 (dst);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
- __m64 neg_d = negate_1x64 (expand_alpha_1x64 (d));
- __m64 neg_s = negate_1x64 (expand_alpha_1x64 (s));
+ __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d));
+ __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s));
- return pack_1x64_32 (pix_add_multiply_1x64 (&s, &neg_d, &d, &neg_s));
+ return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
}
-static force_inline void
-core_combine_xor_u_sse2 (uint32_t* dst,
- const uint32_t* src,
- const uint32_t *mask,
- int width)
+static void
+sse2_combine_xor_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dst,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
{
int w = width;
uint32_t s, d;
@@ -1368,10 +1318,12 @@ core_combine_xor_u_sse2 (uint32_t* dst,
}
static force_inline void
-core_combine_add_u_sse2 (uint32_t* dst,
- const uint32_t* src,
- const uint32_t* mask,
- int width)
+sse2_combine_add_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dst,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
{
int w = width;
uint32_t s, d;
@@ -1387,8 +1339,8 @@ core_combine_add_u_sse2 (uint32_t* dst,
ps++;
if (pm)
pm++;
- *pd++ = _mm_cvtsi64_si32 (
- _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
+ *pd++ = _mm_cvtsi128_si32 (
+ _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
w--;
}
@@ -1414,8 +1366,8 @@ core_combine_add_u_sse2 (uint32_t* dst,
d = *pd;
ps++;
- *pd++ = _mm_cvtsi64_si32 (
- _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
+ *pd++ = _mm_cvtsi128_si32 (
+ _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
if (pm)
pm++;
}
@@ -1425,25 +1377,27 @@ static force_inline uint32_t
core_combine_saturate_u_pixel_sse2 (uint32_t src,
uint32_t dst)
{
- __m64 ms = unpack_32_1x64 (src);
- __m64 md = unpack_32_1x64 (dst);
+ __m128i ms = unpack_32_1x128 (src);
+ __m128i md = unpack_32_1x128 (dst);
uint32_t sa = src >> 24;
uint32_t da = ~dst >> 24;
if (sa > da)
{
- ms = pix_multiply_1x64 (
- ms, expand_alpha_1x64 (unpack_32_1x64 (DIV_UN8 (da, sa) << 24)));
+ ms = pix_multiply_1x128 (
+ ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24)));
}
- return pack_1x64_32 (_mm_adds_pu16 (md, ms));
+ return pack_1x128_32 (_mm_adds_epu16 (md, ms));
}
-static force_inline void
-core_combine_saturate_u_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_saturate_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
@@ -1524,11 +1478,13 @@ core_combine_saturate_u_sse2 (uint32_t * pd,
}
}
-static force_inline void
-core_combine_src_ca_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_src_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m;
@@ -1540,8 +1496,8 @@ core_combine_src_ca_sse2 (uint32_t* pd,
{
s = *ps++;
m = *pm++;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
w--;
}
@@ -1570,8 +1526,8 @@ core_combine_src_ca_sse2 (uint32_t* pd,
{
s = *ps++;
m = *pm++;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
w--;
}
}
@@ -1581,19 +1537,21 @@ core_combine_over_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
- __m64 s = unpack_32_1x64 (src);
- __m64 expAlpha = expand_alpha_1x64 (s);
- __m64 unpk_mask = unpack_32_1x64 (mask);
- __m64 unpk_dst = unpack_32_1x64 (dst);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i expAlpha = expand_alpha_1x128 (s);
+ __m128i unpk_mask = unpack_32_1x128 (mask);
+ __m128i unpk_dst = unpack_32_1x128 (dst);
- return pack_1x64_32 (in_over_1x64 (&s, &expAlpha, &unpk_mask, &unpk_dst));
+ return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
}
-static force_inline void
-core_combine_over_ca_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_over_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -1655,19 +1613,21 @@ core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
- __m64 d = unpack_32_1x64 (dst);
+ __m128i d = unpack_32_1x128 (dst);
- return pack_1x64_32 (
- over_1x64 (d, expand_alpha_1x64 (d),
- pix_multiply_1x64 (unpack_32_1x64 (src),
- unpack_32_1x64 (mask))));
+ return pack_1x128_32 (
+ over_1x128 (d, expand_alpha_1x128 (d),
+ pix_multiply_1x128 (unpack_32_1x128 (src),
+ unpack_32_1x128 (mask))));
}
-static force_inline void
-core_combine_over_reverse_ca_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -1726,11 +1686,13 @@ core_combine_over_reverse_ca_sse2 (uint32_t* pd,
}
}
-static force_inline void
-core_combine_in_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_in_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -1745,10 +1707,10 @@ core_combine_in_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)),
- expand_alpha_1x64 (unpack_32_1x64 (d))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)),
+ expand_alpha_1x128 (unpack_32_1x128 (d))));
w--;
}
@@ -1789,21 +1751,23 @@ core_combine_in_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- pix_multiply_1x64 (
- unpack_32_1x64 (s), unpack_32_1x64 (m)),
- expand_alpha_1x64 (unpack_32_1x64 (d))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (m)),
+ expand_alpha_1x128 (unpack_32_1x128 (d))));
w--;
}
}
-static force_inline void
-core_combine_in_reverse_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -1818,11 +1782,11 @@ core_combine_in_reverse_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- unpack_32_1x64 (d),
- pix_multiply_1x64 (unpack_32_1x64 (m),
- expand_alpha_1x64 (unpack_32_1x64 (s)))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d),
+ pix_multiply_1x128 (unpack_32_1x128 (m),
+ expand_alpha_1x128 (unpack_32_1x128 (s)))));
w--;
}
@@ -1861,20 +1825,22 @@ core_combine_in_reverse_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- unpack_32_1x64 (d),
- pix_multiply_1x64 (unpack_32_1x64 (m),
- expand_alpha_1x64 (unpack_32_1x64 (s)))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d),
+ pix_multiply_1x128 (unpack_32_1x128 (m),
+ expand_alpha_1x128 (unpack_32_1x128 (s)))));
w--;
}
}
-static force_inline void
-core_combine_out_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_out_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -1889,11 +1855,11 @@ core_combine_out_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- pix_multiply_1x64 (
- unpack_32_1x64 (s), unpack_32_1x64 (m)),
- negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (m)),
+ negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
w--;
}
@@ -1934,21 +1900,23 @@ core_combine_out_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- pix_multiply_1x64 (
- unpack_32_1x64 (s), unpack_32_1x64 (m)),
- negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (m)),
+ negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
w--;
}
}
-static force_inline void
-core_combine_out_reverse_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -1963,12 +1931,12 @@ core_combine_out_reverse_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- unpack_32_1x64 (d),
- negate_1x64 (pix_multiply_1x64 (
- unpack_32_1x64 (m),
- expand_alpha_1x64 (unpack_32_1x64 (s))))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d),
+ negate_1x128 (pix_multiply_1x128 (
+ unpack_32_1x128 (m),
+ expand_alpha_1x128 (unpack_32_1x128 (s))))));
w--;
}
@@ -2011,12 +1979,12 @@ core_combine_out_reverse_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- pix_multiply_1x64 (
- unpack_32_1x64 (d),
- negate_1x64 (pix_multiply_1x64 (
- unpack_32_1x64 (m),
- expand_alpha_1x64 (unpack_32_1x64 (s))))));
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d),
+ negate_1x128 (pix_multiply_1x128 (
+ unpack_32_1x128 (m),
+ expand_alpha_1x128 (unpack_32_1x128 (s))))));
w--;
}
}
@@ -2026,23 +1994,25 @@ core_combine_atop_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
- __m64 m = unpack_32_1x64 (mask);
- __m64 s = unpack_32_1x64 (src);
- __m64 d = unpack_32_1x64 (dst);
- __m64 sa = expand_alpha_1x64 (s);
- __m64 da = expand_alpha_1x64 (d);
+ __m128i m = unpack_32_1x128 (mask);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
+ __m128i sa = expand_alpha_1x128 (s);
+ __m128i da = expand_alpha_1x128 (d);
- s = pix_multiply_1x64 (s, m);
- m = negate_1x64 (pix_multiply_1x64 (m, sa));
+ s = pix_multiply_1x128 (s, m);
+ m = negate_1x128 (pix_multiply_1x128 (m, sa));
- return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da));
+ return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
}
-static force_inline void
-core_combine_atop_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_atop_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -2116,24 +2086,26 @@ core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
- __m64 m = unpack_32_1x64 (mask);
- __m64 s = unpack_32_1x64 (src);
- __m64 d = unpack_32_1x64 (dst);
+ __m128i m = unpack_32_1x128 (mask);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
- __m64 da = negate_1x64 (expand_alpha_1x64 (d));
- __m64 sa = expand_alpha_1x64 (s);
+ __m128i da = negate_1x128 (expand_alpha_1x128 (d));
+ __m128i sa = expand_alpha_1x128 (s);
- s = pix_multiply_1x64 (s, m);
- m = pix_multiply_1x64 (m, sa);
+ s = pix_multiply_1x128 (s, m);
+ m = pix_multiply_1x128 (m, sa);
- return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da));
+ return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
}
-static force_inline void
-core_combine_reverse_atop_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -2208,26 +2180,28 @@ core_combine_xor_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
- __m64 a = unpack_32_1x64 (mask);
- __m64 s = unpack_32_1x64 (src);
- __m64 d = unpack_32_1x64 (dst);
+ __m128i a = unpack_32_1x128 (mask);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
- __m64 alpha_dst = negate_1x64 (pix_multiply_1x64 (
- a, expand_alpha_1x64 (s)));
- __m64 dest = pix_multiply_1x64 (s, a);
- __m64 alpha_src = negate_1x64 (expand_alpha_1x64 (d));
+ __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 (
+ a, expand_alpha_1x128 (s)));
+ __m128i dest = pix_multiply_1x128 (s, a);
+ __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d));
- return pack_1x64_32 (pix_add_multiply_1x64 (&d,
+ return pack_1x128_32 (pix_add_multiply_1x128 (&d,
&alpha_dst,
&dest,
&alpha_src));
}
-static force_inline void
-core_combine_xor_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_xor_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -2299,11 +2273,13 @@ core_combine_xor_ca_sse2 (uint32_t * pd,
}
}
-static force_inline void
-core_combine_add_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_add_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
@@ -2317,10 +2293,10 @@ core_combine_add_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
- unpack_32_1x64 (m)),
- unpack_32_1x64 (d)));
+ *pd++ = pack_1x128_32 (
+ _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
+ unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
w--;
}
@@ -2355,36 +2331,20 @@ core_combine_add_ca_sse2 (uint32_t * pd,
m = *pm++;
d = *pd;
- *pd++ = pack_1x64_32 (
- _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
- unpack_32_1x64 (m)),
- unpack_32_1x64 (d)));
+ *pd++ = pack_1x128_32 (
+ _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
+ unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
w--;
}
}
-/* ---------------------------------------------------
- * fb_compose_setup_sSE2
- */
-static force_inline __m64
-create_mask_16_64 (uint16_t mask)
-{
- return _mm_set1_pi16 (mask);
-}
-
static force_inline __m128i
create_mask_16_128 (uint16_t mask)
{
return _mm_set1_epi16 (mask);
}
-static force_inline __m64
-create_mask_2x32_64 (uint32_t mask0,
- uint32_t mask1)
-{
- return _mm_set_pi32 (mask0, mask1);
-}
-
/* Work around a code generation bug in Sun Studio 12. */
#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
# define create_mask_2x32_128(mask0, mask1) \
@@ -2398,276 +2358,6 @@ create_mask_2x32_128 (uint32_t mask0,
}
#endif
-/* SSE2 code patch for fbcompose.c */
-
-static void
-sse2_combine_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_over_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_over_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_over_reverse_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_in_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_reverse_in_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_out_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_reverse_out_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_atop_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_reverse_atop_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_xor_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_add_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_add_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_saturate_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_saturate_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_src_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_src_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_over_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_over_reverse_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_in_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_in_reverse_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_out_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_out_reverse_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_atop_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_reverse_atop_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_xor_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_add_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_add_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-/* -------------------------------------------------------------------
- * composite_over_n_8888
- */
-
static void
sse2_composite_over_n_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -2711,9 +2401,9 @@ sse2_composite_over_n_8888 (pixman_implementation_t *imp,
while (w && (unsigned long)dst & 15)
{
d = *dst;
- *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
- _mm_movepi64_pi64 (xmm_alpha),
- unpack_32_1x64 (d)));
+ *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
+ xmm_alpha,
+ unpack_32_1x128 (d)));
w--;
}
@@ -2738,19 +2428,15 @@ sse2_composite_over_n_8888 (pixman_implementation_t *imp,
while (w)
{
d = *dst;
- *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
- _mm_movepi64_pi64 (xmm_alpha),
- unpack_32_1x64 (d)));
+ *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
+ xmm_alpha,
+ unpack_32_1x128 (d)));
w--;
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------
- * composite_over_n_0565
- */
static void
sse2_composite_over_n_0565 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -2796,9 +2482,9 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp,
d = *dst;
*dst++ = pack_565_32_16 (
- pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
- _mm_movepi64_pi64 (xmm_alpha),
- expand565_16_1x64 (d))));
+ pack_1x128_32 (over_1x128 (xmm_src,
+ xmm_alpha,
+ expand565_16_1x128 (d))));
w--;
}
@@ -2829,18 +2515,13 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp,
{
d = *dst;
*dst++ = pack_565_32_16 (
- pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
- _mm_movepi64_pi64 (xmm_alpha),
- expand565_16_1x64 (d))));
+ pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha,
+ expand565_16_1x128 (d))));
}
}
- _mm_empty ();
}
-/* ------------------------------
- * composite_add_n_8888_8888_ca
- */
static void
sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
pixman_op_t op,
@@ -2866,7 +2547,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
__m128i xmm_dst;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
srca = src >> 24;
@@ -2882,8 +2563,8 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
xmm_src = _mm_unpacklo_epi8 (
create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = _mm_movepi64_pi64 (xmm_src);
- mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
while (height--)
{
@@ -2902,11 +2583,12 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
{
d = *pd;
- mmx_mask = unpack_32_1x64 (m);
- mmx_dest = unpack_32_1x64 (d);
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
- *pd = pack_1x64_32 (
- _mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest));
+ *pd = pack_1x128_32 (
+ _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+ mmx_dest));
}
pd++;
@@ -2950,11 +2632,12 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
{
d = *pd;
- mmx_mask = unpack_32_1x64 (m);
- mmx_dest = unpack_32_1x64 (d);
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
- *pd = pack_1x64_32 (
- _mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest));
+ *pd = pack_1x128_32 (
+ _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+ mmx_dest));
}
pd++;
@@ -2962,13 +2645,8 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------------
- * composite_over_n_8888_8888_ca
- */
-
static void
sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
pixman_op_t op,
@@ -2994,7 +2672,7 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
@@ -3009,8 +2687,8 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
xmm_src = _mm_unpacklo_epi8 (
create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = _mm_movepi64_pi64 (xmm_src);
- mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
while (height--)
{
@@ -3028,10 +2706,10 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
if (m)
{
d = *pd;
- mmx_mask = unpack_32_1x64 (m);
- mmx_dest = unpack_32_1x64 (d);
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
- *pd = pack_1x64_32 (in_over_1x64 (&mmx_src,
+ *pd = pack_1x128_32 (in_over_1x128 (&mmx_src,
&mmx_alpha,
&mmx_mask,
&mmx_dest));
@@ -3078,11 +2756,11 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
if (m)
{
d = *pd;
- mmx_mask = unpack_32_1x64 (m);
- mmx_dest = unpack_32_1x64 (d);
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
- *pd = pack_1x64_32 (
- in_over_1x64 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
+ *pd = pack_1x128_32 (
+ in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
}
pd++;
@@ -3090,13 +2768,8 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/*---------------------------------------------------------------------
- * composite_over_8888_n_8888
- */
-
static void
sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -3148,13 +2821,13 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
{
uint32_t d = *dst;
- __m64 ms = unpack_32_1x64 (s);
- __m64 alpha = expand_alpha_1x64 (ms);
- __m64 dest = _mm_movepi64_pi64 (xmm_mask);
- __m64 alpha_dst = unpack_32_1x64 (d);
+ __m128i ms = unpack_32_1x128 (s);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i dest = xmm_mask;
+ __m128i alpha_dst = unpack_32_1x128 (d);
- *dst = pack_1x64_32 (
- in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
+ *dst = pack_1x128_32 (
+ in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
}
dst++;
w--;
@@ -3195,13 +2868,13 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
{
uint32_t d = *dst;
- __m64 ms = unpack_32_1x64 (s);
- __m64 alpha = expand_alpha_1x64 (ms);
- __m64 mask = _mm_movepi64_pi64 (xmm_mask);
- __m64 dest = unpack_32_1x64 (d);
+ __m128i ms = unpack_32_1x128 (s);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i mask = xmm_mask;
+ __m128i dest = unpack_32_1x128 (d);
- *dst = pack_1x64_32 (
- in_over_1x64 (&ms, &alpha, &mask, &dest));
+ *dst = pack_1x128_32 (
+ in_over_1x128 (&ms, &alpha, &mask, &dest));
}
dst++;
@@ -3209,13 +2882,8 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/*---------------------------------------------------------------------
- * composite_over_8888_n_8888
- */
-
static void
sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -3282,12 +2950,8 @@ sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------
- * composite_over_x888_n_8888
- */
static void
sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -3336,13 +3000,13 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
uint32_t s = (*src++) | 0xff000000;
uint32_t d = *dst;
- __m64 src = unpack_32_1x64 (s);
- __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
- __m64 mask = _mm_movepi64_pi64 (xmm_mask);
- __m64 dest = unpack_32_1x64 (d);
+ __m128i src = unpack_32_1x128 (s);
+ __m128i alpha = xmm_alpha;
+ __m128i mask = xmm_mask;
+ __m128i dest = unpack_32_1x128 (d);
- *dst++ = pack_1x64_32 (
- in_over_1x64 (&src, &alpha, &mask, &dest));
+ *dst++ = pack_1x128_32 (
+ in_over_1x128 (&src, &alpha, &mask, &dest));
w--;
}
@@ -3375,24 +3039,20 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
uint32_t s = (*src++) | 0xff000000;
uint32_t d = *dst;
- __m64 src = unpack_32_1x64 (s);
- __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
- __m64 mask = _mm_movepi64_pi64 (xmm_mask);
- __m64 dest = unpack_32_1x64 (d);
+ __m128i src = unpack_32_1x128 (s);
+ __m128i alpha = xmm_alpha;
+ __m128i mask = xmm_mask;
+ __m128i dest = unpack_32_1x128 (d);
- *dst++ = pack_1x64_32 (
- in_over_1x64 (&src, &alpha, &mask, &dest));
+ *dst++ = pack_1x128_32 (
+ in_over_1x128 (&src, &alpha, &mask, &dest));
w--;
}
}
- _mm_empty ();
}
-/* --------------------------------------------------------------------
- * composite_over_8888_8888
- */
static void
sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -3422,27 +3082,23 @@ sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
while (height--)
{
- core_combine_over_u_sse2 (dst, src, NULL, width);
+ sse2_combine_over_u (imp, op, dst, src, NULL, width);
dst += dst_stride;
src += src_stride;
}
- _mm_empty ();
}
-/* ------------------------------------------------------------------
- * composite_over_8888_0565
- */
static force_inline uint16_t
composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
{
- __m64 ms;
+ __m128i ms;
- ms = unpack_32_1x64 (src);
+ ms = unpack_32_1x128 (src);
return pack_565_32_16 (
- pack_1x64_32 (
- over_1x64 (
- ms, expand_alpha_1x64 (ms), expand565_16_1x64 (dst))));
+ pack_1x128_32 (
+ over_1x128 (
+ ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst))));
}
static void
@@ -3474,15 +3130,6 @@ sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-#if 0
- /* FIXME
- *
- * I copy the code from MMX one and keep the fixme.
- * If it's a problem there, probably is a problem here.
- */
- assert (src_image->drawable == mask_image->drawable);
-#endif
-
while (height--)
{
dst = dst_line;
@@ -3555,13 +3202,8 @@ sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/* -----------------------------------------------------------------
- * composite_over_n_8_8888
- */
-
static void
sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -3588,7 +3230,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
@@ -3604,8 +3246,8 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
xmm_def = create_mask_2x32_128 (src, src);
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = _mm_movepi64_pi64 (xmm_src);
- mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
while (height--)
{
@@ -3622,10 +3264,10 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
if (m)
{
d = *dst;
- mmx_mask = expand_pixel_8_1x64 (m);
- mmx_dest = unpack_32_1x64 (d);
+ mmx_mask = expand_pixel_8_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
- *dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
+ *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
&mmx_alpha,
&mmx_mask,
&mmx_dest));
@@ -3677,10 +3319,10 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
if (m)
{
d = *dst;
- mmx_mask = expand_pixel_8_1x64 (m);
- mmx_dest = unpack_32_1x64 (d);
+ mmx_mask = expand_pixel_8_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
- *dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
+ *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
&mmx_alpha,
&mmx_mask,
&mmx_dest));
@@ -3691,14 +3333,9 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/* ----------------------------------------------------------------
- * composite_over_n_8_8888
- */
-
-pixman_bool_t
+static pixman_bool_t
pixman_fill_sse2 (uint32_t *bits,
int stride,
int bpp,
@@ -3845,7 +3482,6 @@ pixman_fill_sse2 (uint32_t *bits,
}
}
- _mm_empty ();
return TRUE;
}
@@ -3907,9 +3543,8 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
if (m)
{
- *dst = pack_1x64_32 (
- pix_multiply_1x64 (
- _mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
+ *dst = pack_1x128_32 (
+ pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)));
}
else
{
@@ -3962,9 +3597,9 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
if (m)
{
- *dst = pack_1x64_32 (
- pix_multiply_1x64 (
- _mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
+ *dst = pack_1x128_32 (
+ pix_multiply_1x128 (
+ xmm_src, expand_pixel_8_1x128 (m)));
}
else
{
@@ -3976,13 +3611,8 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/*-----------------------------------------------------------------------
- * composite_over_n_8_0565
- */
-
static void
sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -4004,7 +3634,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
int dst_stride, mask_stride;
int32_t w;
uint32_t m;
- __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
__m128i xmm_src, xmm_alpha;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
@@ -4023,8 +3653,8 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = _mm_movepi64_pi64 (xmm_src);
- mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
while (height--)
{
@@ -4041,12 +3671,12 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
if (m)
{
d = *dst;
- mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
- mmx_dest = expand565_16_1x64 (d);
+ mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+ mmx_dest = expand565_16_1x128 (d);
*dst = pack_565_32_16 (
- pack_1x64_32 (
- in_over_1x64 (
+ pack_1x128_32 (
+ in_over_1x128 (
&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
}
@@ -4114,12 +3744,12 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
if (m)
{
d = *dst;
- mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
- mmx_dest = expand565_16_1x64 (d);
+ mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+ mmx_dest = expand565_16_1x128 (d);
*dst = pack_565_32_16 (
- pack_1x64_32 (
- in_over_1x64 (
+ pack_1x128_32 (
+ in_over_1x128 (
&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
}
@@ -4128,13 +3758,8 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/* -----------------------------------------------------------------------
- * composite_over_pixbuf_0565
- */
-
static void
sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -4156,7 +3781,7 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
int32_t w;
uint32_t opaque, zero;
- __m64 ms;
+ __m128i ms;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
@@ -4165,15 +3790,6 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-#if 0
- /* FIXME
- *
- * I copy the code from MMX one and keep the fixme.
- * If it's a problem there, probably is a problem here.
- */
- assert (src_image->drawable == mask_image->drawable);
-#endif
-
while (height--)
{
dst = dst_line;
@@ -4187,11 +3803,11 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
s = *src++;
d = *dst;
- ms = unpack_32_1x64 (s);
+ ms = unpack_32_1x128 (s);
*dst++ = pack_565_32_16 (
- pack_1x64_32 (
- over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));
+ pack_1x128_32 (
+ over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
w--;
}
@@ -4253,22 +3869,17 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
s = *src++;
d = *dst;
- ms = unpack_32_1x64 (s);
+ ms = unpack_32_1x128 (s);
*dst++ = pack_565_32_16 (
- pack_1x64_32 (
- over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));
+ pack_1x128_32 (
+ over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
w--;
}
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------
- * composite_over_pixbuf_8888
- */
-
static void
sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -4298,15 +3909,6 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-#if 0
- /* FIXME
- *
- * I copy the code from MMX one and keep the fixme.
- * If it's a problem there, probably is a problem here.
- */
- assert (src_image->drawable == mask_image->drawable);
-#endif
-
while (height--)
{
dst = dst_line;
@@ -4320,9 +3922,9 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
s = *src++;
d = *dst;
- *dst++ = pack_1x64_32 (
- over_rev_non_pre_1x64 (
- unpack_32_1x64 (s), unpack_32_1x64 (d)));
+ *dst++ = pack_1x128_32 (
+ over_rev_non_pre_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (d)));
w--;
}
@@ -4367,21 +3969,16 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
s = *src++;
d = *dst;
- *dst++ = pack_1x64_32 (
- over_rev_non_pre_1x64 (
- unpack_32_1x64 (s), unpack_32_1x64 (d)));
+ *dst++ = pack_1x128_32 (
+ over_rev_non_pre_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (d)));
w--;
}
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------------------------------
- * composite_over_n_8888_0565_ca
- */
-
static void
sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
pixman_op_t op,
@@ -4408,7 +4005,7 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
- __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
@@ -4422,8 +4019,8 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = _mm_movepi64_pi64 (xmm_src);
- mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
while (height--)
{
@@ -4440,12 +4037,12 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
if (m)
{
d = *dst;
- mmx_mask = unpack_32_1x64 (m);
- mmx_dest = expand565_16_1x64 (d);
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = expand565_16_1x128 (d);
*dst = pack_565_32_16 (
- pack_1x64_32 (
- in_over_1x64 (
+ pack_1x128_32 (
+ in_over_1x128 (
&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
}
@@ -4509,12 +4106,12 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
if (m)
{
d = *dst;
- mmx_mask = unpack_32_1x64 (m);
- mmx_dest = expand565_16_1x64 (d);
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = expand565_16_1x128 (d);
*dst = pack_565_32_16 (
- pack_1x64_32 (
- in_over_1x64 (
+ pack_1x128_32 (
+ in_over_1x128 (
&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
}
@@ -4524,13 +4121,8 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/* -----------------------------------------------------------------------
- * composite_in_n_8_8
- */
-
static void
sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -4582,11 +4174,11 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
m = (uint32_t) *mask++;
d = (uint32_t) *dst;
- *dst++ = (uint8_t) pack_1x64_32 (
- pix_multiply_1x64 (
- pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha),
- unpack_32_1x64 (m)),
- unpack_32_1x64 (d)));
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (xmm_alpha,
+ unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
w--;
}
@@ -4619,22 +4211,17 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
m = (uint32_t) *mask++;
d = (uint32_t) *dst;
- *dst++ = (uint8_t) pack_1x64_32 (
- pix_multiply_1x64 (
- pix_multiply_1x64 (
- _mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
- unpack_32_1x64 (d)));
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (
+ xmm_alpha, unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
w--;
}
}
- _mm_empty ();
}
-/* -----------------------------------------------------------------------
- * composite_in_n_8
- */
-
static void
sse2_composite_in_n_8 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -4689,10 +4276,10 @@ sse2_composite_in_n_8 (pixman_implementation_t *imp,
{
d = (uint32_t) *dst;
- *dst++ = (uint8_t) pack_1x64_32 (
- pix_multiply_1x64 (
- _mm_movepi64_pi64 (xmm_alpha),
- unpack_32_1x64 (d)));
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ xmm_alpha,
+ unpack_32_1x128 (d)));
w--;
}
@@ -4717,21 +4304,16 @@ sse2_composite_in_n_8 (pixman_implementation_t *imp,
{
d = (uint32_t) *dst;
- *dst++ = (uint8_t) pack_1x64_32 (
- pix_multiply_1x64 (
- _mm_movepi64_pi64 (xmm_alpha),
- unpack_32_1x64 (d)));
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ xmm_alpha,
+ unpack_32_1x128 (d)));
w--;
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------------
- * composite_in_8_8
- */
-
static void
sse2_composite_in_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -4774,9 +4356,9 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp,
s = (uint32_t) *src++;
d = (uint32_t) *dst;
- *dst++ = (uint8_t) pack_1x64_32 (
- pix_multiply_1x64 (
- unpack_32_1x64 (s), unpack_32_1x64 (d)));
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (d)));
w--;
}
@@ -4805,19 +4387,14 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp,
s = (uint32_t) *src++;
d = (uint32_t) *dst;
- *dst++ = (uint8_t) pack_1x64_32 (
- pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d)));
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d)));
w--;
}
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------
- * composite_add_n_8_8
- */
-
static void
sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -4869,11 +4446,11 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
m = (uint32_t) *mask++;
d = (uint32_t) *dst;
- *dst++ = (uint8_t) pack_1x64_32 (
- _mm_adds_pu16 (
- pix_multiply_1x64 (
- _mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
- unpack_32_1x64 (d)));
+ *dst++ = (uint8_t) pack_1x128_32 (
+ _mm_adds_epu16 (
+ pix_multiply_1x128 (
+ xmm_alpha, unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
w--;
}
@@ -4905,23 +4482,18 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
m = (uint32_t) *mask++;
d = (uint32_t) *dst;
- *dst++ = (uint8_t) pack_1x64_32 (
- _mm_adds_pu16 (
- pix_multiply_1x64 (
- _mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
- unpack_32_1x64 (d)));
+ *dst++ = (uint8_t) pack_1x128_32 (
+ _mm_adds_epu16 (
+ pix_multiply_1x128 (
+ xmm_alpha, unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
w--;
}
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------
- * composite_add_n_8_8
- */
-
static void
sse2_composite_add_n_8 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -4973,10 +4545,10 @@ sse2_composite_add_n_8 (pixman_implementation_t *imp,
while (w && ((unsigned long)dst & 15))
{
- *dst = (uint8_t)_mm_cvtsi64_si32 (
- _mm_adds_pu8 (
- _mm_movepi64_pi64 (xmm_src),
- _mm_cvtsi32_si64 (*dst)));
+ *dst = (uint8_t)_mm_cvtsi128_si32 (
+ _mm_adds_epu8 (
+ xmm_src,
+ _mm_cvtsi32_si128 (*dst)));
w--;
dst++;
@@ -4993,23 +4565,18 @@ sse2_composite_add_n_8 (pixman_implementation_t *imp,
while (w)
{
- *dst = (uint8_t)_mm_cvtsi64_si32 (
- _mm_adds_pu8 (
- _mm_movepi64_pi64 (xmm_src),
- _mm_cvtsi32_si64 (*dst)));
+ *dst = (uint8_t)_mm_cvtsi128_si32 (
+ _mm_adds_epu8 (
+ xmm_src,
+ _mm_cvtsi32_si128 (*dst)));
w--;
dst++;
}
}
- _mm_empty ();
}
-/* ----------------------------------------------------------------------
- * composite_add_8_8
- */
-
static void
sse2_composite_add_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -5053,7 +4620,8 @@ sse2_composite_add_8_8 (pixman_implementation_t *imp,
w--;
}
- core_combine_add_u_sse2 ((uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+ sse2_combine_add_u (imp, op,
+ (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
/* Small tail */
dst += w & 0xfffc;
@@ -5069,12 +4637,8 @@ sse2_composite_add_8_8 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------
- * composite_add_8888_8888
- */
static void
sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -5106,16 +4670,11 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
src = src_line;
src_line += src_stride;
- core_combine_add_u_sse2 (dst, src, NULL, width);
+ sse2_combine_add_u (imp, op, dst, src, NULL, width);
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------------------------------
- * sse2_composite_copy_area
- */
-
static pixman_bool_t
pixman_blt_sse2 (uint32_t *src_bits,
uint32_t *dst_bits,
@@ -5234,7 +4793,6 @@ pixman_blt_sse2 (uint32_t *src_bits,
}
}
- _mm_empty ();
return TRUE;
}
@@ -5284,7 +4842,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
uint32_t m;
int src_stride, mask_stride, dst_stride;
int32_t w;
- __m64 ms;
+ __m128i ms;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
@@ -5313,24 +4871,25 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
s = 0xff000000 | *src++;
m = (uint32_t) *mask++;
d = *dst;
- ms = unpack_32_1x64 (s);
+ ms = unpack_32_1x128 (s);
if (m != 0xff)
{
- __m64 ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
- __m64 md = unpack_32_1x64 (d);
+ __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+ __m128i md = unpack_32_1x128 (d);
- ms = in_over_1x64 (&ms, &mask_x00ff, &ma, &md);
+ ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md);
}
- *dst++ = pack_1x64_32 (ms);
+ *dst++ = pack_1x128_32 (ms);
w--;
}
while (w >= 4)
{
m = *(uint32_t*) mask;
- xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
+ xmm_src = _mm_or_si128 (
+ load_128_unaligned ((__m128i*)src), mask_ff000000);
if (m == 0xffffffff)
{
@@ -5346,9 +4905,12 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
- expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+ expand_alpha_rev_2x128 (
+ xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
- in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
@@ -5373,15 +4935,15 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
}
else
{
- __m64 ma, md, ms;
+ __m128i ma, md, ms;
d = *dst;
- ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
- md = unpack_32_1x64 (d);
- ms = unpack_32_1x64 (s);
+ ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+ md = unpack_32_1x128 (d);
+ ms = unpack_32_1x128 (s);
- *dst = pack_1x64_32 (in_over_1x64 (&ms, &mask_x00ff, &ma, &md));
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md));
}
}
@@ -5392,7 +4954,6 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
static void
@@ -5457,15 +5018,15 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
}
else
{
- __m64 ms, md, ma, msa;
+ __m128i ms, md, ma, msa;
- ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
- ms = unpack_32_1x64 (s);
- md = unpack_32_1x64 (d);
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (s);
+ md = unpack_32_1x128 (d);
- msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
- *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
@@ -5529,15 +5090,15 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
}
else
{
- __m64 ms, md, ma, msa;
+ __m128i ms, md, ma, msa;
- ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
- ms = unpack_32_1x64 (s);
- md = unpack_32_1x64 (d);
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (s);
+ md = unpack_32_1x128 (d);
- msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
- *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
@@ -5546,7 +5107,6 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
static void
@@ -5591,12 +5151,12 @@ sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
while (w && (unsigned long)dst & 15)
{
- __m64 vd;
+ __m128i vd;
- vd = unpack_32_1x64 (*dst);
+ vd = unpack_32_1x128 (*dst);
- *dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),
- _mm_movepi64_pi64 (xmm_src)));
+ *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
+ xmm_src));
w--;
dst++;
}
@@ -5626,19 +5186,18 @@ sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
while (w)
{
- __m64 vd;
+ __m128i vd;
- vd = unpack_32_1x64 (*dst);
+ vd = unpack_32_1x128 (*dst);
- *dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),
- _mm_movepi64_pi64 (xmm_src)));
+ *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
+ xmm_src));
w--;
dst++;
}
}
- _mm_empty ();
}
static void
@@ -5703,15 +5262,15 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
}
else
{
- __m64 ms, md, ma, msa;
+ __m128i ms, md, ma, msa;
- ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
- ms = unpack_32_1x64 (s);
- md = unpack_32_1x64 (d);
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (s);
+ md = unpack_32_1x128 (d);
- msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
- *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
@@ -5773,15 +5332,15 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
}
else
{
- __m64 ms, md, ma, msa;
+ __m128i ms, md, ma, msa;
- ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
- ms = unpack_32_1x64 (s);
- md = unpack_32_1x64 (d);
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (s);
+ md = unpack_32_1x128 (d);
- msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
- *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
@@ -5790,10 +5349,9 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
}
}
- _mm_empty ();
}
-/* A variant of 'core_combine_over_u_sse2' with minor tweaks */
+/* A variant of 'sse2_combine_over_u' with minor tweaks */
static force_inline void
scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
const uint32_t* ps,
@@ -5885,7 +5443,6 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
w--;
}
- _mm_empty ();
}
FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
@@ -5927,13 +5484,13 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
{
uint32_t d = *dst;
- __m64 ms = unpack_32_1x64 (s);
- __m64 alpha = expand_alpha_1x64 (ms);
- __m64 dest = _mm_movepi64_pi64 (xmm_mask);
- __m64 alpha_dst = unpack_32_1x64 (d);
+ __m128i ms = unpack_32_1x128 (s);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i dest = xmm_mask;
+ __m128i alpha_dst = unpack_32_1x128 (d);
- *dst = pack_1x64_32 (
- in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
+ *dst = pack_1x128_32 (
+ in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
}
dst++;
w--;
@@ -5985,20 +5542,19 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
{
uint32_t d = *dst;
- __m64 ms = unpack_32_1x64 (s);
- __m64 alpha = expand_alpha_1x64 (ms);
- __m64 mask = _mm_movepi64_pi64 (xmm_mask);
- __m64 dest = unpack_32_1x64 (d);
+ __m128i ms = unpack_32_1x128 (s);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i mask = xmm_mask;
+ __m128i dest = unpack_32_1x128 (d);
- *dst = pack_1x64_32 (
- in_over_1x64 (&ms, &alpha, &mask, &dest));
+ *dst = pack_1x128_32 (
+ in_over_1x128 (&ms, &alpha, &mask, &dest));
}
dst++;
w--;
}
- _mm_empty ();
}
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
@@ -6374,20 +5930,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
- /* MMX constants */
- mask_x565_rgb = create_mask_2x32_64 (0x000001f0, 0x003f001f);
- mask_x565_unpack = create_mask_2x32_64 (0x00000084, 0x04100840);
-
- mask_x0080 = create_mask_16_64 (0x0080);
- mask_x00ff = create_mask_16_64 (0x00ff);
- mask_x0101 = create_mask_16_64 (0x0101);
- mask_x_alpha = create_mask_2x32_64 (0x00ff0000, 0x00000000);
-
- _mm_empty ();
-
/* Set up function pointers */
-
- /* SSE code patch for fbcompose.c */
imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
@@ -6420,5 +5963,3 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
return imp;
}
-
-#endif /* USE_SSE2 */