diff options
Diffstat (limited to 'pixman/pixman/pixman-mmx.c')
-rw-r--r-- | pixman/pixman/pixman-mmx.c | 370 |
1 files changed, 189 insertions, 181 deletions
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c index 8fd85776d..bcfeb56f0 100644 --- a/pixman/pixman/pixman-mmx.c +++ b/pixman/pixman/pixman-mmx.c @@ -353,9 +353,16 @@ static __inline__ uint32_t ldl_u(uint32_t *p) } static force_inline __m64 -load8888 (uint32_t v) +load8888 (const uint32_t *v) { - return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ()); + return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (*v), _mm_setzero_si64 ()); +} + +static force_inline __m64 +load8888u (const uint32_t *v) +{ + uint32_t l = ldl_u(v); + return load8888(&l); } static force_inline __m64 @@ -364,15 +371,12 @@ pack8888 (__m64 lo, __m64 hi) return _mm_packs_pu16 (lo, hi); } -#ifdef _MSC_VER -#define store8888(v) _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ())) -#else -static force_inline uint32_t -store8888 (__m64 v) +static force_inline void +store8888 (uint32_t *dest, __m64 v) { - return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ())); + v = pack8888 (v, _mm_setzero_si64()); + *dest = _mm_cvtsi64_si32 (v); } -#endif /* Expand 16 bits positioned at @pos (0-3) of a mmx register into * @@ -475,13 +479,6 @@ pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b) /* --------------- MMX code patch for fbcompose.c --------------------- */ -#ifdef _MSC_VER -#define combine(src, mask) \ - ((mask) ? \ - store8888 (pix_multiply (load8888 (*src), expand_alpha (load8888 (*mask)))) \ - : \ - *src) -#else static force_inline uint32_t combine (const uint32_t *src, const uint32_t *mask) { @@ -489,18 +486,17 @@ combine (const uint32_t *src, const uint32_t *mask) if (mask) { - __m64 m = load8888 (*mask); - __m64 s = load8888 (ssrc); + __m64 m = load8888 (mask); + __m64 s = load8888 (&ssrc); m = expand_alpha (m); s = pix_multiply (s, m); - ssrc = store8888 (s); + store8888 (&ssrc, s); } return ssrc; } -#endif static void mmx_combine_over_u (pixman_implementation_t *imp, @@ -524,9 +520,9 @@ mmx_combine_over_u (pixman_implementation_t *imp, else if (ssrc) { __m64 s, sa; - s = load8888 (ssrc); + s = load8888 (&ssrc); sa = expand_alpha (s); - *dest = store8888 (over (s, sa, load8888 (*dest))); + store8888 (dest, over (s, sa, load8888 (dest))); } ++dest; @@ -552,9 +548,9 @@ mmx_combine_over_reverse_u (pixman_implementation_t *imp, __m64 d, da; uint32_t s = combine (src, mask); - d = load8888 (*dest); + d = load8888 (dest); da = expand_alpha (d); - *dest = store8888 (over (d, da, load8888 (s))); + store8888 (dest, over (d, da, load8888 (&s))); ++dest; ++src; @@ -577,13 +573,14 @@ mmx_combine_in_u (pixman_implementation_t *imp, while (dest < end) { __m64 x, a; + uint32_t ssrc = combine (src, mask); - x = load8888 (combine (src, mask)); - a = load8888 (*dest); + x = load8888 (&ssrc); + a = load8888 (dest); a = expand_alpha (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -606,12 +603,13 @@ mmx_combine_in_reverse_u (pixman_implementation_t *imp, while (dest < end) { __m64 x, a; + uint32_t ssrc = combine (src, mask); - x = load8888 (*dest); - a = load8888 (combine (src, mask)); + x = load8888 (dest); + a = load8888 (&ssrc); a = expand_alpha (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -634,13 +632,14 @@ mmx_combine_out_u (pixman_implementation_t *imp, while (dest < end) { __m64 x, a; + uint32_t ssrc = combine (src, mask); - x = load8888 (combine (src, mask)); - a = load8888 (*dest); + x = load8888 (&ssrc); + a = load8888 (dest); a = expand_alpha (a); a = negate (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -663,14 +662,15 @@ mmx_combine_out_reverse_u (pixman_implementation_t *imp, while (dest < end) { __m64 x, a; + uint32_t ssrc = combine (src, mask); - x = load8888 (*dest); - a = load8888 (combine (src, mask)); + x = load8888 (dest); + a = load8888 (&ssrc); a = expand_alpha (a); a = negate (a); x = pix_multiply (x, a); - *dest = store8888 (x); + store8888 (dest, x); ++dest; ++src; @@ -693,14 +693,15 @@ mmx_combine_atop_u (pixman_implementation_t *imp, while (dest < end) { __m64 s, da, d, sia; + uint32_t ssrc = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + s = load8888 (&ssrc); + d = load8888 (dest); sia = expand_alpha (s); sia = negate (sia); da = expand_alpha (d); s = pix_add_mul (s, da, d, sia); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -725,14 +726,15 @@ mmx_combine_atop_reverse_u (pixman_implementation_t *imp, while (dest < end) { __m64 s, dia, d, sa; + uint32_t ssrc = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + s = load8888 (&ssrc); + d = load8888 (dest); sa = expand_alpha (s); dia = expand_alpha (d); dia = negate (dia); s = pix_add_mul (s, dia, d, sa); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -755,15 +757,16 @@ mmx_combine_xor_u (pixman_implementation_t *imp, while (dest < end) { __m64 s, dia, d, sia; + uint32_t ssrc = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + s = load8888 (&ssrc); + d = load8888 (dest); sia = expand_alpha (s); dia = expand_alpha (d); sia = negate (sia); dia = negate (dia); s = pix_add_mul (s, dia, d, sia); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -786,11 +789,12 @@ mmx_combine_add_u (pixman_implementation_t *imp, while (dest < end) { __m64 s, d; + uint32_t ssrc = combine (src, mask); - s = load8888 (combine (src, mask)); - d = load8888 (*dest); + s = load8888 (&ssrc); + d = load8888 (dest); s = pix_add (s, d); - *dest = store8888 (s); + store8888 (dest, s); ++dest; ++src; @@ -814,20 +818,21 @@ mmx_combine_saturate_u (pixman_implementation_t *imp, { uint32_t s = combine (src, mask); uint32_t d = *dest; - __m64 ms = load8888 (s); - __m64 md = load8888 (d); + __m64 ms = load8888 (&s); + __m64 md = load8888 (&d); uint32_t sa = s >> 24; uint32_t da = ~d >> 24; if (sa > da) { - __m64 msa = load8888 (DIV_UN8 (da, sa) << 24); + uint32_t quot = DIV_UN8 (da, sa) << 24; + __m64 msa = load8888 ("); msa = expand_alpha (msa); ms = pix_multiply (ms, msa); } md = pix_add (md, ms); - *dest = store8888 (md); + store8888 (dest, md); ++src; ++dest; @@ -849,11 +854,11 @@ mmx_combine_src_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); s = pix_multiply (s, a); - *dest = store8888 (s); + store8888 (dest, s); ++src; ++mask; @@ -874,12 +879,12 @@ mmx_combine_over_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 sa = expand_alpha (s); - *dest = store8888 (in_over (s, sa, a, d)); + store8888 (dest, in_over (s, sa, a, d)); ++src; ++dest; @@ -900,12 +905,12 @@ mmx_combine_over_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); - *dest = store8888 (over (d, da, in (s, a))); + store8888 (dest, over (d, da, in (s, a))); ++src; ++dest; @@ -926,14 +931,14 @@ mmx_combine_in_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); s = pix_multiply (s, a); s = pix_multiply (s, da); - *dest = store8888 (s); + store8888 (dest, s); ++src; ++dest; @@ -954,14 +959,14 @@ mmx_combine_in_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 sa = expand_alpha (s); a = pix_multiply (a, sa); d = pix_multiply (d, a); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -982,15 +987,15 @@ mmx_combine_out_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); da = negate (da); s = pix_multiply (s, a); s = pix_multiply (s, da); - *dest = store8888 (s); + store8888 (dest, s); ++src; ++dest; @@ -1011,15 +1016,15 @@ mmx_combine_out_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 sa = expand_alpha (s); a = pix_multiply (a, sa); a = negate (a); d = pix_multiply (d, a); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1040,9 +1045,9 @@ mmx_combine_atop_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); __m64 sa = expand_alpha (s); @@ -1050,7 +1055,7 @@ mmx_combine_atop_ca (pixman_implementation_t *imp, a = pix_multiply (a, sa); a = negate (a); d = pix_add_mul (d, a, s, da); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1071,9 +1076,9 @@ mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); __m64 sa = expand_alpha (s); @@ -1081,7 +1086,7 @@ mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, a = pix_multiply (a, sa); da = negate (da); d = pix_add_mul (d, a, s, da); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1102,9 +1107,9 @@ mmx_combine_xor_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); __m64 da = expand_alpha (d); __m64 sa = expand_alpha (s); @@ -1113,7 +1118,7 @@ mmx_combine_xor_ca (pixman_implementation_t *imp, da = negate (da); a = negate (a); d = pix_add_mul (d, a, s, da); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1134,13 +1139,13 @@ mmx_combine_add_ca (pixman_implementation_t *imp, while (src < end) { - __m64 a = load8888 (*mask); - __m64 s = load8888 (*src); - __m64 d = load8888 (*dest); + __m64 a = load8888 (mask); + __m64 s = load8888 (src); + __m64 d = load8888 (dest); s = pix_multiply (s, a); d = pix_add (s, d); - *dest = store8888 (d); + store8888 (dest, d); ++src; ++dest; @@ -1171,7 +1176,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1184,7 +1189,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - *dst = store8888 (over (vsrc, vsrca, load8888 (*dst))); + store8888 (dst, over (vsrc, vsrca, load8888 (dst))); w--; dst++; @@ -1210,7 +1215,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, if (w) { - *dst = store8888 (over (vsrc, vsrca, load8888 (*dst))); + store8888 (dst, over (vsrc, vsrca, load8888 (dst))); } } @@ -1237,7 +1242,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1316,7 +1321,7 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1331,9 +1336,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888 (*q); - vdest = in_over (vsrc, vsrca, load8888 (m), vdest); - *q = store8888 (vdest); + __m64 vdest = load8888 (q); + vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); + store8888 (q, vdest); } twidth--; @@ -1352,9 +1357,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, __m64 dest0, dest1; __m64 vdest = *(__m64 *)q; - dest0 = in_over (vsrc, vsrca, load8888 (m0), + dest0 = in_over (vsrc, vsrca, load8888 (&m0), expand8888 (vdest, 0)); - dest1 = in_over (vsrc, vsrca, load8888 (m1), + dest1 = in_over (vsrc, vsrca, load8888 (&m1), expand8888 (vdest, 1)); *(__m64 *)q = pack8888 (dest0, dest1); @@ -1371,9 +1376,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888 (*q); - vdest = in_over (vsrc, vsrca, load8888 (m), vdest); - *q = store8888 (vdest); + __m64 vdest = load8888 (q); + vdest = in_over (vsrc, vsrca, load8888 (&m), vdest); + store8888 (q, vdest); } twidth--; @@ -1408,7 +1413,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask &= 0xff000000; mask = mask | mask >> 8 | mask >> 16 | mask >> 24; - vmask = load8888 (mask); + vmask = load8888 (&mask); while (height--) { @@ -1420,10 +1425,10 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, expand_alpha (s), vmask, d)); + store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); w--; dst++; @@ -1448,10 +1453,10 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, if (w) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, expand_alpha (s), vmask, d)); + store8888 (dst, in_over (s, expand_alpha (s), vmask, d)); } } @@ -1479,7 +1484,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, mask &= 0xff000000; mask = mask | mask >> 8 | mask >> 16 | mask >> 24; - vmask = load8888 (mask); + vmask = load8888 (&mask); srca = MC (4x00ff); while (height--) @@ -1492,10 +1497,11 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 s = load8888 (*src | 0xff000000); - __m64 d = load8888 (*dst); + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, srca, vmask, d)); + store8888 (dst, in_over (s, srca, vmask, d)); w--; dst++; @@ -1570,10 +1576,11 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, while (w) { - __m64 s = load8888 (*src | 0xff000000); - __m64 d = load8888 (*dst); + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); + __m64 d = load8888 (dst); - *dst = store8888 (in_over (s, srca, vmask, d)); + store8888 (dst, in_over (s, srca, vmask, d)); w--; dst++; @@ -1621,9 +1628,9 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp, else if (s) { __m64 ms, sa; - ms = load8888 (s); + ms = load8888 (&s); sa = expand_alpha (ms); - *dst = store8888 (over (ms, sa, load8888 (*dst))); + store8888 (dst, over (ms, sa, load8888 (dst))); } dst++; @@ -1664,7 +1671,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -1685,10 +1692,10 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, __m64 vsrc0, vsrc1, vsrc2, vsrc3; __m64 vdest; - vsrc0 = load8888 (*(src + 0)); - vsrc1 = load8888 (*(src + 1)); - vsrc2 = load8888 (*(src + 2)); - vsrc3 = load8888 (*(src + 3)); + vsrc0 = load8888 ((src + 0)); + vsrc1 = load8888 ((src + 1)); + vsrc2 = load8888 ((src + 2)); + vsrc3 = load8888 ((src + 3)); vdest = *(__m64 *)dst; @@ -1708,7 +1715,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, while (w) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -1751,7 +1758,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -1772,9 +1779,9 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, { __m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m)), - load8888 (*dst)); + load8888 (dst)); - *dst = store8888 (vdest); + store8888 (dst, vdest); } w--; @@ -1823,11 +1830,11 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888 (*dst); + __m64 vdest = load8888 (dst); vdest = in_over ( vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest); - *dst = store8888 (vdest); + store8888 (dst, vdest); } } } @@ -2016,7 +2023,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); while (height--) { @@ -2036,7 +2043,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, { __m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); - *dst = store8888 (vdest); + store8888 (dst, vdest); } else { @@ -2087,10 +2094,10 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888 (*dst); + __m64 vdest = load8888 (dst); vdest = in (vsrc, expand_alpha_rev (to_m64 (m))); - *dst = store8888 (vdest); + store8888 (dst, vdest); } else { @@ -2126,7 +2133,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0); @@ -2265,7 +2272,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -2298,10 +2305,10 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, if ((a0 & a1 & a2 & a3) == 0xFF) { __m64 vdest; - vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0); - vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1); - vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2); - vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3); + vdest = pack_565 (invert_colors (load8888 (&s0)), _mm_setzero_si64 (), 0); + vdest = pack_565 (invert_colors (load8888 (&s1)), vdest, 1); + vdest = pack_565 (invert_colors (load8888 (&s2)), vdest, 2); + vdest = pack_565 (invert_colors (load8888 (&s3)), vdest, 3); *(__m64 *)dst = vdest; } @@ -2309,10 +2316,10 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)dst; - vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3); + vdest = pack_565 (over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0)), vdest, 0); + vdest = pack_565 (over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1)), vdest, 1); + vdest = pack_565 (over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2)), vdest, 2); + vdest = pack_565 (over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3)), vdest, 3); *(__m64 *)dst = vdest; } @@ -2326,7 +2333,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, while (w) { - __m64 vsrc = load8888 (*src); + __m64 vsrc = load8888 (src); uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -2373,10 +2380,10 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, while (w && (unsigned long)dst & 7) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (over_rev_non_pre (s, d)); + store8888 (dst, over_rev_non_pre (s, d)); w--; dst++; @@ -2385,7 +2392,7 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, while (w >= 2) { - uint64_t s0, s1; + uint32_t s0, s1; unsigned char a0, a1; __m64 d0, d1; @@ -2397,8 +2404,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, if ((a0 & a1) == 0xFF) { - d0 = invert_colors (load8888 (s0)); - d1 = invert_colors (load8888 (s1)); + d0 = invert_colors (load8888 (&s0)); + d1 = invert_colors (load8888 (&s1)); *(__m64 *)dst = pack8888 (d0, d1); } @@ -2406,8 +2413,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)dst; - d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0)); - d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1)); + d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0)); + d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1)); *(__m64 *)dst = pack8888 (d0, d1); } @@ -2419,10 +2426,10 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, if (w) { - __m64 s = load8888 (*src); - __m64 d = load8888 (*dst); + __m64 s = load8888 (src); + __m64 d = load8888 (dst); - *dst = store8888 (over_rev_non_pre (s, d)); + store8888 (dst, over_rev_non_pre (s, d)); } } @@ -2450,7 +2457,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -2467,7 +2474,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, { uint64_t d = *q; __m64 vdest = expand565 (to_m64 (d), 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); *q = to_uint64 (vdest); } @@ -2489,10 +2496,10 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)q; - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0)), vdest, 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1)), vdest, 1); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2)), vdest, 2); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3)), vdest, 3); *(__m64 *)q = vdest; } @@ -2510,7 +2517,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, { uint64_t d = *q; __m64 vdest = expand565 (to_m64 (d), 0); - vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0); *q = to_uint64 (vdest); } @@ -2546,7 +2553,7 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, sa = src >> 24; - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -2578,10 +2585,10 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, __m64 vmask; __m64 vdest; - vmask = load8888 (ldl_u((uint32_t *)mask)); - vdest = load8888 (*(uint32_t *)dst); + vmask = load8888u ((uint32_t *)mask); + vdest = load8888 ((uint32_t *)dst); - *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest)); + store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest)); dst += 4; mask += 4; @@ -2648,7 +2655,7 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp, uint32_t *s = (uint32_t *)src; uint32_t *d = (uint32_t *)dst; - *d = store8888 (in (load8888 (ldl_u((uint32_t *)s)), load8888 (*d))); + store8888 (d, in (load8888u (s), load8888 (d))); w -= 4; dst += 4; @@ -2696,7 +2703,7 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, if (src == 0) return; - vsrc = load8888 (src); + vsrc = load8888 (&src); vsrca = expand_alpha (vsrc); while (height--) @@ -2729,10 +2736,10 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, __m64 vmask; __m64 vdest; - vmask = load8888 (ldl_u((uint32_t *)mask)); - vdest = load8888 (*(uint32_t *)dst); + vmask = load8888u ((uint32_t *)mask); + vdest = load8888 ((uint32_t *)dst); - *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest)); + store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest)); dst += 4; mask += 4; @@ -3073,19 +3080,20 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp, if (m) { - __m64 s = load8888 (*src | 0xff000000); + uint32_t ssrc = *src | 0xff000000; + __m64 s = load8888 (&ssrc); if (m == 0xff) { - *dst = store8888 (s); + store8888 (dst, s); } else { __m64 sa = expand_alpha (s); __m64 vm = expand_alpha_rev (to_m64 (m)); - __m64 vdest = in_over (s, sa, vm, load8888 (*dst)); + __m64 vdest = in_over (s, sa, vm, load8888 (dst)); - *dst = store8888 (vdest); + store8888 (dst, vdest); } } |