aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman/pixman-mmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman/pixman-mmx.c')
-rw-r--r--pixman/pixman/pixman-mmx.c370
1 files changed, 189 insertions, 181 deletions
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c
index 8fd85776d..bcfeb56f0 100644
--- a/pixman/pixman/pixman-mmx.c
+++ b/pixman/pixman/pixman-mmx.c
@@ -353,9 +353,16 @@ static __inline__ uint32_t ldl_u(uint32_t *p)
}
static force_inline __m64
-load8888 (uint32_t v)
+load8888 (const uint32_t *v)
{
- return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ());
+ return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (*v), _mm_setzero_si64 ());
+}
+
+static force_inline __m64
+load8888u (const uint32_t *v)
+{
+ uint32_t l = ldl_u(v);
+ return load8888(&l);
}
static force_inline __m64
@@ -364,15 +371,12 @@ pack8888 (__m64 lo, __m64 hi)
return _mm_packs_pu16 (lo, hi);
}
-#ifdef _MSC_VER
-#define store8888(v) _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()))
-#else
-static force_inline uint32_t
-store8888 (__m64 v)
+static force_inline void
+store8888 (uint32_t *dest, __m64 v)
{
- return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()));
+ v = pack8888 (v, _mm_setzero_si64());
+ *dest = _mm_cvtsi64_si32 (v);
}
-#endif
/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
*
@@ -475,13 +479,6 @@ pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
/* --------------- MMX code patch for fbcompose.c --------------------- */
-#ifdef _MSC_VER
-#define combine(src, mask) \
- ((mask) ? \
- store8888 (pix_multiply (load8888 (*src), expand_alpha (load8888 (*mask)))) \
- : \
- *src)
-#else
static force_inline uint32_t
combine (const uint32_t *src, const uint32_t *mask)
{
@@ -489,18 +486,17 @@ combine (const uint32_t *src, const uint32_t *mask)
if (mask)
{
- __m64 m = load8888 (*mask);
- __m64 s = load8888 (ssrc);
+ __m64 m = load8888 (mask);
+ __m64 s = load8888 (&ssrc);
m = expand_alpha (m);
s = pix_multiply (s, m);
- ssrc = store8888 (s);
+ store8888 (&ssrc, s);
}
return ssrc;
}
-#endif
static void
mmx_combine_over_u (pixman_implementation_t *imp,
@@ -524,9 +520,9 @@ mmx_combine_over_u (pixman_implementation_t *imp,
else if (ssrc)
{
__m64 s, sa;
- s = load8888 (ssrc);
+ s = load8888 (&ssrc);
sa = expand_alpha (s);
- *dest = store8888 (over (s, sa, load8888 (*dest)));
+ store8888 (dest, over (s, sa, load8888 (dest)));
}
++dest;
@@ -552,9 +548,9 @@ mmx_combine_over_reverse_u (pixman_implementation_t *imp,
__m64 d, da;
uint32_t s = combine (src, mask);
- d = load8888 (*dest);
+ d = load8888 (dest);
da = expand_alpha (d);
- *dest = store8888 (over (d, da, load8888 (s)));
+ store8888 (dest, over (d, da, load8888 (&s)));
++dest;
++src;
@@ -577,13 +573,14 @@ mmx_combine_in_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 x, a;
+ uint32_t ssrc = combine (src, mask);
- x = load8888 (combine (src, mask));
- a = load8888 (*dest);
+ x = load8888 (&ssrc);
+ a = load8888 (dest);
a = expand_alpha (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
@@ -606,12 +603,13 @@ mmx_combine_in_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 x, a;
+ uint32_t ssrc = combine (src, mask);
- x = load8888 (*dest);
- a = load8888 (combine (src, mask));
+ x = load8888 (dest);
+ a = load8888 (&ssrc);
a = expand_alpha (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
@@ -634,13 +632,14 @@ mmx_combine_out_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 x, a;
+ uint32_t ssrc = combine (src, mask);
- x = load8888 (combine (src, mask));
- a = load8888 (*dest);
+ x = load8888 (&ssrc);
+ a = load8888 (dest);
a = expand_alpha (a);
a = negate (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
@@ -663,14 +662,15 @@ mmx_combine_out_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 x, a;
+ uint32_t ssrc = combine (src, mask);
- x = load8888 (*dest);
- a = load8888 (combine (src, mask));
+ x = load8888 (dest);
+ a = load8888 (&ssrc);
a = expand_alpha (a);
a = negate (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
@@ -693,14 +693,15 @@ mmx_combine_atop_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 s, da, d, sia;
+ uint32_t ssrc = combine (src, mask);
- s = load8888 (combine (src, mask));
- d = load8888 (*dest);
+ s = load8888 (&ssrc);
+ d = load8888 (dest);
sia = expand_alpha (s);
sia = negate (sia);
da = expand_alpha (d);
s = pix_add_mul (s, da, d, sia);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
@@ -725,14 +726,15 @@ mmx_combine_atop_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 s, dia, d, sa;
+ uint32_t ssrc = combine (src, mask);
- s = load8888 (combine (src, mask));
- d = load8888 (*dest);
+ s = load8888 (&ssrc);
+ d = load8888 (dest);
sa = expand_alpha (s);
dia = expand_alpha (d);
dia = negate (dia);
s = pix_add_mul (s, dia, d, sa);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
@@ -755,15 +757,16 @@ mmx_combine_xor_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 s, dia, d, sia;
+ uint32_t ssrc = combine (src, mask);
- s = load8888 (combine (src, mask));
- d = load8888 (*dest);
+ s = load8888 (&ssrc);
+ d = load8888 (dest);
sia = expand_alpha (s);
dia = expand_alpha (d);
sia = negate (sia);
dia = negate (dia);
s = pix_add_mul (s, dia, d, sia);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
@@ -786,11 +789,12 @@ mmx_combine_add_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 s, d;
+ uint32_t ssrc = combine (src, mask);
- s = load8888 (combine (src, mask));
- d = load8888 (*dest);
+ s = load8888 (&ssrc);
+ d = load8888 (dest);
s = pix_add (s, d);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
@@ -814,20 +818,21 @@ mmx_combine_saturate_u (pixman_implementation_t *imp,
{
uint32_t s = combine (src, mask);
uint32_t d = *dest;
- __m64 ms = load8888 (s);
- __m64 md = load8888 (d);
+ __m64 ms = load8888 (&s);
+ __m64 md = load8888 (&d);
uint32_t sa = s >> 24;
uint32_t da = ~d >> 24;
if (sa > da)
{
- __m64 msa = load8888 (DIV_UN8 (da, sa) << 24);
+ uint32_t quot = DIV_UN8 (da, sa) << 24;
+ __m64 msa = load8888 (&quot);
msa = expand_alpha (msa);
ms = pix_multiply (ms, msa);
}
md = pix_add (md, ms);
- *dest = store8888 (md);
+ store8888 (dest, md);
++src;
++dest;
@@ -849,11 +854,11 @@ mmx_combine_src_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
s = pix_multiply (s, a);
- *dest = store8888 (s);
+ store8888 (dest, s);
++src;
++mask;
@@ -874,12 +879,12 @@ mmx_combine_over_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 sa = expand_alpha (s);
- *dest = store8888 (in_over (s, sa, a, d));
+ store8888 (dest, in_over (s, sa, a, d));
++src;
++dest;
@@ -900,12 +905,12 @@ mmx_combine_over_reverse_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
- *dest = store8888 (over (d, da, in (s, a)));
+ store8888 (dest, over (d, da, in (s, a)));
++src;
++dest;
@@ -926,14 +931,14 @@ mmx_combine_in_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
s = pix_multiply (s, a);
s = pix_multiply (s, da);
- *dest = store8888 (s);
+ store8888 (dest, s);
++src;
++dest;
@@ -954,14 +959,14 @@ mmx_combine_in_reverse_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 sa = expand_alpha (s);
a = pix_multiply (a, sa);
d = pix_multiply (d, a);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
@@ -982,15 +987,15 @@ mmx_combine_out_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
da = negate (da);
s = pix_multiply (s, a);
s = pix_multiply (s, da);
- *dest = store8888 (s);
+ store8888 (dest, s);
++src;
++dest;
@@ -1011,15 +1016,15 @@ mmx_combine_out_reverse_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 sa = expand_alpha (s);
a = pix_multiply (a, sa);
a = negate (a);
d = pix_multiply (d, a);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
@@ -1040,9 +1045,9 @@ mmx_combine_atop_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
__m64 sa = expand_alpha (s);
@@ -1050,7 +1055,7 @@ mmx_combine_atop_ca (pixman_implementation_t *imp,
a = pix_multiply (a, sa);
a = negate (a);
d = pix_add_mul (d, a, s, da);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
@@ -1071,9 +1076,9 @@ mmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
__m64 sa = expand_alpha (s);
@@ -1081,7 +1086,7 @@ mmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
a = pix_multiply (a, sa);
da = negate (da);
d = pix_add_mul (d, a, s, da);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
@@ -1102,9 +1107,9 @@ mmx_combine_xor_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
__m64 sa = expand_alpha (s);
@@ -1113,7 +1118,7 @@ mmx_combine_xor_ca (pixman_implementation_t *imp,
da = negate (da);
a = negate (a);
d = pix_add_mul (d, a, s, da);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
@@ -1134,13 +1139,13 @@ mmx_combine_add_ca (pixman_implementation_t *imp,
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
s = pix_multiply (s, a);
d = pix_add (s, d);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
@@ -1171,7 +1176,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
@@ -1184,7 +1189,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp,
while (w && (unsigned long)dst & 7)
{
- *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
+ store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
w--;
dst++;
@@ -1210,7 +1215,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp,
if (w)
{
- *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
+ store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
}
}
@@ -1237,7 +1242,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
@@ -1316,7 +1321,7 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
@@ -1331,9 +1336,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
if (m)
{
- __m64 vdest = load8888 (*q);
- vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
- *q = store8888 (vdest);
+ __m64 vdest = load8888 (q);
+ vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
+ store8888 (q, vdest);
}
twidth--;
@@ -1352,9 +1357,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
__m64 dest0, dest1;
__m64 vdest = *(__m64 *)q;
- dest0 = in_over (vsrc, vsrca, load8888 (m0),
+ dest0 = in_over (vsrc, vsrca, load8888 (&m0),
expand8888 (vdest, 0));
- dest1 = in_over (vsrc, vsrca, load8888 (m1),
+ dest1 = in_over (vsrc, vsrca, load8888 (&m1),
expand8888 (vdest, 1));
*(__m64 *)q = pack8888 (dest0, dest1);
@@ -1371,9 +1376,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
if (m)
{
- __m64 vdest = load8888 (*q);
- vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
- *q = store8888 (vdest);
+ __m64 vdest = load8888 (q);
+ vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
+ store8888 (q, vdest);
}
twidth--;
@@ -1408,7 +1413,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
mask &= 0xff000000;
mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
- vmask = load8888 (mask);
+ vmask = load8888 (&mask);
while (height--)
{
@@ -1420,10 +1425,10 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
while (w && (unsigned long)dst & 7)
{
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dst);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dst);
- *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
+ store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
w--;
dst++;
@@ -1448,10 +1453,10 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
if (w)
{
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dst);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dst);
- *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
+ store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
}
}
@@ -1479,7 +1484,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
mask &= 0xff000000;
mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
- vmask = load8888 (mask);
+ vmask = load8888 (&mask);
srca = MC (4x00ff);
while (height--)
@@ -1492,10 +1497,11 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
while (w && (unsigned long)dst & 7)
{
- __m64 s = load8888 (*src | 0xff000000);
- __m64 d = load8888 (*dst);
+ uint32_t ssrc = *src | 0xff000000;
+ __m64 s = load8888 (&ssrc);
+ __m64 d = load8888 (dst);
- *dst = store8888 (in_over (s, srca, vmask, d));
+ store8888 (dst, in_over (s, srca, vmask, d));
w--;
dst++;
@@ -1570,10 +1576,11 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
while (w)
{
- __m64 s = load8888 (*src | 0xff000000);
- __m64 d = load8888 (*dst);
+ uint32_t ssrc = *src | 0xff000000;
+ __m64 s = load8888 (&ssrc);
+ __m64 d = load8888 (dst);
- *dst = store8888 (in_over (s, srca, vmask, d));
+ store8888 (dst, in_over (s, srca, vmask, d));
w--;
dst++;
@@ -1621,9 +1628,9 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
else if (s)
{
__m64 ms, sa;
- ms = load8888 (s);
+ ms = load8888 (&s);
sa = expand_alpha (ms);
- *dst = store8888 (over (ms, sa, load8888 (*dst)));
+ store8888 (dst, over (ms, sa, load8888 (dst)));
}
dst++;
@@ -1664,7 +1671,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
while (w && (unsigned long)dst & 7)
{
- __m64 vsrc = load8888 (*src);
+ __m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
@@ -1685,10 +1692,10 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
__m64 vsrc0, vsrc1, vsrc2, vsrc3;
__m64 vdest;
- vsrc0 = load8888 (*(src + 0));
- vsrc1 = load8888 (*(src + 1));
- vsrc2 = load8888 (*(src + 2));
- vsrc3 = load8888 (*(src + 3));
+ vsrc0 = load8888 ((src + 0));
+ vsrc1 = load8888 ((src + 1));
+ vsrc2 = load8888 ((src + 2));
+ vsrc3 = load8888 ((src + 3));
vdest = *(__m64 *)dst;
@@ -1708,7 +1715,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
while (w)
{
- __m64 vsrc = load8888 (*src);
+ __m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
@@ -1751,7 +1758,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
@@ -1772,9 +1779,9 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
{
__m64 vdest = in_over (vsrc, vsrca,
expand_alpha_rev (to_m64 (m)),
- load8888 (*dst));
+ load8888 (dst));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
w--;
@@ -1823,11 +1830,11 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
if (m)
{
- __m64 vdest = load8888 (*dst);
+ __m64 vdest = load8888 (dst);
vdest = in_over (
vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
}
}
@@ -2016,7 +2023,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
while (height--)
{
@@ -2036,7 +2043,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
{
__m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
else
{
@@ -2087,10 +2094,10 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
if (m)
{
- __m64 vdest = load8888 (*dst);
+ __m64 vdest = load8888 (dst);
vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
else
{
@@ -2126,7 +2133,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0);
@@ -2265,7 +2272,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
while (w && (unsigned long)dst & 7)
{
- __m64 vsrc = load8888 (*src);
+ __m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
@@ -2298,10 +2305,10 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
if ((a0 & a1 & a2 & a3) == 0xFF)
{
__m64 vdest;
- vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0);
- vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1);
- vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2);
- vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3);
+ vdest = pack_565 (invert_colors (load8888 (&s0)), _mm_setzero_si64 (), 0);
+ vdest = pack_565 (invert_colors (load8888 (&s1)), vdest, 1);
+ vdest = pack_565 (invert_colors (load8888 (&s2)), vdest, 2);
+ vdest = pack_565 (invert_colors (load8888 (&s3)), vdest, 3);
*(__m64 *)dst = vdest;
}
@@ -2309,10 +2316,10 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
{
__m64 vdest = *(__m64 *)dst;
- vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0);
- vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1);
- vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2);
- vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3);
+ vdest = pack_565 (over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0)), vdest, 0);
+ vdest = pack_565 (over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1)), vdest, 1);
+ vdest = pack_565 (over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2)), vdest, 2);
+ vdest = pack_565 (over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3)), vdest, 3);
*(__m64 *)dst = vdest;
}
@@ -2326,7 +2333,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
while (w)
{
- __m64 vsrc = load8888 (*src);
+ __m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
@@ -2373,10 +2380,10 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
while (w && (unsigned long)dst & 7)
{
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dst);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dst);
- *dst = store8888 (over_rev_non_pre (s, d));
+ store8888 (dst, over_rev_non_pre (s, d));
w--;
dst++;
@@ -2385,7 +2392,7 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
while (w >= 2)
{
- uint64_t s0, s1;
+ uint32_t s0, s1;
unsigned char a0, a1;
__m64 d0, d1;
@@ -2397,8 +2404,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
if ((a0 & a1) == 0xFF)
{
- d0 = invert_colors (load8888 (s0));
- d1 = invert_colors (load8888 (s1));
+ d0 = invert_colors (load8888 (&s0));
+ d1 = invert_colors (load8888 (&s1));
*(__m64 *)dst = pack8888 (d0, d1);
}
@@ -2406,8 +2413,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
{
__m64 vdest = *(__m64 *)dst;
- d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0));
- d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1));
+ d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0));
+ d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1));
*(__m64 *)dst = pack8888 (d0, d1);
}
@@ -2419,10 +2426,10 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
if (w)
{
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dst);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dst);
- *dst = store8888 (over_rev_non_pre (s, d));
+ store8888 (dst, over_rev_non_pre (s, d));
}
}
@@ -2450,7 +2457,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
@@ -2467,7 +2474,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
{
uint64_t d = *q;
__m64 vdest = expand565 (to_m64 (d), 0);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
*q = to_uint64 (vdest);
}
@@ -2489,10 +2496,10 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
{
__m64 vdest = *(__m64 *)q;
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0)), vdest, 0);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1)), vdest, 1);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2)), vdest, 2);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3)), vdest, 3);
*(__m64 *)q = vdest;
}
@@ -2510,7 +2517,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
{
uint64_t d = *q;
__m64 vdest = expand565 (to_m64 (d), 0);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
*q = to_uint64 (vdest);
}
@@ -2546,7 +2553,7 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
sa = src >> 24;
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
@@ -2578,10 +2585,10 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
__m64 vmask;
__m64 vdest;
- vmask = load8888 (ldl_u((uint32_t *)mask));
- vdest = load8888 (*(uint32_t *)dst);
+ vmask = load8888u ((uint32_t *)mask);
+ vdest = load8888 ((uint32_t *)dst);
- *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
+ store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest));
dst += 4;
mask += 4;
@@ -2648,7 +2655,7 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp,
uint32_t *s = (uint32_t *)src;
uint32_t *d = (uint32_t *)dst;
- *d = store8888 (in (load8888 (ldl_u((uint32_t *)s)), load8888 (*d)));
+ store8888 (d, in (load8888u (s), load8888 (d)));
w -= 4;
dst += 4;
@@ -2696,7 +2703,7 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
if (src == 0)
return;
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
@@ -2729,10 +2736,10 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
__m64 vmask;
__m64 vdest;
- vmask = load8888 (ldl_u((uint32_t *)mask));
- vdest = load8888 (*(uint32_t *)dst);
+ vmask = load8888u ((uint32_t *)mask);
+ vdest = load8888 ((uint32_t *)dst);
- *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+ store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest));
dst += 4;
mask += 4;
@@ -3073,19 +3080,20 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
if (m)
{
- __m64 s = load8888 (*src | 0xff000000);
+ uint32_t ssrc = *src | 0xff000000;
+ __m64 s = load8888 (&ssrc);
if (m == 0xff)
{
- *dst = store8888 (s);
+ store8888 (dst, s);
}
else
{
__m64 sa = expand_alpha (s);
__m64 vm = expand_alpha_rev (to_m64 (m));
- __m64 vdest = in_over (s, sa, vm, load8888 (*dst));
+ __m64 vdest = in_over (s, sa, vm, load8888 (dst));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
}