aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman/pixman-mmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman/pixman-mmx.c')
-rw-r--r--pixman/pixman/pixman-mmx.c62
1 files changed, 44 insertions, 18 deletions
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c
index 5441d6bc2..b3ef2495b 100644
--- a/pixman/pixman/pixman-mmx.c
+++ b/pixman/pixman/pixman-mmx.c
@@ -61,7 +61,7 @@ _mm_empty (void)
}
#endif
-#ifdef USE_X86_MMX
+#if defined __GNUC__ && defined USE_X86_MMX
# if (defined(__SUNPRO_C) || defined(_MSC_VER))
# include <xmmintrin.h>
# else
@@ -109,13 +109,17 @@ _mm_shuffle_pi16 (__m64 __A, int8_t const __N)
((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
# endif
# endif
-#endif
#ifndef _MSC_VER
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
#endif
+#else
+#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
+#include <emmintrin.h> /* for SSE2 intrinsics */
+#endif
+
/* Notes about writing mmx code
*
* give memory operands as the second operand. If you give it as the
@@ -261,6 +265,9 @@ to_m64 (uint64_t x)
#endif
}
+#ifdef _MSC_VER
+#define to_uint64(arg) arg.M64_MEMBER
+#else
static force_inline uint64_t
to_uint64 (__m64 x)
{
@@ -275,6 +282,7 @@ to_uint64 (__m64 x)
return (uint64_t)x;
#endif
}
+#endif
static force_inline __m64
shift (__m64 v,
@@ -438,6 +446,11 @@ pack8888 (__m64 lo, __m64 hi)
return _mm_packs_pu16 (lo, hi);
}
+#ifdef _MSC_VER
+#define store8888(dest,v) *(dest)=_mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()))
+#define store(dest,v) *(dest) = _mm_cvtsi64_si32 (v)
+#else
+
static force_inline void
store (uint32_t *dest, __m64 v)
{
@@ -458,6 +471,7 @@ store8888 (uint32_t *dest, __m64 v)
v = pack8888 (v, _mm_setzero_si64 ());
store (dest, v);
}
+#endif
static force_inline pixman_bool_t
is_equal (__m64 a, __m64 b)
@@ -466,7 +480,9 @@ is_equal (__m64 a, __m64 b)
/* __m64 is double, we can compare directly. */
return a == b;
#else
- return _mm_movemask_pi8 (_mm_cmpeq_pi8 (a, b)) == 0xff;
+ pixman_bool_t ret = _mm_movemask_pi8 (_mm_cmpeq_pi8 (a, b)) == 0xff;
+ _mm_empty();
+ return ret;
#endif
}
@@ -477,15 +493,21 @@ is_opaque (__m64 v)
return is_equal (_mm_and_si64 (v, MC (full_alpha)), MC (full_alpha));
#else
__m64 ffs = _mm_cmpeq_pi8 (v, v);
- return (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x40);
+ pixman_bool_t ret = (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x40);
+ _mm_empty();
+ return ret;
#endif
}
+#ifdef _MSC_VER
+#define is_zero(v) is_equal (v, _mm_setzero_si64 ())
+#else
static force_inline pixman_bool_t
is_zero (__m64 v)
{
return is_equal (v, _mm_setzero_si64 ());
}
+#endif
/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
*
@@ -532,7 +554,7 @@ expand565 (__m64 pixel, int pos)
static force_inline void
expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1, int full_alpha)
{
- __m64 t0, t1, alpha = _mm_setzero_si64 ();;
+ __m64 t0, t1, alpha = _mm_setzero_si64 ();
__m64 r = _mm_and_si64 (vin, MC (expand_565_r));
__m64 g = _mm_and_si64 (vin, MC (expand_565_g));
__m64 b = _mm_and_si64 (vin, MC (expand_565_b));
@@ -553,6 +575,7 @@ expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1, int full_alpha)
*vout0 = _mm_unpacklo_pi16 (t0, t1); /* A1 R1 G1 B1 A0 R0 G0 B0 */
*vout1 = _mm_unpackhi_pi16 (t0, t1); /* A3 R3 G3 B3 A2 R2 G2 B2 */
+ _mm_empty();
}
static force_inline __m64
@@ -579,6 +602,7 @@ expand_4x565 (__m64 vin, __m64 *vout0, __m64 *vout1, __m64 *vout2, __m64 *vout3,
*vout1 = expand8888 (v0, 1);
*vout2 = expand8888 (v1, 0);
*vout3 = expand8888 (v1, 1);
+ _mm_empty();
}
static force_inline __m64
@@ -1881,7 +1905,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
vdest = pack_565 (
over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
- *dst = to_uint64 (vdest);
+ *dst = to_uint64 (vdest)&0xffff;
w--;
dst++;
@@ -1894,13 +1918,14 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
{
__m64 vdest = *(__m64 *)dst;
__m64 v0, v1, v2, v3;
+ __m64 vsrc0, vsrc1, vsrc2, vsrc3;
expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
- __m64 vsrc0 = load8888 ((src + 0));
- __m64 vsrc1 = load8888 ((src + 1));
- __m64 vsrc2 = load8888 ((src + 2));
- __m64 vsrc3 = load8888 ((src + 3));
+ vsrc0 = load8888 ((src + 0));
+ vsrc1 = load8888 ((src + 1));
+ vsrc2 = load8888 ((src + 2));
+ vsrc3 = load8888 ((src + 3));
v0 = over (vsrc0, expand_alpha (vsrc0), v0);
v1 = over (vsrc1, expand_alpha (vsrc1), v1);
@@ -1924,7 +1949,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
- *dst = to_uint64 (vdest);
+ *dst = to_uint64 (vdest) & 0xffff;
w--;
dst++;
@@ -2125,7 +2150,7 @@ pixman_fill_mmx (uint32_t *bits,
if (w >= 2 && ((unsigned long)d & 3))
{
- *(uint16_t *)d = xor;
+ *(uint16_t *)d = (xor & 0xffff);
w -= 2;
d += 2;
}
@@ -2178,7 +2203,7 @@ pixman_fill_mmx (uint32_t *bits,
}
if (w >= 2)
{
- *(uint16_t *)d = xor;
+ *(uint16_t *)d = (xor & 0xffff);
w -= 2;
d += 2;
}
@@ -2446,22 +2471,23 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
{
__m64 vdest = *(__m64 *)dst;
__m64 v0, v1, v2, v3;
+ __m64 vm0, vm1, vm2, vm3;
expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
- __m64 vm0 = to_m64 (m0);
+ vm0 = to_m64 (m0);
v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0), v0);
- __m64 vm1 = to_m64 (m1);
+ vm1 = to_m64 (m1);
v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1), v1);
- __m64 vm2 = to_m64 (m2);
+ vm2 = to_m64 (m2);
v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2), v2);
- __m64 vm3 = to_m64 (m3);
+ vm3 = to_m64 (m3);
v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3), v3);
- *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);;
+ *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
}
w -= 4;