aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman/pixman-vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman/pixman-vmx.c')
-rw-r--r--pixman/pixman/pixman-vmx.c707
1 files changed, 299 insertions, 408 deletions
diff --git a/pixman/pixman/pixman-vmx.c b/pixman/pixman/pixman-vmx.c
index 6fc3cdea5..06325a7c0 100644
--- a/pixman/pixman/pixman-vmx.c
+++ b/pixman/pixman/pixman-vmx.c
@@ -45,24 +45,24 @@ static force_inline vector unsigned int
pix_multiply (vector unsigned int p, vector unsigned int a)
{
vector unsigned short hi, lo, mod;
-
+
/* unpack to short */
hi = (vector unsigned short)
vec_mergeh ((vector unsigned char)AVV (0),
(vector unsigned char)p);
-
+
mod = (vector unsigned short)
vec_mergeh ((vector unsigned char)AVV (0),
(vector unsigned char)a);
-
+
hi = vec_mladd (hi, mod, (vector unsigned short)
AVV (0x0080, 0x0080, 0x0080, 0x0080,
0x0080, 0x0080, 0x0080, 0x0080));
-
+
hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
-
+
hi = vec_sr (hi, vec_splat_u16 (8));
-
+
/* unpack to short */
lo = (vector unsigned short)
vec_mergel ((vector unsigned char)AVV (0),
@@ -70,15 +70,15 @@ pix_multiply (vector unsigned int p, vector unsigned int a)
mod = (vector unsigned short)
vec_mergel ((vector unsigned char)AVV (0),
(vector unsigned char)a);
-
+
lo = vec_mladd (lo, mod, (vector unsigned short)
AVV (0x0080, 0x0080, 0x0080, 0x0080,
0x0080, 0x0080, 0x0080, 0x0080));
-
+
lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
-
+
lo = vec_sr (lo, vec_splat_u16 (8));
-
+
return (vector unsigned int)vec_packsu (hi, lo);
}
@@ -95,56 +95,12 @@ pix_add_mul (vector unsigned int x,
vector unsigned int y,
vector unsigned int b)
{
- vector unsigned short hi, lo, mod, hiy, loy, mody;
-
- hi = (vector unsigned short)
- vec_mergeh ((vector unsigned char)AVV (0),
- (vector unsigned char)x);
- mod = (vector unsigned short)
- vec_mergeh ((vector unsigned char)AVV (0),
- (vector unsigned char)a);
- hiy = (vector unsigned short)
- vec_mergeh ((vector unsigned char)AVV (0),
- (vector unsigned char)y);
- mody = (vector unsigned short)
- vec_mergeh ((vector unsigned char)AVV (0),
- (vector unsigned char)b);
-
- hi = vec_mladd (hi, mod, (vector unsigned short)
- AVV (0x0080, 0x0080, 0x0080, 0x0080,
- 0x0080, 0x0080, 0x0080, 0x0080));
-
- hi = vec_mladd (hiy, mody, hi);
-
- hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
-
- hi = vec_sr (hi, vec_splat_u16 (8));
-
- lo = (vector unsigned short)
- vec_mergel ((vector unsigned char)AVV (0),
- (vector unsigned char)x);
- mod = (vector unsigned short)
- vec_mergel ((vector unsigned char)AVV (0),
- (vector unsigned char)a);
-
- loy = (vector unsigned short)
- vec_mergel ((vector unsigned char)AVV (0),
- (vector unsigned char)y);
- mody = (vector unsigned short)
- vec_mergel ((vector unsigned char)AVV (0),
- (vector unsigned char)b);
-
- lo = vec_mladd (lo, mod, (vector unsigned short)
- AVV (0x0080, 0x0080, 0x0080, 0x0080,
- 0x0080, 0x0080, 0x0080, 0x0080));
-
- lo = vec_mladd (loy, mody, lo);
-
- lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
-
- lo = vec_sr (lo, vec_splat_u16 (8));
-
- return (vector unsigned int)vec_packsu (hi, lo);
+ vector unsigned int t1, t2;
+
+ t1 = pix_multiply (x, a);
+ t2 = pix_multiply (y, b);
+
+ return pix_add (t1, t2);
}
static force_inline vector unsigned int
@@ -161,7 +117,7 @@ over (vector unsigned int src,
{
vector unsigned char tmp = (vector unsigned char)
pix_multiply (dest, negate (srca));
-
+
tmp = vec_adds ((vector unsigned char)src, tmp);
return (vector unsigned int)tmp;
}
@@ -235,31 +191,31 @@ vmx_combine_over_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
+
LOAD_VECTORS (dest, src);
-
+
vdest = over (vsrc, splat_alpha (vsrc), vdest);
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t ia = ALPHA_8 (~s);
-
+
UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-
+
dest[i] = d;
}
}
@@ -274,35 +230,34 @@ vmx_combine_over_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = over (vsrc, splat_alpha (vsrc), vdest);
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t ia;
-
+
UN8x4_MUL_UN8 (s, m);
-
+
ia = ALPHA_8 (~s);
-
+
UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
dest[i] = d;
}
@@ -331,29 +286,29 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
+
LOAD_VECTORS (dest, src);
-
+
vdest = over (vdest, splat_alpha (vdest), vsrc);
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t ia = ALPHA_8 (~dest[i]);
-
+
UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
dest[i] = s;
}
@@ -369,33 +324,33 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
+
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = over (vdest, splat_alpha (vdest), vsrc);
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t ia = ALPHA_8 (~dest[i]);
-
+
UN8x4_MUL_UN8 (s, m);
-
+
UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
dest[i] = s;
}
@@ -424,28 +379,27 @@ vmx_combine_in_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORS (dest, src);
-
+
vdest = pix_multiply (vsrc, splat_alpha (vdest));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
-
uint32_t s = src[i];
uint32_t a = ALPHA_8 (dest[i]);
+
UN8x4_MUL_UN8 (s, a);
dest[i] = s;
}
@@ -461,33 +415,32 @@ vmx_combine_in_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = pix_multiply (vsrc, splat_alpha (vdest));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
uint32_t s = src[i];
uint32_t a = ALPHA_8 (dest[i]);
-
+
UN8x4_MUL_UN8 (s, m);
-
UN8x4_MUL_UN8 (s, a);
+
dest[i] = s;
}
}
@@ -515,28 +468,29 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORS (dest, src);
-
+
vdest = pix_multiply (vdest, splat_alpha (vsrc));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t d = dest[i];
uint32_t a = ALPHA_8 (src[i]);
+
UN8x4_MUL_UN8 (d, a);
+
dest[i] = d;
}
}
@@ -551,34 +505,33 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = pix_multiply (vdest, splat_alpha (vsrc));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
uint32_t d = dest[i];
uint32_t a = src[i];
-
+
UN8x4_MUL_UN8 (a, m);
-
a = ALPHA_8 (a);
UN8x4_MUL_UN8 (d, a);
+
dest[i] = d;
}
}
@@ -606,28 +559,29 @@ vmx_combine_out_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORS (dest, src);
-
+
vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t s = src[i];
uint32_t a = ALPHA_8 (~dest[i]);
+
UN8x4_MUL_UN8 (s, a);
+
dest[i] = s;
}
}
@@ -642,33 +596,32 @@ vmx_combine_out_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
uint32_t s = src[i];
uint32_t a = ALPHA_8 (~dest[i]);
-
+
UN8x4_MUL_UN8 (s, m);
-
UN8x4_MUL_UN8 (s, a);
+
dest[i] = s;
}
}
@@ -696,28 +649,30 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
+
LOAD_VECTORS (dest, src);
-
+
vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t d = dest[i];
uint32_t a = ALPHA_8 (~src[i]);
+
UN8x4_MUL_UN8 (d, a);
+
dest[i] = d;
}
}
@@ -732,34 +687,33 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
uint32_t d = dest[i];
uint32_t a = src[i];
-
+
UN8x4_MUL_UN8 (a, m);
-
a = ALPHA_8 (~a);
UN8x4_MUL_UN8 (d, a);
+
dest[i] = d;
}
}
@@ -787,32 +741,32 @@ vmx_combine_atop_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORS (dest, src);
-
+
vdest = pix_add_mul (vsrc, splat_alpha (vdest),
vdest, splat_alpha (negate (vsrc)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t dest_a = ALPHA_8 (d);
uint32_t src_ia = ALPHA_8 (~s);
-
+
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+
dest[i] = s;
}
}
@@ -827,25 +781,24 @@ vmx_combine_atop_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = pix_add_mul (vsrc, splat_alpha (vdest),
vdest, splat_alpha (negate (vsrc)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
@@ -853,12 +806,13 @@ vmx_combine_atop_u_mask (uint32_t * dest,
uint32_t d = dest[i];
uint32_t dest_a = ALPHA_8 (d);
uint32_t src_ia;
-
+
UN8x4_MUL_UN8 (s, m);
-
+
src_ia = ALPHA_8 (~s);
-
+
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+
dest[i] = s;
}
}
@@ -886,32 +840,32 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORS (dest, src);
-
+
vdest = pix_add_mul (vdest, splat_alpha (vsrc),
vsrc, splat_alpha (negate (vdest)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t src_a = ALPHA_8 (s);
uint32_t dest_ia = ALPHA_8 (~d);
-
+
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+
dest[i] = s;
}
}
@@ -926,25 +880,24 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = pix_add_mul (vdest, splat_alpha (vsrc),
vsrc, splat_alpha (negate (vdest)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
@@ -952,12 +905,13 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest,
uint32_t d = dest[i];
uint32_t src_a;
uint32_t dest_ia = ALPHA_8 (~d);
-
+
UN8x4_MUL_UN8 (s, m);
-
+
src_a = ALPHA_8 (s);
-
+
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+
dest[i] = s;
}
}
@@ -985,32 +939,32 @@ vmx_combine_xor_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORS (dest, src);
-
+
vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
vdest, splat_alpha (negate (vsrc)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t src_ia = ALPHA_8 (~s);
uint32_t dest_ia = ALPHA_8 (~d);
-
+
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+
dest[i] = s;
}
}
@@ -1025,25 +979,24 @@ vmx_combine_xor_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
vdest, splat_alpha (negate (vsrc)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
@@ -1051,12 +1004,13 @@ vmx_combine_xor_u_mask (uint32_t * dest,
uint32_t d = dest[i];
uint32_t src_ia;
uint32_t dest_ia = ALPHA_8 (~d);
-
+
UN8x4_MUL_UN8 (s, m);
-
+
src_ia = ALPHA_8 (~s);
-
+
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+
dest[i] = s;
}
}
@@ -1084,27 +1038,28 @@ vmx_combine_add_u_no_mask (uint32_t * dest,
vector unsigned int vdest, vsrc;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORS (dest, src);
-
+
vdest = pix_add (vsrc, vdest);
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t s = src[i];
uint32_t d = dest[i];
+
UN8x4_ADD_UN8x4 (d, s);
+
dest[i] = d;
}
}
@@ -1119,33 +1074,32 @@ vmx_combine_add_u_mask (uint32_t * dest,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSM (dest, src, mask);
-
+
vdest = pix_add (vsrc, vdest);
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t m = ALPHA_8 (mask[i]);
uint32_t s = src[i];
uint32_t d = dest[i];
-
+
UN8x4_MUL_UN8 (s, m);
-
UN8x4_ADD_UN8x4 (d, s);
+
dest[i] = d;
}
}
@@ -1176,28 +1130,30 @@ vmx_combine_src_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
+
vdest = pix_multiply (vsrc, vmask);
-
+
STORE_VECTOR (dest);
-
+
mask += 4;
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t s = src[i];
+
UN8x4_MUL_UN8x4 (s, a);
+
dest[i] = s;
}
}
@@ -1214,30 +1170,34 @@ vmx_combine_over_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
+
vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
-
+
STORE_VECTOR (dest);
-
+
mask += 4;
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t s = src[i];
uint32_t d = dest[i];
+ uint32_t sa = ALPHA_8 (s);
+
UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
+
dest[i] = d;
}
}
@@ -1254,32 +1214,33 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
+
vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
-
+
STORE_VECTOR (dest);
-
+
mask += 4;
src += 4;
dest += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t s = src[i];
uint32_t d = dest[i];
- uint32_t da = ALPHA_8 (d);
+ uint32_t ida = ALPHA_8 (~d);
+
UN8x4_MUL_UN8x4 (s, a);
- UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ~da, d);
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
+
dest[i] = s;
}
}
@@ -1296,31 +1257,32 @@ vmx_combine_in_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
+
vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t s = src[i];
uint32_t da = ALPHA_8 (dest[i]);
- UN8x4_MUL_UN8 (s, a);
+
+ UN8x4_MUL_UN8x4 (s, a);
UN8x4_MUL_UN8 (s, da);
+
dest[i] = s;
}
}
@@ -1337,31 +1299,33 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
+
LOAD_VECTORSC (dest, src, mask);
-
+
vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t d = dest[i];
uint32_t sa = ALPHA_8 (src[i]);
+
UN8x4_MUL_UN8 (a, sa);
UN8x4_MUL_UN8x4 (d, a);
+
dest[i] = d;
}
}
@@ -1378,32 +1342,34 @@ vmx_combine_out_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
- vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
-
+
+ vdest = pix_multiply (
+ pix_multiply (vsrc, vmask), splat_alpha (negate (vdest)));
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t da = ALPHA_8 (~d);
+
UN8x4_MUL_UN8x4 (s, a);
- UN8x4_MUL_UN8x4 (s, da);
+ UN8x4_MUL_UN8 (s, da);
+
dest[i] = s;
}
}
@@ -1420,33 +1386,34 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
+
vdest = pix_multiply (
vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t sa = ALPHA_8 (s);
- UN8x4_MUL_UN8x4 (a, sa);
+
+ UN8x4_MUL_UN8 (a, sa);
UN8x4_MUL_UN8x4 (d, ~a);
+
dest[i] = d;
}
}
@@ -1460,30 +1427,32 @@ vmx_combine_atop_ca (pixman_implementation_t *imp,
int width)
{
int i;
- vector unsigned int vdest, vsrc, vmask;
+ vector unsigned int vdest, vsrc, vmask, vsrca;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
- vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest),
- vdest,
- negate (pix_multiply (vmask,
- splat_alpha (vmask))));
-
+
+ vsrca = splat_alpha (vsrc);
+
+ vsrc = pix_multiply (vsrc, vmask);
+ vmask = pix_multiply (vmask, vsrca);
+
+ vdest = pix_add_mul (vsrc, splat_alpha (vdest),
+ negate (vmask), vdest);
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
@@ -1491,10 +1460,11 @@ vmx_combine_atop_ca (pixman_implementation_t *imp,
uint32_t d = dest[i];
uint32_t sa = ALPHA_8 (s);
uint32_t da = ALPHA_8 (d);
-
+
UN8x4_MUL_UN8x4 (s, a);
UN8x4_MUL_UN8 (a, sa);
UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
+
dest[i] = d;
}
}
@@ -1511,38 +1481,38 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
+
vdest = pix_add_mul (vdest,
pix_multiply (vmask, splat_alpha (vsrc)),
pix_multiply (vsrc, vmask),
negate (splat_alpha (vdest)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t sa = ALPHA_8 (s);
- uint32_t da = ALPHA_8 (d);
-
+ uint32_t da = ALPHA_8 (~d);
+
UN8x4_MUL_UN8x4 (s, a);
UN8x4_MUL_UN8 (a, sa);
- UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, ~da);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
+
dest[i] = d;
}
}
@@ -1559,38 +1529,38 @@ vmx_combine_xor_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
+
vdest = pix_add_mul (vdest,
negate (pix_multiply (vmask, splat_alpha (vsrc))),
pix_multiply (vsrc, vmask),
negate (splat_alpha (vdest)));
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t s = src[i];
uint32_t d = dest[i];
uint32_t sa = ALPHA_8 (s);
- uint32_t da = ALPHA_8 (d);
-
+ uint32_t da = ALPHA_8 (~d);
+
UN8x4_MUL_UN8x4 (s, a);
UN8x4_MUL_UN8 (a, sa);
- UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, ~da);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
+
dest[i] = d;
}
}
@@ -1607,122 +1577,44 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
+
vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
-
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t s = src[i];
uint32_t d = dest[i];
-
+
UN8x4_MUL_UN8x4 (s, a);
UN8x4_ADD_UN8x4 (s, d);
- dest[i] = s;
- }
-}
-
-#if 0
-void
-vmx_composite_over_n_8888 (pixman_operator_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int16_t src_x,
- int16_t src_y,
- int16_t mask_x,
- int16_t mask_y,
- int16_t dest_x,
- int16_t dest_y,
- uint16_t width,
- uint16_t height)
-{
- uint32_t src;
- uint32_t *dst_line, *dst;
- int dst_stride;
-
- _pixman_image_get_solid (src_image, dst_image, src);
-
- if (src >> 24 == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- /* XXX vmx_combine_over_u (dst, src, width); */
- }
-}
-void
-vmx_composite_over_n_0565 (pixman_operator_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int16_t src_x,
- int16_t src_y,
- int16_t mask_x,
- int16_t mask_y,
- int16_t dest_x,
- int16_t dest_y,
- uint16_t width,
- uint16_t height)
-{
- uint32_t src;
- uint16_t *dst_line, *dst;
- uint16_t w;
- int dst_stride;
-
- _pixman_image_get_solid (src_image, dst_image, src);
-
- if (src >> 24 == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- vmx_combine_over_u565 (dst, src, width);
+ dest[i] = s;
}
}
-static const pixman_fast_path_t vmx_fast_path_array[] =
-{
- { PIXMAN_OP_NONE },
-};
-
-const pixman_fast_path_t *const vmx_fast_paths = vmx_fast_path_array;
-
-#endif
-
pixman_implementation_t *
_pixman_implementation_create_vmx (void)
{
pixman_implementation_t *fast = _pixman_implementation_create_fast_path ();
pixman_implementation_t *imp = _pixman_implementation_create (fast);
-
+
/* Set up function pointers */
-
- /* SSE code patch for fbcompose.c */
+
imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;
imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u;
imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u;
@@ -1732,9 +1624,9 @@ _pixman_implementation_create_vmx (void)
imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u;
imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u;
imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u;
-
+
imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u;
-
+
imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca;
imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca;
imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca;
@@ -1746,7 +1638,6 @@ _pixman_implementation_create_vmx (void)
imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca;
imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
-
+
return imp;
}
-