diff options
Diffstat (limited to 'pixman/pixman/pixman-vmx.c')
-rw-r--r-- | pixman/pixman/pixman-vmx.c | 707 |
1 files changed, 299 insertions, 408 deletions
diff --git a/pixman/pixman/pixman-vmx.c b/pixman/pixman/pixman-vmx.c index 6fc3cdea5..06325a7c0 100644 --- a/pixman/pixman/pixman-vmx.c +++ b/pixman/pixman/pixman-vmx.c @@ -45,24 +45,24 @@ static force_inline vector unsigned int pix_multiply (vector unsigned int p, vector unsigned int a) { vector unsigned short hi, lo, mod; - + /* unpack to short */ hi = (vector unsigned short) vec_mergeh ((vector unsigned char)AVV (0), (vector unsigned char)p); - + mod = (vector unsigned short) vec_mergeh ((vector unsigned char)AVV (0), (vector unsigned char)a); - + hi = vec_mladd (hi, mod, (vector unsigned short) AVV (0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080)); - + hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); - + hi = vec_sr (hi, vec_splat_u16 (8)); - + /* unpack to short */ lo = (vector unsigned short) vec_mergel ((vector unsigned char)AVV (0), @@ -70,15 +70,15 @@ pix_multiply (vector unsigned int p, vector unsigned int a) mod = (vector unsigned short) vec_mergel ((vector unsigned char)AVV (0), (vector unsigned char)a); - + lo = vec_mladd (lo, mod, (vector unsigned short) AVV (0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080)); - + lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); - + lo = vec_sr (lo, vec_splat_u16 (8)); - + return (vector unsigned int)vec_packsu (hi, lo); } @@ -95,56 +95,12 @@ pix_add_mul (vector unsigned int x, vector unsigned int y, vector unsigned int b) { - vector unsigned short hi, lo, mod, hiy, loy, mody; - - hi = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)x); - mod = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)a); - hiy = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)y); - mody = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)b); - - hi = vec_mladd (hi, mod, (vector unsigned short) - AVV (0x0080, 0x0080, 0x0080, 0x0080, - 0x0080, 0x0080, 0x0080, 0x0080)); - - hi = vec_mladd (hiy, mody, hi); - - hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); - - hi = vec_sr (hi, vec_splat_u16 (8)); - - lo = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)x); - mod = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)a); - - loy = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)y); - mody = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)b); - - lo = vec_mladd (lo, mod, (vector unsigned short) - AVV (0x0080, 0x0080, 0x0080, 0x0080, - 0x0080, 0x0080, 0x0080, 0x0080)); - - lo = vec_mladd (loy, mody, lo); - - lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); - - lo = vec_sr (lo, vec_splat_u16 (8)); - - return (vector unsigned int)vec_packsu (hi, lo); + vector unsigned int t1, t2; + + t1 = pix_multiply (x, a); + t2 = pix_multiply (y, b); + + return pix_add (t1, t2); } static force_inline vector unsigned int @@ -161,7 +117,7 @@ over (vector unsigned int src, { vector unsigned char tmp = (vector unsigned char) pix_multiply (dest, negate (srca)); - + tmp = vec_adds ((vector unsigned char)src, tmp); return (vector unsigned int)tmp; } @@ -235,31 +191,31 @@ vmx_combine_over_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - + LOAD_VECTORS (dest, src); - + vdest = over (vsrc, splat_alpha (vsrc), vdest); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t ia = ALPHA_8 (~s); - + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - + dest[i] = d; } } @@ -274,35 +230,34 @@ vmx_combine_over_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSM (dest, src, mask); - + vdest = over (vsrc, splat_alpha (vsrc), vdest); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t ia; - + UN8x4_MUL_UN8 (s, m); - + ia = ALPHA_8 (~s); - + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); dest[i] = d; } @@ -331,29 +286,29 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - + LOAD_VECTORS (dest, src); - + vdest = over (vdest, splat_alpha (vdest), vsrc); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t ia = ALPHA_8 (~dest[i]); - + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); dest[i] = s; } @@ -369,33 +324,33 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - + LOAD_VECTORSM (dest, src, mask); - + vdest = over (vdest, splat_alpha (vdest), vsrc); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t ia = ALPHA_8 (~dest[i]); - + UN8x4_MUL_UN8 (s, m); - + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); dest[i] = s; } @@ -424,28 +379,27 @@ vmx_combine_in_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORS (dest, src); - + vdest = pix_multiply (vsrc, splat_alpha (vdest)); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { - uint32_t s = src[i]; uint32_t a = ALPHA_8 (dest[i]); + UN8x4_MUL_UN8 (s, a); dest[i] = s; } @@ -461,33 +415,32 @@ vmx_combine_in_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSM (dest, src, mask); - + vdest = pix_multiply (vsrc, splat_alpha (vdest)); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; uint32_t a = ALPHA_8 (dest[i]); - + UN8x4_MUL_UN8 (s, m); - UN8x4_MUL_UN8 (s, a); + dest[i] = s; } } @@ -515,28 +468,29 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORS (dest, src); - + vdest = pix_multiply (vdest, splat_alpha (vsrc)); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t d = dest[i]; uint32_t a = ALPHA_8 (src[i]); + UN8x4_MUL_UN8 (d, a); + dest[i] = d; } } @@ -551,34 +505,33 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSM (dest, src, mask); - + vdest = pix_multiply (vdest, splat_alpha (vsrc)); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t d = dest[i]; uint32_t a = src[i]; - + UN8x4_MUL_UN8 (a, m); - a = ALPHA_8 (a); UN8x4_MUL_UN8 (d, a); + dest[i] = d; } } @@ -606,28 +559,29 @@ vmx_combine_out_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORS (dest, src); - + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t a = ALPHA_8 (~dest[i]); + UN8x4_MUL_UN8 (s, a); + dest[i] = s; } } @@ -642,33 +596,32 @@ vmx_combine_out_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSM (dest, src, mask); - + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; uint32_t a = ALPHA_8 (~dest[i]); - + UN8x4_MUL_UN8 (s, m); - UN8x4_MUL_UN8 (s, a); + dest[i] = s; } } @@ -696,28 +649,30 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - + LOAD_VECTORS (dest, src); - + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t d = dest[i]; uint32_t a = ALPHA_8 (~src[i]); + UN8x4_MUL_UN8 (d, a); + dest[i] = d; } } @@ -732,34 +687,33 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSM (dest, src, mask); - + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t d = dest[i]; uint32_t a = src[i]; - + UN8x4_MUL_UN8 (a, m); - a = ALPHA_8 (~a); UN8x4_MUL_UN8 (d, a); + dest[i] = d; } } @@ -787,32 +741,32 @@ vmx_combine_atop_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORS (dest, src); - + vdest = pix_add_mul (vsrc, splat_alpha (vdest), vdest, splat_alpha (negate (vsrc))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t dest_a = ALPHA_8 (d); uint32_t src_ia = ALPHA_8 (~s); - + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + dest[i] = s; } } @@ -827,25 +781,24 @@ vmx_combine_atop_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSM (dest, src, mask); - + vdest = pix_add_mul (vsrc, splat_alpha (vdest), vdest, splat_alpha (negate (vsrc))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); @@ -853,12 +806,13 @@ vmx_combine_atop_u_mask (uint32_t * dest, uint32_t d = dest[i]; uint32_t dest_a = ALPHA_8 (d); uint32_t src_ia; - + UN8x4_MUL_UN8 (s, m); - + src_ia = ALPHA_8 (~s); - + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + dest[i] = s; } } @@ -886,32 +840,32 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORS (dest, src); - + vdest = pix_add_mul (vdest, splat_alpha (vsrc), vsrc, splat_alpha (negate (vdest))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t src_a = ALPHA_8 (s); uint32_t dest_ia = ALPHA_8 (~d); - + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + dest[i] = s; } } @@ -926,25 +880,24 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSM (dest, src, mask); - + vdest = pix_add_mul (vdest, splat_alpha (vsrc), vsrc, splat_alpha (negate (vdest))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); @@ -952,12 +905,13 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest, uint32_t d = dest[i]; uint32_t src_a; uint32_t dest_ia = ALPHA_8 (~d); - + UN8x4_MUL_UN8 (s, m); - + src_a = ALPHA_8 (s); - + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + dest[i] = s; } } @@ -985,32 +939,32 @@ vmx_combine_xor_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORS (dest, src); - + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), vdest, splat_alpha (negate (vsrc))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t src_ia = ALPHA_8 (~s); uint32_t dest_ia = ALPHA_8 (~d); - + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + dest[i] = s; } } @@ -1025,25 +979,24 @@ vmx_combine_xor_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSM (dest, src, mask); - + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), vdest, splat_alpha (negate (vsrc))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); @@ -1051,12 +1004,13 @@ vmx_combine_xor_u_mask (uint32_t * dest, uint32_t d = dest[i]; uint32_t src_ia; uint32_t dest_ia = ALPHA_8 (~d); - + UN8x4_MUL_UN8 (s, m); - + src_ia = ALPHA_8 (~s); - + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + dest[i] = s; } } @@ -1084,27 +1038,28 @@ vmx_combine_add_u_no_mask (uint32_t * dest, vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORS (dest, src); - + vdest = pix_add (vsrc, vdest); - + STORE_VECTOR (dest); - + src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t s = src[i]; uint32_t d = dest[i]; + UN8x4_ADD_UN8x4 (d, s); + dest[i] = d; } } @@ -1119,33 +1074,32 @@ vmx_combine_add_u_mask (uint32_t * dest, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, src_mask, mask_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSM (dest, src, mask); - + vdest = pix_add (vsrc, vdest); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t m = ALPHA_8 (mask[i]); uint32_t s = src[i]; uint32_t d = dest[i]; - + UN8x4_MUL_UN8 (s, m); - UN8x4_ADD_UN8x4 (d, s); + dest[i] = d; } } @@ -1176,28 +1130,30 @@ vmx_combine_src_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - + vdest = pix_multiply (vsrc, vmask); - + STORE_VECTOR (dest); - + mask += 4; src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; + UN8x4_MUL_UN8x4 (s, a); + dest[i] = s; } } @@ -1214,30 +1170,34 @@ vmx_combine_over_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - + vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); - + STORE_VECTOR (dest); - + mask += 4; src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); + dest[i] = d; } } @@ -1254,32 +1214,33 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - + vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); - + STORE_VECTOR (dest); - + mask += 4; src += 4; dest += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; - uint32_t da = ALPHA_8 (d); + uint32_t ida = ALPHA_8 (~d); + UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ~da, d); + UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); + dest[i] = s; } } @@ -1296,31 +1257,32 @@ vmx_combine_in_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - + vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t da = ALPHA_8 (dest[i]); - UN8x4_MUL_UN8 (s, a); + + UN8x4_MUL_UN8x4 (s, a); UN8x4_MUL_UN8 (s, da); + dest[i] = s; } } @@ -1337,31 +1299,33 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - + LOAD_VECTORSC (dest, src, mask); - + vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (src[i]); + UN8x4_MUL_UN8 (a, sa); UN8x4_MUL_UN8x4 (d, a); + dest[i] = d; } } @@ -1378,32 +1342,34 @@ vmx_combine_out_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); - + + vdest = pix_multiply ( + pix_multiply (vsrc, vmask), splat_alpha (negate (vdest))); + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t da = ALPHA_8 (~d); + UN8x4_MUL_UN8x4 (s, a); - UN8x4_MUL_UN8x4 (s, da); + UN8x4_MUL_UN8 (s, da); + dest[i] = s; } } @@ -1420,33 +1386,34 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - + vdest = pix_multiply ( vdest, negate (pix_multiply (vmask, splat_alpha (vsrc)))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); - UN8x4_MUL_UN8x4 (a, sa); + + UN8x4_MUL_UN8 (a, sa); UN8x4_MUL_UN8x4 (d, ~a); + dest[i] = d; } } @@ -1460,30 +1427,32 @@ vmx_combine_atop_ca (pixman_implementation_t *imp, int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask, vsrca; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - - vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest), - vdest, - negate (pix_multiply (vmask, - splat_alpha (vmask)))); - + + vsrca = splat_alpha (vsrc); + + vsrc = pix_multiply (vsrc, vmask); + vmask = pix_multiply (vmask, vsrca); + + vdest = pix_add_mul (vsrc, splat_alpha (vdest), + negate (vmask), vdest); + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; @@ -1491,10 +1460,11 @@ vmx_combine_atop_ca (pixman_implementation_t *imp, uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); uint32_t da = ALPHA_8 (d); - + UN8x4_MUL_UN8x4 (s, a); UN8x4_MUL_UN8 (a, sa); UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + dest[i] = d; } } @@ -1511,38 +1481,38 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - + vdest = pix_add_mul (vdest, pix_multiply (vmask, splat_alpha (vsrc)), pix_multiply (vsrc, vmask), negate (splat_alpha (vdest))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); - uint32_t da = ALPHA_8 (d); - + uint32_t da = ALPHA_8 (~d); + UN8x4_MUL_UN8x4 (s, a); UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, ~da); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); + dest[i] = d; } } @@ -1559,38 +1529,38 @@ vmx_combine_xor_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - + vdest = pix_add_mul (vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))), pix_multiply (vsrc, vmask), negate (splat_alpha (vdest))); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; uint32_t sa = ALPHA_8 (s); - uint32_t da = ALPHA_8 (d); - + uint32_t da = ALPHA_8 (~d); + UN8x4_MUL_UN8x4 (s, a); UN8x4_MUL_UN8 (a, sa); - UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, ~da); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + dest[i] = d; } } @@ -1607,122 +1577,44 @@ vmx_combine_add_ca (pixman_implementation_t *imp, vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, dest_mask, mask_mask, src_mask, store_mask; - + COMPUTE_SHIFT_MASKC (dest, src, mask); - + /* printf ("%s\n",__PRETTY_FUNCTION__); */ for (i = width / 4; i > 0; i--) { - LOAD_VECTORSC (dest, src, mask); - + vdest = pix_add (pix_multiply (vsrc, vmask), vdest); - + STORE_VECTOR (dest); - + src += 4; dest += 4; mask += 4; } - + for (i = width % 4; --i >= 0;) { uint32_t a = mask[i]; uint32_t s = src[i]; uint32_t d = dest[i]; - + UN8x4_MUL_UN8x4 (s, a); UN8x4_ADD_UN8x4 (s, d); - dest[i] = s; - } -} - -#if 0 -void -vmx_composite_over_n_8888 (pixman_operator_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int16_t src_x, - int16_t src_y, - int16_t mask_x, - int16_t mask_y, - int16_t dest_x, - int16_t dest_y, - uint16_t width, - uint16_t height) -{ - uint32_t src; - uint32_t *dst_line, *dst; - int dst_stride; - - _pixman_image_get_solid (src_image, dst_image, src); - - if (src >> 24 == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - /* XXX vmx_combine_over_u (dst, src, width); */ - } -} -void -vmx_composite_over_n_0565 (pixman_operator_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int16_t src_x, - int16_t src_y, - int16_t mask_x, - int16_t mask_y, - int16_t dest_x, - int16_t dest_y, - uint16_t width, - uint16_t height) -{ - uint32_t src; - uint16_t *dst_line, *dst; - uint16_t w; - int dst_stride; - - _pixman_image_get_solid (src_image, dst_image, src); - - if (src >> 24 == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - vmx_combine_over_u565 (dst, src, width); + dest[i] = s; } } -static const pixman_fast_path_t vmx_fast_path_array[] = -{ - { PIXMAN_OP_NONE }, -}; - -const pixman_fast_path_t *const vmx_fast_paths = vmx_fast_path_array; - -#endif - pixman_implementation_t * _pixman_implementation_create_vmx (void) { pixman_implementation_t *fast = _pixman_implementation_create_fast_path (); pixman_implementation_t *imp = _pixman_implementation_create (fast); - + /* Set up function pointers */ - - /* SSE code patch for fbcompose.c */ + imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u; imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; @@ -1732,9 +1624,9 @@ _pixman_implementation_create_vmx (void) imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u; imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; - + imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u; - + imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca; @@ -1746,7 +1638,6 @@ _pixman_implementation_create_vmx (void) imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca; imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; - + return imp; } - |