diff options
Diffstat (limited to 'pixman/pixman/pixman-vmx.c')
-rw-r--r-- | pixman/pixman/pixman-vmx.c | 2174 |
1 files changed, 1429 insertions, 745 deletions
diff --git a/pixman/pixman/pixman-vmx.c b/pixman/pixman/pixman-vmx.c index 8c8a2a364..6fc3cdea5 100644 --- a/pixman/pixman/pixman-vmx.c +++ b/pixman/pixman/pixman-vmx.c @@ -26,1043 +26,1727 @@ */ #include <config.h> -#include "pixman-vmx.h" +#include "pixman-private.h" #include "pixman-combine32.h" #include <altivec.h> -#ifdef __GNUC__ -# define inline __inline__ __attribute__ ((__always_inline__)) -#endif +#define AVV(x...) {x} -static inline vector unsigned int -splat_alpha (vector unsigned int pix) { +static force_inline vector unsigned int +splat_alpha (vector unsigned int pix) +{ return vec_perm (pix, pix, - (vector unsigned char)AVV(0x00,0x00,0x00,0x00, 0x04,0x04,0x04,0x04, - 0x08,0x08,0x08,0x08, 0x0C,0x0C,0x0C,0x0C)); + (vector unsigned char)AVV ( + 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, + 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); } -static inline vector unsigned int +static force_inline vector unsigned int pix_multiply (vector unsigned int p, vector unsigned int a) { vector unsigned short hi, lo, mod; + /* unpack to short */ hi = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)p); + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)p); + mod = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)a); - + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)a); + hi = vec_mladd (hi, mod, (vector unsigned short) - AVV(0x0080,0x0080,0x0080,0x0080, - 0x0080,0x0080,0x0080,0x0080)); - + AVV (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); + hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); - + hi = vec_sr (hi, vec_splat_u16 (8)); - + /* unpack to short */ lo = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)p); + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)p); mod = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)a); - + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)a); + lo = vec_mladd (lo, mod, (vector unsigned short) - AVV(0x0080,0x0080,0x0080,0x0080, - 0x0080,0x0080,0x0080,0x0080)); - + AVV (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); + lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); - + lo = vec_sr (lo, vec_splat_u16 (8)); - + return (vector unsigned int)vec_packsu (hi, lo); } -static inline vector unsigned int +static force_inline vector unsigned int pix_add (vector unsigned int a, vector unsigned int b) { return (vector unsigned int)vec_adds ((vector unsigned char)a, - (vector unsigned char)b); + (vector unsigned char)b); } -static inline vector unsigned int -pix_add_mul (vector unsigned int x, vector unsigned int a, - vector unsigned int y, vector unsigned int b) +static force_inline vector unsigned int +pix_add_mul (vector unsigned int x, + vector unsigned int a, + vector unsigned int y, + vector unsigned int b) { vector unsigned short hi, lo, mod, hiy, loy, mody; - + hi = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)x); + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)x); mod = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)a); + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)a); hiy = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)y); + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)y); mody = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)b); - + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)b); + hi = vec_mladd (hi, mod, (vector unsigned short) - AVV(0x0080,0x0080,0x0080,0x0080, - 0x0080,0x0080,0x0080,0x0080)); - + AVV (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); + hi = vec_mladd (hiy, mody, hi); - + hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); - + hi = vec_sr (hi, vec_splat_u16 (8)); - + lo = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)x); + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)x); mod = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)a); - + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)a); + loy = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)y); + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)y); mody = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)b); - + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)b); + lo = vec_mladd (lo, mod, (vector unsigned short) - AVV(0x0080,0x0080,0x0080,0x0080, - 0x0080,0x0080,0x0080,0x0080)); - + AVV (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); + lo = vec_mladd (loy, mody, lo); - + lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); - + lo = vec_sr (lo, vec_splat_u16 (8)); - + return (vector unsigned int)vec_packsu (hi, lo); } -static inline vector unsigned int +static force_inline vector unsigned int negate (vector unsigned int src) { return vec_nor (src, src); } + /* dest*~srca + src */ -static inline vector unsigned int -over (vector unsigned int src, vector unsigned int srca, +static force_inline vector unsigned int +over (vector unsigned int src, + vector unsigned int srca, vector unsigned int dest) { vector unsigned char tmp = (vector unsigned char) - pix_multiply (dest, negate (srca)); + pix_multiply (dest, negate (srca)); + tmp = vec_adds ((vector unsigned char)src, tmp); return (vector unsigned int)tmp; } /* in == pix_multiply */ -#define in_over(src, srca, mask, dest) over (pix_multiply (src, mask),\ - pix_multiply (srca, mask), dest) +#define in_over(src, srca, mask, dest) \ + over (pix_multiply (src, mask), \ + pix_multiply (srca, mask), dest) -#define COMPUTE_SHIFT_MASK(source) \ +#define COMPUTE_SHIFT_MASK(source) \ source ## _mask = vec_lvsl (0, source); -#define COMPUTE_SHIFT_MASKS(dest, source) \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ +#define COMPUTE_SHIFT_MASKS(dest, source) \ + dest ## _mask = vec_lvsl (0, dest); \ + source ## _mask = vec_lvsl (0, source); \ store_mask = vec_lvsr (0, dest); -#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ - mask ## _mask = vec_lvsl (0, mask); \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ +#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ + mask ## _mask = vec_lvsl (0, mask); \ + dest ## _mask = vec_lvsl (0, dest); \ + source ## _mask = vec_lvsl (0, source); \ store_mask = vec_lvsr (0, dest); /* notice you have to declare temp vars... * Note: tmp3 and tmp4 must remain untouched! */ -#define LOAD_VECTORS(dest, source) \ - tmp1 = (typeof(tmp1))vec_ld(0, source); \ - tmp2 = (typeof(tmp2))vec_ld(15, source); \ - tmp3 = (typeof(tmp3))vec_ld(0, dest); \ - v ## source = (typeof(v ## source)) \ - vec_perm(tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld(15, dest); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm(tmp3, tmp4, dest ## _mask); - -#define LOAD_VECTORSC(dest, source, mask) \ - tmp1 = (typeof(tmp1))vec_ld(0, source); \ - tmp2 = (typeof(tmp2))vec_ld(15, source); \ - tmp3 = (typeof(tmp3))vec_ld(0, dest); \ - v ## source = (typeof(v ## source)) \ - vec_perm(tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld(15, dest); \ - tmp1 = (typeof(tmp1))vec_ld(0, mask); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm(tmp3, tmp4, dest ## _mask); \ - tmp2 = (typeof(tmp2))vec_ld(15, mask); \ - v ## mask = (typeof(v ## mask)) \ - vec_perm(tmp1, tmp2, mask ## _mask); -#define STORE_VECTOR(dest) \ - edges = vec_perm (tmp4, tmp3, dest ## _mask); \ - tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ - tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ - vec_st ((vector unsigned int) tmp3, 15, dest ); \ - vec_st ((vector unsigned int) tmp1, 0, dest ); - -static FASTCALL void -vmxCombineMaskU (uint32_t *src, const uint32_t *msk, int width) +#define LOAD_VECTORS(dest, source) \ + tmp1 = (typeof(tmp1))vec_ld (0, source); \ + tmp2 = (typeof(tmp2))vec_ld (15, source); \ + tmp3 = (typeof(tmp3))vec_ld (0, dest); \ + v ## source = (typeof(v ## source)) \ + vec_perm (tmp1, tmp2, source ## _mask); \ + tmp4 = (typeof(tmp4))vec_ld (15, dest); \ + v ## dest = (typeof(v ## dest)) \ + vec_perm (tmp3, tmp4, dest ## _mask); + +#define LOAD_VECTORSC(dest, source, mask) \ + tmp1 = (typeof(tmp1))vec_ld (0, source); \ + tmp2 = (typeof(tmp2))vec_ld (15, source); \ + tmp3 = (typeof(tmp3))vec_ld (0, dest); \ + v ## source = (typeof(v ## source)) \ + vec_perm (tmp1, tmp2, source ## _mask); \ + tmp4 = (typeof(tmp4))vec_ld (15, dest); \ + tmp1 = (typeof(tmp1))vec_ld (0, mask); \ + v ## dest = (typeof(v ## dest)) \ + vec_perm (tmp3, tmp4, dest ## _mask); \ + tmp2 = (typeof(tmp2))vec_ld (15, mask); \ + v ## mask = (typeof(v ## mask)) \ + vec_perm (tmp1, tmp2, mask ## _mask); + +#define LOAD_VECTORSM(dest, source, mask) \ + LOAD_VECTORSC (dest, source, mask) \ + v ## source = pix_multiply (v ## source, \ + splat_alpha (v ## mask)); + +#define STORE_VECTOR(dest) \ + edges = vec_perm (tmp4, tmp3, dest ## _mask); \ + tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ + tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ + vec_st ((vector unsigned int) tmp3, 15, dest); \ + vec_st ((vector unsigned int) tmp1, 0, dest); + +static void +vmx_combine_over_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vsrc, vmsk; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - src_mask, msk_mask, store_mask; - - COMPUTE_SHIFT_MASKS(src, msk) - + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(src, msk) - - vsrc = pix_multiply (vsrc, splat_alpha (vmsk)); - - STORE_VECTOR(src) - - msk+=4; - src+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = over (vsrc, splat_alpha (vsrc), vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; } - - for (i = width%4; --i >= 0;) { - uint32_t a = msk[i] >> 24; - uint32_t s = src[i]; - FbByteMul (s, a); - src[i] = s; + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + + dest[i] = d; } } -static FASTCALL void -vmxCombineOverU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_over_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) - + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) - - vdest = over (vsrc, splat_alpha (vsrc), vdest); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = over (vsrc, splat_alpha (vsrc), vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } - - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia = Alpha (~s); - - FbByteMulAdd (d, ia, s); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia; + + UN8x4_MUL_UN8 (s, m); + + ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + dest[i] = d; } } +static void +vmx_combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_over_u_mask (dest, src, mask, width); + else + vmx_combine_over_u_no_mask (dest, src, width); +} -static FASTCALL void -vmxCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_over_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) - + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) - - vdest = over (vdest, splat_alpha (vdest) , vsrc); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = over (vdest, splat_alpha (vdest), vsrc); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; } - - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia = Alpha (~dest[i]); - - FbByteMulAdd (s, ia, d); - dest[i] = s; + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia = ALPHA_8 (~dest[i]); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + dest[i] = s; } } -static FASTCALL void -vmxCombineInU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_over_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) - + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) - - vdest = pix_multiply (vsrc, splat_alpha (vdest)); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = over (vdest, splat_alpha (vdest), vsrc); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia = ALPHA_8 (~dest[i]); + + UN8x4_MUL_UN8 (s, m); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + dest[i] = s; + } +} - for (i = width%4; --i >=0;) { +static void +vmx_combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_over_reverse_u_mask (dest, src, mask, width); + else + vmx_combine_over_reverse_u_no_mask (dest, src, width); +} - uint32_t s = src[i]; - uint32_t a = Alpha (dest[i]); - FbByteMul (s, a); - dest[i] = s; +static void +vmx_combine_in_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = pix_multiply (vsrc, splat_alpha (vdest)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (dest[i]); + UN8x4_MUL_UN8 (s, a); + dest[i] = s; } } -static FASTCALL void -vmxCombineInReverseU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_in_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) - + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) - - vdest = pix_multiply (vdest, splat_alpha (vsrc)); + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_multiply (vsrc, splat_alpha (vdest)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (dest[i]); + + UN8x4_MUL_UN8 (s, m); + + UN8x4_MUL_UN8 (s, a); + dest[i] = s; + } +} - STORE_VECTOR(dest) +static void +vmx_combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_in_u_mask (dest, src, mask, width); + else + vmx_combine_in_u_no_mask (dest, src, width); +} - src+=4; - dest+=4; +static void +vmx_combine_in_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = pix_multiply (vdest, splat_alpha (vsrc)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; } - - for (i = width%4; --i >=0;) { - uint32_t d = dest[i]; - uint32_t a = Alpha (src[i]); - FbByteMul (d, a); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t d = dest[i]; + uint32_t a = ALPHA_8 (src[i]); + UN8x4_MUL_UN8 (d, a); + dest[i] = d; } } -static FASTCALL void -vmxCombineOutU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_in_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) - + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) - - vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_multiply (vdest, splat_alpha (vsrc)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t d = dest[i]; + uint32_t a = src[i]; + + UN8x4_MUL_UN8 (a, m); + + a = ALPHA_8 (a); + UN8x4_MUL_UN8 (d, a); + dest[i] = d; + } +} - STORE_VECTOR(dest) +static void +vmx_combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_in_reverse_u_mask (dest, src, mask, width); + else + vmx_combine_in_reverse_u_no_mask (dest, src, width); +} - src+=4; - dest+=4; +static void +vmx_combine_out_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; } - - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t a = Alpha (~dest[i]); - FbByteMul (s, a); - dest[i] = s; + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (~dest[i]); + UN8x4_MUL_UN8 (s, a); + dest[i] = s; } } -static FASTCALL void -vmxCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_out_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) - + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) - - vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (~dest[i]); + + UN8x4_MUL_UN8 (s, m); + + UN8x4_MUL_UN8 (s, a); + dest[i] = s; + } +} - STORE_VECTOR(dest) +static void +vmx_combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_out_u_mask (dest, src, mask, width); + else + vmx_combine_out_u_no_mask (dest, src, width); +} - src+=4; - dest+=4; +static void +vmx_combine_out_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; } - - for (i = width%4; --i >=0;) { - uint32_t d = dest[i]; - uint32_t a = Alpha (~src[i]); - FbByteMul (d, a); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t d = dest[i]; + uint32_t a = ALPHA_8 (~src[i]); + UN8x4_MUL_UN8 (d, a); + dest[i] = d; } } -static FASTCALL void -vmxCombineAtopU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_out_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) - + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) - - vdest = pix_add_mul (vsrc, splat_alpha (vdest), - vdest, splat_alpha (negate (vsrc))); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t d = dest[i]; + uint32_t a = src[i]; + + UN8x4_MUL_UN8 (a, m); + + a = ALPHA_8 (~a); + UN8x4_MUL_UN8 (d, a); + dest[i] = d; + } +} - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t dest_a = Alpha (d); - uint32_t src_ia = Alpha (~s); +static void +vmx_combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_out_reverse_u_mask (dest, src, mask, width); + else + vmx_combine_out_reverse_u_no_mask (dest, src, width); +} - FbByteAddMul (s, dest_a, d, src_ia); - dest[i] = s; +static void +vmx_combine_atop_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = pix_add_mul (vsrc, splat_alpha (vdest), + vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + dest[i] = s; } } -static FASTCALL void -vmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_atop_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) - + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) - - vdest = pix_add_mul (vdest, splat_alpha (vsrc), - vsrc, splat_alpha (negate (vdest))); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_add_mul (vsrc, splat_alpha (vdest), + vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia; + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + dest[i] = s; } +} - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_a = Alpha (s); - uint32_t dest_ia = Alpha (~d); +static void +vmx_combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_atop_u_mask (dest, src, mask, width); + else + vmx_combine_atop_u_no_mask (dest, src, width); +} - FbByteAddMul (s, dest_ia, d, src_a); - dest[i] = s; +static void +vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = pix_add_mul (vdest, splat_alpha (vsrc), + vsrc, splat_alpha (negate (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_a = ALPHA_8 (s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + dest[i] = s; } } -static FASTCALL void -vmxCombineXorU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_atop_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) - + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS (dest, src) - - vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), - vdest, splat_alpha (negate (vsrc))); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_add_mul (vdest, splat_alpha (vsrc), + vsrc, splat_alpha (negate (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_a; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_a = ALPHA_8 (s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + dest[i] = s; + } +} - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_ia = Alpha (~s); - uint32_t dest_ia = Alpha (~d); +static void +vmx_combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_atop_reverse_u_mask (dest, src, mask, width); + else + vmx_combine_atop_reverse_u_no_mask (dest, src, width); +} - FbByteAddMul (s, dest_ia, d, src_ia); - dest[i] = s; +static void +vmx_combine_xor_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), + vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + dest[i] = s; } } -static FASTCALL void -vmxCombineAddU (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_xor_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKS(dest, src) + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) - - vdest = pix_add (vsrc, vdest); + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), + vdest, splat_alpha (negate (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_ia; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + dest[i] = s; + } +} - STORE_VECTOR(dest) +static void +vmx_combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_xor_u_mask (dest, src, mask, width); + else + vmx_combine_xor_u_no_mask (dest, src, width); +} - src+=4; - dest+=4; +static void +vmx_combine_add_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) +{ + int i; + vector unsigned int vdest, vsrc; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKS (dest, src); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORS (dest, src); + + vdest = pix_add (vsrc, vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; } - - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - FbByteAdd (d, s); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + UN8x4_ADD_UN8x4 (d, s); + dest[i] = d; } } -static FASTCALL void -vmxCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_add_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask); + dest_mask, src_mask, mask_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_multiply (vsrc, vmask); + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSM (dest, src, mask); + + vdest = pix_add (vsrc, vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; + } + + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + + UN8x4_MUL_UN8 (s, m); + + UN8x4_ADD_UN8x4 (d, s); + dest[i] = d; + } +} - STORE_VECTOR(dest) +static void +vmx_combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + if (mask) + vmx_combine_add_u_mask (dest, src, mask, width); + else + vmx_combine_add_u_no_mask (dest, src, width); +} - mask+=4; - src+=4; - dest+=4; +static void +vmx_combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + int i; + vector unsigned int vdest, vsrc, vmask; + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply (vsrc, vmask); + + STORE_VECTOR (dest); + + mask += 4; + src += 4; + dest += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - FbByteMulC (s, a); - dest[i] = s; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + UN8x4_MUL_UN8x4 (s, a); + dest[i] = s; } } -static FASTCALL void -vmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask); + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); - - STORE_VECTOR(dest) - - mask+=4; - src+=4; - dest+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); + + STORE_VECTOR (dest); + + mask += 4; + src += 4; + dest += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - FbByteMulC (s, a); - FbByteMulAddC (d, ~a, s); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); + dest[i] = d; } } -static FASTCALL void -vmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); - COMPUTE_SHIFT_MASKC(dest, src, mask); /* printf("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC (dest, src, mask) - - vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); - - STORE_VECTOR(dest) - - mask+=4; - src+=4; - dest+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); + + STORE_VECTOR (dest); + + mask += 4; + src += 4; + dest += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t da = Alpha (d); - FbByteMulC (s, a); - FbByteMulAddC (s, ~da, d); - dest[i] = s; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t da = ALPHA_8 (d); + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ~da, d); + dest[i] = s; } } -static FASTCALL void -vmxCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask) - + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; - mask+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t da = Alpha (dest[i]); - FbByteMul (s, a); - FbByteMul (s, da); - dest[i] = s; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t da = ALPHA_8 (dest[i]); + UN8x4_MUL_UN8 (s, a); + UN8x4_MUL_UN8 (s, da); + dest[i] = s; } } -static FASTCALL void -vmxCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask) - + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; - mask+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (src[i]); - FbByteMul (a, sa); - FbByteMulC (d, a); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (src[i]); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, a); + dest[i] = d; } } -static FASTCALL void -vmxCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask) - + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; - mask+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t da = Alpha (~d); - FbByteMulC (s, a); - FbByteMulC (s, da); - dest[i] = s; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t da = ALPHA_8 (~d); + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8x4 (s, da); + dest[i] = s; } } -static FASTCALL void -vmxCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask) - + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_multiply (vdest, - negate (pix_multiply (vmask, splat_alpha (vsrc)))); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; - mask+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_multiply ( + vdest, negate (pix_multiply (vmask, splat_alpha (vsrc)))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (s); - FbByteMulC (a, sa); - FbByteMulC (d, ~a); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + UN8x4_MUL_UN8x4 (a, sa); + UN8x4_MUL_UN8x4 (d, ~a); + dest[i] = d; } } -static FASTCALL void -vmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask) - + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest), - vdest, - negate (pix_multiply (vmask, - splat_alpha (vmask)))); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; - mask+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest), + vdest, + negate (pix_multiply (vmask, + splat_alpha (vmask)))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (s); - uint32_t da = Alpha (d); - - FbByteMulC (s, a); - FbByteMul (a, sa); - FbByteAddMulC (d, ~a, s, da); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + dest[i] = d; } } -static FASTCALL void -vmxCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask) - + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_add_mul (vdest, - pix_multiply (vmask, splat_alpha (vsrc)), - pix_multiply (vsrc, vmask), - negate (splat_alpha (vdest))); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; - mask+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_add_mul (vdest, + pix_multiply (vmask, splat_alpha (vsrc)), + pix_multiply (vsrc, vmask), + negate (splat_alpha (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (s); - uint32_t da = Alpha (d); - - FbByteMulC (s, a); - FbByteMul (a, sa); - FbByteAddMulC (d, a, s, ~da); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, ~da); + dest[i] = d; } } -static FASTCALL void -vmxCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask) - + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_add_mul (vdest, - negate (pix_multiply (vmask, splat_alpha (vsrc))), - pix_multiply (vsrc, vmask), - negate (splat_alpha (vdest))); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; - mask+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_add_mul (vdest, + negate (pix_multiply (vmask, splat_alpha (vsrc))), + pix_multiply (vsrc, vmask), + negate (splat_alpha (vdest))); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (s); - uint32_t da = Alpha (d); - - FbByteMulC (s, a); - FbByteMul (a, sa); - FbByteAddMulC (d, ~a, s, ~da); - dest[i] = d; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, ~da); + dest[i] = d; } } -static FASTCALL void -vmxCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width) +static void +vmx_combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; - - COMPUTE_SHIFT_MASKC(dest, src, mask) - + dest_mask, mask_mask, src_mask, store_mask; + + COMPUTE_SHIFT_MASKC (dest, src, mask); + /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_add (pix_multiply (vsrc, vmask), vdest); - - STORE_VECTOR(dest) - - src+=4; - dest+=4; - mask+=4; + for (i = width / 4; i > 0; i--) + { + + LOAD_VECTORSC (dest, src, mask); + + vdest = pix_add (pix_multiply (vsrc, vmask), vdest); + + STORE_VECTOR (dest); + + src += 4; + dest += 4; + mask += 4; } - - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - - FbByteMulC (s, a); - FbByteAdd (s, d); - dest[i] = s; + + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_ADD_UN8x4 (s, d); + dest[i] = s; } } - #if 0 void -fbCompositeSolid_nx8888vmx (pixman_operator_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) +vmx_composite_over_n_8888 (pixman_operator_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height) { - uint32_t src; - uint32_t *dstLine, *dst; - int dstStride; - - fbComposeGetSolid (pSrc, pDst, src); - + uint32_t src; + uint32_t *dst_line, *dst; + int dst_stride; + + _pixman_image_get_solid (src_image, dst_image, src); + if (src >> 24 == 0) return; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + while (height--) { - dst = dstLine; - dstLine += dstStride; - /* XXX vmxCombineOverU (dst, src, width); */ + dst = dst_line; + dst_line += dst_stride; + /* XXX vmx_combine_over_u (dst, src, width); */ } } void -fbCompositeSolid_nx0565vmx (pixman_operator_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) +vmx_composite_over_n_0565 (pixman_operator_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height) { - uint32_t src; - uint16_t *dstLine, *dst; - uint16_t w; - int dstStride; - - fbComposeGetSolid (pSrc, pDst, src); - + uint32_t src; + uint16_t *dst_line, *dst; + uint16_t w; + int dst_stride; + + _pixman_image_get_solid (src_image, dst_image, src); + if (src >> 24 == 0) return; - - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + while (height--) { - dst = dstLine; - dstLine += dstStride; - vmxCombineOverU565(dst, src, width); + dst = dst_line; + dst_line += dst_stride; + vmx_combine_over_u565 (dst, src, width); } } +static const pixman_fast_path_t vmx_fast_path_array[] = +{ + { PIXMAN_OP_NONE }, +}; + +const pixman_fast_path_t *const vmx_fast_paths = vmx_fast_path_array; + #endif -void fbComposeSetupVMX (void) +pixman_implementation_t * +_pixman_implementation_create_vmx (void) { - /* check if we have VMX support and initialize accordingly */ - if (pixman_have_vmx ()) { - pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = vmxCombineOverU; - pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseU; - pixman_composeFunctions.combineU[PIXMAN_OP_IN] = vmxCombineInU; - pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseU; - pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = vmxCombineOutU; - pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseU; - pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = vmxCombineAtopU; - pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseU; - pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = vmxCombineXorU; - pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = vmxCombineAddU; - - pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = vmxCombineSrcC; - pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = vmxCombineOverC; - pixman_composeFunctions.combineC[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseC; - pixman_composeFunctions.combineC[PIXMAN_OP_IN] = vmxCombineInC; - pixman_composeFunctions.combineC[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseC; - pixman_composeFunctions.combineC[PIXMAN_OP_OUT] = vmxCombineOutC; - pixman_composeFunctions.combineC[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseC; - pixman_composeFunctions.combineC[PIXMAN_OP_ATOP] = vmxCombineAtopC; - pixman_composeFunctions.combineC[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseC; - pixman_composeFunctions.combineC[PIXMAN_OP_XOR] = vmxCombineXorC; - pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = vmxCombineAddC; - - pixman_composeFunctions.combineMaskU = vmxCombineMaskU; - } + pixman_implementation_t *fast = _pixman_implementation_create_fast_path (); + pixman_implementation_t *imp = _pixman_implementation_create (fast); + + /* Set up function pointers */ + + /* SSE code patch for fbcompose.c */ + imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; + + imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u; + + imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; + + return imp; } + |