diff options
Diffstat (limited to 'pixman/pixman/pixman-vmx.c')
-rw-r--r-- | pixman/pixman/pixman-vmx.c | 539 |
1 files changed, 458 insertions, 81 deletions
diff --git a/pixman/pixman/pixman-vmx.c b/pixman/pixman/pixman-vmx.c index f629003ab..c33631c0e 100644 --- a/pixman/pixman/pixman-vmx.c +++ b/pixman/pixman/pixman-vmx.c @@ -134,15 +134,11 @@ over (vector unsigned int src, source ## _mask = vec_lvsl (0, source); #define COMPUTE_SHIFT_MASKS(dest, source) \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ - store_mask = vec_lvsr (0, dest); + source ## _mask = vec_lvsl (0, source); #define COMPUTE_SHIFT_MASKC(dest, source, mask) \ mask ## _mask = vec_lvsl (0, mask); \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ - store_mask = vec_lvsr (0, dest); + source ## _mask = vec_lvsl (0, source); /* notice you have to declare temp vars... * Note: tmp3 and tmp4 must remain untouched! @@ -151,23 +147,17 @@ over (vector unsigned int src, #define LOAD_VECTORS(dest, source) \ tmp1 = (typeof(tmp1))vec_ld (0, source); \ tmp2 = (typeof(tmp2))vec_ld (15, source); \ - tmp3 = (typeof(tmp3))vec_ld (0, dest); \ v ## source = (typeof(v ## source)) \ vec_perm (tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld (15, dest); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm (tmp3, tmp4, dest ## _mask); + v ## dest = (typeof(v ## dest))vec_ld (0, dest); #define LOAD_VECTORSC(dest, source, mask) \ tmp1 = (typeof(tmp1))vec_ld (0, source); \ tmp2 = (typeof(tmp2))vec_ld (15, source); \ - tmp3 = (typeof(tmp3))vec_ld (0, dest); \ v ## source = (typeof(v ## source)) \ vec_perm (tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld (15, dest); \ tmp1 = (typeof(tmp1))vec_ld (0, mask); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm (tmp3, tmp4, dest ## _mask); \ + v ## dest = (typeof(v ## dest))vec_ld (0, dest); \ tmp2 = (typeof(tmp2))vec_ld (15, mask); \ v ## mask = (typeof(v ## mask)) \ vec_perm (tmp1, tmp2, mask ## _mask); @@ -178,11 +168,7 @@ over (vector unsigned int src, splat_alpha (v ## mask)); #define STORE_VECTOR(dest) \ - edges = vec_perm (tmp4, tmp3, dest ## _mask); \ - tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ - tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ - vec_st ((vector unsigned int) tmp3, 15, dest); \ - vec_st ((vector unsigned int) tmp1, 0, dest); + vec_st ((vector unsigned int) v ## dest, 0, dest); static void vmx_combine_over_u_no_mask (uint32_t * dest, @@ -191,8 +177,19 @@ vmx_combine_over_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -230,8 +227,23 @@ vmx_combine_over_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia; + + UN8x4_MUL_UN8 (s, m); + + ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -286,8 +298,18 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -324,8 +346,21 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -379,8 +414,17 @@ vmx_combine_in_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t a = ALPHA_8 (*dest); + + UN8x4_MUL_UN8 (s, a); + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -415,8 +459,20 @@ vmx_combine_in_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t a = ALPHA_8 (*dest); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -468,8 +524,18 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t d = *dest; + uint32_t a = ALPHA_8 (*src++); + + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -505,8 +571,21 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t d = *dest; + uint32_t a = *src++; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (a); + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -559,8 +638,18 @@ vmx_combine_out_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t a = ALPHA_8 (~(*dest)); + + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -596,8 +685,20 @@ vmx_combine_out_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t a = ALPHA_8 (~(*dest)); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -649,8 +750,18 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t d = *dest; + uint32_t a = ALPHA_8 (~(*src++)); + + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -687,8 +798,21 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t d = *dest; + uint32_t a = *src++; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (~a); + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -741,8 +865,20 @@ vmx_combine_atop_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -781,8 +917,25 @@ vmx_combine_atop_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia; + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -840,8 +993,20 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_a = ALPHA_8 (s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -880,8 +1045,25 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_a; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_a = ALPHA_8 (s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -939,8 +1121,20 @@ vmx_combine_xor_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -979,8 +1173,25 @@ vmx_combine_xor_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_ia; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1038,8 +1249,18 @@ vmx_combine_add_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_ADD_UN8x4 (d, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ @@ -1074,8 +1295,20 @@ vmx_combine_add_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_MUL_UN8 (s, m); + UN8x4_ADD_UN8x4 (d, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1130,8 +1363,18 @@ vmx_combine_src_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + + UN8x4_MUL_UN8x4 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1170,8 +1413,22 @@ vmx_combine_over_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1214,8 +1471,21 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ida = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1257,8 +1527,20 @@ vmx_combine_in_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t da = ALPHA_8 (*dest); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1299,8 +1581,20 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (*src++); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1342,8 +1636,21 @@ vmx_combine_out_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1386,8 +1693,21 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, ~a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1430,8 +1750,23 @@ vmx_combine_atop_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask, vsrca; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1481,8 +1816,23 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1529,8 +1879,23 @@ vmx_combine_xor_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1577,8 +1942,20 @@ vmx_combine_add_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_ADD_UN8x4 (s, d); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); |