aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman
diff options
context:
space:
mode:
authormarha <marha@users.sourceforge.net>2013-10-02 11:13:08 +0200
committermarha <marha@users.sourceforge.net>2013-10-02 11:13:08 +0200
commitc3594c6c050a987fb891a1c7cf0c83102fbbec46 (patch)
tree33b215c07359fff941f32fc89a8cf3e968d334a9 /pixman/pixman
parent1881d1a5c46cc9efb394f84b69f58c000ee1118b (diff)
parent8092f320c341a6b3a1b428fdd4473859d5db8b79 (diff)
downloadvcxsrv-c3594c6c050a987fb891a1c7cf0c83102fbbec46.tar.gz
vcxsrv-c3594c6c050a987fb891a1c7cf0c83102fbbec46.tar.bz2
vcxsrv-c3594c6c050a987fb891a1c7cf0c83102fbbec46.zip
Merge remote-tracking branch 'origin/released'
* origin/released: fontconfig mesa pixman xkeyboard-config git update 2 Okt 2013 Conflicts: mesalib/src/glsl/ast.h mesalib/src/glsl/glsl_parser_extras.h
Diffstat (limited to 'pixman/pixman')
-rw-r--r--pixman/pixman/pixman-compiler.h2
-rw-r--r--pixman/pixman/pixman-vmx.c539
2 files changed, 459 insertions, 82 deletions
diff --git a/pixman/pixman/pixman-compiler.h b/pixman/pixman/pixman-compiler.h
index 38e855519..938ebc21a 100644
--- a/pixman/pixman/pixman-compiler.h
+++ b/pixman/pixman/pixman-compiler.h
@@ -181,7 +181,7 @@
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
-#elif defined(HAVE_PTHREAD_SETSPECIFIC)
+#elif defined(HAVE_PTHREADS)
#include <pthread.h>
diff --git a/pixman/pixman/pixman-vmx.c b/pixman/pixman/pixman-vmx.c
index f629003ab..c33631c0e 100644
--- a/pixman/pixman/pixman-vmx.c
+++ b/pixman/pixman/pixman-vmx.c
@@ -134,15 +134,11 @@ over (vector unsigned int src,
source ## _mask = vec_lvsl (0, source);
#define COMPUTE_SHIFT_MASKS(dest, source) \
- dest ## _mask = vec_lvsl (0, dest); \
- source ## _mask = vec_lvsl (0, source); \
- store_mask = vec_lvsr (0, dest);
+ source ## _mask = vec_lvsl (0, source);
#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
mask ## _mask = vec_lvsl (0, mask); \
- dest ## _mask = vec_lvsl (0, dest); \
- source ## _mask = vec_lvsl (0, source); \
- store_mask = vec_lvsr (0, dest);
+ source ## _mask = vec_lvsl (0, source);
/* notice you have to declare temp vars...
* Note: tmp3 and tmp4 must remain untouched!
@@ -151,23 +147,17 @@ over (vector unsigned int src,
#define LOAD_VECTORS(dest, source) \
tmp1 = (typeof(tmp1))vec_ld (0, source); \
tmp2 = (typeof(tmp2))vec_ld (15, source); \
- tmp3 = (typeof(tmp3))vec_ld (0, dest); \
v ## source = (typeof(v ## source)) \
vec_perm (tmp1, tmp2, source ## _mask); \
- tmp4 = (typeof(tmp4))vec_ld (15, dest); \
- v ## dest = (typeof(v ## dest)) \
- vec_perm (tmp3, tmp4, dest ## _mask);
+ v ## dest = (typeof(v ## dest))vec_ld (0, dest);
#define LOAD_VECTORSC(dest, source, mask) \
tmp1 = (typeof(tmp1))vec_ld (0, source); \
tmp2 = (typeof(tmp2))vec_ld (15, source); \
- tmp3 = (typeof(tmp3))vec_ld (0, dest); \
v ## source = (typeof(v ## source)) \
vec_perm (tmp1, tmp2, source ## _mask); \
- tmp4 = (typeof(tmp4))vec_ld (15, dest); \
tmp1 = (typeof(tmp1))vec_ld (0, mask); \
- v ## dest = (typeof(v ## dest)) \
- vec_perm (tmp3, tmp4, dest ## _mask); \
+ v ## dest = (typeof(v ## dest))vec_ld (0, dest); \
tmp2 = (typeof(tmp2))vec_ld (15, mask); \
v ## mask = (typeof(v ## mask)) \
vec_perm (tmp1, tmp2, mask ## _mask);
@@ -178,11 +168,7 @@ over (vector unsigned int src,
splat_alpha (v ## mask));
#define STORE_VECTOR(dest) \
- edges = vec_perm (tmp4, tmp3, dest ## _mask); \
- tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
- tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
- vec_st ((vector unsigned int) tmp3, 15, dest); \
- vec_st ((vector unsigned int) tmp1, 0, dest);
+ vec_st ((vector unsigned int) v ## dest, 0, dest);
static void
vmx_combine_over_u_no_mask (uint32_t * dest,
@@ -191,8 +177,19 @@ vmx_combine_over_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -230,8 +227,23 @@ vmx_combine_over_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia;
+
+ UN8x4_MUL_UN8 (s, m);
+
+ ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -286,8 +298,18 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -324,8 +346,21 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8 (s, m);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -379,8 +414,17 @@ vmx_combine_in_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (*dest);
+
+ UN8x4_MUL_UN8 (s, a);
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -415,8 +459,20 @@ vmx_combine_in_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (*dest);
+
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_MUL_UN8 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -468,8 +524,18 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t d = *dest;
+ uint32_t a = ALPHA_8 (*src++);
+
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -505,8 +571,21 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t d = *dest;
+ uint32_t a = *src++;
+
+ UN8x4_MUL_UN8 (a, m);
+ a = ALPHA_8 (a);
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -559,8 +638,18 @@ vmx_combine_out_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (~(*dest));
+
+ UN8x4_MUL_UN8 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -596,8 +685,20 @@ vmx_combine_out_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (~(*dest));
+
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_MUL_UN8 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -649,8 +750,18 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t d = *dest;
+ uint32_t a = ALPHA_8 (~(*src++));
+
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -687,8 +798,21 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t d = *dest;
+ uint32_t a = *src++;
+
+ UN8x4_MUL_UN8 (a, m);
+ a = ALPHA_8 (~a);
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -741,8 +865,20 @@ vmx_combine_atop_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t dest_a = ALPHA_8 (d);
+ uint32_t src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -781,8 +917,25 @@ vmx_combine_atop_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t dest_a = ALPHA_8 (d);
+ uint32_t src_ia;
+
+ UN8x4_MUL_UN8 (s, m);
+
+ src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -840,8 +993,20 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_a = ALPHA_8 (s);
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -880,8 +1045,25 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_a;
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8 (s, m);
+
+ src_a = ALPHA_8 (s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -939,8 +1121,20 @@ vmx_combine_xor_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_ia = ALPHA_8 (~s);
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
@@ -979,8 +1173,25 @@ vmx_combine_xor_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_ia;
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8 (s, m);
+
+ src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1038,8 +1249,18 @@ vmx_combine_add_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+
+ UN8x4_ADD_UN8x4 (d, s);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1074,8 +1295,20 @@ vmx_combine_add_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_ADD_UN8x4 (d, s);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1130,8 +1363,18 @@ vmx_combine_src_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+
+ UN8x4_MUL_UN8x4 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1170,8 +1413,22 @@ vmx_combine_over_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1214,8 +1471,21 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ida = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1257,8 +1527,20 @@ vmx_combine_in_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t da = ALPHA_8 (*dest);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (s, da);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1299,8 +1581,20 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (*src++);
+
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1342,8 +1636,21 @@ vmx_combine_out_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t da = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (s, da);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1386,8 +1693,21 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4 (d, ~a);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1430,8 +1750,23 @@ vmx_combine_atop_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask, vsrca;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+ uint32_t da = ALPHA_8 (d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1481,8 +1816,23 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+ uint32_t da = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1529,8 +1879,23 @@ vmx_combine_xor_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+ uint32_t da = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
+
+ *dest++ = d;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);
@@ -1577,8 +1942,20 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
+
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_ADD_UN8x4 (s, d);
+
+ *dest++ = s;
+ width--;
+ }
COMPUTE_SHIFT_MASKC (dest, src, mask);