aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman')
-rw-r--r--pixman/configure.ac13
-rw-r--r--pixman/pixman/pixman-arm-neon-asm.h2
-rw-r--r--pixman/pixman/pixman-fast-path.c99
-rw-r--r--pixman/pixman/pixman-filter.c2
-rw-r--r--pixman/pixman/pixman-general.c92
-rw-r--r--pixman/pixman/pixman-image.c10
-rw-r--r--pixman/pixman/pixman-implementation.c89
-rw-r--r--pixman/pixman/pixman-mips-dspr2-asm.S723
-rw-r--r--pixman/pixman/pixman-mips-dspr2-asm.h51
-rw-r--r--pixman/pixman/pixman-mips-dspr2.c31
-rw-r--r--pixman/pixman/pixman-mips-dspr2.h42
-rw-r--r--pixman/pixman/pixman-mmx.c82
-rw-r--r--pixman/pixman/pixman-noop.c183
-rw-r--r--pixman/pixman/pixman-private.h70
-rw-r--r--pixman/pixman/pixman-region.c6
-rwxr-xr-x[-rw-r--r--]pixman/pixman/pixman-sse2.c93
-rw-r--r--pixman/pixman/pixman-utils.c11
-rw-r--r--pixman/pixman/pixman-vmx.c2
-rw-r--r--pixman/pixman/pixman.c2
-rw-r--r--pixman/pixman/refactor478
-rw-r--r--pixman/test/blitters-test.c13
-rw-r--r--pixman/test/lowlevel-blt-bench.c13
-rw-r--r--pixman/test/prng-test.c5
-rw-r--r--pixman/test/utils-prng.c58
-rw-r--r--pixman/test/utils-prng.h5
25 files changed, 1063 insertions, 1112 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 38f89b31e..221179ff1 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -53,8 +53,8 @@ AC_PREREQ([2.57])
#
m4_define([pixman_major], 0)
-m4_define([pixman_minor], 29)
-m4_define([pixman_micro], 3)
+m4_define([pixman_minor], 31)
+m4_define([pixman_micro], 1)
m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
@@ -279,7 +279,7 @@ AC_MSG_CHECKING(whether to use Loongson MMI assembler)
xserver_save_CFLAGS=$CFLAGS
CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+AC_LINK_IFELSE([AC_LANG_SOURCE([[
#ifndef __mips_loongson_vector_rev
#error "Loongson Multimedia Instructions are only available on Loongson"
#endif
@@ -845,6 +845,13 @@ if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then
fi
dnl =====================================
+dnl Check for missing sqrtf() as, e.g., for Solaris 9
+
+AC_SEARCH_LIBS([sqrtf], [m], [],
+ [AC_DEFINE([sqrtf], [sqrt],
+ [Define to sqrt if you do not have the `sqrtf' function.])])
+
+dnl =====================================
dnl Thread local storage
AC_MSG_CHECKING(for thread local storage (TLS) support)
diff --git a/pixman/pixman/pixman-arm-neon-asm.h b/pixman/pixman/pixman-arm-neon-asm.h
index 1673b080f..d0d92d74c 100644
--- a/pixman/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman/pixman-arm-neon-asm.h
@@ -385,7 +385,7 @@
* execute simultaneously with NEON and be completely shadowed by it. Thus
* we get no performance overhead at all (*). This looks like a very nice
* feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
- * but still can implement some rather advanced prefetch logic in sofware
+ * but still can implement some rather advanced prefetch logic in software
* for almost zero cost!
*
* (*) The overhead of the prefetcher is visible when running some trivial
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index 247aea645..3982dce8b 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -2261,89 +2261,27 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter)
}
}
-typedef struct
-{
- pixman_format_code_t format;
- pixman_iter_get_scanline_t get_scanline;
- pixman_iter_write_back_t write_back;
-} fetcher_info_t;
-
-static const fetcher_info_t fetchers[] =
-{
- { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
- { PIXMAN_null }
-};
-
-static pixman_bool_t
-fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
-{
- pixman_image_t *image = iter->image;
-
-#define FLAGS \
+#define IMAGE_FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
- if ((iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FLAGS) == FLAGS)
- {
- const fetcher_info_t *f;
-
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
- {
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
-
- iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
-
- iter->get_scanline = f->get_scanline;
- return TRUE;
- }
- }
- }
-
- return FALSE;
-}
-
-static pixman_bool_t
-fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static const pixman_iter_info_t fast_iters[] =
{
- pixman_image_t *image = iter->image;
-
- if ((iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
- {
- const fetcher_info_t *f;
-
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
- {
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
-
- iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
-
- if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
- (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
- {
- iter->get_scanline = fast_dest_fetch_noop;
- }
- else
- {
- iter->get_scanline = f->get_scanline;
- }
- iter->write_back = f->write_back;
- return TRUE;
- }
- }
- }
- return FALSE;
-}
-
+ { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC,
+ _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL },
+
+ { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
+ ITER_NARROW | ITER_DEST,
+ _pixman_iter_init_bits_stride,
+ fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
+
+ { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
+ ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA,
+ _pixman_iter_init_bits_stride,
+ fast_dest_fetch_noop, fast_write_back_r5g6b5 },
+
+ { PIXMAN_null },
+};
pixman_implementation_t *
_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
@@ -2351,8 +2289,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
imp->fill = fast_path_fill;
- imp->src_iter_init = fast_src_iter_init;
- imp->dest_iter_init = fast_dest_iter_init;
+ imp->iter_info = fast_iters;
return imp;
}
diff --git a/pixman/pixman/pixman-filter.c b/pixman/pixman/pixman-filter.c
index 26b39d571..5ff7b6eaa 100644
--- a/pixman/pixman/pixman-filter.c
+++ b/pixman/pixman/pixman-filter.c
@@ -28,7 +28,9 @@
#include <stdio.h>
#include <math.h>
#include <assert.h>
+#ifdef HAVE_CONFIG_H
#include <config.h>
+#endif
#include "pixman-private.h"
typedef double (* kernel_func_t) (double x);
diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c
index 93a1b9acf..4da5da5e2 100644
--- a/pixman/pixman/pixman-general.c
+++ b/pixman/pixman/pixman-general.c
@@ -37,43 +37,47 @@
#include <string.h>
#include "pixman-private.h"
-static pixman_bool_t
-general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static void
+general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
- if (image->type == LINEAR)
- _pixman_linear_gradient_iter_init (image, iter);
- else if (image->type == RADIAL)
+ switch (image->type)
+ {
+ case BITS:
+ if ((iter->iter_flags & ITER_SRC) == ITER_SRC)
+ _pixman_bits_image_src_iter_init (image, iter);
+ else
+ _pixman_bits_image_dest_iter_init (image, iter);
+ break;
+
+ case LINEAR:
+ _pixman_linear_gradient_iter_init (image, iter);
+ break;
+
+ case RADIAL:
_pixman_radial_gradient_iter_init (image, iter);
- else if (image->type == CONICAL)
+ break;
+
+ case CONICAL:
_pixman_conical_gradient_iter_init (image, iter);
- else if (image->type == BITS)
- _pixman_bits_image_src_iter_init (image, iter);
- else if (image->type == SOLID)
+ break;
+
+ case SOLID:
_pixman_log_error (FUNC, "Solid image not handled by noop");
- else
- _pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
+ break;
- return TRUE;
+ default:
+ _pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
+ break;
+ }
}
-static pixman_bool_t
-general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static const pixman_iter_info_t general_iters[] =
{
- if (iter->image->type == BITS)
- {
- _pixman_bits_image_dest_iter_init (iter->image, iter);
-
- return TRUE;
- }
- else
- {
- _pixman_log_error (FUNC, "Trying to write to a non-writable image");
-
- return FALSE;
- }
-}
+ { PIXMAN_any, 0, 0, general_iter_init, NULL, NULL },
+ { PIXMAN_null },
+};
typedef struct op_info_t op_info_t;
struct op_info_t
@@ -116,7 +120,7 @@ general_composite_rect (pixman_implementation_t *imp,
pixman_iter_t src_iter, mask_iter, dest_iter;
pixman_combine_32_func_t compose;
pixman_bool_t component_alpha;
- iter_flags_t narrow, src_iter_flags;
+ iter_flags_t width_flag, src_iter_flags;
int Bpp;
int i;
@@ -124,12 +128,12 @@ general_composite_rect (pixman_implementation_t *imp,
(!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
(dest_image->common.flags & FAST_PATH_NARROW_FORMAT))
{
- narrow = ITER_NARROW;
+ width_flag = ITER_NARROW;
Bpp = 4;
}
else
{
- narrow = 0;
+ width_flag = ITER_WIDE;
Bpp = 16;
}
@@ -145,7 +149,7 @@ general_composite_rect (pixman_implementation_t *imp,
mask_buffer = src_buffer + width * Bpp;
dest_buffer = mask_buffer + width * Bpp;
- if (!narrow)
+ if (width_flag == ITER_WIDE)
{
/* To make sure there aren't any NANs in the buffers */
memset (src_buffer, 0, width * Bpp);
@@ -154,11 +158,12 @@ general_composite_rect (pixman_implementation_t *imp,
}
/* src iter */
- src_iter_flags = narrow | op_flags[op].src;
+ src_iter_flags = width_flag | op_flags[op].src | ITER_SRC;
- _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image,
- src_x, src_y, width, height,
- src_buffer, src_iter_flags, info->src_flags);
+ _pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image,
+ src_x, src_y, width, height,
+ src_buffer, src_iter_flags,
+ info->src_flags);
/* mask iter */
if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
@@ -176,17 +181,19 @@ general_composite_rect (pixman_implementation_t *imp,
mask_image->common.component_alpha &&
PIXMAN_FORMAT_RGB (mask_image->bits.format);
- _pixman_implementation_src_iter_init (
- imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height,
- mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags);
+ _pixman_implementation_iter_init (
+ imp->toplevel, &mask_iter,
+ mask_image, mask_x, mask_y, width, height, mask_buffer,
+ ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB),
+ info->mask_flags);
/* dest iter */
- _pixman_implementation_dest_iter_init (
+ _pixman_implementation_iter_init (
imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height,
- dest_buffer, narrow | op_flags[op].dst, info->dest_flags);
+ dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags);
compose = _pixman_implementation_lookup_combiner (
- imp->toplevel, op, component_alpha, narrow);
+ imp->toplevel, op, component_alpha, width_flag != ITER_WIDE);
for (i = 0; i < height; ++i)
{
@@ -219,8 +226,7 @@ _pixman_implementation_create_general (void)
_pixman_setup_combiner_functions_32 (imp);
_pixman_setup_combiner_functions_float (imp);
- imp->src_iter_init = general_src_iter_init;
- imp->dest_iter_init = general_dest_iter_init;
+ imp->iter_info = general_iters;
return imp;
}
diff --git a/pixman/pixman/pixman-image.c b/pixman/pixman/pixman-image.c
index 65041b43b..4f9c2f966 100644
--- a/pixman/pixman/pixman-image.c
+++ b/pixman/pixman/pixman-image.c
@@ -502,8 +502,10 @@ compute_image_info (pixman_image_t *image)
break;
}
- /* Alpha map */
- if (!image->common.alpha_map)
+ /* Alpha maps are only supported for BITS images, so it's always
+ * safe to ignore their presense for non-BITS images
+ */
+ if (!image->common.alpha_map || image->type != BITS)
{
flags |= FAST_PATH_NO_ALPHA_MAP;
}
@@ -918,10 +920,10 @@ _pixman_image_get_solid (pixman_implementation_t *imp,
pixman_iter_t iter;
otherwise:
- _pixman_implementation_src_iter_init (
+ _pixman_implementation_iter_init (
imp, &iter, image, 0, 0, 1, 1,
(uint8_t *)&result,
- ITER_NARROW, image->common.flags);
+ ITER_NARROW | ITER_SRC, image->common.flags);
result = *iter.get_scanline (&iter, NULL);
}
diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c
index cfb82bb1f..160847ad0 100644
--- a/pixman/pixman/pixman-implementation.c
+++ b/pixman/pixman/pixman-implementation.c
@@ -285,18 +285,26 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
return FALSE;
}
-pixman_bool_t
-_pixman_implementation_src_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x,
- int y,
- int width,
- int height,
- uint8_t *buffer,
- iter_flags_t iter_flags,
- uint32_t image_flags)
+static uint32_t *
+get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
{
+ return NULL;
+}
+
+void
+_pixman_implementation_iter_init (pixman_implementation_t *imp,
+ pixman_iter_t *iter,
+ pixman_image_t *image,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint8_t *buffer,
+ iter_flags_t iter_flags,
+ uint32_t image_flags)
+{
+ pixman_format_code_t format;
+
iter->image = image;
iter->buffer = (uint32_t *)buffer;
iter->x = x;
@@ -306,47 +314,38 @@ _pixman_implementation_src_iter_init (pixman_implementation_t *imp,
iter->iter_flags = iter_flags;
iter->image_flags = image_flags;
- while (imp)
+ if (!iter->image)
{
- if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter))
- return TRUE;
-
- imp = imp->fallback;
+ iter->get_scanline = get_scanline_null;
+ return;
}
- return FALSE;
-}
-
-pixman_bool_t
-_pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x,
- int y,
- int width,
- int height,
- uint8_t *buffer,
- iter_flags_t iter_flags,
- uint32_t image_flags)
-{
- iter->image = image;
- iter->buffer = (uint32_t *)buffer;
- iter->x = x;
- iter->y = y;
- iter->width = width;
- iter->height = height;
- iter->iter_flags = iter_flags;
- iter->image_flags = image_flags;
+ format = iter->image->common.extended_format_code;
while (imp)
{
- if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter))
- return TRUE;
-
- imp = imp->fallback;
+ if (imp->iter_info)
+ {
+ const pixman_iter_info_t *info;
+
+ for (info = imp->iter_info; info->format != PIXMAN_null; ++info)
+ {
+ if ((info->format == PIXMAN_any || info->format == format) &&
+ (info->image_flags & image_flags) == info->image_flags &&
+ (info->iter_flags & iter_flags) == info->iter_flags)
+ {
+ iter->get_scanline = info->get_scanline;
+ iter->write_back = info->write_back;
+
+ if (info->initializer)
+ info->initializer (iter, info);
+ return;
+ }
+ }
+ }
+
+ imp = imp->fallback;
}
-
- return FALSE;
}
pixman_bool_t
diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S
index 3adbb2afe..866e93e58 100644
--- a/pixman/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman/pixman-mips-dspr2-asm.S
@@ -699,6 +699,127 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
END(pixman_composite_src_0888_0565_rev_asm_mips)
#endif
+LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
+/*
+ * a0 - dst (a8b8g8r8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ li v0, 0x00ff00ff
+
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ addiu a1, a1, 8
+ addiu a2, a2, -2
+ srl t2, t0, 24
+ srl t3, t1, 24
+
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
+
+ sll t0, t0, 8
+ sll t1, t1, 8
+ andi t2, t2, 0xff
+ andi t3, t3, 0xff
+ or t0, t0, t2
+ or t1, t1, t3
+ wsbh t0, t0
+ wsbh t1, t1
+ rotr t0, t0, 16
+ rotr t1, t1, 16
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+
+ addiu t2, a2, -1
+ bgtz t2, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ lw t0, 0(a1)
+ srl t1, t0, 24
+
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
+
+ sll t0, t0, 8
+ andi t1, t1, 0xff
+ or t0, t0, t1
+ wsbh t0, t0
+ rotr t0, t0, 16
+ sw t0, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_composite_src_pixbuf_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ li v0, 0x00ff00ff
+
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ addiu a1, a1, 8
+ addiu a2, a2, -2
+ srl t2, t0, 24
+ srl t3, t1, 24
+
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
+
+ sll t0, t0, 8
+ sll t1, t1, 8
+ andi t2, t2, 0xff
+ andi t3, t3, 0xff
+ or t0, t0, t2
+ or t1, t1, t3
+ rotr t0, t0, 8
+ rotr t1, t1, 8
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+
+ addiu t2, a2, -1
+ bgtz t2, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ lw t0, 0(a1)
+ srl t1, t0, 24
+
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
+
+ sll t0, t0, 8
+ andi t1, t1, 0xff
+ or t0, t0, t1
+ rotr t0, t0, 8
+ sw t0, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_composite_src_rpixbuf_8888_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
/*
* a0 - dst (a8r8g8b8)
@@ -840,34 +961,35 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
* a3 - w
*/
- SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
- beqz a3, 4f
+ beqz a3, 8f
nop
+ SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
+
li t6, 0xff
addiu t7, zero, -1 /* t7 = 0xffffffff */
srl t8, a1, 24 /* t8 = srca */
li t9, 0x00ff00ff
+
addiu t1, a3, -1
- beqz t1, 3f /* last pixel */
+ beqz t1, 4f /* last pixel */
nop
- beq t8, t6, 2f /* if (srca == 0xff) */
- nop
-1:
- /* a1 = src */
+
+0:
lw t0, 0(a2) /* t0 = mask */
lw t1, 4(a2) /* t1 = mask */
+ addiu a3, a3, -2 /* w = w - 2 */
or t2, t0, t1
- beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */
+ beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */
addiu a2, a2, 8
- and t3, t0, t1
- move t4, a1 /* t4 = src */
- move t5, a1 /* t5 = src */
+ and t2, t0, t1
+ beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+ nop
+
+//if(ma)
lw t2, 0(a0) /* t2 = dst */
- beq t3, t7, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
- lw t3, 4(a0) /* t3 = dst */
+ lw t3, 4(a0) /* t3 = dst */
MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
-11:
not t0, t0
not t1, t1
MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
@@ -875,62 +997,79 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
addu_s.qb t3, t5, t3
sw t2, 0(a0)
sw t3, 4(a0)
-12:
- addiu a3, a3, -2
addiu t1, a3, -1
- bgtz t1, 1b
+ bgtz t1, 0b
addiu a0, a0, 8
- b 3f
+ b 4f
+ nop
+1:
+//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
+ beq t8, t6, 2f /* if (srca == 0xff) */
nop
-2:
- /* a1 = src */
- lw t0, 0(a2) /* t0 = mask */
- lw t1, 4(a2) /* t1 = mask */
- or t2, t0, t1
- beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */
- addiu a2, a2, 8
- and t2, t0, t1
- move t4, a1
- beq t2, t7, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
- move t5, a1
lw t2, 0(a0) /* t2 = dst */
lw t3, 4(a0) /* t3 = dst */
- MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
- not t0, t0
- not t1, t1
- MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
- addu_s.qb t4, t4, t2
- addu_s.qb t5, t5, t3
-21:
- sw t4, 0(a0)
- sw t5, 4(a0)
-22:
- addiu a3, a3, -2
+ not t0, a1
+ not t1, a1
+ srl t0, t0, 24
+ srl t1, t1, 24
+ MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+ addu_s.qb t2, a1, t2
+ addu_s.qb t3, a1, t3
+ sw t2, 0(a0)
+ sw t3, 4(a0)
addiu t1, a3, -1
- bgtz t1, 2b
+ bgtz t1, 0b
addiu a0, a0, 8
+ b 4f
+ nop
+2:
+ sw a1, 0(a0)
+ sw a1, 4(a0)
3:
- blez a3, 4f
+ addiu t1, a3, -1
+ bgtz t1, 0b
+ addiu a0, a0, 8
+
+4:
+ beqz a3, 7f
nop
/* a1 = src */
- lw t1, 0(a2) /* t1 = mask */
- beqz t1, 4f
+ lw t0, 0(a2) /* t0 = mask */
+ beqz t0, 7f /* if (t0 == 0) */
nop
- move t2, a1 /* t2 = src */
- beq t1, t7, 31f
- lw t0, 0(a0) /* t0 = dst */
-
- MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6
- MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5
-31:
- not t1, t1
- MIPS_UN8x4_MUL_UN8x4 t0, t1, t0, t9, t3, t4, t5, t6
- addu_s.qb t0, t2, t0
- sw t0, 0(a0)
-4:
+ beq t0, t7, 5f /* if (t0 == 0xffffffff) */
+ nop
+//if(ma)
+ lw t1, 0(a0) /* t1 = dst */
+ MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0
+ MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5
+ not t0, t0
+ MIPS_UN8x4_MUL_UN8x4 t1, t0, t1, t9, t3, t4, t5, s0
+ addu_s.qb t1, t2, t1
+ sw t1, 0(a0)
RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
j ra
nop
+5:
+//if (t0 == 0xffffffff)
+ beq t8, t6, 6f /* if (srca == 0xff) */
+ nop
+ lw t1, 0(a0) /* t1 = dst */
+ not t0, a1
+ srl t0, t0, 24
+ MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4
+ addu_s.qb t1, a1, t1
+ sw t1, 0(a0)
+ RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+ j ra
+ nop
+6:
+ sw a1, 0(a0)
+7:
+ RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+8:
+ j ra
+ nop
END(pixman_composite_over_n_8888_8888_ca_asm_mips)
@@ -942,106 +1081,126 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
* a3 - w
*/
- SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
- beqz a3, 4f
+ beqz a3, 8f
nop
- li t5, 0xf800f800
- li t6, 0x07e007e0
- li t7, 0x001F001F
- li t9, 0x00ff00ff
+ SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ li t6, 0xff
+ addiu t7, zero, -1 /* t7 = 0xffffffff */
srl t8, a1, 24 /* t8 = srca */
+ li t9, 0x00ff00ff
+ li s6, 0xf800f800
+ li s7, 0x07e007e0
+ li s8, 0x001F001F
+
addiu t1, a3, -1
- beqz t1, 3f /* last pixel */
+ beqz t1, 4f /* last pixel */
nop
- li s0, 0xff /* s0 = 0xff */
- addiu s1, zero, -1 /* s1 = 0xffffffff */
- beq t8, s0, 2f /* if (srca == 0xff) */
- nop
-1:
- /* a1 = src */
+0:
lw t0, 0(a2) /* t0 = mask */
lw t1, 4(a2) /* t1 = mask */
+ addiu a3, a3, -2 /* w = w - 2 */
or t2, t0, t1
- beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */
+ beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */
addiu a2, a2, 8
- and t3, t0, t1
- move s2, a1 /* s2 = src */
- move s3, a1 /* s3 = src */
+ and t2, t0, t1
+ beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+ nop
+
+//if(ma)
lhu t2, 0(a0) /* t2 = dst */
- beq t3, s1, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
- lhu t3, 2(a0) /* t3 = dst */
- MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
- MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8
-11:
+ lhu t3, 2(a0) /* t3 = dst */
+ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
not t0, t0
not t1, t1
- CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
- MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1
- addu_s.qb s2, s2, s4
- addu_s.qb s3, s3, s5
- CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5
+ CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
+ MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+ addu_s.qb t2, t4, t2
+ addu_s.qb t3, t5, t3
+ CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
sh t2, 0(a0)
sh t3, 2(a0)
-12:
- addiu a3, a3, -2
addiu t1, a3, -1
- bgtz t1, 1b
+ bgtz t1, 0b
addiu a0, a0, 4
- b 3f
+ b 4f
+ nop
+1:
+//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
+ beq t8, t6, 2f /* if (srca == 0xff) */
nop
-2:
- /* a1 = src */
- lw t0, 0(a2) /* t0 = mask */
- lw t1, 4(a2) /* t1 = mask */
- or t2, t0, t1
- beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */
- addiu a2, a2, 8
- and t3, t0, t1
- move t2, a1
- beq t3, s1, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
- move t3, a1
lhu t2, 0(a0) /* t2 = dst */
lhu t3, 2(a0) /* t3 = dst */
- MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
- not t0, t0
- not t1, t1
- CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
- MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3
- addu_s.qb t2, s2, s4
- addu_s.qb t3, s3, s5
-21:
- CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3
- sh t0, 0(a0)
- sh t1, 2(a0)
-22:
- addiu a3, a3, -2
+ not t0, a1
+ not t1, a1
+ srl t0, t0, 24
+ srl t1, t1, 24
+ CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
+ MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+ addu_s.qb t2, a1, t2
+ addu_s.qb t3, a1, t3
+ CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
+ sh t2, 0(a0)
+ sh t3, 2(a0)
addiu t1, a3, -1
- bgtz t1, 2b
+ bgtz t1, 0b
addiu a0, a0, 4
+ b 4f
+ nop
+2:
+ CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1
+ sh t2, 0(a0)
+ sh t2, 2(a0)
3:
- blez a3, 4f
+ addiu t1, a3, -1
+ bgtz t1, 0b
+ addiu a0, a0, 4
+
+4:
+ beqz a3, 7f
nop
/* a1 = src */
- lw t1, 0(a2) /* t1 = mask */
- beqz t1, 4f
+ lw t0, 0(a2) /* t0 = mask */
+ beqz t0, 7f /* if (t0 == 0) */
nop
- move t2, a1 /* t2 = src */
- beq t1, t7, 31f
- lhu t0, 0(a0) /* t0 = dst */
-
- MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6
- MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5
-31:
- not t1, t1
- CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3
- MIPS_UN8x4_MUL_UN8x4 s1, t1, t3, t9, t4, t5, t6, t7
- addu_s.qb t0, t2, t3
- CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3
- sh s1, 0(a0)
-4:
- RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ beq t0, t7, 5f /* if (t0 == 0xffffffff) */
+ nop
+//if(ma)
+ lhu t1, 0(a0) /* t1 = dst */
+ MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0
+ MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5
+ not t0, t0
+ CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
+ MIPS_UN8x4_MUL_UN8x4 s1, t0, s1, t9, t3, t4, t5, s0
+ addu_s.qb s1, t2, s1
+ CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
+ sh t1, 0(a0)
+ RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ j ra
+ nop
+5:
+//if (t0 == 0xffffffff)
+ beq t8, t6, 6f /* if (srca == 0xff) */
+ nop
+ lhu t1, 0(a0) /* t1 = dst */
+ not t0, a1
+ srl t0, t0, 24
+ CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
+ MIPS_UN8x4_MUL_UN8 s1, t0, s1, t9, t2, t3, t4
+ addu_s.qb s1, a1, s1
+ CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
+ sh t1, 0(a0)
+ RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+ j ra
+ nop
+6:
+ CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2
+ sh t1, 0(a0)
+7:
+ RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+8:
j ra
nop
@@ -2936,101 +3095,265 @@ END(pixman_composite_over_reverse_n_8888_asm_mips)
LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
/*
* a0 - dst (a8)
- * a1 - src (a8r8g8b8)
+ * a1 - src (32bit constant)
* a2 - w
*/
- beqz a2, 5f
+ li t9, 0x00ff00ff
+ beqz a2, 3f
nop
-
- SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
- move t7, a1
- srl t5, t7, 24
- replv.ph t5, t5
- srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */
- beqz t9, 2f /* branch if less than 4 src pixels */
+ srl t7, a2, 2 /* t7 = how many multiples of 4 dst pixels */
+ beqz t7, 1f /* branch if less than 4 src pixels */
nop
-1:
- addiu t9, t9, -1
- addiu a2, a2, -4
+ srl t8, a1, 24
+ replv.ph t8, t8
+
+0:
+ beqz t7, 1f
+ addiu t7, t7, -1
lbu t0, 0(a0)
lbu t1, 1(a0)
lbu t2, 2(a0)
lbu t3, 3(a0)
- muleu_s.ph.qbl s0, t0, t5
- muleu_s.ph.qbr s1, t0, t5
- muleu_s.ph.qbl s2, t1, t5
- muleu_s.ph.qbr s3, t1, t5
- muleu_s.ph.qbl s4, t2, t5
- muleu_s.ph.qbr s5, t2, t5
- muleu_s.ph.qbl s6, t3, t5
- muleu_s.ph.qbr s7, t3, t5
-
- shrl.ph t4, s0, 8
- shrl.ph t6, s1, 8
- shrl.ph t7, s2, 8
- shrl.ph t8, s3, 8
- addq.ph t0, s0, t4
- addq.ph t1, s1, t6
- addq.ph t2, s2, t7
- addq.ph t3, s3, t8
- shra_r.ph t0, t0, 8
- shra_r.ph t1, t1, 8
+ precr_sra.ph.w t1, t0, 0
+ precr_sra.ph.w t3, t2, 0
+ precr.qb.ph t0, t3, t1
+
+ muleu_s.ph.qbl t2, t0, t8
+ muleu_s.ph.qbr t3, t0, t8
+ shra_r.ph t4, t2, 8
+ shra_r.ph t5, t3, 8
+ and t4, t4, t9
+ and t5, t5, t9
+ addq.ph t2, t2, t4
+ addq.ph t3, t3, t5
shra_r.ph t2, t2, 8
shra_r.ph t3, t3, 8
- shrl.ph t4, s4, 8
- shrl.ph t6, s5, 8
- shrl.ph t7, s6, 8
- shrl.ph t8, s7, 8
- addq.ph s0, s4, t4
- addq.ph s1, s5, t6
- addq.ph s2, s6, t7
- addq.ph s3, s7, t8
- shra_r.ph t4, s0, 8
- shra_r.ph t6, s1, 8
- shra_r.ph t7, s2, 8
- shra_r.ph t8, s3, 8
-
- precr.qb.ph s0, t0, t1
- precr.qb.ph s1, t2, t3
- precr.qb.ph s2, t4, t6
- precr.qb.ph s3, t7, t8
+ precr.qb.ph t2, t2, t3
- sb s0, 0(a0)
- sb s1, 1(a0)
- sb s2, 2(a0)
- sb s3, 3(a0)
- bgtz t9, 1b
+ sb t2, 0(a0)
+ srl t2, t2, 8
+ sb t2, 1(a0)
+ srl t2, t2, 8
+ sb t2, 2(a0)
+ srl t2, t2, 8
+ sb t2, 3(a0)
+ addiu a2, a2, -4
+ b 0b
addiu a0, a0, 4
-2:
- beqz a2, 4f
+
+1:
+ beqz a2, 3f
nop
-3:
- lbu t1, 0(a0)
+ srl t8, a1, 24
+2:
+ lbu t0, 0(a0)
+
+ mul t2, t0, t8
+ shra_r.ph t3, t2, 8
+ andi t3, t3, 0x00ff
+ addq.ph t2, t2, t3
+ shra_r.ph t2, t2, 8
- muleu_s.ph.qbl t4, t1, t5
- muleu_s.ph.qbr t7, t1, t5
- shrl.ph t6, t4, 8
- shrl.ph t0, t7, 8
- addq.ph t8, t4, t6
- addq.ph t9, t7, t0
- shra_r.ph t8, t8, 8
- shra_r.ph t9, t9, 8
- precr.qb.ph t2, t8, t9
sb t2, 0(a0)
addiu a2, a2, -1
- bnez a2, 3b
+ bnez a2, 2b
addiu a0, a0, 1
-4:
- RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
-5:
+
+3:
j ra
nop
END(pixman_composite_in_n_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ * a3 - vx
+ * 16(sp) - unit_x
+ */
+
+ SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
+ lw t8, 16(sp) /* t8 = unit_x */
+ li t6, 0x00ff00ff
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t0, a1, t0
+ lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+
+ sra t1, a3, 16 /* t0 = vx >> 16 */
+ sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t1, a1, t1
+ lw t1, 0(t1) /* t1 = source (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+
+ lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+ lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+
+ OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3
+
+ sw t4, 0(a0)
+ sw t5, 4(a0)
+ addiu a2, a2, -2
+ addiu t1, a2, -1
+ bgtz t1, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t0, a1, t0
+ lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
+ lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+
+ OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7
+
+ sw t2, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+ j ra
+ nop
+
+END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ * a3 - vx
+ * 16(sp) - unit_x
+ */
+
+ SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1
+ lw t8, 40(sp) /* t8 = unit_x */
+ li t4, 0x00ff00ff
+ li t5, 0xf800f800
+ li t6, 0x07e007e0
+ li t7, 0x001F001F
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t0, a1, t0
+ lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+ sra t1, a3, 16 /* t0 = vx >> 16 */
+ sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t1, a1, t1
+ lw t1, 0(t1) /* t1 = source (a8r8g8b8) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+ lhu t2, 0(a0) /* t2 = destination (r5g6b5) */
+ lhu t3, 2(a0) /* t3 = destination (r5g6b5) */
+
+ CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3
+ OVER_2x8888_2x8888 t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4
+ CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2
+
+ sh v0, 0(a0)
+ sh v1, 2(a0)
+ addiu a2, a2, -2
+ addiu t1, a2, -1
+ bgtz t1, 1b
+ addiu a0, a0, 4
+2:
+ beqz a2, 3f
+ nop
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */
+ addu t0, a1, t0
+ lw t0, 0(t0) /* t0 = source (a8r8g8b8) */
+ lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
+ addu a3, a3, t8 /* a3 = vx + unit_x */
+
+ CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6
+ OVER_8888_8888 t0, t2, t1, t4, t3, t5, t6, t7
+ CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6
+
+ sh t2, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1
+ j ra
+ nop
+
+END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (r5g6b5)
+ * a2 - w
+ * a3 - vx
+ * 16(sp) - unit_x
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ beqz a2, 3f
+ nop
+
+ lw v0, 16(sp) /* v0 = unit_x */
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+
+ li t4, 0x07e007e0
+ li t5, 0x001F001F
+1:
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */
+ addu t0, a1, t0
+ lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */
+ addu a3, a3, v0 /* a3 = vx + unit_x */
+ sra t1, a3, 16 /* t1 = vx >> 16 */
+ sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */
+ addu t1, a1, t1
+ lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */
+ addu a3, a3, v0 /* a3 = vx + unit_x */
+ addiu a2, a2, -2
+
+ CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
+
+ sw t2, 0(a0)
+ sw t3, 4(a0)
+
+ addiu t2, a2, -1
+ bgtz t2, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */
+ addu t0, a1, t0
+ lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */
+
+ CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
+
+ sw t1, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
/*
* a0 - dst (r5g6b5)
diff --git a/pixman/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman/pixman-mips-dspr2-asm.h
index b330c0f0d..cab122d80 100644
--- a/pixman/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman/pixman-mips-dspr2-asm.h
@@ -354,17 +354,16 @@ LEAF_MIPS32R2(symbol) \
out1_565, out2_565, \
maskR, maskG, maskB, \
scratch1, scratch2
- precrq.ph.w \scratch1, \in2_8888, \in1_8888
- precr_sra.ph.w \in2_8888, \in1_8888, 0
- shll.ph \scratch1, \scratch1, 8
- srl \in2_8888, \in2_8888, 3
- and \scratch2, \in2_8888, \maskB
- and \scratch1, \scratch1, \maskR
- srl \in2_8888, \in2_8888, 2
- and \out2_565, \in2_8888, \maskG
- or \out2_565, \out2_565, \scratch2
- or \out1_565, \out2_565, \scratch1
- srl \out2_565, \out1_565, 16
+ precr.qb.ph \scratch1, \in2_8888, \in1_8888
+ precrq.qb.ph \in2_8888, \in2_8888, \in1_8888
+ and \out1_565, \scratch1, \maskR
+ shrl.ph \scratch1, \scratch1, 3
+ shll.ph \in2_8888, \in2_8888, 3
+ and \scratch1, \scratch1, \maskB
+ or \out1_565, \out1_565, \scratch1
+ and \in2_8888, \in2_8888, \maskG
+ or \out1_565, \out1_565, \in2_8888
+ srl \out2_565, \out1_565, 16
.endm
/*
@@ -587,6 +586,36 @@ LEAF_MIPS32R2(symbol) \
addu_s.qb \out_8888, \out_8888, \s_8888
.endm
+/*
+ * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
+ * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro OVER_2x8888_2x8888 s1_8888, \
+ s2_8888, \
+ d1_8888, \
+ d2_8888, \
+ out1_8888, \
+ out2_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, \
+ scratch4, scratch5, scratch6
+ not \scratch1, \s1_8888
+ srl \scratch1, \scratch1, 24
+ not \scratch2, \s2_8888
+ srl \scratch2, \scratch2, 24
+ MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \
+ \scratch1, \scratch2, \
+ \out1_8888, \out2_8888, \
+ \maskLSR, \
+ \scratch3, \scratch4, \scratch5, \
+ \scratch6, \d1_8888, \d2_8888
+
+ addu_s.qb \out1_8888, \out1_8888, \s1_8888
+ addu_s.qb \out2_8888, \out2_8888, \s2_8888
+.endm
+
.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \
m_8, \
d_8888, \
diff --git a/pixman/pixman/pixman-mips-dspr2.c b/pixman/pixman/pixman-mips-dspr2.c
index 1ea244576..e10c9df0a 100644
--- a/pixman/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman/pixman-mips-dspr2.c
@@ -54,6 +54,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev,
uint8_t, 3, uint16_t, 1)
#endif
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565,
@@ -121,6 +125,13 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1,
uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER,
+ uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER,
+ uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC,
+ uint16_t, uint32_t)
+
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
uint32_t, uint32_t)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
@@ -292,6 +303,10 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev),
PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev),
#endif
+ PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888),
@@ -357,6 +372,22 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (IN, solid, null, a8, mips_composite_in_n_8),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888),
+
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565),
+
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
+ /* Note: NONE repeat is not supported yet */
+ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
+
PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
diff --git a/pixman/pixman/pixman-mips-dspr2.h b/pixman/pixman/pixman-mips-dspr2.h
index 4ac9ff95d..955ed70b8 100644
--- a/pixman/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman/pixman-mips-dspr2.h
@@ -246,6 +246,48 @@ mips_composite_##name (pixman_implementation_t *imp, \
} \
}
+/****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \
+ src_type, dst_type) \
+void \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \
+ dst_type * dst, \
+ const src_type * src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x); \
+ \
+static force_inline void \
+scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \
+ const src_type * ps, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \
+ vx, unit_x); \
+} \
+ \
+FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, dst_type, COVER) \
+FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, dst_type, NONE) \
+FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, dst_type, PAD)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+
+
/*****************************************************************************/
#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c
index 39e85c586..ed5107321 100644
--- a/pixman/pixman/pixman-mmx.c
+++ b/pixman/pixman/pixman-mmx.c
@@ -309,6 +309,29 @@ negate (__m64 mask)
return _mm_xor_si64 (mask, MC (4x00ff));
}
+/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1
+ * and maps its result to the same range.
+ *
+ * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner:
+ * Notation, Notation, Notation", the first of which is
+ *
+ * prod(a, b) = (a * b + 128) / 255.
+ *
+ * By approximating the division by 255 as 257/65536 it can be replaced by a
+ * multiply and a right shift. This is the implementation that we use in
+ * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended
+ * 3DNow!, and unavailable at the time of the book's publication) to perform
+ * the multiplication and right shift in a single operation.
+ *
+ * prod(a, b) = ((a * b + 128) * 257) >> 16.
+ *
+ * A third way (how pix_multiply() was implemented prior to 14208344) exists
+ * also that performs the multiplication by 257 with adds and shifts.
+ *
+ * Where temp = a * b + 128
+ *
+ * prod(a, b) = (temp + (temp >> 8)) >> 8.
+ */
static force_inline __m64
pix_multiply (__m64 a, __m64 b)
{
@@ -3926,52 +3949,23 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
-typedef struct
-{
- pixman_format_code_t format;
- pixman_iter_get_scanline_t get_scanline;
-} fetcher_info_t;
-
-static const fetcher_info_t fetchers[] =
-{
- { PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 },
- { PIXMAN_r5g6b5, mmx_fetch_r5g6b5 },
- { PIXMAN_a8, mmx_fetch_a8 },
- { PIXMAN_null }
-};
-
-static pixman_bool_t
-mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
-{
- pixman_image_t *image = iter->image;
-
-#define FLAGS \
+#define IMAGE_FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
- if ((iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FLAGS) == FLAGS)
- {
- const fetcher_info_t *f;
-
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
- {
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
-
- iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
-
- iter->get_scanline = f->get_scanline;
- return TRUE;
- }
- }
- }
-
- return FALSE;
-}
+static const pixman_iter_info_t mmx_iters[] =
+{
+ { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL
+ },
+ { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL
+ },
+ { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL
+ },
+ { PIXMAN_null },
+};
static const pixman_fast_path_t mmx_fast_paths[] =
{
@@ -4101,7 +4095,7 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback)
imp->blt = mmx_blt;
imp->fill = mmx_fill;
- imp->src_iter_init = mmx_src_iter_init;
+ imp->iter_info = mmx_iters;
return imp;
}
diff --git a/pixman/pixman/pixman-noop.c b/pixman/pixman/pixman-noop.c
index e39996d9d..e59890492 100644
--- a/pixman/pixman/pixman-noop.c
+++ b/pixman/pixman/pixman-noop.c
@@ -37,12 +37,6 @@ noop_composite (pixman_implementation_t *imp,
return;
}
-static void
-dest_write_back_direct (pixman_iter_t *iter)
-{
- iter->buffer += iter->image->bits.rowstride;
-}
-
static uint32_t *
noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
{
@@ -53,110 +47,102 @@ noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
return result;
}
-static uint32_t *
-get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
-{
- return NULL;
+static void
+noop_init_solid_narrow (pixman_iter_t *iter,
+ const pixman_iter_info_t *info)
+{
+ pixman_image_t *image = iter->image;
+ uint32_t *buffer = iter->buffer;
+ uint32_t *end = buffer + iter->width;
+ uint32_t color;
+
+ if (iter->image->type == SOLID)
+ color = image->solid.color_32;
+ else
+ color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
+
+ while (buffer < end)
+ *(buffer++) = color;
}
-static pixman_bool_t
-noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static void
+noop_init_solid_wide (pixman_iter_t *iter,
+ const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
+ argb_t *buffer = (argb_t *)iter->buffer;
+ argb_t *end = buffer + iter->width;
+ argb_t color;
-#define FLAGS \
- (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
-
- if (!image)
- {
- iter->get_scanline = get_scanline_null;
- }
- else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
- (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
- {
- iter->get_scanline = _pixman_iter_get_scanline_noop;
- }
- else if (image->common.extended_format_code == PIXMAN_solid &&
- (iter->image->type == SOLID ||
- (iter->image_flags & FAST_PATH_NO_ALPHA_MAP)))
- {
- if (iter->iter_flags & ITER_NARROW)
- {
- uint32_t *buffer = iter->buffer;
- uint32_t *end = buffer + iter->width;
- uint32_t color;
-
- if (image->type == SOLID)
- color = image->solid.color_32;
- else
- color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
-
- while (buffer < end)
- *(buffer++) = color;
- }
- else
- {
- argb_t *buffer = (argb_t *)iter->buffer;
- argb_t *end = buffer + iter->width;
- argb_t color;
-
- if (image->type == SOLID)
- color = image->solid.color_float;
- else
- color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
-
- while (buffer < end)
- *(buffer++) = color;
- }
-
- iter->get_scanline = _pixman_iter_get_scanline_noop;
- }
- else if (image->common.extended_format_code == PIXMAN_a8r8g8b8 &&
- (iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FLAGS) == FLAGS &&
- iter->x >= 0 && iter->y >= 0 &&
- iter->x + iter->width <= image->bits.width &&
- iter->y + iter->height <= image->bits.height)
- {
- iter->buffer =
- image->bits.bits + iter->y * image->bits.rowstride + iter->x;
-
- iter->get_scanline = noop_get_scanline;
- }
+ if (iter->image->type == SOLID)
+ color = image->solid.color_float;
else
- {
- return FALSE;
- }
+ color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
- return TRUE;
+ while (buffer < end)
+ *(buffer++) = color;
}
-static pixman_bool_t
-noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+static void
+noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info)
{
pixman_image_t *image = iter->image;
- uint32_t image_flags = iter->image_flags;
- uint32_t iter_flags = iter->iter_flags;
-
- if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS &&
- (iter_flags & ITER_NARROW) == ITER_NARROW &&
- ((image->common.extended_format_code == PIXMAN_a8r8g8b8) ||
- (image->common.extended_format_code == PIXMAN_x8r8g8b8 &&
- (iter_flags & (ITER_LOCALIZED_ALPHA)))))
- {
- iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x;
-
- iter->get_scanline = _pixman_iter_get_scanline_noop;
- iter->write_back = dest_write_back_direct;
-
- return TRUE;
- }
- else
- {
- return FALSE;
- }
+
+ iter->buffer =
+ image->bits.bits + iter->y * image->bits.rowstride + iter->x;
}
+static void
+dest_write_back_direct (pixman_iter_t *iter)
+{
+ iter->buffer += iter->image->bits.rowstride;
+}
+
+static const pixman_iter_info_t noop_iters[] =
+{
+ /* Source iters */
+ { PIXMAN_any,
+ 0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC,
+ NULL,
+ _pixman_iter_get_scanline_noop,
+ NULL
+ },
+ { PIXMAN_solid,
+ FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC,
+ noop_init_solid_narrow,
+ _pixman_iter_get_scanline_noop,
+ NULL,
+ },
+ { PIXMAN_solid,
+ FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC,
+ noop_init_solid_wide,
+ _pixman_iter_get_scanline_noop,
+ NULL
+ },
+ { PIXMAN_a8r8g8b8,
+ FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,
+ ITER_NARROW | ITER_SRC,
+ noop_init_direct_buffer,
+ noop_get_scanline,
+ NULL
+ },
+ /* Dest iters */
+ { PIXMAN_a8r8g8b8,
+ FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST,
+ noop_init_direct_buffer,
+ _pixman_iter_get_scanline_noop,
+ dest_write_back_direct
+ },
+ { PIXMAN_x8r8g8b8,
+ FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA,
+ noop_init_direct_buffer,
+ _pixman_iter_get_scanline_noop,
+ dest_write_back_direct
+ },
+ { PIXMAN_null },
+};
+
static const pixman_fast_path_t noop_fast_paths[] =
{
{ PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite },
@@ -169,8 +155,7 @@ _pixman_implementation_create_noop (pixman_implementation_t *fallback)
pixman_implementation_t *imp =
_pixman_implementation_create (fallback, noop_fast_paths);
- imp->src_iter_init = noop_src_iter_init;
- imp->dest_iter_init = noop_dest_iter_init;
+ imp->iter_info = noop_iters;
return imp;
}
diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h
index 6d9c05321..af4a0b6e0 100644
--- a/pixman/pixman/pixman-private.h
+++ b/pixman/pixman/pixman-private.h
@@ -212,7 +212,8 @@ typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter);
typedef enum
{
- ITER_NARROW = (1 << 0),
+ ITER_NARROW = (1 << 0),
+ ITER_WIDE = (1 << 1),
/* "Localized alpha" is when the alpha channel is used only to compute
* the alpha value of the destination. This means that the computation
@@ -229,9 +230,15 @@ typedef enum
* we can treat it as if it were ARGB, which means in some cases we can
* avoid copying it to a temporary buffer.
*/
- ITER_LOCALIZED_ALPHA = (1 << 1),
- ITER_IGNORE_ALPHA = (1 << 2),
- ITER_IGNORE_RGB = (1 << 3)
+ ITER_LOCALIZED_ALPHA = (1 << 2),
+ ITER_IGNORE_ALPHA = (1 << 3),
+ ITER_IGNORE_RGB = (1 << 4),
+
+ /* These indicate whether the iterator is for a source
+ * or a destination image
+ */
+ ITER_SRC = (1 << 5),
+ ITER_DEST = (1 << 6)
} iter_flags_t;
struct pixman_iter_t
@@ -255,6 +262,19 @@ struct pixman_iter_t
int stride;
};
+typedef struct pixman_iter_info_t pixman_iter_info_t;
+typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter,
+ const pixman_iter_info_t *info);
+struct pixman_iter_info_t
+{
+ pixman_format_code_t format;
+ uint32_t image_flags;
+ iter_flags_t iter_flags;
+ pixman_iter_initializer_t initializer;
+ pixman_iter_get_scanline_t get_scanline;
+ pixman_iter_write_back_t write_back;
+};
+
void
_pixman_bits_image_setup_accessors (bits_image_t *image);
@@ -454,8 +474,6 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
int width,
int height,
uint32_t filler);
-typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
- pixman_iter_t *iter);
void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp);
@@ -477,11 +495,10 @@ struct pixman_implementation_t
pixman_implementation_t * toplevel;
pixman_implementation_t * fallback;
const pixman_fast_path_t * fast_paths;
+ const pixman_iter_info_t * iter_info;
pixman_blt_func_t blt;
pixman_fill_func_t fill;
- pixman_iter_init_func_t src_iter_init;
- pixman_iter_init_func_t dest_iter_init;
pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS];
pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS];
@@ -542,29 +559,17 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
int height,
uint32_t filler);
-pixman_bool_t
-_pixman_implementation_src_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x,
- int y,
- int width,
- int height,
- uint8_t *buffer,
- iter_flags_t flags,
- uint32_t image_flags);
-
-pixman_bool_t
-_pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x,
- int y,
- int width,
- int height,
- uint8_t *buffer,
- iter_flags_t flags,
- uint32_t image_flags);
+void
+_pixman_implementation_iter_init (pixman_implementation_t *imp,
+ pixman_iter_t *iter,
+ pixman_image_t *image,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint8_t *buffer,
+ iter_flags_t flags,
+ uint32_t image_flags);
/* Specific implementations */
pixman_implementation_t *
@@ -647,6 +652,9 @@ _pixman_compute_composite_region32 (pixman_region32_t * region,
uint32_t *
_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
+void
+_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info);
+
/* These "formats" all have depth 0, so they
* will never clash with any real ones
*/
diff --git a/pixman/pixman/pixman-region.c b/pixman/pixman/pixman-region.c
index 2d6f1571c..59bc9c797 100644
--- a/pixman/pixman/pixman-region.c
+++ b/pixman/pixman/pixman-region.c
@@ -1858,7 +1858,7 @@ pixman_region_subtract_o (region_type_t * region,
else if (r2->x1 <= x1)
{
/*
- * Subtrahend preceeds minuend: nuke left edge of minuend.
+ * Subtrahend precedes minuend: nuke left edge of minuend.
*/
x1 = r2->x2;
if (x1 >= r1->x2)
@@ -1982,7 +1982,7 @@ PREFIX (_subtract) (region_type_t *reg_d,
}
/* Add those rectangles in region 1 that aren't in region 2,
- do yucky substraction for overlaps, and
+ do yucky subtraction for overlaps, and
just throw away rectangles in region 2 that aren't in region 1 */
if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE))
return FALSE;
@@ -2042,7 +2042,7 @@ PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */
}
/* Add those rectangles in region 1 that aren't in region 2,
- * do yucky substraction for overlaps, and
+ * do yucky subtraction for overlaps, and
* just throw away rectangles in region 2 that aren't in region 1
*/
inv_reg.extents = *inv_rect;
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index 1d525794e..932bfd8ee 100644..100755
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -5554,19 +5554,27 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
-#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1)
-
-#define BILINEAR_DECLARE_VARIABLES \
+#if BILINEAR_INTERPOLATION_BITS < 8
+# define BILINEAR_DECLARE_VARIABLES \
const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
- const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\
- const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
- const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\
- const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
+ const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
+ const __m128i xmm_ux = _mm_set_epi16 (unit_x&0xffff, (-unit_x)&0xffff, unit_x&0xffff, (-unit_x)&0xffff, \
+ unit_x&0xffff, (-unit_x)&0xffff, unit_x&0xffff, (-unit_x)&0xffff); \
+ const __m128i xmm_zero = _mm_setzero_si128 (); \
+ __m128i xmm_x = _mm_set_epi16 (vx&0xffff, (-(vx + 1))&0xffff, vx&0xffff, (-(vx + 1))&0xffff, \
+ vx&0xffff, (-(vx + 1))&0xffff, vx&0xffff, (-(vx + 1))&0xffff)
+#else
+# define BILINEAR_DECLARE_VARIABLES \
+ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
+ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
+ const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
const __m128i xmm_ux = _mm_set_epi16 (unit_x&0xffff, unit_x&0xffff, unit_x&0xffff, unit_x&0xffff, \
- unit_x&0xffff, unit_x&0xffff, unit_x&0xffff, unit_x&0xffff); \
+ (-unit_x)&0xffff, (-unit_x)&0xffff, (-unit_x)&0xffff, (-unit_x)&0xffff); \
const __m128i xmm_zero = _mm_setzero_si128 (); \
- __m128i xmm_x = _mm_set_epi16 (vx&0xffff, vx&0xffff, vx&0xffff, vx&0xffff, vx&0xffff, vx&0xffff, vx&0xffff, vx&0xffff)
+ __m128i xmm_x = _mm_set_epi16 (vx&0xffff, vx&0xffff, vx&0xffff, vx&0xffff, \
+ (-(vx + 1))&0xffff, (-(vx + 1))&0xffff, (-(vx + 1))&0xffff, (-(vx + 1))&0xffff)
+#endif
#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
do { \
@@ -5585,8 +5593,8 @@ do { \
if (BILINEAR_INTERPOLATION_BITS < 8) \
{ \
/* calculate horizontal weights */ \
- xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, \
- _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \
+ xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
+ 16 - BILINEAR_INTERPOLATION_BITS)); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
/* horizontal interpolation */ \
a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \
@@ -5595,8 +5603,8 @@ do { \
else \
{ \
/* calculate horizontal weights */ \
- xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, \
- _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \
+ xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
+ 16 - BILINEAR_INTERPOLATION_BITS)); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
/* horizontal interpolation */ \
xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
@@ -6332,52 +6340,23 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
-typedef struct
-{
- pixman_format_code_t format;
- pixman_iter_get_scanline_t get_scanline;
-} fetcher_info_t;
-
-static const fetcher_info_t fetchers[] =
-{
- { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
- { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 },
- { PIXMAN_a8, sse2_fetch_a8 },
- { PIXMAN_null }
-};
-
-static pixman_bool_t
-sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
-{
- pixman_image_t *image = iter->image;
-
-#define FLAGS \
+#define IMAGE_FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
- if ((iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FLAGS) == FLAGS)
- {
- const fetcher_info_t *f;
-
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
- {
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
-
- iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
-
- iter->get_scanline = f->get_scanline;
- return TRUE;
- }
- }
- }
-
- return FALSE;
-}
+static const pixman_iter_info_t sse2_iters[] =
+{
+ { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL
+ },
+ { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL
+ },
+ { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL
+ },
+ { PIXMAN_null },
+};
#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
__attribute__((__force_align_arg_pointer__))
@@ -6435,7 +6414,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
imp->blt = sse2_blt;
imp->fill = sse2_fill;
- imp->src_iter_init = sse2_src_iter_init;
+ imp->iter_info = sse2_iters;
return imp;
}
diff --git a/pixman/pixman/pixman-utils.c b/pixman/pixman/pixman-utils.c
index f31171f6d..98723a800 100644
--- a/pixman/pixman/pixman-utils.c
+++ b/pixman/pixman/pixman-utils.c
@@ -214,6 +214,17 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
+void
+_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
+{
+ pixman_image_t *image = iter->image;
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8;
+ iter->stride = s;
+}
+
#define N_TMP_BOXES (16)
pixman_bool_t
diff --git a/pixman/pixman/pixman-vmx.c b/pixman/pixman/pixman-vmx.c
index 6868704a8..f629003ab 100644
--- a/pixman/pixman/pixman-vmx.c
+++ b/pixman/pixman/pixman-vmx.c
@@ -25,7 +25,9 @@
* Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
*/
+#ifdef HAVE_CONFIG_H
#include <config.h>
+#endif
#include "pixman-private.h"
#include "pixman-combine32.h"
#include <altivec.h>
diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c
index 184f0c4e6..9555ceaaf 100644
--- a/pixman/pixman/pixman.c
+++ b/pixman/pixman/pixman.c
@@ -605,7 +605,7 @@ pixman_image_composite32 (pixman_op_t op,
else
{
mask_format = PIXMAN_null;
- info.mask_flags = FAST_PATH_IS_OPAQUE;
+ info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP;
}
dest_format = dest->common.extended_format_code;
diff --git a/pixman/pixman/refactor b/pixman/pixman/refactor
deleted file mode 100644
index 52fceab17..000000000
--- a/pixman/pixman/refactor
+++ /dev/null
@@ -1,478 +0,0 @@
-Roadmap
-
-- Move all the fetchers etc. into pixman-image to make pixman-compose.c
- less intimidating.
-
- DONE
-
-- Make combiners for unified alpha take a mask argument. That way
- we won't need two separate paths for unified vs component in the
- general compositing code.
-
- DONE, except that the Altivec code needs to be updated. Luca is
- looking into that.
-
-- Delete separate 'unified alpha' path
-
- DONE
-
-- Split images into their own files
-
- DONE
-
-- Split the gradient walker code out into its own file
-
- DONE
-
-- Add scanline getters per image
-
- DONE
-
-- Generic 64 bit fetcher
-
- DONE
-
-- Split fast path tables into their respective architecture dependent
- files.
-
-See "Render Algorithm" below for rationale
-
-Images will eventually have these virtual functions:
-
- get_scanline()
- get_scanline_wide()
- get_pixel()
- get_pixel_wide()
- get_untransformed_pixel()
- get_untransformed_pixel_wide()
- get_unfiltered_pixel()
- get_unfiltered_pixel_wide()
-
- store_scanline()
- store_scanline_wide()
-
-1.
-
-Initially we will just have get_scanline() and get_scanline_wide();
-these will be based on the ones in pixman-compose. Hopefully this will
-reduce the complexity in pixman_composite_rect_general().
-
-Note that there is access considerations - the compose function is
-being compiled twice.
-
-
-2.
-
-Split image types into their own source files. Export noop virtual
-reinit() call. Call this whenever a property of the image changes.
-
-
-3.
-
-Split the get_scanline() call into smaller functions that are
-initialized by the reinit() call.
-
-The Render Algorithm:
- (first repeat, then filter, then transform, then clip)
-
-Starting from a destination pixel (x, y), do
-
- 1 x = x - xDst + xSrc
- y = y - yDst + ySrc
-
- 2 reject pixel that is outside the clip
-
- This treats clipping as something that happens after
- transformation, which I think is correct for client clips. For
- hierarchy clips it is wrong, but who really cares? Without
- GraphicsExposes hierarchy clips are basically irrelevant. Yes,
- you could imagine cases where the pixels of a subwindow of a
- redirected, transformed window should be treated as
- transparent. I don't really care
-
- Basically, I think the render spec should say that pixels that
- are unavailable due to the hierarcy have undefined content,
- and that GraphicsExposes are not generated. Ie., basically
- that using non-redirected windows as sources is fail. This is
- at least consistent with the current implementation and we can
- update the spec later if someone makes it work.
-
- The implication for render is that it should stop passing the
- hierarchy clip to pixman. In pixman, if a souce image has a
- clip it should be used in computing the composite region and
- nowhere else, regardless of what "has_client_clip" says. The
- default should be for there to not be any clip.
-
- I would really like to get rid of the client clip as well for
- source images, but unfortunately there is at least one
- application in the wild that uses them.
-
- 3 Transform pixel: (x, y) = T(x, y)
-
- 4 Call p = GetUntransformedPixel (x, y)
-
- 5 If the image has an alpha map, then
-
- Call GetUntransformedPixel (x, y) on the alpha map
-
- add resulting alpha channel to p
-
- return p
-
- Where GetUnTransformedPixel is:
-
- 6 switch (filter)
- {
- case NEAREST:
- return GetUnfilteredPixel (x, y);
- break;
-
- case BILINEAR:
- return GetUnfilteredPixel (...) // 4 times
- break;
-
- case CONVOLUTION:
- return GetUnfilteredPixel (...) // as many times as necessary.
- break;
- }
-
- Where GetUnfilteredPixel (x, y) is
-
- 7 switch (repeat)
- {
- case REPEAT_NORMAL:
- case REPEAT_PAD:
- case REPEAT_REFLECT:
- // adjust x, y as appropriate
- break;
-
- case REPEAT_NONE:
- if (x, y) is outside image bounds
- return 0;
- break;
- }
-
- return GetRawPixel(x, y)
-
- Where GetRawPixel (x, y) is
-
- 8 Compute the pixel in question, depending on image type.
-
-For gradients, repeat has a totally different meaning, so
-UnfilteredPixel() and RawPixel() must be the same function so that
-gradients can do their own repeat algorithm.
-
-So, the GetRawPixel
-
- for bits must deal with repeats
- for gradients must deal with repeats (differently)
- for solids, should ignore repeats.
-
- for polygons, when we add them, either ignore repeats or do
- something similar to bits (in which case, we may want an extra
- layer of indirection to modify the coordinates).
-
-It is then possible to build things like "get scanline" or "get tile" on
-top of this. In the simplest case, just repeatedly calling GetPixel()
-would work, but specialized get_scanline()s or get_tile()s could be
-plugged in for common cases.
-
-By not plugging anything in for images with access functions, we only
-have to compile the pixel functions twice, not the scanline functions.
-
-And we can get rid of fetchers for the bizarre formats that no one
-uses. Such as b2g3r3 etc. r1g2b1? Seriously? It is also worth
-considering a generic format based pixel fetcher for these edge cases.
-
-Since the actual routines depend on the image attributes, the images
-must be notified when those change and update their function pointers
-appropriately. So there should probably be a virtual function called
-(* reinit) or something like that.
-
-There will also be wide fetchers for both pixels and lines. The line
-fetcher will just call the wide pixel fetcher. The wide pixel fetcher
-will just call expand, except for 10 bit formats.
-
-Rendering pipeline:
-
-Drawable:
- 0. if (picture has alpha map)
- 0.1. Position alpha map according to the alpha_x/alpha_y
- 0.2. Where the two drawables intersect, the alpha channel
- Replace the alpha channel of source with the one
- from the alpha map. Replacement only takes place
- in the intersection of the two drawables' geometries.
- 1. Repeat the drawable according to the repeat attribute
- 2. Reconstruct a continuous image according to the filter
- 3. Transform according to the transform attribute
- 4. Position image such that src_x, src_y is over dst_x, dst_y
- 5. Sample once per destination pixel
- 6. Clip. If a pixel is not within the source clip, then no
- compositing takes place at that pixel. (Ie., it's *not*
- treated as 0).
-
- Sampling a drawable:
-
- - If the channel does not have an alpha channel, the pixels in it
- are treated as opaque.
-
- Note on reconstruction:
-
- - The top left pixel has coordinates (0.5, 0.5) and pixels are
- spaced 1 apart.
-
-Gradient:
- 1. Unless gradient type is conical, repeat the underlying (0, 1)
- gradient according to the repeat attribute
- 2. Integrate the gradient across the plane according to type.
- 3. Transform according to transform attribute
- 4. Position gradient
- 5. Sample once per destination pixel.
- 6. Clip
-
-Solid Fill:
- 1. Repeat has no effect
- 2. Image is already continuous and defined for the entire plane
- 3. Transform has no effect
- 4. Positioning has no effect
- 5. Sample once per destination pixel.
- 6. Clip
-
-Polygon:
- 1. Repeat has no effect
- 2. Image is already continuous and defined on the whole plane
- 3. Transform according to transform attribute
- 4. Position image
- 5. Supersample 15x17 per destination pixel.
- 6. Clip
-
-Possibly interesting additions:
- - More general transformations, such as warping, or general
- shading.
-
- - Shader image where a function is called to generate the
- pixel (ie., uploading assembly code).
-
- - Resampling kernels
-
- In principle the polygon image uses a 15x17 box filter for
- resampling. If we allow general resampling filters, then we
- get all the various antialiasing types for free.
-
- Bilinear downsampling looks terrible and could be much
- improved by a resampling filter. NEAREST reconstruction
- combined with a box resampling filter is what GdkPixbuf
- does, I believe.
-
- Useful for high frequency gradients as well.
-
- (Note that the difference between a reconstruction and a
- resampling filter is mainly where in the pipeline they
- occur. High quality resampling should use a correctly
- oriented kernel so it should happen after transformation.
-
- An implementation can transform the resampling kernel and
- convolve it with the reconstruction if it so desires, but it
- will need to deal with the fact that the resampling kernel
- will not necessarily be pixel aligned.
-
- "Output kernels"
-
- One could imagine doing the resampling after compositing,
- ie., for each destination pixel sample each source image 16
- times, then composite those subpixels individually, then
- finally apply a kernel.
-
- However, this is effectively the same as full screen
- antialiasing, which is a simpler way to think about it. So
- resampling kernels may make sense for individual images, but
- not as a post-compositing step.
-
- Fullscreen AA is inefficient without chained compositing
- though. Consider an (image scaled up to oversample size IN
- some polygon) scaled down to screen size. With the current
- implementation, there will be a huge temporary. With chained
- compositing, the whole thing ends up being equivalent to the
- output kernel from above.
-
- - Color space conversion
-
- The complete model here is that each surface has a color
- space associated with it and that the compositing operation
- also has one associated with it. Note also that gradients
- should have associcated colorspaces.
-
- - Dithering
-
- If people dither something that is already dithered, it will
- look terrible, but don't do that, then. (Dithering happens
- after resampling if at all - what is the relationship
- with color spaces? Presumably dithering should happen in linear
- intensity space).
-
- - Floating point surfaces, 16, 32 and possibly 64 bit per
- channel.
-
- Maybe crack:
-
- - Glyph polygons
-
- If glyphs could be given as polygons, they could be
- positioned and rasterized more accurately. The glyph
- structure would need subpixel positioning though.
-
- - Luminance vs. coverage for the alpha channel
-
- Whether the alpha channel should be interpreted as luminance
- modulation or as coverage (intensity modulation). This is a
- bit of a departure from the rendering model though. It could
- also be considered whether it should be possible to have
- both channels in the same drawable.
-
- - Alternative for component alpha
-
- - Set component-alpha on the output image.
-
- - This means each of the components are sampled
- independently and composited in the corresponding
- channel only.
-
- - Have 3 x oversampled mask
-
- - Scale it down by 3 horizontally, with [ 1/3, 1/3, 1/3 ]
- resampling filter.
-
- Is this equivalent to just using a component alpha mask?
-
- Incompatible changes:
-
- - Gradients could be specified with premultiplied colors. (You
- can use a mask to get things like gradients from solid red to
- transparent red.
-
-Refactoring pixman
-
-The pixman code is not particularly nice to put it mildly. Among the
-issues are
-
-- inconsistent naming style (fb vs Fb, camelCase vs
- underscore_naming). Sometimes there is even inconsistency *within*
- one name.
-
- fetchProc32 ACCESS(pixman_fetchProcForPicture32)
-
- may be one of the uglies names ever created.
-
- coding style:
- use the one from cairo except that pixman uses this brace style:
-
- while (blah)
- {
- }
-
- Format do while like this:
-
- do
- {
-
- }
- while (...);
-
-- PIXMAN_COMPOSITE_RECT_GENERAL() is horribly complex
-
-- switch case logic in pixman-access.c
-
- Instead it would be better to just store function pointers in the
- image objects themselves,
-
- get_pixel()
- get_scanline()
-
-- Much of the scanline fetching code is for formats that no one
- ever uses. a2r2g2b2 anyone?
-
- It would probably be worthwhile having a generic fetcher for any
- pixman format whatsoever.
-
-- Code related to particular image types should be split into individual
- files.
-
- pixman-bits-image.c
- pixman-linear-gradient-image.c
- pixman-radial-gradient-image.c
- pixman-solid-image.c
-
-- Fast path code should be split into files based on architecture:
-
- pixman-mmx-fastpath.c
- pixman-sse2-fastpath.c
- pixman-c-fastpath.c
-
- etc.
-
- Each of these files should then export a fastpath table, which would
- be declared in pixman-private.h. This should allow us to get rid
- of the pixman-mmx.h files.
-
- The fast path table should describe each fast path. Ie there should
- be bitfields indicating what things the fast path can handle, rather than
- like now where it is only allowed to take one format per src/mask/dest. Ie.,
-
- {
- FAST_a8r8g8b8 | FAST_x8r8g8b8,
- FAST_null,
- FAST_x8r8g8b8,
- FAST_repeat_normal | FAST_repeat_none,
- the_fast_path
- }
-
-There should then be *one* file that implements pixman_image_composite().
-This should do this:
-
- optimize_operator();
-
- convert 1x1 repeat to solid (actually this should be done at
- image creation time).
-
- is there a useful fastpath?
-
-There should be a file called pixman-cpu.c that contains all the
-architecture specific stuff to detect what CPU features we have.
-
-Issues that must be kept in mind:
-
- - we need accessor code to be preserved
-
- - maybe there should be a "store_scanline" too?
-
- Is this sufficient?
-
- We should preserve the optimization where the
- compositing happens directly in the destination
- whenever possible.
-
- - It should be possible to create GPU samplers from the
- images.
-
-The "horizontal" classification should be a bit in the image, the
-"vertical" classification should just happen inside the gradient
-file. Note though that
-
- (a) these will change if the tranformation/repeat changes.
-
- (b) at the moment the optimization for linear gradients
- takes the source rectangle into account. Presumably
- this is to also optimize the case where the gradient
- is close enough to horizontal?
-
-Who is responsible for repeats? In principle it should be the scanline
-fetch. Right now NORMAL repeats are handled by walk_composite_region()
-while other repeats are handled by the scanline code.
-
-
-(Random note on filtering: do you filter before or after
-transformation? Hardware is going to filter after transformation;
-this is also what pixman does currently). It's not completely clear
-what filtering *after* transformation means. One thing that might look
-good would be to do *supersampling*, ie., compute multiple subpixels
-per destination pixel, then average them together.
diff --git a/pixman/test/blitters-test.c b/pixman/test/blitters-test.c
index 8766fa800..a2c6ff4d8 100644
--- a/pixman/test/blitters-test.c
+++ b/pixman/test/blitters-test.c
@@ -46,7 +46,16 @@ create_random_image (pixman_format_code_t *allowed_formats,
/* do the allocation */
buf = aligned_malloc (64, stride * height);
- prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF);
+ if (prng_rand_n (4) == 0)
+ {
+ /* uniform distribution */
+ prng_randmemset (buf, stride * height, 0);
+ }
+ else
+ {
+ /* significantly increased probability for 0x00 and 0xFF */
+ prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF);
+ }
img = pixman_image_create_bits (fmt, width, height, buf, stride);
@@ -393,6 +402,6 @@ main (int argc, const char *argv[])
}
return fuzzer_test_main("blitters", 2000000,
- 0xD8265D5E,
+ 0x0CF3283B,
test_composite, argc, argv);
}
diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c
index 4e16f7ba1..1049e21e7 100644
--- a/pixman/test/lowlevel-blt-bench.c
+++ b/pixman/test/lowlevel-blt-bench.c
@@ -385,6 +385,7 @@ bench_composite (char * testname,
double t1, t2, t3, pix_cnt;
int64_t n, l1test_width, nlines;
double bytes_per_pix = 0;
+ pixman_bool_t bench_pixbuf = FALSE;
pixman_composite_func_t func = pixman_image_composite_wrapper;
@@ -422,16 +423,20 @@ bench_composite (char * testname,
mask_img = NULL;
xmask_img = NULL;
+ if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0)
+ {
+ bench_pixbuf = TRUE;
+ }
if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null)
{
bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0);
mask_img = pixman_image_create_bits (mask_fmt,
WIDTH, HEIGHT,
- mask,
+ bench_pixbuf ? src : mask,
WIDTH * 4);
xmask_img = pixman_image_create_bits (mask_fmt,
XWIDTH, XHEIGHT,
- mask,
+ bench_pixbuf ? src : mask,
XWIDTH * 4);
}
else if (mask_fmt != PIXMAN_null)
@@ -643,6 +648,8 @@ tests_tbl[] =
{ "src_0888_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
{ "src_0888_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
{ "src_0888_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
+ { "src_0888_8888_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
+ { "src_0888_0565_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
{ "src_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
{ "src_x888_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
{ "src_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
@@ -707,6 +714,8 @@ tests_tbl[] =
{ "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
{ "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
{ "over_reverse_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
+ { "pixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 },
+ { "rpixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 },
};
int
diff --git a/pixman/test/prng-test.c b/pixman/test/prng-test.c
index 0a3ad5e8f..c1d9320cc 100644
--- a/pixman/test/prng-test.c
+++ b/pixman/test/prng-test.c
@@ -106,7 +106,10 @@ int main (int argc, char *argv[])
{
const uint32_t ref_crc[RANDMEMSET_MORE_00_AND_FF + 1] =
{
- 0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39
+ 0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39,
+ 0xD2321099, 0xFD8C5420, 0xD3B7C42A, 0xFC098093,
+ 0x85E01DE0, 0x6680F8F7, 0x4D32DD3C, 0xAE52382B,
+ 0x149E6CB5, 0x8B336987, 0x15DCB2B3, 0x8A71B781
};
uint32_t crc1, crc2;
uint32_t ref, seed, seed0, seed1, seed2, seed3;
diff --git a/pixman/test/utils-prng.c b/pixman/test/utils-prng.c
index 967b8989a..7b32e3531 100644
--- a/pixman/test/utils-prng.c
+++ b/pixman/test/utils-prng.c
@@ -107,6 +107,7 @@ randmemset_internal (prng_t *prng,
{
prng_t local_prng = *prng;
prng_rand_128_data_t randdata;
+ size_t i;
while (size >= 16)
{
@@ -138,6 +139,22 @@ randmemset_internal (prng_t *prng,
};
randdata.vb &= (t.vb >= const_40);
}
+ if (flags & RANDMEMSET_MORE_FFFFFFFF)
+ {
+ const uint32x4 const_C0000000 =
+ {
+ 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000
+ };
+ randdata.vw |= ((t.vw << 30) >= const_C0000000);
+ }
+ if (flags & RANDMEMSET_MORE_00000000)
+ {
+ const uint32x4 const_40000000 =
+ {
+ 0x40000000, 0x40000000, 0x40000000, 0x40000000
+ };
+ randdata.vw &= ((t.vw << 30) >= const_40000000);
+ }
#else
#define PROCESS_ONE_LANE(i) \
if (flags & RANDMEMSET_MORE_FF) \
@@ -155,6 +172,18 @@ randmemset_internal (prng_t *prng,
mask_00 |= mask_00 >> 2; \
mask_00 |= mask_00 >> 4; \
randdata.w[i] &= mask_00; \
+ } \
+ if (flags & RANDMEMSET_MORE_FFFFFFFF) \
+ { \
+ int32_t mask_ff = ((t.w[i] << 30) & (t.w[i] << 31)) & \
+ 0x80000000; \
+ randdata.w[i] |= mask_ff >> 31; \
+ } \
+ if (flags & RANDMEMSET_MORE_00000000) \
+ { \
+ int32_t mask_00 = ((t.w[i] << 30) | (t.w[i] << 31)) & \
+ 0x80000000; \
+ randdata.w[i] &= mask_00 >> 31; \
}
PROCESS_ONE_LANE (0)
@@ -198,7 +227,8 @@ randmemset_internal (prng_t *prng,
}
size -= 16;
}
- while (size > 0)
+ i = 0;
+ while (i < size)
{
uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF;
if (flags != 0)
@@ -208,9 +238,25 @@ randmemset_internal (prng_t *prng,
randbyte = 0xFF;
if ((flags & RANDMEMSET_MORE_00) && (t < 0x40))
randbyte = 0x00;
+ if (i % 4 == 0 && i + 4 <= size)
+ {
+ t = prng_rand_r (&local_prng) & 0xFF;
+ if ((flags & RANDMEMSET_MORE_FFFFFFFF) && (t >= 0xC0))
+ {
+ memset(&buf[i], 0xFF, 4);
+ i += 4;
+ continue;
+ }
+ if ((flags & RANDMEMSET_MORE_00000000) && (t < 0x40))
+ {
+ memset(&buf[i], 0x00, 4);
+ i += 4;
+ continue;
+ }
+ }
}
- *buf++ = randbyte;
- size--;
+ buf[i] = randbyte;
+ i++;
}
*prng = local_prng;
}
@@ -218,8 +264,10 @@ randmemset_internal (prng_t *prng,
/*
* Fill memory buffer with random data. Flags argument may be used
* to tweak some statistics properties:
- * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00
- * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF
+ * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00
+ * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF
+ * RANDMEMSET_MORE_00000000 - ~25% chance for 00000000 4-byte clusters
+ * RANDMEMSET_MORE_FFFFFFFF - ~25% chance for FFFFFFFF 4-byte clusters
*/
void prng_randmemset_r (prng_t *prng,
void *voidbuf,
diff --git a/pixman/test/utils-prng.h b/pixman/test/utils-prng.h
index 285107f08..564ffcef1 100644
--- a/pixman/test/utils-prng.h
+++ b/pixman/test/utils-prng.h
@@ -153,7 +153,10 @@ typedef enum
{
RANDMEMSET_MORE_00 = 1, /* ~25% chance for 0x00 bytes */
RANDMEMSET_MORE_FF = 2, /* ~25% chance for 0xFF bytes */
- RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_FF)
+ RANDMEMSET_MORE_00000000 = 4, /* ~25% chance for 0x00000000 clusters */
+ RANDMEMSET_MORE_FFFFFFFF = 8, /* ~25% chance for 0xFFFFFFFF clusters */
+ RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_00000000 |
+ RANDMEMSET_MORE_FF | RANDMEMSET_MORE_FFFFFFFF)
} prng_randmemset_flags_t;
/* Set the 32-bit seed for PRNG */