aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--X11/Xlibint.h2
-rw-r--r--pixman/configure.ac31
-rw-r--r--pixman/pixman/Makefile.am1
-rw-r--r--pixman/pixman/pixman-bits-image.c253
-rw-r--r--pixman/pixman/pixman-compiler.h7
-rw-r--r--pixman/pixman/pixman-fast-path.c268
-rw-r--r--pixman/pixman/pixman-fast-path.h443
-rw-r--r--pixman/pixman/pixman-general.c54
-rw-r--r--pixman/pixman/pixman-image.c44
-rw-r--r--pixman/pixman/pixman-private.h9
-rw-r--r--pixman/pixman/pixman-sse2.c115
-rw-r--r--pixman/pixman/pixman.c107
-rw-r--r--pixman/test/Makefile.am17
-rw-r--r--pixman/test/affine-test.c261
-rw-r--r--pixman/test/alphamap.c289
-rw-r--r--pixman/test/blitters-test.c15
-rw-r--r--pixman/test/composite-test.c383
-rw-r--r--pixman/test/lowlevel-blt-bench.c712
-rw-r--r--pixman/test/utils.c148
-rw-r--r--pixman/test/utils.h19
20 files changed, 2564 insertions, 614 deletions
diff --git a/X11/Xlibint.h b/X11/Xlibint.h
index ef2ccd109..8a06f6d6a 100644
--- a/X11/Xlibint.h
+++ b/X11/Xlibint.h
@@ -626,7 +626,7 @@ extern void _XFlushGCCache(Display *dpy, GC gc);
if (dpy->bufptr + (n) > dpy->bufmax) \
_XFlush (dpy); \
ptr = (type) dpy->bufptr; \
- (void)ptr; \
+ memset(ptr, '\0', n); \
dpy->bufptr += (n);
#ifdef WORD64
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 851a16a66..a4e5e9316 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
m4_define([pixman_major], 0)
m4_define([pixman_minor], 19)
-m4_define([pixman_micro], 3)
+m4_define([pixman_micro], 5)
m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
@@ -248,6 +248,9 @@ dnl -fvisibility stuff
PIXMAN_CHECK_CFLAG([-fvisibility=hidden], [dnl
#if defined(__GNUC__) && (__GNUC__ >= 4)
+#ifdef _WIN32
+#error Have -fvisibility but it is ignored and generates a warning
+#endif
#else
error Need GCC 4.0 for visibility
#endif
@@ -606,7 +609,7 @@ AC_SUBST(DEP_CFLAGS)
AC_SUBST(DEP_LIBS)
dnl =====================================
-dnl posix_memalign, sigaction, alarm
+dnl posix_memalign, sigaction, alarm, gettimeofday
AC_CHECK_FUNC(posix_memalign, have_posix_memalign=yes, have_posix_memalign=no)
if test x$have_posix_memalign = xyes; then
@@ -623,6 +626,25 @@ if test x$have_alarm = xyes; then
AC_DEFINE(HAVE_ALARM, 1, [Whether we have alarm()])
fi
+AC_CHECK_HEADER([sys/mman.h],
+ [AC_DEFINE(HAVE_SYS_MMAN_H, [1], [Define to 1 if we have <sys/mman.h>])])
+
+AC_CHECK_FUNC(mprotect, have_mprotect=yes, have_mprotect=no)
+if test x$have_mprotect = xyes; then
+ AC_DEFINE(HAVE_MPROTECT, 1, [Whether we have mprotect()])
+fi
+
+AC_CHECK_FUNC(getpagesize, have_getpagesize=yes, have_getpagesize=no)
+if test x$have_getpagesize = xyes; then
+ AC_DEFINE(HAVE_GETPAGESIZE, 1, [Whether we have getpagesize()])
+fi
+
+AC_CHECK_FUNC(gettimeofday, have_gettimeofday=yes, have_gettimeofday=no)
+AC_CHECK_HEADER(sys/time.h, have_sys_time_h=yes, have_sys_time_h=no)
+if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then
+ AC_DEFINE(HAVE_GETTIMEOFDAY, 1, [Whether we have gettimeofday()])
+fi
+
dnl =====================================
dnl Thread local storage
@@ -630,8 +652,9 @@ support_for__thread=no
AC_MSG_CHECKING(for __thread)
AC_LINK_IFELSE([
-#ifdef __MINGW32__
-#error MinGW has broken __thread support
+#ifdef defined __MINGW32__ && !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
+#error This MinGW version has broken __thread support
+#endif
#endif
#ifdef __OpenBSD__
#error OpenBSD has broken __thread support
diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am
index 750556e79..2658e40dc 100644
--- a/pixman/pixman/Makefile.am
+++ b/pixman/pixman/Makefile.am
@@ -21,6 +21,7 @@ libpixman_1_la_SOURCES = \
pixman-general.c \
pixman.c \
pixman-fast-path.c \
+ pixman-fast-path.h \
pixman-solid-fill.c \
pixman-conical-gradient.c \
pixman-linear-gradient.c \
diff --git a/pixman/pixman/pixman-bits-image.c b/pixman/pixman/pixman-bits-image.c
index 806c65e7a..1f023b826 100644
--- a/pixman/pixman/pixman-bits-image.c
+++ b/pixman/pixman/pixman-bits-image.c
@@ -637,7 +637,7 @@ bits_image_fetch_affine_no_alpha (pixman_image_t * image,
buffer[i] = bits_image_fetch_pixel_filtered (
&image->bits, x, y, fetch_pixel_no_alpha);
}
-
+
x += ux;
y += uy;
}
@@ -749,6 +749,220 @@ bits_image_fetch_general (pixman_image_t * image,
}
}
+static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
+
+static force_inline void
+bits_image_fetch_bilinear_affine (pixman_image_t * image,
+ int offset,
+ int line,
+ int width,
+ uint32_t * buffer,
+ const uint32_t * mask,
+
+ convert_pixel_t convert_pixel,
+ pixman_format_code_t format,
+ pixman_repeat_t repeat_mode)
+{
+ pixman_fixed_t x, y;
+ pixman_fixed_t ux, uy;
+ pixman_vector_t v;
+ bits_image_t *bits = &image->bits;
+ int i;
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (image->common.transform, &v))
+ return;
+
+ ux = image->common.transform->matrix[0][0];
+ uy = image->common.transform->matrix[1][0];
+
+ x = v.vector[0];
+ y = v.vector[1];
+
+ for (i = 0; i < width; ++i)
+ {
+ int x1, y1, x2, y2;
+ uint32_t tl, tr, bl, br;
+ int32_t distx, disty;
+ int width = image->bits.width;
+ int height = image->bits.height;
+ const uint8_t *row1;
+ const uint8_t *row2;
+
+ if (mask && !mask[i])
+ goto next;
+
+ x1 = x - pixman_fixed_1 / 2;
+ y1 = y - pixman_fixed_1 / 2;
+
+ distx = (x1 >> 8) & 0xff;
+ disty = (y1 >> 8) & 0xff;
+
+ y1 = pixman_fixed_to_int (y1);
+ y2 = y1 + 1;
+ x1 = pixman_fixed_to_int (x1);
+ x2 = x1 + 1;
+
+ if (repeat_mode != PIXMAN_REPEAT_NONE)
+ {
+ uint32_t mask;
+
+ mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+
+ repeat (repeat_mode, width, &x1);
+ repeat (repeat_mode, height, &y1);
+ repeat (repeat_mode, width, &x2);
+ repeat (repeat_mode, height, &y2);
+
+ row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+ row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+
+ tl = convert_pixel (row1, x1) | mask;
+ tr = convert_pixel (row1, x2) | mask;
+ bl = convert_pixel (row2, x1) | mask;
+ br = convert_pixel (row2, x2) | mask;
+ }
+ else
+ {
+ uint32_t mask1, mask2;
+ int bpp;
+
+ /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
+ * which means if you use it in expressions, those
+ * expressions become unsigned themselves. Since
+ * the variables below can be negative in some cases,
+ * that will lead to crashes on 64 bit architectures.
+ *
+ * So this line makes sure bpp is signed
+ */
+ bpp = PIXMAN_FORMAT_BPP (format);
+
+ if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
+ {
+ buffer[i] = 0;
+ goto next;
+ }
+
+ if (y2 == 0)
+ {
+ row1 = zero;
+ mask1 = 0;
+ }
+ else
+ {
+ row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+ row1 += bpp / 8 * x1;
+
+ mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+ }
+
+ if (y1 == height - 1)
+ {
+ row2 = zero;
+ mask2 = 0;
+ }
+ else
+ {
+ row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+ row2 += bpp / 8 * x1;
+
+ mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+ }
+
+ if (x2 == 0)
+ {
+ tl = 0;
+ bl = 0;
+ }
+ else
+ {
+ tl = convert_pixel (row1, 0) | mask1;
+ bl = convert_pixel (row2, 0) | mask2;
+ }
+
+ if (x1 == width - 1)
+ {
+ tr = 0;
+ br = 0;
+ }
+ else
+ {
+ tr = convert_pixel (row1, 1) | mask1;
+ br = convert_pixel (row2, 1) | mask2;
+ }
+ }
+
+ buffer[i] = bilinear_interpolation (
+ tl, tr, bl, br, distx, disty);
+
+ next:
+ x += ux;
+ y += uy;
+ }
+}
+
+static force_inline uint32_t
+convert_a8r8g8b8 (const uint8_t *row, int x)
+{
+ return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_x8r8g8b8 (const uint8_t *row, int x)
+{
+ return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_a8 (const uint8_t *row, int x)
+{
+ return *(row + x) << 24;
+}
+
+static force_inline uint32_t
+convert_r5g6b5 (const uint8_t *row, int x)
+{
+ return CONVERT_0565_TO_0888 (*((uint16_t *)row + x));
+}
+
+#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \
+ static void \
+ bits_image_fetch_bilinear_affine_ ## name (pixman_image_t *image, \
+ int offset, \
+ int line, \
+ int width, \
+ uint32_t * buffer, \
+ const uint32_t * mask) \
+ { \
+ bits_image_fetch_bilinear_affine (image, offset, line, width, buffer, mask, \
+ convert_ ## format, \
+ PIXMAN_ ## format, \
+ repeat_mode); \
+ }
+
+MAKE_BILINEAR_FETCHER (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD);
+MAKE_BILINEAR_FETCHER (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE);
+MAKE_BILINEAR_FETCHER (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT);
+MAKE_BILINEAR_FETCHER (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL);
+MAKE_BILINEAR_FETCHER (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD);
+MAKE_BILINEAR_FETCHER (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE);
+MAKE_BILINEAR_FETCHER (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT);
+MAKE_BILINEAR_FETCHER (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL);
+MAKE_BILINEAR_FETCHER (pad_a8, a8, PIXMAN_REPEAT_PAD);
+MAKE_BILINEAR_FETCHER (none_a8, a8, PIXMAN_REPEAT_NONE);
+MAKE_BILINEAR_FETCHER (reflect_a8, a8, PIXMAN_REPEAT_REFLECT);
+MAKE_BILINEAR_FETCHER (normal_a8, a8, PIXMAN_REPEAT_NORMAL);
+MAKE_BILINEAR_FETCHER (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD);
+MAKE_BILINEAR_FETCHER (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE);
+MAKE_BILINEAR_FETCHER (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT);
+MAKE_BILINEAR_FETCHER (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL);
+
static void
bits_image_fetch_solid_32 (pixman_image_t * image,
int x,
@@ -954,14 +1168,45 @@ static const fetcher_info_t fetcher_info[] =
_pixman_image_get_scanline_generic_64
},
+#define GENERAL_BILINEAR_FLAGS \
+ (FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_HAS_TRANSFORM | \
+ FAST_PATH_AFFINE_TRANSFORM | \
+ FAST_PATH_BILINEAR_FILTER)
+
+#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \
+ { PIXMAN_ ## format, \
+ GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
+ bits_image_fetch_bilinear_affine_ ## name, \
+ _pixman_image_get_scanline_generic_64 \
+ },
+
+ BILINEAR_AFFINE_FAST_PATH (pad_a8r8g8b8, a8r8g8b8, PAD)
+ BILINEAR_AFFINE_FAST_PATH (none_a8r8g8b8, a8r8g8b8, NONE)
+ BILINEAR_AFFINE_FAST_PATH (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
+ BILINEAR_AFFINE_FAST_PATH (normal_a8r8g8b8, a8r8g8b8, NORMAL)
+ BILINEAR_AFFINE_FAST_PATH (pad_x8r8g8b8, x8r8g8b8, PAD)
+ BILINEAR_AFFINE_FAST_PATH (none_x8r8g8b8, x8r8g8b8, NONE)
+ BILINEAR_AFFINE_FAST_PATH (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
+ BILINEAR_AFFINE_FAST_PATH (normal_x8r8g8b8, x8r8g8b8, NORMAL)
+ BILINEAR_AFFINE_FAST_PATH (pad_a8, a8, PAD)
+ BILINEAR_AFFINE_FAST_PATH (none_a8, a8, NONE)
+ BILINEAR_AFFINE_FAST_PATH (reflect_a8, a8, REFLECT)
+ BILINEAR_AFFINE_FAST_PATH (normal_a8, a8, NORMAL)
+ BILINEAR_AFFINE_FAST_PATH (pad_r5g6b5, r5g6b5, PAD)
+ BILINEAR_AFFINE_FAST_PATH (none_r5g6b5, r5g6b5, NONE)
+ BILINEAR_AFFINE_FAST_PATH (reflect_r5g6b5, r5g6b5, REFLECT)
+ BILINEAR_AFFINE_FAST_PATH (normal_r5g6b5, r5g6b5, NORMAL)
+
+ /* Affine, no alpha */
{ PIXMAN_any,
- (FAST_PATH_NO_ALPHA_MAP |
- FAST_PATH_HAS_TRANSFORM |
- FAST_PATH_AFFINE_TRANSFORM),
+ (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
bits_image_fetch_affine_no_alpha,
_pixman_image_get_scanline_generic_64
},
+ /* General */
{ PIXMAN_any, 0, bits_image_fetch_general, _pixman_image_get_scanline_generic_64 },
{ PIXMAN_null },
diff --git a/pixman/pixman/pixman-compiler.h b/pixman/pixman/pixman-compiler.h
index 484ef4477..0fe30a79c 100644
--- a/pixman/pixman/pixman-compiler.h
+++ b/pixman/pixman/pixman-compiler.h
@@ -50,17 +50,22 @@
/* 'inline' is available only in C++ in MSVC */
# define inline __inline
# define force_inline __forceinline
+# define noinline __declspec(noinline)
#elif defined __GNUC__ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define inline __inline__
# define force_inline __inline__ __attribute__ ((__always_inline__))
+# define noinline __attribute__((noinline))
#else
# ifndef force_inline
# define force_inline inline
# endif
+# ifndef noinline
+# define noinline
+# endif
#endif
/* GCC visibility */
-#if defined(__GNUC__) && __GNUC__ >= 4
+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(_WIN32)
# define PIXMAN_EXPORT __attribute__ ((visibility("default")))
/* Sun Studio 8 visibility */
#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index 6c214fede..0b8a2526e 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -30,6 +30,7 @@
#include <stdlib.h>
#include "pixman-private.h"
#include "pixman-combine32.h"
+#include "pixman-fast-path.h"
static force_inline uint32_t
fetch_24 (uint8_t *a)
@@ -1386,248 +1387,25 @@ fast_composite_src_memcpy (pixman_implementation_t *imp,
}
}
-static force_inline pixman_bool_t
-repeat (pixman_repeat_t repeat, int *c, int size)
-{
- if (repeat == PIXMAN_REPEAT_NONE)
- {
- if (*c < 0 || *c >= size)
- return FALSE;
- }
- else if (repeat == PIXMAN_REPEAT_NORMAL)
- {
- while (*c >= size)
- *c -= size;
- while (*c < 0)
- *c += size;
- }
- else if (repeat == PIXMAN_REPEAT_PAD)
- {
- *c = CLIP (*c, 0, size - 1);
- }
- else /* REFLECT */
- {
- *c = MOD (*c, size * 2);
- if (*c >= size)
- *c = size * 2 - *c - 1;
- }
- return TRUE;
-}
-
-/* A macroified version of specialized nearest scalers for some
- * common 8888 and 565 formats. It supports SRC and OVER ops.
- *
- * There are two repeat versions, one that handles repeat normal,
- * and one without repeat handling that only works if the src region
- * used is completely covered by the pre-repeated source samples.
- *
- * The loops are unrolled to process two pixels per iteration for better
- * performance on most CPU architectures (superscalar processors
- * can issue several operations simultaneously, other processors can hide
- * instructions latencies by pipelining operations). Unrolling more
- * does not make much sense because the compiler will start running out
- * of spare registers soon.
- */
-
-#define GET_8888_ALPHA(s) ((s) >> 24)
- /* This is not actually used since we don't have an OVER with
- 565 source, but it is needed to build. */
-#define GET_0565_ALPHA(s) 0xff
-
-#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
-static void \
-fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dst_x, \
- int32_t dst_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type_t *dst_line; \
- src_type_t *src_first_line; \
- uint32_t d; \
- src_type_t s1, s2; \
- uint8_t a1, a2; \
- int w; \
- int x1, x2, y; \
- pixman_fixed_t orig_vx; \
- pixman_fixed_t max_vx, max_vy; \
- pixman_vector_t v; \
- pixman_fixed_t vx, vy; \
- pixman_fixed_t unit_x, unit_y; \
- \
- src_type_t *src; \
- dst_type_t *dst; \
- int src_stride, dst_stride; \
- \
- if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
- abort(); \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NORMAL && \
- PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NONE) \
- { \
- abort(); \
- } \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
- * transformed from destination space to source space */ \
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
- \
- /* reference point is the center of the pixel */ \
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
- v.vector[2] = pixman_fixed_1; \
- \
- if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
- return; \
- \
- unit_x = src_image->common.transform->matrix[0][0]; \
- unit_y = src_image->common.transform->matrix[1][1]; \
- \
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
- v.vector[0] -= pixman_fixed_e; \
- v.vector[1] -= pixman_fixed_e; \
- \
- vx = v.vector[0]; \
- vy = v.vector[1]; \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* Clamp repeating positions inside the actual samples */ \
- max_vx = src_image->bits.width << 16; \
- max_vy = src_image->bits.height << 16; \
- \
- repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- } \
- \
- orig_vx = vx; \
- \
- while (--height >= 0) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- \
- y = vy >> 16; \
- vy += unit_y; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- \
- src = src_first_line + src_stride * y; \
- \
- w = width; \
- vx = orig_vx; \
- while ((w -= 2) >= 0) \
- { \
- x1 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s1 = src[x1]; \
- \
- x2 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s2 = src[x2]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- \
- if (a2 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- else if (s2) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
- a2 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- } \
- \
- if (w & 1) \
- { \
- x1 = vx >> 16; \
- s1 = src[x1]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- } \
- } \
-}
-
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER);
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE);
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD);
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL);
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER);
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE);
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD);
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL);
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
+FAST_NEAREST (565_565_cover, 0565, 0565, uint16_t, uint16_t, SRC, COVER);
FAST_NEAREST (565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, NONE);
+FAST_NEAREST (565_565_pad, 0565, 0565, uint16_t, uint16_t, SRC, PAD);
FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
static force_inline uint32_t
@@ -1859,30 +1637,6 @@ static const pixman_fast_path_t c_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
-#define SCALED_NEAREST_FLAGS \
- (FAST_PATH_SCALE_TRANSFORM | \
- FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NO_WIDE_FORMAT)
-
-#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }, \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
diff --git a/pixman/pixman/pixman-fast-path.h b/pixman/pixman/pixman-fast-path.h
new file mode 100644
index 000000000..5de0e1ee0
--- /dev/null
+++ b/pixman/pixman/pixman-fast-path.h
@@ -0,0 +1,443 @@
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author: Keith Packard, SuSE, Inc.
+ */
+
+#ifndef PIXMAN_FAST_PATH_H__
+#define PIXMAN_FAST_PATH_H__
+
+#include "pixman-private.h"
+
+#define PIXMAN_REPEAT_COVER -1
+
+static force_inline pixman_bool_t
+repeat (pixman_repeat_t repeat, int *c, int size)
+{
+ if (repeat == PIXMAN_REPEAT_NONE)
+ {
+ if (*c < 0 || *c >= size)
+ return FALSE;
+ }
+ else if (repeat == PIXMAN_REPEAT_NORMAL)
+ {
+ while (*c >= size)
+ *c -= size;
+ while (*c < 0)
+ *c += size;
+ }
+ else if (repeat == PIXMAN_REPEAT_PAD)
+ {
+ *c = CLIP (*c, 0, size - 1);
+ }
+ else /* REFLECT */
+ {
+ *c = MOD (*c, size * 2);
+ if (*c >= size)
+ *c = size * 2 - *c - 1;
+ }
+ return TRUE;
+}
+
+/*
+ * For each scanline fetched from source image with PAD repeat:
+ * - calculate how many pixels need to be padded on the left side
+ * - calculate how many pixels need to be padded on the right side
+ * - update width to only count pixels which are fetched from the image
+ * All this information is returned via 'width', 'left_pad', 'right_pad'
+ * arguments. The code is assuming that 'unit_x' is positive.
+ *
+ * Note: 64-bit math is used in order to avoid potential overflows, which
+ * is probably excessive in many cases. This particular function
+ * may need its own correctness test and performance tuning.
+ */
+static force_inline void
+pad_repeat_get_scanline_bounds (int32_t source_image_width,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ int32_t * width,
+ int32_t * left_pad,
+ int32_t * right_pad)
+{
+ int64_t max_vx = (int64_t) source_image_width << 16;
+ int64_t tmp;
+ if (vx < 0)
+ {
+ tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
+ if (tmp > *width)
+ {
+ *left_pad = *width;
+ *width = 0;
+ }
+ else
+ {
+ *left_pad = (int32_t) tmp;
+ *width -= (int32_t) tmp;
+ }
+ }
+ else
+ {
+ *left_pad = 0;
+ }
+ tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
+ if (tmp < 0)
+ {
+ *right_pad = *width;
+ *width = 0;
+ }
+ else if (tmp >= *width)
+ {
+ *right_pad = 0;
+ }
+ else
+ {
+ *right_pad = *width - (int32_t) tmp;
+ *width = (int32_t) tmp;
+ }
+}
+
+/* A macroified version of specialized nearest scalers for some
+ * common 8888 and 565 formats. It supports SRC and OVER ops.
+ *
+ * There are two repeat versions, one that handles repeat normal,
+ * and one without repeat handling that only works if the src region
+ * used is completely covered by the pre-repeated source samples.
+ *
+ * The loops are unrolled to process two pixels per iteration for better
+ * performance on most CPU architectures (superscalar processors
+ * can issue several operations simultaneously, other processors can hide
+ * instructions latencies by pipelining operations). Unrolling more
+ * does not make much sense because the compiler will start running out
+ * of spare registers soon.
+ */
+
+#define GET_8888_ALPHA(s) ((s) >> 24)
+ /* This is not actually used since we don't have an OVER with
+ 565 source, but it is needed to build. */
+#define GET_0565_ALPHA(s) 0xff
+
+#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, repeat_mode) \
+static force_inline void \
+scanline_func_name (dst_type_t *dst, \
+ src_type_t *src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx) \
+{ \
+ uint32_t d; \
+ src_type_t s1, s2; \
+ uint8_t a1, a2; \
+ int x1, x2; \
+ \
+ if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
+ abort(); \
+ \
+ while ((w -= 2) >= 0) \
+ { \
+ x1 = vx >> 16; \
+ vx += unit_x; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s1 = src[x1]; \
+ \
+ x2 = vx >> 16; \
+ vx += unit_x; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s2 = src[x2]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
+ s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ \
+ if (a2 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ else if (s2) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
+ a2 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ } \
+ \
+ if (w & 1) \
+ { \
+ x1 = vx >> 16; \
+ s1 = src[x1]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ } \
+}
+
+#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
+ repeat_mode) \
+static void \
+fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dst_x, \
+ int32_t dst_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type_t *dst_line; \
+ src_type_t *src_first_line; \
+ int y; \
+ pixman_fixed_t max_vx = 0; /* suppress uninitialized variable warning */ \
+ pixman_fixed_t max_vy; \
+ pixman_vector_t v; \
+ pixman_fixed_t vx, vy; \
+ pixman_fixed_t unit_x, unit_y; \
+ int32_t left_pad, right_pad; \
+ \
+ src_type_t *src; \
+ dst_type_t *dst; \
+ int src_stride, dst_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
+ * transformed from destination space to source space */ \
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
+ \
+ /* reference point is the center of the pixel */ \
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
+ v.vector[2] = pixman_fixed_1; \
+ \
+ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
+ return; \
+ \
+ unit_x = src_image->common.transform->matrix[0][0]; \
+ unit_y = src_image->common.transform->matrix[1][1]; \
+ \
+ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
+ v.vector[0] -= pixman_fixed_e; \
+ v.vector[1] -= pixman_fixed_e; \
+ \
+ vx = v.vector[0]; \
+ vy = v.vector[1]; \
+ \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* Clamp repeating positions inside the actual samples */ \
+ max_vx = src_image->bits.width << 16; \
+ max_vy = src_image->bits.height << 16; \
+ \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ } \
+ \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
+ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
+ &width, &left_pad, &right_pad); \
+ vx += left_pad * unit_x; \
+ } \
+ \
+ while (--height >= 0) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ \
+ y = vy >> 16; \
+ vy += unit_y; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
+ { \
+ repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
+ src = src_first_line + src_stride * y; \
+ if (left_pad > 0) \
+ { \
+ scanline_func (dst, src, left_pad, 0, 0, 0); \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
+ } \
+ if (right_pad > 0) \
+ { \
+ scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \
+ right_pad, 0, 0, 0); \
+ } \
+ } \
+ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ static src_type_t zero = 0; \
+ if (y < 0 || y >= src_image->bits.height) \
+ { \
+ scanline_func (dst, &zero, left_pad + width + right_pad, 0, 0, 0); \
+ continue; \
+ } \
+ src = src_first_line + src_stride * y; \
+ if (left_pad > 0) \
+ { \
+ scanline_func (dst, &zero, left_pad, 0, 0, 0); \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
+ } \
+ if (right_pad > 0) \
+ { \
+ scanline_func (dst + left_pad + width, &zero, right_pad, 0, 0, 0); \
+ } \
+ } \
+ else \
+ { \
+ src = src_first_line + src_stride * y; \
+ scanline_func (dst, src, width, vx, unit_x, max_vx); \
+ } \
+ } \
+}
+
+#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, repeat_mode) \
+ FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
+ SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
+ OP, repeat_mode) \
+ FAST_NEAREST_MAINLOOP(scale_func_name##_##OP, \
+ scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
+ src_type_t, dst_type_t, repeat_mode)
+
+
+#define SCALED_NEAREST_FLAGS \
+ (FAST_PATH_SCALE_TRANSFORM | \
+ FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NEAREST_FILTER | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_NARROW_FORMAT)
+
+#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
+/* Prefer the use of 'cover' variant, because it is faster */
+#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
+
+#endif
diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c
index fa22049df..105125359 100644
--- a/pixman/pixman/pixman-general.c
+++ b/pixman/pixman/pixman-general.c
@@ -57,17 +57,6 @@ general_composite_rect (pixman_implementation_t *imp,
int32_t height)
{
uint8_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH * 3];
- const pixman_format_code_t src_format =
- src->type == BITS ? src->bits.format : 0;
- const pixman_format_code_t mask_format =
- mask && mask->type == BITS ? mask->bits.format : 0;
- const pixman_format_code_t dest_format =
- dest->type == BITS ? dest->bits.format : 0;
- const int src_wide = PIXMAN_FORMAT_IS_WIDE (src_format);
- const int mask_wide = mask && PIXMAN_FORMAT_IS_WIDE (mask_format);
- const int dest_wide = PIXMAN_FORMAT_IS_WIDE (dest_format);
- const int wide = src_wide || mask_wide || dest_wide;
- const int Bpp = wide ? 8 : 4;
uint8_t *scanline_buffer = stack_scanline_buffer;
uint8_t *src_buffer, *mask_buffer, *dest_buffer;
fetch_scanline_t fetch_src = NULL, fetch_mask = NULL, fetch_dest = NULL;
@@ -77,8 +66,15 @@ general_composite_rect (pixman_implementation_t *imp,
pixman_bool_t component_alpha;
uint32_t *bits;
int32_t stride;
+ int narrow, Bpp;
int i;
+ narrow =
+ (src->common.flags & FAST_PATH_NARROW_FORMAT) &&
+ (!mask || mask->common.flags & FAST_PATH_NARROW_FORMAT) &&
+ (dest->common.flags & FAST_PATH_NARROW_FORMAT);
+ Bpp = narrow ? 4 : 8;
+
if (width * Bpp > SCANLINE_BUFFER_LENGTH)
{
scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
@@ -106,29 +102,29 @@ general_composite_rect (pixman_implementation_t *imp,
if (op == PIXMAN_OP_CLEAR)
fetch_src = NULL;
- else if (wide)
- fetch_src = _pixman_image_get_scanline_64;
- else
+ else if (narrow)
fetch_src = _pixman_image_get_scanline_32;
+ else
+ fetch_src = _pixman_image_get_scanline_64;
if (!mask || op == PIXMAN_OP_CLEAR)
fetch_mask = NULL;
- else if (wide)
- fetch_mask = _pixman_image_get_scanline_64;
- else
+ else if (narrow)
fetch_mask = _pixman_image_get_scanline_32;
+ else
+ fetch_mask = _pixman_image_get_scanline_64;
if (op == PIXMAN_OP_CLEAR || op == PIXMAN_OP_SRC)
fetch_dest = NULL;
- else if (wide)
- fetch_dest = _pixman_image_get_scanline_64;
- else
+ else if (narrow)
fetch_dest = _pixman_image_get_scanline_32;
-
- if (wide)
- store = _pixman_image_store_scanline_64;
else
+ fetch_dest = _pixman_image_get_scanline_64;
+
+ if (narrow)
store = _pixman_image_store_scanline_32;
+ else
+ store = _pixman_image_store_scanline_64;
/* Skip the store step and composite directly into the
* destination if the output format of the compose func matches
@@ -148,7 +144,7 @@ general_composite_rect (pixman_implementation_t *imp,
op == PIXMAN_OP_OUT_REVERSE ||
op == PIXMAN_OP_DST)))
{
- if (!wide &&
+ if (narrow &&
!dest->common.alpha_map &&
!dest->bits.write_func)
{
@@ -175,19 +171,19 @@ general_composite_rect (pixman_implementation_t *imp,
mask->common.component_alpha &&
PIXMAN_FORMAT_RGB (mask->bits.format);
- if (wide)
+ if (narrow)
{
if (component_alpha)
- compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
+ compose = _pixman_implementation_combine_32_ca;
else
- compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
+ compose = _pixman_implementation_combine_32;
}
else
{
if (component_alpha)
- compose = _pixman_implementation_combine_32_ca;
+ compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
else
- compose = _pixman_implementation_combine_32;
+ compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
}
if (!compose)
diff --git a/pixman/pixman/pixman-image.c b/pixman/pixman/pixman-image.c
index 3020f246c..1b243979a 100644
--- a/pixman/pixman/pixman-image.c
+++ b/pixman/pixman/pixman-image.c
@@ -327,10 +327,6 @@ compute_image_info (pixman_image_t *image)
flags |= FAST_PATH_Y_UNIT_ZERO;
}
- /* Alpha map */
- if (!image->common.alpha_map)
- flags |= FAST_PATH_NO_ALPHA_MAP;
-
/* Filter */
switch (image->common.filter)
{
@@ -357,19 +353,34 @@ compute_image_info (pixman_image_t *image)
switch (image->common.repeat)
{
case PIXMAN_REPEAT_NONE:
- flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NORMAL_REPEAT;
+ flags |=
+ FAST_PATH_NO_REFLECT_REPEAT |
+ FAST_PATH_NO_PAD_REPEAT |
+ FAST_PATH_NO_NORMAL_REPEAT;
break;
case PIXMAN_REPEAT_REFLECT:
- flags |= FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NONE_REPEAT | FAST_PATH_NO_NORMAL_REPEAT;
+ flags |=
+ FAST_PATH_NO_PAD_REPEAT |
+ FAST_PATH_NO_NONE_REPEAT |
+ FAST_PATH_NO_NORMAL_REPEAT |
+ FAST_PATH_COVERS_CLIP;
break;
case PIXMAN_REPEAT_PAD:
- flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_NONE_REPEAT | FAST_PATH_NO_NORMAL_REPEAT;
+ flags |=
+ FAST_PATH_NO_REFLECT_REPEAT |
+ FAST_PATH_NO_NONE_REPEAT |
+ FAST_PATH_NO_NORMAL_REPEAT |
+ FAST_PATH_COVERS_CLIP;
break;
default:
- flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NONE_REPEAT;
+ flags |=
+ FAST_PATH_NO_REFLECT_REPEAT |
+ FAST_PATH_NO_PAD_REPEAT |
+ FAST_PATH_NO_NONE_REPEAT |
+ FAST_PATH_COVERS_CLIP;
break;
}
@@ -379,7 +390,7 @@ compute_image_info (pixman_image_t *image)
else
flags |= FAST_PATH_UNIFIED_ALPHA;
- flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NO_WIDE_FORMAT);
+ flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT);
/* Type specific checks */
switch (image->type)
@@ -389,6 +400,8 @@ compute_image_info (pixman_image_t *image)
if (image->solid.color.alpha == 0xffff)
flags |= FAST_PATH_IS_OPAQUE;
+
+ flags |= FAST_PATH_COVERS_CLIP;
break;
case BITS:
@@ -426,7 +439,7 @@ compute_image_info (pixman_image_t *image)
flags &= ~FAST_PATH_NO_ACCESSORS;
if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
- flags &= ~FAST_PATH_NO_WIDE_FORMAT;
+ flags &= ~FAST_PATH_NARROW_FORMAT;
break;
case LINEAR:
@@ -454,6 +467,17 @@ compute_image_info (pixman_image_t *image)
break;
}
+ /* Alpha map */
+ if (!image->common.alpha_map)
+ {
+ flags |= FAST_PATH_NO_ALPHA_MAP;
+ }
+ else
+ {
+ if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format))
+ flags &= ~FAST_PATH_NARROW_FORMAT;
+ }
+
/* Both alpha maps and convolution filters can introduce
* non-opaqueness in otherwise opaque images. Also
* an image with component alpha turned on is only opaque
diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h
index 65f40a1b3..8c68304dc 100644
--- a/pixman/pixman/pixman-private.h
+++ b/pixman/pixman/pixman-private.h
@@ -554,7 +554,7 @@ _pixman_choose_implementation (void);
#define FAST_PATH_NO_PAD_REPEAT (1 << 3)
#define FAST_PATH_NO_REFLECT_REPEAT (1 << 4)
#define FAST_PATH_NO_ACCESSORS (1 << 5)
-#define FAST_PATH_NO_WIDE_FORMAT (1 << 6)
+#define FAST_PATH_NARROW_FORMAT (1 << 6)
#define FAST_PATH_COVERS_CLIP (1 << 7)
#define FAST_PATH_COMPONENT_ALPHA (1 << 8)
#define FAST_PATH_UNIFIED_ALPHA (1 << 9)
@@ -600,7 +600,7 @@ _pixman_choose_implementation (void);
FAST_PATH_NO_PAD_REPEAT | \
FAST_PATH_NO_REFLECT_REPEAT | \
FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NO_WIDE_FORMAT | \
+ FAST_PATH_NARROW_FORMAT | \
FAST_PATH_COVERS_CLIP)
#define FAST_PATH_STD_SRC_FLAGS \
@@ -614,7 +614,7 @@ _pixman_choose_implementation (void);
#define FAST_PATH_STD_DEST_FLAGS \
(FAST_PATH_NO_ACCESSORS | \
FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NO_WIDE_FORMAT)
+ FAST_PATH_NARROW_FORMAT)
#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \
PIXMAN_OP_ ## op, \
@@ -744,6 +744,9 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
#undef DEBUG
+#define COMPILE_TIME_ASSERT(x) \
+ do { typedef int compile_time_assertion [(x)?1:-1]; } while (0)
+
/* Turn on debugging depending on what type of release this is
*/
#if (((PIXMAN_VERSION_MICRO % 2) == 0) && ((PIXMAN_VERSION_MINOR % 2) == 1))
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index cfef466c8..8e175b78d 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -35,6 +35,7 @@
#include <emmintrin.h> /* for SSE2 intrinsics */
#include "pixman-private.h"
#include "pixman-combine32.h"
+#include "pixman-fast-path.h"
#if defined(_MSC_VER) && defined(_M_AMD64)
/* Windows 64 doesn't allow MMX to be used, so
@@ -6346,6 +6347,107 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
_mm_empty ();
}
+/* A variant of 'core_combine_over_u_sse2' with minor tweaks */
+static force_inline void
+scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
+ const uint32_t* ps,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx)
+{
+ uint32_t s, d;
+ const uint32_t* pm = NULL;
+
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ /* Align dst on a 16-byte boundary */
+ while (w && ((unsigned long)pd & 15))
+ {
+ d = *pd;
+ s = combine1 (ps + (vx >> 16), pm);
+ vx += unit_x;
+
+ *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m128i tmp;
+ uint32_t tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = ps[vx >> 16];
+ vx += unit_x;
+ tmp2 = ps[vx >> 16];
+ vx += unit_x;
+ tmp3 = ps[vx >> 16];
+ vx += unit_x;
+ tmp4 = ps[vx >> 16];
+ vx += unit_x;
+
+ tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
+
+ xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
+
+ if (is_opaque (xmm_src_hi))
+ {
+ save_128_aligned ((__m128i*)pd, xmm_src_hi);
+ }
+ else if (!is_zero (xmm_src_hi))
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (
+ xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+
+ over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ /* rebuid the 4 pixel data and save*/
+ save_128_aligned ((__m128i*)pd,
+ pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ w -= 4;
+ pd += 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ d = *pd;
+ s = combine1 (ps + (vx >> 16), pm);
+ vx += unit_x;
+
+ *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+
+ w--;
+ }
+ _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, COVER);
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, NONE);
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, PAD);
+
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
@@ -6429,6 +6531,19 @@ static const pixman_fast_path_t sse2_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+
{ PIXMAN_OP_NONE },
};
diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c
index 4dd45f471..f2565ca01 100644
--- a/pixman/pixman/pixman.c
+++ b/pixman/pixman/pixman.c
@@ -139,14 +139,18 @@ optimize_operator (pixman_op_t op,
uint32_t dst_flags)
{
pixman_bool_t is_source_opaque, is_dest_opaque;
- int opaqueness;
- is_source_opaque = ((src_flags & mask_flags) & FAST_PATH_IS_OPAQUE) != 0;
- is_dest_opaque = (dst_flags & FAST_PATH_IS_OPAQUE) != 0;
+#define OPAQUE_SHIFT 13
+
+ COMPILE_TIME_ASSERT (FAST_PATH_IS_OPAQUE == (1 << OPAQUE_SHIFT));
+
+ is_dest_opaque = (dst_flags & FAST_PATH_IS_OPAQUE);
+ is_source_opaque = ((src_flags & mask_flags) & FAST_PATH_IS_OPAQUE);
- opaqueness = ((is_dest_opaque << 1) | is_source_opaque);
+ is_dest_opaque >>= OPAQUE_SHIFT - 1;
+ is_source_opaque >>= OPAQUE_SHIFT;
- return operator_table[op].opaque_info[opaqueness];
+ return operator_table[op].opaque_info[is_dest_opaque | is_source_opaque];
}
static void
@@ -302,6 +306,10 @@ pixman_compute_composite_region32 (pixman_region32_t * region,
if (region->extents.x1 >= region->extents.x2 ||
region->extents.y1 >= region->extents.y2)
{
+ region->extents.x1 = 0;
+ region->extents.x2 = 0;
+ region->extents.y1 = 0;
+ region->extents.y2 = 0;
return FALSE;
}
@@ -311,14 +319,27 @@ pixman_compute_composite_region32 (pixman_region32_t * region,
return FALSE;
}
- if (dst_image->common.alpha_map && dst_image->common.alpha_map->common.have_clip_region)
+ if (dst_image->common.alpha_map)
{
- if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region,
- -dst_image->common.alpha_origin_x,
- -dst_image->common.alpha_origin_y))
+ if (!pixman_region32_intersect_rect (region, region,
+ dst_image->common.alpha_origin_x,
+ dst_image->common.alpha_origin_y,
+ dst_image->common.alpha_map->width,
+ dst_image->common.alpha_map->height))
{
return FALSE;
}
+ if (!pixman_region32_not_empty (region))
+ return FALSE;
+ if (dst_image->common.alpha_map->common.have_clip_region)
+ {
+ if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region,
+ -dst_image->common.alpha_origin_x,
+ -dst_image->common.alpha_origin_y))
+ {
+ return FALSE;
+ }
+ }
}
/* clip against src */
@@ -691,29 +712,9 @@ analyze_extent (pixman_image_t *image, int x, int y,
pixman_fixed_t width, height;
pixman_box32_t ex;
- *flags |= FAST_PATH_COVERS_CLIP;
if (!image)
return TRUE;
- transform = image->common.transform;
- if (image->common.type == BITS)
- {
- /* During repeat mode calculations we might convert the
- * width/height of an image to fixed 16.16, so we need
- * them to be smaller than 16 bits.
- */
- if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff)
- return FALSE;
-
- if (image->common.repeat == PIXMAN_REPEAT_NONE &&
- (x > extents->x1 || y > extents->y1 ||
- x + image->bits.width < extents->x2 ||
- y + image->bits.height < extents->y2))
- {
- (*flags) &= ~FAST_PATH_COVERS_CLIP;
- }
- }
-
/* Some compositing functions walk one step
* outside the destination rectangle, so we
* check here that the expanded-by-one source
@@ -727,8 +728,28 @@ analyze_extent (pixman_image_t *image, int x, int y,
return FALSE;
}
+ transform = image->common.transform;
if (image->common.type == BITS)
{
+ /* During repeat mode calculations we might convert the
+ * width/height of an image to fixed 16.16, so we need
+ * them to be smaller than 16 bits.
+ */
+ if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff)
+ return FALSE;
+
+#define ID_AND_NEAREST (FAST_PATH_ID_TRANSFORM | FAST_PATH_NEAREST_FILTER)
+
+ if ((image->common.flags & ID_AND_NEAREST) == ID_AND_NEAREST &&
+ extents->x1 - x >= 0 &&
+ extents->y1 - y >= 0 &&
+ extents->x2 - x <= image->bits.width &&
+ extents->y2 - y <= image->bits.height)
+ {
+ *flags |= (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_COVERS_CLIP);
+ return TRUE;
+ }
+
switch (image->common.filter)
{
case PIXMAN_FILTER_CONVOLUTION:
@@ -759,6 +780,17 @@ analyze_extent (pixman_image_t *image, int x, int y,
default:
return FALSE;
}
+
+ /* Check whether the non-expanded, transformed extent is entirely within
+ * the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is.
+ */
+ ex = *extents;
+ if (compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height) &&
+ ex.x1 >= 0 && ex.y1 >= 0 &&
+ ex.x2 <= image->bits.width && ex.y2 <= image->bits.height)
+ {
+ *flags |= (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_COVERS_CLIP);
+ }
}
else
{
@@ -769,8 +801,8 @@ analyze_extent (pixman_image_t *image, int x, int y,
}
/* Check that the extents expanded by one don't overflow. This ensures that
- * compositing functions can simply walk the source space using 16.16 variables
- * without worrying about overflow.
+ * compositing functions can simply walk the source space using 16.16
+ * variables without worrying about overflow.
*/
ex.x1 = extents->x1 - 1;
ex.y1 = extents->y1 - 1;
@@ -780,19 +812,6 @@ analyze_extent (pixman_image_t *image, int x, int y,
if (!compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height))
return FALSE;
- if (image->type == BITS)
- {
- /* Check whether the non-expanded, transformed extent is entirely within
- * the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is.
- */
- ex = *extents;
- if (compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height))
- {
- if (ex.x1 >= 0 && ex.y1 >= 0 && ex.x2 <= image->bits.width && ex.y2 <= image->bits.height)
- *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
- }
- }
-
return TRUE;
}
diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am
index 108ae96ee..e3ab89038 100644
--- a/pixman/test/Makefile.am
+++ b/pixman/test/Makefile.am
@@ -13,11 +13,12 @@ TESTPROGRAMS = \
window-test \
gradient-crash-test \
trap-crasher \
- alphamap \
alpha-loop \
scaling-crash-test \
+ alphamap \
blitters-test \
scaling-test \
+ affine-test \
composite
a1_trap_test_LDADD = $(TEST_LDADD)
@@ -39,6 +40,9 @@ blitters_test_SOURCES = blitters-test.c utils.c utils.h
scaling_test_LDADD = $(TEST_LDADD)
scaling_test_SOURCES = scaling-test.c utils.c utils.h
+affine_test_LDADD = $(TEST_LDADD)
+affine_test_SOURCES = affine-test.c utils.c utils.h
+
alphamap_LDADD = $(TEST_LDADD)
alphamap_SOURCES = alphamap.c utils.c utils.h
@@ -90,7 +94,14 @@ convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
endif
-noinst_PROGRAMS = $(TESTPROGRAMS) $(TESTPROGRAMS_GTK)
+# Benchmarks
-TESTS = $(TESTPROGRAMS)
+BENCHMARKS = \
+ lowlevel-blt-bench
+lowlevel_blt_bench_SOURCES = lowlevel-blt-bench.c utils.c utils.h
+lowlevel_blt_bench_LDADD = $(TEST_LDADD)
+
+noinst_PROGRAMS = $(TESTPROGRAMS) $(TESTPROGRAMS_GTK) $(BENCHMARKS)
+
+TESTS = $(TESTPROGRAMS)
diff --git a/pixman/test/affine-test.c b/pixman/test/affine-test.c
new file mode 100644
index 000000000..3e32b5a23
--- /dev/null
+++ b/pixman/test/affine-test.c
@@ -0,0 +1,261 @@
+/*
+ * Test program, which can detect some problems with affine transformations
+ * in pixman. Testing is done by running lots of random SRC and OVER
+ * compositing operations a8r8g8b8, x8a8r8g8b8, r5g6b5 and a8 color formats
+ * with random scaled, rotated and translated transforms.
+ *
+ * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in
+ * the case of test failure.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define MAX_SRC_WIDTH 16
+#define MAX_SRC_HEIGHT 16
+#define MAX_DST_WIDTH 16
+#define MAX_DST_HEIGHT 16
+#define MAX_STRIDE 4
+
+/*
+ * Composite operation with pseudorandom images
+ */
+uint32_t
+test_composite (int testnum,
+ int verbose)
+{
+ int i;
+ pixman_image_t * src_img;
+ pixman_image_t * dst_img;
+ pixman_transform_t transform;
+ pixman_region16_t clip;
+ int src_width, src_height;
+ int dst_width, dst_height;
+ int src_stride, dst_stride;
+ int src_x, src_y;
+ int dst_x, dst_y;
+ int src_bpp;
+ int dst_bpp;
+ int w, h;
+ pixman_fixed_t scale_x = 65536, scale_y = 65536;
+ pixman_fixed_t translate_x = 0, translate_y = 0;
+ int op;
+ int repeat = 0;
+ int src_fmt, dst_fmt;
+ uint32_t * srcbuf;
+ uint32_t * dstbuf;
+ uint32_t crc32;
+ FLOAT_REGS_CORRUPTION_DETECTOR_START ();
+
+ lcg_srand (testnum);
+
+ src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
+ dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
+ op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER;
+
+ src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
+ src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+ dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
+ dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
+ src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
+ dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
+
+ if (src_stride & 3)
+ src_stride += 2;
+
+ if (dst_stride & 3)
+ dst_stride += 2;
+
+ src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
+ src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
+ dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
+ dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
+ w = lcg_rand_n (dst_width * 3 / 2 - dst_x);
+ h = lcg_rand_n (dst_height * 3 / 2 - dst_y);
+
+ srcbuf = (uint32_t *)malloc (src_stride * src_height);
+ dstbuf = (uint32_t *)malloc (dst_stride * dst_height);
+
+ for (i = 0; i < src_stride * src_height; i++)
+ *((uint8_t *)srcbuf + i) = lcg_rand_n (256);
+
+ for (i = 0; i < dst_stride * dst_height; i++)
+ *((uint8_t *)dstbuf + i) = lcg_rand_n (256);
+
+ src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ?
+ PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
+
+ dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ?
+ PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
+
+ src_img = pixman_image_create_bits (
+ src_fmt, src_width, src_height, srcbuf, src_stride);
+
+ dst_img = pixman_image_create_bits (
+ dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
+
+ image_endian_swap (src_img, src_bpp * 8);
+ image_endian_swap (dst_img, dst_bpp * 8);
+
+ pixman_transform_init_identity (&transform);
+
+ if (lcg_rand_n (8) > 0)
+ {
+ scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
+ scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
+ translate_x = lcg_rand_N (65536);
+ translate_y = lcg_rand_N (65536);
+ pixman_transform_init_scale (&transform, scale_x, scale_y);
+ pixman_transform_translate (&transform, NULL, translate_x, translate_y);
+ }
+
+ if (lcg_rand_n (4) > 0)
+ {
+ int c = lcg_rand_N (2 * 65536) - 65536;
+ int s = lcg_rand_N (2 * 65536) - 65536;
+
+ pixman_transform_rotate (&transform, NULL, c, s);
+ }
+
+ pixman_image_set_transform (src_img, &transform);
+
+ switch (lcg_rand_n (4))
+ {
+ case 0:
+ repeat = PIXMAN_REPEAT_NONE;
+ break;
+
+ case 1:
+ repeat = PIXMAN_REPEAT_NORMAL;
+ break;
+
+ case 2:
+ repeat = PIXMAN_REPEAT_PAD;
+ break;
+
+ case 3:
+ repeat = PIXMAN_REPEAT_REFLECT;
+ break;
+
+ default:
+ break;
+ }
+ pixman_image_set_repeat (src_img, repeat);
+
+ if (lcg_rand_n (2))
+ pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+ else
+ pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
+ if (verbose)
+ {
+ printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
+ printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n",
+ op, scale_x, scale_y, repeat);
+ printf ("translate_x=%d, translate_y=%d\n",
+ translate_x, translate_y);
+ printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n",
+ src_width, src_height, dst_width, dst_height);
+ printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n",
+ src_x, src_y, dst_x, dst_y);
+ printf ("w=%d, h=%d\n", w, h);
+ }
+
+ if (lcg_rand_n (8) == 0)
+ {
+ pixman_box16_t clip_boxes[2];
+ int n = lcg_rand_n (2) + 1;
+
+ for (i = 0; i < n; i++)
+ {
+ clip_boxes[i].x1 = lcg_rand_n (src_width);
+ clip_boxes[i].y1 = lcg_rand_n (src_height);
+ clip_boxes[i].x2 =
+ clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
+ clip_boxes[i].y2 =
+ clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
+
+ if (verbose)
+ {
+ printf ("source clip box: [%d,%d-%d,%d]\n",
+ clip_boxes[i].x1, clip_boxes[i].y1,
+ clip_boxes[i].x2, clip_boxes[i].y2);
+ }
+ }
+
+ pixman_region_init_rects (&clip, clip_boxes, n);
+ pixman_image_set_clip_region (src_img, &clip);
+ pixman_image_set_source_clipping (src_img, 1);
+ pixman_region_fini (&clip);
+ }
+
+ if (lcg_rand_n (8) == 0)
+ {
+ pixman_box16_t clip_boxes[2];
+ int n = lcg_rand_n (2) + 1;
+ for (i = 0; i < n; i++)
+ {
+ clip_boxes[i].x1 = lcg_rand_n (dst_width);
+ clip_boxes[i].y1 = lcg_rand_n (dst_height);
+ clip_boxes[i].x2 =
+ clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
+ clip_boxes[i].y2 =
+ clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
+
+ if (verbose)
+ {
+ printf ("destination clip box: [%d,%d-%d,%d]\n",
+ clip_boxes[i].x1, clip_boxes[i].y1,
+ clip_boxes[i].x2, clip_boxes[i].y2);
+ }
+ }
+ pixman_region_init_rects (&clip, clip_boxes, n);
+ pixman_image_set_clip_region (dst_img, &clip);
+ pixman_region_fini (&clip);
+ }
+
+ pixman_image_composite (op, src_img, NULL, dst_img,
+ src_x, src_y, 0, 0, dst_x, dst_y, w, h);
+
+ if (dst_fmt == PIXMAN_x8r8g8b8)
+ {
+ /* ignore unused part */
+ for (i = 0; i < dst_stride * dst_height / 4; i++)
+ dstbuf[i] &= 0xFFFFFF;
+ }
+
+ image_endian_swap (dst_img, dst_bpp * 8);
+
+ if (verbose)
+ {
+ int j;
+
+ for (i = 0; i < dst_height; i++)
+ {
+ for (j = 0; j < dst_stride; j++)
+ printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j));
+
+ printf ("\n");
+ }
+ }
+
+ pixman_image_unref (src_img);
+ pixman_image_unref (dst_img);
+
+ crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
+ free (srcbuf);
+ free (dstbuf);
+
+ FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
+ return crc32;
+}
+
+int
+main (int argc, const char *argv[])
+{
+ pixman_disable_out_of_bounds_workaround ();
+
+ return fuzzer_test_main ("affine", 8000000, 0x46EC3C6A,
+ test_composite, argc, argv);
+}
diff --git a/pixman/test/alphamap.c b/pixman/test/alphamap.c
index e6a25efcb..eb7a330f9 100644
--- a/pixman/test/alphamap.c
+++ b/pixman/test/alphamap.c
@@ -1,49 +1,240 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include "utils.h"
-
-#define WIDTH 400
-#define HEIGHT 200
-
-int
-main (int argc, char **argv)
-{
- uint8_t *alpha = make_random_bytes (WIDTH * HEIGHT);
- uint32_t *src = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4);
- uint32_t *dest = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4);
- int i;
-
- pixman_image_t *a = pixman_image_create_bits (PIXMAN_a8, WIDTH, HEIGHT, (uint32_t *)alpha, WIDTH);
- pixman_image_t *d = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4);
-
- for (i = 0; i < 2; ++i)
- {
- pixman_format_code_t sformat = (i == 0)? PIXMAN_a8r8g8b8 : PIXMAN_a2r10g10b10;
- pixman_image_t *s = pixman_image_create_bits (sformat, WIDTH, HEIGHT, src, WIDTH * 4);
- int j, k;
-
- pixman_image_set_alpha_map (s, a, 0, 0);
-
- pixman_image_composite (PIXMAN_OP_SRC, s, NULL, d, 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
-
- for (j = 0; j < HEIGHT; ++j)
- {
- for (k = 0; k < WIDTH; ++k)
- {
- uint8_t ap = ((uint8_t *)alpha)[j * WIDTH + k];
- uint32_t dap = (dest[j * WIDTH + k] >> 24);
- uint32_t sap = (src[j * WIDTH + k] >> 24);
-
- if (ap != dap)
- {
- printf ("Wrong alpha value at (%d, %d). Should be %d; got %d (src was %d)\n", k, j, ap, dap, sap);
- return 1;
- }
- }
- }
-
- pixman_image_unref (s);
- }
-
- return 0;
-}
+#include <stdio.h>
+#include <stdlib.h>
+#include "utils.h"
+
+#define WIDTH 100
+#define HEIGHT 100
+
+static const pixman_format_code_t formats[] =
+{
+ PIXMAN_a8r8g8b8,
+ PIXMAN_a2r10g10b10,
+ PIXMAN_a4r4g4b4,
+ PIXMAN_a8
+};
+
+static const pixman_format_code_t alpha_formats[] =
+{
+ PIXMAN_null,
+ PIXMAN_a8,
+ PIXMAN_a2r10g10b10,
+ PIXMAN_a4r4g4b4
+};
+
+static const int origins[] =
+{
+ 0, 10, -100
+};
+
+static const char *
+format_name (pixman_format_code_t format)
+{
+ if (format == PIXMAN_a8)
+ return "a8";
+ else if (format == PIXMAN_a2r10g10b10)
+ return "a2r10g10b10";
+ else if (format == PIXMAN_a8r8g8b8)
+ return "a8r8g8b8";
+ else if (format == PIXMAN_a4r4g4b4)
+ return "a4r4g4b4";
+ else if (format == PIXMAN_null)
+ return "none";
+ else
+ assert (0);
+
+ return "<unknown - bug in alphamap.c>";
+}
+
+static pixman_image_t *
+make_image (pixman_format_code_t format)
+{
+ uint32_t *bits;
+ uint8_t bpp = PIXMAN_FORMAT_BPP (format) / 8;
+
+ bits = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * bpp);
+
+ return pixman_image_create_bits (format, WIDTH, HEIGHT, bits, WIDTH * bpp);
+}
+
+static pixman_image_t *
+create_image (pixman_format_code_t format, pixman_format_code_t alpha_format,
+ int alpha_origin_x, int alpha_origin_y)
+{
+ pixman_image_t *image = make_image (format);
+
+ if (alpha_format != PIXMAN_null)
+ {
+ pixman_image_t *alpha = make_image (alpha_format);
+
+ pixman_image_set_alpha_map (image, alpha,
+ alpha_origin_x, alpha_origin_y);
+ }
+
+ return image;
+}
+
+static uint8_t
+get_alpha (pixman_image_t *image, int x, int y, int orig_x, int orig_y)
+{
+ uint8_t *bits;
+ uint8_t r;
+
+ if (image->common.alpha_map)
+ {
+ if (x - orig_x >= 0 && x - orig_x < WIDTH &&
+ y - orig_y >= 0 && y - orig_y < HEIGHT)
+ {
+ image = (pixman_image_t *)image->common.alpha_map;
+
+ x -= orig_x;
+ y -= orig_y;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ bits = (uint8_t *)image->bits.bits;
+
+ if (image->bits.format == PIXMAN_a8)
+ {
+ r = bits[y * WIDTH + x];
+ }
+ else if (image->bits.format == PIXMAN_a2r10g10b10)
+ {
+ r = ((uint32_t *)bits)[y * WIDTH + x] >> 30;
+ r |= r << 2;
+ r |= r << 4;
+ }
+ else if (image->bits.format == PIXMAN_a8r8g8b8)
+ {
+ r = ((uint32_t *)bits)[y * WIDTH + x] >> 24;
+ }
+ else if (image->bits.format == PIXMAN_a4r4g4b4)
+ {
+ r = ((uint16_t *)bits)[y * WIDTH + x] >> 12;
+ r |= r << 4;
+ }
+ else
+ {
+ assert (0);
+ }
+
+ return r;
+}
+
+#define ARRAY_LENGTH(A) ((int) (sizeof (A) / sizeof ((A) [0])))
+
+static int
+run_test (int s, int d, int sa, int da, int soff, int doff)
+{
+ pixman_format_code_t sf = formats[s];
+ pixman_format_code_t df = formats[d];
+ pixman_format_code_t saf = alpha_formats[sa];
+ pixman_format_code_t daf = alpha_formats[da];
+ pixman_image_t *src, *dst, *orig_dst;
+ pixman_transform_t t1;
+ int j, k;
+ int n_alpha_bits;
+
+ soff = origins[soff];
+ doff = origins[doff];
+
+ n_alpha_bits = PIXMAN_FORMAT_A (df);
+ if (daf != PIXMAN_null)
+ n_alpha_bits = PIXMAN_FORMAT_A (daf);
+
+
+ src = create_image (sf, saf, soff, soff);
+ orig_dst = create_image (df, daf, doff, doff);
+ dst = create_image (df, daf, doff, doff);
+
+ /* Transformations on destinations should be ignored, so just set some
+ * random one.
+ */
+ pixman_transform_init_identity (&t1);
+ pixman_transform_scale (&t1, NULL, pixman_int_to_fixed (100), pixman_int_to_fixed (11));
+ pixman_transform_rotate (&t1, NULL, pixman_double_to_fixed (0.5), pixman_double_to_fixed (0.11));
+ pixman_transform_translate (&t1, NULL, pixman_int_to_fixed (11), pixman_int_to_fixed (17));
+
+#if 0
+ /* Unfortunately, this is actually broken at the moment, so we can't
+ * actually turn it on
+ */
+ pixman_image_set_transform (dst, &t1);
+#endif
+
+ pixman_image_composite (PIXMAN_OP_SRC, orig_dst, NULL, dst,
+ 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
+
+ pixman_image_composite (PIXMAN_OP_ADD, src, NULL, dst,
+ 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
+
+ for (j = MAX (doff, 0); j < MIN (HEIGHT, HEIGHT + doff); ++j)
+ {
+ for (k = MAX (doff, 0); k < MIN (WIDTH, WIDTH + doff); ++k)
+ {
+ uint8_t sa, da, oda, ref;
+
+ sa = get_alpha (src, k, j, soff, soff);
+ da = get_alpha (dst, k, j, doff, doff);
+ oda = get_alpha (orig_dst, k, j, doff, doff);
+
+ if (sa + oda > 255)
+ ref = 255;
+ else
+ ref = sa + oda;
+
+ if (da >> (8 - n_alpha_bits) != ref >> (8 - n_alpha_bits))
+ {
+ printf ("\nWrong alpha value at (%d, %d). Should be 0x%x; got 0x%x. Source was 0x%x, original dest was 0x%x\n",
+ k, j, ref, da, sa, oda);
+
+ printf ("src: %s, alpha: %s, origin %d %d\ndst: %s, alpha: %s, origin: %d %d\n\n",
+ format_name (sf),
+ format_name (saf),
+ soff, soff,
+ format_name (df),
+ format_name (daf),
+ doff, doff);
+ return 1;
+ }
+ }
+ }
+
+ pixman_image_unref (src);
+ pixman_image_unref (dst);
+ pixman_image_unref (orig_dst);
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i, j, a, b, x, y;
+
+ for (i = 0; i < ARRAY_LENGTH (formats); ++i)
+ {
+ for (j = 0; j < ARRAY_LENGTH (formats); ++j)
+ {
+ for (a = 0; a < ARRAY_LENGTH (alpha_formats); ++a)
+ {
+ for (b = 0; b < ARRAY_LENGTH (alpha_formats); ++b)
+ {
+ for (x = 0; x < ARRAY_LENGTH (origins); ++x)
+ {
+ for (y = 0; y < ARRAY_LENGTH (origins); ++y)
+ {
+ if (run_test (i, j, a, b, x, y) != 0)
+ return 1;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/pixman/test/blitters-test.c b/pixman/test/blitters-test.c
index a61817d96..a9e8cc2ce 100644
--- a/pixman/test/blitters-test.c
+++ b/pixman/test/blitters-test.c
@@ -14,21 +14,6 @@
static pixman_indexed_t rgb_palette[9];
static pixman_indexed_t y_palette[9];
-static void *
-aligned_malloc (size_t align, size_t size)
-{
- void *result;
-
-#ifdef HAVE_POSIX_MEMALIGN
- if (posix_memalign (&result, align, size) != 0)
- result = NULL;
-#else
- result = malloc (size);
-#endif
-
- return result;
-}
-
/* Create random image for testing purposes */
static pixman_image_t *
create_random_image (pixman_format_code_t *allowed_formats,
diff --git a/pixman/test/composite-test.c b/pixman/test/composite-test.c
index 5401abfdf..79d5d5eac 100644
--- a/pixman/test/composite-test.c
+++ b/pixman/test/composite-test.c
@@ -1,192 +1,191 @@
-#include <gtk/gtk.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "pixman.h"
-#include "gtk-utils.h"
-
-#define WIDTH 60
-#define HEIGHT 60
-
-typedef struct {
- const char *name;
- pixman_op_t op;
-} operator_t;
-
-static const operator_t operators[] = {
- { "CLEAR", PIXMAN_OP_CLEAR },
- { "SRC", PIXMAN_OP_SRC },
- { "DST", PIXMAN_OP_DST },
- { "OVER", PIXMAN_OP_OVER },
- { "OVER_REVERSE", PIXMAN_OP_OVER_REVERSE },
- { "IN", PIXMAN_OP_IN },
- { "IN_REVERSE", PIXMAN_OP_IN_REVERSE },
- { "OUT", PIXMAN_OP_OUT },
- { "OUT_REVERSE", PIXMAN_OP_OUT_REVERSE },
- { "ATOP", PIXMAN_OP_ATOP },
- { "ATOP_REVERSE", PIXMAN_OP_ATOP_REVERSE },
- { "XOR", PIXMAN_OP_XOR },
- { "ADD", PIXMAN_OP_ADD },
- { "SATURATE", PIXMAN_OP_SATURATE },
-
- { "MULTIPLY", PIXMAN_OP_MULTIPLY },
- { "SCREEN", PIXMAN_OP_SCREEN },
- { "OVERLAY", PIXMAN_OP_OVERLAY },
- { "DARKEN", PIXMAN_OP_DARKEN },
- { "LIGHTEN", PIXMAN_OP_LIGHTEN },
- { "COLOR_DODGE", PIXMAN_OP_COLOR_DODGE },
- { "COLOR_BURN", PIXMAN_OP_COLOR_BURN },
- { "HARD_LIGHT", PIXMAN_OP_HARD_LIGHT },
- { "SOFT_LIGHT", PIXMAN_OP_SOFT_LIGHT },
- { "DIFFERENCE", PIXMAN_OP_DIFFERENCE },
- { "EXCLUSION", PIXMAN_OP_EXCLUSION },
- { "HSL_HUE", PIXMAN_OP_HSL_HUE },
- { "HSL_SATURATION", PIXMAN_OP_HSL_SATURATION },
- { "HSL_COLOR", PIXMAN_OP_HSL_COLOR },
- { "HSL_LUMINOSITY", PIXMAN_OP_HSL_LUMINOSITY },
-};
-
-static uint32_t
-reader (const void *src, int size)
-{
- switch (size)
- {
- case 1:
- return *(uint8_t *)src;
- case 2:
- return *(uint16_t *)src;
- case 4:
- return *(uint32_t *)src;
- default:
- g_assert_not_reached();
- }
-}
-
-static void
-writer (void *src, uint32_t value, int size)
-{
- switch (size)
- {
- case 1:
- *(uint8_t *)src = value;
- break;
-
- case 2:
- *(uint16_t *)src = value;
- break;
-
- case 4:
- *(uint32_t *)src = value;
- break;
-
- default:
- break;
- }
-}
-
-int
-main (int argc, char **argv)
-{
-#define d2f pixman_double_to_fixed
-
- GtkWidget *window, *swindow;
- GtkWidget *table;
- uint32_t *dest = malloc (WIDTH * HEIGHT * 4);
- uint32_t *src = malloc (WIDTH * HEIGHT * 4);
- pixman_image_t *src_img;
- pixman_image_t *dest_img;
- pixman_point_fixed_t p1 = { -10 << 0, 0 };
- pixman_point_fixed_t p2 = { WIDTH << 16, (HEIGHT - 10) << 16 };
- uint16_t full = 0xcfff;
- uint16_t low = 0x5000;
- uint16_t alpha = 0xffff;
- pixman_gradient_stop_t stops[6] =
- {
- { d2f (0.0), { full, low, low, alpha } },
- { d2f (0.25), { full, full, low, alpha } },
- { d2f (0.4), { low, full, low, alpha } },
- { d2f (0.5), { low, full, full, alpha } },
- { d2f (0.8), { low, low, full, alpha } },
- { d2f (1.0), { full, low, full, alpha } },
- };
-
-
- int i;
-
- gtk_init (&argc, &argv);
-
- window = gtk_window_new (GTK_WINDOW_TOPLEVEL);
-
- gtk_window_set_default_size (GTK_WINDOW (window), 800, 600);
-
- g_signal_connect (window, "delete-event",
- G_CALLBACK (gtk_main_quit),
- NULL);
- table = gtk_table_new (G_N_ELEMENTS (operators) / 6, 6, TRUE);
-
- src_img = pixman_image_create_linear_gradient (&p1, &p2, stops,
- sizeof (stops) / sizeof (stops[0]));
-
- pixman_image_set_repeat (src_img, PIXMAN_REPEAT_PAD);
-
- dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
- WIDTH, HEIGHT,
- dest,
- WIDTH * 4);
- pixman_image_set_accessors (dest_img, reader, writer);
-
- for (i = 0; i < G_N_ELEMENTS (operators); ++i)
- {
- GtkWidget *image;
- GdkPixbuf *pixbuf;
- GtkWidget *vbox;
- GtkWidget *label;
- int j, k;
-
- vbox = gtk_vbox_new (FALSE, 0);
-
- label = gtk_label_new (operators[i].name);
- gtk_box_pack_start (GTK_BOX (vbox), label, FALSE, FALSE, 6);
- gtk_widget_show (label);
-
- for (j = 0; j < HEIGHT; ++j)
- {
- for (k = 0; k < WIDTH; ++k)
- dest[j * WIDTH + k] = 0x7f6f6f00;
- }
- pixman_image_composite (operators[i].op, src_img, NULL, dest_img,
- 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
- pixbuf = pixbuf_from_argb32 (pixman_image_get_data (dest_img), TRUE,
- WIDTH, HEIGHT, WIDTH * 4);
- image = gtk_image_new_from_pixbuf (pixbuf);
- gtk_box_pack_start (GTK_BOX (vbox), image, FALSE, FALSE, 0);
- gtk_widget_show (image);
-
- gtk_table_attach_defaults (GTK_TABLE (table), vbox,
- i % 6, (i % 6) + 1, i / 6, (i / 6) + 1);
- gtk_widget_show (vbox);
-
- g_object_unref (pixbuf);
- }
-
- pixman_image_unref (src_img);
- free (src);
- pixman_image_unref (dest_img);
- free (dest);
-
- swindow = gtk_scrolled_window_new (NULL, NULL);
- gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow),
- GTK_POLICY_AUTOMATIC,
- GTK_POLICY_AUTOMATIC);
-
- gtk_scrolled_window_add_with_viewport (GTK_SCROLLED_WINDOW (swindow), table);
- gtk_widget_show (table);
-
- gtk_container_add (GTK_CONTAINER (window), swindow);
- gtk_widget_show (swindow);
-
- gtk_widget_show (window);
-
- gtk_main ();
-
- return 0;
-}
+#include <gtk/gtk.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "pixman.h"
+#include "gtk-utils.h"
+
+#define WIDTH 60
+#define HEIGHT 60
+
+typedef struct {
+ const char *name;
+ pixman_op_t op;
+} operator_t;
+
+static const operator_t operators[] = {
+ { "CLEAR", PIXMAN_OP_CLEAR },
+ { "SRC", PIXMAN_OP_SRC },
+ { "DST", PIXMAN_OP_DST },
+ { "OVER", PIXMAN_OP_OVER },
+ { "OVER_REVERSE", PIXMAN_OP_OVER_REVERSE },
+ { "IN", PIXMAN_OP_IN },
+ { "IN_REVERSE", PIXMAN_OP_IN_REVERSE },
+ { "OUT", PIXMAN_OP_OUT },
+ { "OUT_REVERSE", PIXMAN_OP_OUT_REVERSE },
+ { "ATOP", PIXMAN_OP_ATOP },
+ { "ATOP_REVERSE", PIXMAN_OP_ATOP_REVERSE },
+ { "XOR", PIXMAN_OP_XOR },
+ { "ADD", PIXMAN_OP_ADD },
+ { "SATURATE", PIXMAN_OP_SATURATE },
+
+ { "MULTIPLY", PIXMAN_OP_MULTIPLY },
+ { "SCREEN", PIXMAN_OP_SCREEN },
+ { "OVERLAY", PIXMAN_OP_OVERLAY },
+ { "DARKEN", PIXMAN_OP_DARKEN },
+ { "LIGHTEN", PIXMAN_OP_LIGHTEN },
+ { "COLOR_DODGE", PIXMAN_OP_COLOR_DODGE },
+ { "COLOR_BURN", PIXMAN_OP_COLOR_BURN },
+ { "HARD_LIGHT", PIXMAN_OP_HARD_LIGHT },
+ { "SOFT_LIGHT", PIXMAN_OP_SOFT_LIGHT },
+ { "DIFFERENCE", PIXMAN_OP_DIFFERENCE },
+ { "EXCLUSION", PIXMAN_OP_EXCLUSION },
+ { "HSL_HUE", PIXMAN_OP_HSL_HUE },
+ { "HSL_SATURATION", PIXMAN_OP_HSL_SATURATION },
+ { "HSL_COLOR", PIXMAN_OP_HSL_COLOR },
+ { "HSL_LUMINOSITY", PIXMAN_OP_HSL_LUMINOSITY },
+};
+
+static uint32_t
+reader (const void *src, int size)
+{
+ switch (size)
+ {
+ case 1:
+ return *(uint8_t *)src;
+ case 2:
+ return *(uint16_t *)src;
+ case 4:
+ return *(uint32_t *)src;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void
+writer (void *src, uint32_t value, int size)
+{
+ switch (size)
+ {
+ case 1:
+ *(uint8_t *)src = value;
+ break;
+
+ case 2:
+ *(uint16_t *)src = value;
+ break;
+
+ case 4:
+ *(uint32_t *)src = value;
+ break;
+
+ default:
+ break;
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+#define d2f pixman_double_to_fixed
+
+ GtkWidget *window, *swindow;
+ GtkWidget *table;
+ uint32_t *dest = malloc (WIDTH * HEIGHT * 4);
+ uint32_t *src = malloc (WIDTH * HEIGHT * 4);
+ pixman_image_t *src_img;
+ pixman_image_t *dest_img;
+ pixman_point_fixed_t p1 = { -10 << 0, 0 };
+ pixman_point_fixed_t p2 = { WIDTH << 16, (HEIGHT - 10) << 16 };
+ uint16_t full = 0xcfff;
+ uint16_t low = 0x5000;
+ uint16_t alpha = 0xffff;
+ pixman_gradient_stop_t stops[6] =
+ {
+ { d2f (0.0), { full, low, low, alpha } },
+ { d2f (0.25), { full, full, low, alpha } },
+ { d2f (0.4), { low, full, low, alpha } },
+ { d2f (0.6), { low, full, full, alpha } },
+ { d2f (0.8), { low, low, full, alpha } },
+ { d2f (1.0), { full, low, full, alpha } },
+ };
+
+ int i;
+
+ gtk_init (&argc, &argv);
+
+ window = gtk_window_new (GTK_WINDOW_TOPLEVEL);
+
+ gtk_window_set_default_size (GTK_WINDOW (window), 800, 600);
+
+ g_signal_connect (window, "delete-event",
+ G_CALLBACK (gtk_main_quit),
+ NULL);
+ table = gtk_table_new (G_N_ELEMENTS (operators) / 6, 6, TRUE);
+
+ src_img = pixman_image_create_linear_gradient (&p1, &p2, stops,
+ sizeof (stops) / sizeof (stops[0]));
+
+ pixman_image_set_repeat (src_img, PIXMAN_REPEAT_PAD);
+
+ dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
+ WIDTH, HEIGHT,
+ dest,
+ WIDTH * 4);
+ pixman_image_set_accessors (dest_img, reader, writer);
+
+ for (i = 0; i < G_N_ELEMENTS (operators); ++i)
+ {
+ GtkWidget *image;
+ GdkPixbuf *pixbuf;
+ GtkWidget *vbox;
+ GtkWidget *label;
+ int j, k;
+
+ vbox = gtk_vbox_new (FALSE, 0);
+
+ label = gtk_label_new (operators[i].name);
+ gtk_box_pack_start (GTK_BOX (vbox), label, FALSE, FALSE, 6);
+ gtk_widget_show (label);
+
+ for (j = 0; j < HEIGHT; ++j)
+ {
+ for (k = 0; k < WIDTH; ++k)
+ dest[j * WIDTH + k] = 0x7f6f6f00;
+ }
+ pixman_image_composite (operators[i].op, src_img, NULL, dest_img,
+ 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
+ pixbuf = pixbuf_from_argb32 (pixman_image_get_data (dest_img), TRUE,
+ WIDTH, HEIGHT, WIDTH * 4);
+ image = gtk_image_new_from_pixbuf (pixbuf);
+ gtk_box_pack_start (GTK_BOX (vbox), image, FALSE, FALSE, 0);
+ gtk_widget_show (image);
+
+ gtk_table_attach_defaults (GTK_TABLE (table), vbox,
+ i % 6, (i % 6) + 1, i / 6, (i / 6) + 1);
+ gtk_widget_show (vbox);
+
+ g_object_unref (pixbuf);
+ }
+
+ pixman_image_unref (src_img);
+ free (src);
+ pixman_image_unref (dest_img);
+ free (dest);
+
+ swindow = gtk_scrolled_window_new (NULL, NULL);
+ gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow),
+ GTK_POLICY_AUTOMATIC,
+ GTK_POLICY_AUTOMATIC);
+
+ gtk_scrolled_window_add_with_viewport (GTK_SCROLLED_WINDOW (swindow), table);
+ gtk_widget_show (table);
+
+ gtk_container_add (GTK_CONTAINER (window), swindow);
+ gtk_widget_show (swindow);
+
+ gtk_widget_show (window);
+
+ gtk_main ();
+
+ return 0;
+}
diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c
new file mode 100644
index 000000000..d4df81507
--- /dev/null
+++ b/pixman/test/lowlevel-blt-bench.c
@@ -0,0 +1,712 @@
+/*
+ * Copyright © 2009 Nokia Corporation
+ * Copyright © 2010 Movial Creative Technologies Oy
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+#include "utils.h"
+
+#define SOLID_FLAG 1
+#define CA_FLAG 2
+
+#define L1CACHE_SIZE (8 * 1024)
+#define L2CACHE_SIZE (128 * 1024)
+
+#define WIDTH 1920
+#define HEIGHT 1080
+#define BUFSIZE (WIDTH * HEIGHT * 4)
+#define XWIDTH 256
+#define XHEIGHT 256
+#define TILEWIDTH 32
+#define TINYWIDTH 8
+
+#define EXCLUDE_OVERHEAD 1
+
+uint32_t *dst;
+uint32_t *src;
+uint32_t *mask;
+
+double bandwidth = 0;
+
+double
+bench_memcpy ()
+{
+ int64_t n = 0, total;
+ double t1, t2;
+ int x = 0;
+
+ t1 = gettime ();
+ while (1)
+ {
+ memcpy (dst, src, BUFSIZE - 64);
+ memcpy (src, dst, BUFSIZE - 64);
+ n += 4 * (BUFSIZE - 64);
+ t2 = gettime ();
+ if (t2 - t1 > 0.5)
+ break;
+ }
+ n = total = n * 5;
+ t1 = gettime ();
+ while (n > 0)
+ {
+ if (++x >= 64)
+ x = 0;
+ memcpy ((char *)dst + 1, (char *)src + x, BUFSIZE - 64);
+ memcpy ((char *)src + 1, (char *)dst + x, BUFSIZE - 64);
+ n -= 4 * (BUFSIZE - 64);
+ }
+ t2 = gettime ();
+ return (double)total / (t2 - t1);
+}
+
+static void
+pixman_image_composite_wrapper (pixman_implementation_t *impl,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ pixman_image_composite (op, src_image, mask_image, dst_image, src_x,
+ src_y, mask_x, mask_y, dest_x, dest_y, width, height);
+}
+
+static void
+pixman_image_composite_empty (pixman_implementation_t *impl,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ pixman_image_composite (op, src_image, mask_image, dst_image, 0,
+ 0, 0, 0, 0, 0, 1, 1);
+}
+
+void
+noinline
+bench_L (pixman_op_t op,
+ pixman_image_t * src_img,
+ pixman_image_t * mask_img,
+ pixman_image_t * dst_img,
+ int64_t n,
+ pixman_composite_func_t func,
+ int width,
+ int lines_count)
+{
+ int64_t i, j;
+ int x = 0;
+ int q = 0;
+ volatile int qx;
+
+ for (i = 0; i < n; i++)
+ {
+ /* touch destination buffer to fetch it into L1 cache */
+ for (j = 0; j < width + 64; j += 16) {
+ q += dst[j];
+ q += src[j];
+ }
+ if (++x >= 64)
+ x = 0;
+ func (0, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count);
+ }
+ qx = q;
+}
+
+void
+noinline
+bench_M (pixman_op_t op,
+ pixman_image_t * src_img,
+ pixman_image_t * mask_img,
+ pixman_image_t * dst_img,
+ int64_t n,
+ pixman_composite_func_t func)
+{
+ int64_t i;
+ int x = 0;
+
+ for (i = 0; i < n; i++)
+ {
+ if (++x >= 64)
+ x = 0;
+ func (0, op, src_img, mask_img, dst_img, x, 0, x, 0, 1, 0, WIDTH - 64, HEIGHT);
+ }
+}
+
+double
+noinline
+bench_HT (pixman_op_t op,
+ pixman_image_t * src_img,
+ pixman_image_t * mask_img,
+ pixman_image_t * dst_img,
+ int64_t n,
+ pixman_composite_func_t func)
+{
+ double pix_cnt = 0;
+ int x = 0;
+ int y = 0;
+ int64_t i;
+
+ srand (0);
+ for (i = 0; i < n; i++)
+ {
+ int w = (rand () % (TILEWIDTH * 2)) + 1;
+ int h = (rand () % (TILEWIDTH * 2)) + 1;
+ if (x + w > WIDTH)
+ {
+ x = 0;
+ y += TILEWIDTH * 2;
+ }
+ if (y + h > HEIGHT)
+ {
+ y = 0;
+ }
+ func (0, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
+ x += w;
+ pix_cnt += w * h;
+ }
+ return pix_cnt;
+}
+
+double
+noinline
+bench_VT (pixman_op_t op,
+ pixman_image_t * src_img,
+ pixman_image_t * mask_img,
+ pixman_image_t * dst_img,
+ int64_t n,
+ pixman_composite_func_t func)
+{
+ double pix_cnt = 0;
+ int x = 0;
+ int y = 0;
+ int64_t i;
+
+ srand (0);
+ for (i = 0; i < n; i++)
+ {
+ int w = (rand () % (TILEWIDTH * 2)) + 1;
+ int h = (rand () % (TILEWIDTH * 2)) + 1;
+ if (y + h > HEIGHT)
+ {
+ y = 0;
+ x += TILEWIDTH * 2;
+ }
+ if (x + w > WIDTH)
+ {
+ x = 0;
+ }
+ func (0, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
+ y += h;
+ pix_cnt += w * h;
+ }
+ return pix_cnt;
+}
+
+double
+noinline
+bench_R (pixman_op_t op,
+ pixman_image_t * src_img,
+ pixman_image_t * mask_img,
+ pixman_image_t * dst_img,
+ int64_t n,
+ pixman_composite_func_t func,
+ int maxw,
+ int maxh)
+{
+ double pix_cnt = 0;
+ int64_t i;
+
+ if (maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2)
+ {
+ printf("error: maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2\n");
+ return 0;
+ }
+
+ srand (0);
+ for (i = 0; i < n; i++)
+ {
+ int w = (rand () % (TILEWIDTH * 2)) + 1;
+ int h = (rand () % (TILEWIDTH * 2)) + 1;
+ int sx = rand () % (maxw - TILEWIDTH * 2);
+ int sy = rand () % (maxh - TILEWIDTH * 2);
+ int dx = rand () % (maxw - TILEWIDTH * 2);
+ int dy = rand () % (maxh - TILEWIDTH * 2);
+ func (0, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
+ pix_cnt += w * h;
+ }
+ return pix_cnt;
+}
+
+double
+noinline
+bench_RT (pixman_op_t op,
+ pixman_image_t * src_img,
+ pixman_image_t * mask_img,
+ pixman_image_t * dst_img,
+ int64_t n,
+ pixman_composite_func_t func,
+ int maxw,
+ int maxh)
+{
+ double pix_cnt = 0;
+ int64_t i;
+
+ if (maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2)
+ {
+ printf("error: maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2\n");
+ return 0;
+ }
+
+ srand (0);
+ for (i = 0; i < n; i++)
+ {
+ int w = (rand () % (TINYWIDTH * 2)) + 1;
+ int h = (rand () % (TINYWIDTH * 2)) + 1;
+ int sx = rand () % (maxw - TINYWIDTH * 2);
+ int sy = rand () % (maxh - TINYWIDTH * 2);
+ int dx = rand () % (maxw - TINYWIDTH * 2);
+ int dy = rand () % (maxh - TINYWIDTH * 2);
+ func (0, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
+ pix_cnt += w * h;
+ }
+ return pix_cnt;
+}
+
+void
+bench_composite (char * testname,
+ int src_fmt,
+ int src_flags,
+ int op,
+ int mask_fmt,
+ int mask_flags,
+ int dst_fmt,
+ double npix)
+{
+ pixman_image_t * src_img;
+ pixman_image_t * dst_img;
+ pixman_image_t * mask_img;
+ pixman_image_t * xsrc_img;
+ pixman_image_t * xdst_img;
+ pixman_image_t * xmask_img;
+ double t1, t2, t3, pix_cnt;
+ int64_t n, l1test_width, nlines;
+ double bytes_per_pix = 0;
+
+ pixman_composite_func_t func = pixman_image_composite_wrapper;
+
+ if (!(src_flags & SOLID_FLAG))
+ {
+ bytes_per_pix += (src_fmt >> 24) / 8.0;
+ src_img = pixman_image_create_bits (src_fmt,
+ WIDTH, HEIGHT,
+ src,
+ WIDTH * 4);
+ xsrc_img = pixman_image_create_bits (src_fmt,
+ XWIDTH, XHEIGHT,
+ src,
+ XWIDTH * 4);
+ }
+ else
+ {
+ src_img = pixman_image_create_bits (src_fmt,
+ 1, 1,
+ src,
+ 4);
+ xsrc_img = pixman_image_create_bits (src_fmt,
+ 1, 1,
+ src,
+ 4);
+ pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL);
+ pixman_image_set_repeat (xsrc_img, PIXMAN_REPEAT_NORMAL);
+ }
+
+ bytes_per_pix += (dst_fmt >> 24) / 8.0;
+ dst_img = pixman_image_create_bits (dst_fmt,
+ WIDTH, HEIGHT,
+ dst,
+ WIDTH * 4);
+
+ mask_img = NULL;
+ xmask_img = NULL;
+ if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null)
+ {
+ bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0);
+ mask_img = pixman_image_create_bits (mask_fmt,
+ WIDTH, HEIGHT,
+ mask,
+ WIDTH * 4);
+ xmask_img = pixman_image_create_bits (mask_fmt,
+ XWIDTH, XHEIGHT,
+ mask,
+ XWIDTH * 4);
+ }
+ else if (mask_fmt != PIXMAN_null)
+ {
+ mask_img = pixman_image_create_bits (mask_fmt,
+ 1, 1,
+ mask,
+ 4);
+ xmask_img = pixman_image_create_bits (mask_fmt,
+ 1, 1,
+ mask,
+ 4 * 4);
+ pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL);
+ pixman_image_set_repeat (xmask_img, PIXMAN_REPEAT_NORMAL);
+ }
+ if ((mask_flags & CA_FLAG) && mask_fmt != PIXMAN_null)
+ {
+ pixman_image_set_component_alpha (mask_img, 1);
+ }
+ xdst_img = pixman_image_create_bits (dst_fmt,
+ XWIDTH, XHEIGHT,
+ dst,
+ XWIDTH * 4);
+
+
+ printf ("%24s %c", testname, func != pixman_image_composite_wrapper ?
+ '-' : '=');
+
+ memcpy (src, dst, BUFSIZE);
+ memcpy (dst, src, BUFSIZE);
+
+ l1test_width = L1CACHE_SIZE / 8 - 64;
+ if (l1test_width < 1)
+ l1test_width = 1;
+ if (l1test_width > WIDTH - 64)
+ l1test_width = WIDTH - 64;
+ n = 1 + npix / (l1test_width * 8);
+ t1 = gettime ();
+#if EXCLUDE_OVERHEAD
+ bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, 1);
+#endif
+ t2 = gettime ();
+ bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, 1);
+ t3 = gettime ();
+ printf (" L1:%7.2f", (double)n * l1test_width * 1 /
+ ((t3 - t2) - (t2 - t1)) / 1000000.);
+ fflush (stdout);
+
+ memcpy (src, dst, BUFSIZE);
+ memcpy (dst, src, BUFSIZE);
+
+ nlines = (L2CACHE_SIZE / l1test_width) /
+ ((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8);
+ if (nlines < 1)
+ nlines = 1;
+ n = 1 + npix / (l1test_width * nlines);
+ t1 = gettime ();
+#if EXCLUDE_OVERHEAD
+ bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, nlines);
+#endif
+ t2 = gettime ();
+ bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, nlines);
+ t3 = gettime ();
+ printf (" L2:%7.2f", (double)n * l1test_width * nlines /
+ ((t3 - t2) - (t2 - t1)) / 1000000.);
+ fflush (stdout);
+
+ memcpy (src, dst, BUFSIZE);
+ memcpy (dst, src, BUFSIZE);
+
+ n = 1 + npix / (WIDTH * HEIGHT);
+ t1 = gettime ();
+#if EXCLUDE_OVERHEAD
+ bench_M (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
+#endif
+ t2 = gettime ();
+ bench_M (op, src_img, mask_img, dst_img, n, func);
+ t3 = gettime ();
+ printf (" M:%6.2f (%6.2f%%)",
+ ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1))) / 1000000.,
+ ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) );
+ fflush (stdout);
+
+ memcpy (src, dst, BUFSIZE);
+ memcpy (dst, src, BUFSIZE);
+
+ n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
+ t1 = gettime ();
+#if EXCLUDE_OVERHEAD
+ pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
+#endif
+ t2 = gettime ();
+ pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, func);
+ t3 = gettime ();
+ printf (" HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
+ fflush (stdout);
+
+ memcpy (src, dst, BUFSIZE);
+ memcpy (dst, src, BUFSIZE);
+
+ n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
+ t1 = gettime ();
+#if EXCLUDE_OVERHEAD
+ pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
+#endif
+ t2 = gettime ();
+ pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, func);
+ t3 = gettime ();
+ printf (" VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
+ fflush (stdout);
+
+ memcpy (src, dst, BUFSIZE);
+ memcpy (dst, src, BUFSIZE);
+
+ n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
+ t1 = gettime ();
+#if EXCLUDE_OVERHEAD
+ pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
+#endif
+ t2 = gettime ();
+ pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
+ t3 = gettime ();
+ printf (" R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
+ fflush (stdout);
+
+ memcpy (src, dst, BUFSIZE);
+ memcpy (dst, src, BUFSIZE);
+
+ n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH);
+ t1 = gettime ();
+#if EXCLUDE_OVERHEAD
+ pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
+#endif
+ t2 = gettime ();
+ pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
+ t3 = gettime ();
+ printf (" RT:%6.2f (%4.0fKops/s)\n", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000., (double) n / ((t3 - t2) * 1000));
+
+ if (mask_img) {
+ pixman_image_unref (mask_img);
+ pixman_image_unref (xmask_img);
+ }
+ pixman_image_unref (src_img);
+ pixman_image_unref (dst_img);
+ pixman_image_unref (xsrc_img);
+ pixman_image_unref (xdst_img);
+}
+
+#define PIXMAN_OP_OUT_REV (PIXMAN_OP_OUT_REVERSE)
+
+struct
+{
+ char *testname;
+ int src_fmt;
+ int src_flags;
+ int op;
+ int mask_fmt;
+ int mask_flags;
+ int dst_fmt;
+}
+tests_tbl[] =
+{
+ { "add_8_8_8", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 },
+ { "add_n_8_8000", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 },
+ { "add_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
+ { "add_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
+ { "add_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
+ { "add_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
+ { "add_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 },
+ { "add_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 },
+ { "add_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 },
+ { "add_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 },
+ { "add_n_8000", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 },
+ { "add_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
+ { "add_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
+ { "add_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "add_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
+ { "add_n_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a4r4g4b4 },
+ { "add_n_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r2g2b2 },
+ { "add_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x2r10g10b10 },
+ { "add_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r10g10b10 },
+ { "add_8000_8000", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 },
+ { "add_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
+ { "add_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
+ { "add_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "add_8888_1555", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
+ { "add_8888_4444", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a4r4g4b4 },
+ { "add_8888_2222", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r2g2b2 },
+ { "add_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "add_1555_1555", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
+ { "add_0565_2x10", PIXMAN_r5g6b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x2r10g10b10 },
+ { "add_2a10_2a10", PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r10g10b10 },
+ { "src_n_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r2g2b2 },
+ { "src_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "src_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
+ { "src_n_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a4r4g4b4 },
+ { "src_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
+ { "src_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
+ { "src_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 },
+ { "src_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r10g10b10 },
+ { "src_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "src_8888_4444", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a4r4g4b4 },
+ { "src_8888_2222", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r2g2b2 },
+ { "src_8888_2x10", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 },
+ { "src_8888_2a10", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r10g10b10 },
+ { "src_0888_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "src_0888_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
+ { "src_0888_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
+ { "src_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
+ { "src_x888_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
+ { "src_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
+ { "src_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "src_1555_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "src_0565_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
+ { "src_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
+ { "src_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
+ { "src_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 },
+ { "src_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 },
+ { "src_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
+ { "src_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
+ { "src_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 },
+ { "src_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 },
+ { "src_8888_8_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
+ { "src_0888_8_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
+ { "src_0888_8_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
+ { "src_0888_8_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
+ { "src_x888_8_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
+ { "src_x888_8_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
+ { "src_0565_8_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
+ { "src_1555_8_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
+ { "src_0565_8_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
+ { "over_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
+ { "over_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
+ { "over_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "over_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
+ { "over_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 },
+ { "over_8888_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
+ { "over_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
+ { "over_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
+ { "over_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 },
+ { "over_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 },
+ { "over_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
+ { "over_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
+ { "over_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 },
+ { "over_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 },
+ { "over_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
+ { "over_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
+ { "over_n_8888_0565_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
+ { "over_n_8888_1555_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
+ { "over_n_8888_4444_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a4r4g4b4 },
+ { "over_n_8888_2222_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a2r2g2b2 },
+ { "over_n_8888_2x10_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_x2r10g10b10 },
+ { "over_n_8888_2a10_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a2r10g10b10 },
+ { "over_8888_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a8r8g8b8 },
+ { "over_8888_n_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_x8r8g8b8 },
+ { "over_8888_n_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_r5g6b5 },
+ { "over_8888_n_1555", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a1r5g5b5 },
+ { "outrev_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
+ { "outrev_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
+ { "outrev_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
+ { "outrev_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
+ { "outrev_n_8888_0565_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
+ { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
+ { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
+ { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
+};
+
+int
+main (int argc, char *argv[])
+{
+ double x;
+ int i;
+ char *pattern = argc > 1 ? argv[1] : "all";
+
+ src = aligned_malloc (4096, BUFSIZE * 3);
+ memset (src, 0xCC, BUFSIZE * 3);
+ dst = src + (BUFSIZE / 4);
+ mask = dst + (BUFSIZE / 4);
+
+ printf ("Benchmark for a set of most commonly used functions\n");
+ printf ("---\n");
+ printf ("All results are presented in millions of pixels per second\n");
+ printf ("L1 - small Xx1 rectangle (fitting L1 cache), always blitted at the same\n");
+ printf (" memory location with small drift in horizontal direction\n");
+ printf ("L2 - small XxY rectangle (fitting L2 cache), always blitted at the same\n");
+ printf (" memory location with small drift in horizontal direction\n");
+ printf ("M - large %dx%d rectangle, always blitted at the same\n",
+ WIDTH - 64, HEIGHT);
+ printf (" memory location with small drift in horizontal direction\n");
+ printf ("HT - random rectangles with %dx%d average size are copied from\n",
+ TILEWIDTH, TILEWIDTH);
+ printf (" one %dx%d buffer to another, traversing from left to right\n",
+ WIDTH, HEIGHT);
+ printf (" and from top to bottom\n");
+ printf ("VT - random rectangles with %dx%d average size are copied from\n",
+ TILEWIDTH, TILEWIDTH);
+ printf (" one %dx%d buffer to another, traversing from top to bottom\n",
+ WIDTH, HEIGHT);
+ printf (" and from left to right\n");
+ printf ("R - random rectangles with %dx%d average size are copied from\n",
+ TILEWIDTH, TILEWIDTH);
+ printf (" random locations of one %dx%d buffer to another\n",
+ WIDTH, HEIGHT);
+ printf ("RT - as R, but %dx%d average sized rectangles are copied\n",
+ TINYWIDTH, TINYWIDTH);
+ printf ("---\n");
+ bandwidth = x = bench_memcpy ();
+ printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n",
+ x / 1000000., x / 4000000);
+ printf ("---\n");
+
+ for (i = 0; i < sizeof(tests_tbl) / sizeof(tests_tbl[0]); i++)
+ {
+ if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern))
+ {
+ bench_composite (tests_tbl[i].testname,
+ tests_tbl[i].src_fmt,
+ tests_tbl[i].src_flags,
+ tests_tbl[i].op,
+ tests_tbl[i].mask_fmt,
+ tests_tbl[i].mask_flags,
+ tests_tbl[i].dst_fmt,
+ bandwidth/8);
+ }
+ }
+
+ free (src);
+ return 0;
+}
diff --git a/pixman/test/utils.c b/pixman/test/utils.c
index f5199268b..22ab02fae 100644
--- a/pixman/test/utils.c
+++ b/pixman/test/utils.c
@@ -1,10 +1,20 @@
#include "utils.h"
#include <signal.h>
+#ifdef HAVE_GETTIMEOFDAY
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif
+
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
/* Random number seed
*/
@@ -197,10 +207,117 @@ image_endian_swap (pixman_image_t *img, int bpp)
}
}
+#define N_LEADING_PROTECTED 10
+#define N_TRAILING_PROTECTED 10
+
+typedef struct
+{
+ void *addr;
+ uint32_t len;
+ uint8_t *trailing;
+ int n_bytes;
+} info_t;
+
+#if defined(HAVE_MPROTECT) && defined(HAVE_GETPAGESIZE)
+
+/* This is apparently necessary on at least OS X */
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+void *
+fence_malloc (uint32_t len)
+{
+ unsigned long page_size = getpagesize();
+ unsigned long page_mask = page_size - 1;
+ uint32_t n_payload_bytes = (len + page_mask) & ~page_mask;
+ uint32_t n_bytes =
+ (page_size * (N_LEADING_PROTECTED + N_TRAILING_PROTECTED + 2) +
+ n_payload_bytes) & ~page_mask;
+ uint8_t *initial_page;
+ uint8_t *leading_protected;
+ uint8_t *trailing_protected;
+ uint8_t *payload;
+ uint8_t *addr;
+
+ addr = mmap (NULL, n_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+
+ if (addr == (void *)MAP_FAILED)
+ {
+ printf ("mmap failed on %u %u\n", len, n_bytes);
+ return NULL;
+ }
+
+ initial_page = (uint8_t *)(((unsigned long)addr + page_mask) & ~page_mask);
+ leading_protected = initial_page + page_size;
+ payload = leading_protected + N_LEADING_PROTECTED * page_size;
+ trailing_protected = payload + n_payload_bytes;
+
+ ((info_t *)initial_page)->addr = addr;
+ ((info_t *)initial_page)->len = len;
+ ((info_t *)initial_page)->trailing = trailing_protected;
+ ((info_t *)initial_page)->n_bytes = n_bytes;
+
+ if (mprotect (leading_protected, N_LEADING_PROTECTED * page_size,
+ PROT_NONE) == -1)
+ {
+ free (addr);
+ return NULL;
+ }
+
+ if (mprotect (trailing_protected, N_TRAILING_PROTECTED * page_size,
+ PROT_NONE) == -1)
+ {
+ mprotect (leading_protected, N_LEADING_PROTECTED * page_size,
+ PROT_READ | PROT_WRITE);
+
+ free (addr);
+ return NULL;
+ }
+
+ return payload;
+}
+
+void
+fence_free (void *data)
+{
+ uint32_t page_size = getpagesize();
+ uint8_t *payload = data;
+ uint8_t *leading_protected = payload - N_LEADING_PROTECTED * page_size;
+ uint8_t *initial_page = leading_protected - page_size;
+ info_t *info = (info_t *)initial_page;
+ uint8_t *trailing_protected = info->trailing;
+
+ mprotect (leading_protected, N_LEADING_PROTECTED * page_size,
+ PROT_READ | PROT_WRITE);
+
+ mprotect (trailing_protected, N_LEADING_PROTECTED * page_size,
+ PROT_READ | PROT_WRITE);
+
+ munmap (info->addr, info->n_bytes);
+}
+
+#else
+
+void *
+fence_malloc (uint32_t len)
+{
+ return malloc (len);
+}
+
+void
+fence_free (void *data)
+{
+ free (data);
+}
+
+#endif
+
uint8_t *
make_random_bytes (int n_bytes)
{
- uint8_t *bytes = malloc (n_bytes);
+ uint8_t *bytes = fence_malloc (n_bytes);
int i;
if (!bytes)
@@ -325,6 +442,20 @@ fuzzer_test_main (const char *test_name,
return 0;
}
+/* Try to obtain current time in seconds */
+double
+gettime (void)
+{
+#ifdef HAVE_GETTIMEOFDAY
+ struct timeval tv;
+
+ gettimeofday (&tv, NULL);
+ return (double)((int64_t)tv.tv_sec * 1000000 + tv.tv_usec) / 1000000.;
+#else
+ return (double)clock() / (double)CLOCKS_PER_SEC;
+#endif
+}
+
static const char *global_msg;
static void
@@ -352,3 +483,18 @@ fail_after (int seconds, const char *msg)
#endif
#endif
}
+
+void *
+aligned_malloc (size_t align, size_t size)
+{
+ void *result;
+
+#ifdef HAVE_POSIX_MEMALIGN
+ if (posix_memalign (&result, align, size) != 0)
+ result = NULL;
+#else
+ result = malloc (size);
+#endif
+
+ return result;
+}
diff --git a/pixman/test/utils.h b/pixman/test/utils.h
index ab71452a4..e6122119a 100644
--- a/pixman/test/utils.h
+++ b/pixman/test/utils.h
@@ -51,10 +51,23 @@ compute_crc32 (uint32_t in_crc32,
void
image_endian_swap (pixman_image_t *img, int bpp);
-/* Generate n_bytes random bytes in malloced memory */
+/* Allocate memory that is bounded by protected pages,
+ * so that out-of-bounds access will cause segfaults
+ */
+void *
+fence_malloc (uint32_t len);
+
+void
+fence_free (void *data);
+
+/* Generate n_bytes random bytes in fence_malloced memory */
uint8_t *
make_random_bytes (int n_bytes);
+/* Return current time in seconds */
+double
+gettime (void);
+
/* main body of the fuzzer test */
int
fuzzer_test_main (const char *test_name,
@@ -101,3 +114,7 @@ fail_after (int seconds, const char *msg);
assert (frcd_canary_variable6 == frcd_volatile_constant6); \
assert (frcd_canary_variable7 == frcd_volatile_constant7); \
assert (frcd_canary_variable8 == frcd_volatile_constant8);
+
+/* Try to get an aligned memory chunk */
+void *
+aligned_malloc (size_t align, size_t size);