aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman')
-rw-r--r--pixman/pixman/Makefile.am1
-rw-r--r--pixman/pixman/pixman-bits-image.c253
-rw-r--r--pixman/pixman/pixman-compiler.h7
-rw-r--r--pixman/pixman/pixman-fast-path.c268
-rw-r--r--pixman/pixman/pixman-fast-path.h443
-rw-r--r--pixman/pixman/pixman-general.c54
-rw-r--r--pixman/pixman/pixman-image.c44
-rw-r--r--pixman/pixman/pixman-private.h9
-rw-r--r--pixman/pixman/pixman-sse2.c115
-rw-r--r--pixman/pixman/pixman.c107
10 files changed, 953 insertions, 348 deletions
diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am
index 750556e79..2658e40dc 100644
--- a/pixman/pixman/Makefile.am
+++ b/pixman/pixman/Makefile.am
@@ -21,6 +21,7 @@ libpixman_1_la_SOURCES = \
pixman-general.c \
pixman.c \
pixman-fast-path.c \
+ pixman-fast-path.h \
pixman-solid-fill.c \
pixman-conical-gradient.c \
pixman-linear-gradient.c \
diff --git a/pixman/pixman/pixman-bits-image.c b/pixman/pixman/pixman-bits-image.c
index 806c65e7a..1f023b826 100644
--- a/pixman/pixman/pixman-bits-image.c
+++ b/pixman/pixman/pixman-bits-image.c
@@ -637,7 +637,7 @@ bits_image_fetch_affine_no_alpha (pixman_image_t * image,
buffer[i] = bits_image_fetch_pixel_filtered (
&image->bits, x, y, fetch_pixel_no_alpha);
}
-
+
x += ux;
y += uy;
}
@@ -749,6 +749,220 @@ bits_image_fetch_general (pixman_image_t * image,
}
}
+static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
+
+static force_inline void
+bits_image_fetch_bilinear_affine (pixman_image_t * image,
+ int offset,
+ int line,
+ int width,
+ uint32_t * buffer,
+ const uint32_t * mask,
+
+ convert_pixel_t convert_pixel,
+ pixman_format_code_t format,
+ pixman_repeat_t repeat_mode)
+{
+ pixman_fixed_t x, y;
+ pixman_fixed_t ux, uy;
+ pixman_vector_t v;
+ bits_image_t *bits = &image->bits;
+ int i;
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (image->common.transform, &v))
+ return;
+
+ ux = image->common.transform->matrix[0][0];
+ uy = image->common.transform->matrix[1][0];
+
+ x = v.vector[0];
+ y = v.vector[1];
+
+ for (i = 0; i < width; ++i)
+ {
+ int x1, y1, x2, y2;
+ uint32_t tl, tr, bl, br;
+ int32_t distx, disty;
+ int width = image->bits.width;
+ int height = image->bits.height;
+ const uint8_t *row1;
+ const uint8_t *row2;
+
+ if (mask && !mask[i])
+ goto next;
+
+ x1 = x - pixman_fixed_1 / 2;
+ y1 = y - pixman_fixed_1 / 2;
+
+ distx = (x1 >> 8) & 0xff;
+ disty = (y1 >> 8) & 0xff;
+
+ y1 = pixman_fixed_to_int (y1);
+ y2 = y1 + 1;
+ x1 = pixman_fixed_to_int (x1);
+ x2 = x1 + 1;
+
+ if (repeat_mode != PIXMAN_REPEAT_NONE)
+ {
+ uint32_t mask;
+
+ mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+
+ repeat (repeat_mode, width, &x1);
+ repeat (repeat_mode, height, &y1);
+ repeat (repeat_mode, width, &x2);
+ repeat (repeat_mode, height, &y2);
+
+ row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+ row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+
+ tl = convert_pixel (row1, x1) | mask;
+ tr = convert_pixel (row1, x2) | mask;
+ bl = convert_pixel (row2, x1) | mask;
+ br = convert_pixel (row2, x2) | mask;
+ }
+ else
+ {
+ uint32_t mask1, mask2;
+ int bpp;
+
+ /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
+ * which means if you use it in expressions, those
+ * expressions become unsigned themselves. Since
+ * the variables below can be negative in some cases,
+ * that will lead to crashes on 64 bit architectures.
+ *
+ * So this line makes sure bpp is signed
+ */
+ bpp = PIXMAN_FORMAT_BPP (format);
+
+ if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
+ {
+ buffer[i] = 0;
+ goto next;
+ }
+
+ if (y2 == 0)
+ {
+ row1 = zero;
+ mask1 = 0;
+ }
+ else
+ {
+ row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+ row1 += bpp / 8 * x1;
+
+ mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+ }
+
+ if (y1 == height - 1)
+ {
+ row2 = zero;
+ mask2 = 0;
+ }
+ else
+ {
+ row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+ row2 += bpp / 8 * x1;
+
+ mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+ }
+
+ if (x2 == 0)
+ {
+ tl = 0;
+ bl = 0;
+ }
+ else
+ {
+ tl = convert_pixel (row1, 0) | mask1;
+ bl = convert_pixel (row2, 0) | mask2;
+ }
+
+ if (x1 == width - 1)
+ {
+ tr = 0;
+ br = 0;
+ }
+ else
+ {
+ tr = convert_pixel (row1, 1) | mask1;
+ br = convert_pixel (row2, 1) | mask2;
+ }
+ }
+
+ buffer[i] = bilinear_interpolation (
+ tl, tr, bl, br, distx, disty);
+
+ next:
+ x += ux;
+ y += uy;
+ }
+}
+
+static force_inline uint32_t
+convert_a8r8g8b8 (const uint8_t *row, int x)
+{
+ return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_x8r8g8b8 (const uint8_t *row, int x)
+{
+ return *(((uint32_t *)row) + x);
+}
+
+static force_inline uint32_t
+convert_a8 (const uint8_t *row, int x)
+{
+ return *(row + x) << 24;
+}
+
+static force_inline uint32_t
+convert_r5g6b5 (const uint8_t *row, int x)
+{
+ return CONVERT_0565_TO_0888 (*((uint16_t *)row + x));
+}
+
+#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \
+ static void \
+ bits_image_fetch_bilinear_affine_ ## name (pixman_image_t *image, \
+ int offset, \
+ int line, \
+ int width, \
+ uint32_t * buffer, \
+ const uint32_t * mask) \
+ { \
+ bits_image_fetch_bilinear_affine (image, offset, line, width, buffer, mask, \
+ convert_ ## format, \
+ PIXMAN_ ## format, \
+ repeat_mode); \
+ }
+
+MAKE_BILINEAR_FETCHER (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD);
+MAKE_BILINEAR_FETCHER (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE);
+MAKE_BILINEAR_FETCHER (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT);
+MAKE_BILINEAR_FETCHER (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL);
+MAKE_BILINEAR_FETCHER (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD);
+MAKE_BILINEAR_FETCHER (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE);
+MAKE_BILINEAR_FETCHER (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT);
+MAKE_BILINEAR_FETCHER (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL);
+MAKE_BILINEAR_FETCHER (pad_a8, a8, PIXMAN_REPEAT_PAD);
+MAKE_BILINEAR_FETCHER (none_a8, a8, PIXMAN_REPEAT_NONE);
+MAKE_BILINEAR_FETCHER (reflect_a8, a8, PIXMAN_REPEAT_REFLECT);
+MAKE_BILINEAR_FETCHER (normal_a8, a8, PIXMAN_REPEAT_NORMAL);
+MAKE_BILINEAR_FETCHER (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD);
+MAKE_BILINEAR_FETCHER (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE);
+MAKE_BILINEAR_FETCHER (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT);
+MAKE_BILINEAR_FETCHER (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL);
+
static void
bits_image_fetch_solid_32 (pixman_image_t * image,
int x,
@@ -954,14 +1168,45 @@ static const fetcher_info_t fetcher_info[] =
_pixman_image_get_scanline_generic_64
},
+#define GENERAL_BILINEAR_FLAGS \
+ (FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_HAS_TRANSFORM | \
+ FAST_PATH_AFFINE_TRANSFORM | \
+ FAST_PATH_BILINEAR_FILTER)
+
+#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \
+ { PIXMAN_ ## format, \
+ GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
+ bits_image_fetch_bilinear_affine_ ## name, \
+ _pixman_image_get_scanline_generic_64 \
+ },
+
+ BILINEAR_AFFINE_FAST_PATH (pad_a8r8g8b8, a8r8g8b8, PAD)
+ BILINEAR_AFFINE_FAST_PATH (none_a8r8g8b8, a8r8g8b8, NONE)
+ BILINEAR_AFFINE_FAST_PATH (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
+ BILINEAR_AFFINE_FAST_PATH (normal_a8r8g8b8, a8r8g8b8, NORMAL)
+ BILINEAR_AFFINE_FAST_PATH (pad_x8r8g8b8, x8r8g8b8, PAD)
+ BILINEAR_AFFINE_FAST_PATH (none_x8r8g8b8, x8r8g8b8, NONE)
+ BILINEAR_AFFINE_FAST_PATH (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
+ BILINEAR_AFFINE_FAST_PATH (normal_x8r8g8b8, x8r8g8b8, NORMAL)
+ BILINEAR_AFFINE_FAST_PATH (pad_a8, a8, PAD)
+ BILINEAR_AFFINE_FAST_PATH (none_a8, a8, NONE)
+ BILINEAR_AFFINE_FAST_PATH (reflect_a8, a8, REFLECT)
+ BILINEAR_AFFINE_FAST_PATH (normal_a8, a8, NORMAL)
+ BILINEAR_AFFINE_FAST_PATH (pad_r5g6b5, r5g6b5, PAD)
+ BILINEAR_AFFINE_FAST_PATH (none_r5g6b5, r5g6b5, NONE)
+ BILINEAR_AFFINE_FAST_PATH (reflect_r5g6b5, r5g6b5, REFLECT)
+ BILINEAR_AFFINE_FAST_PATH (normal_r5g6b5, r5g6b5, NORMAL)
+
+ /* Affine, no alpha */
{ PIXMAN_any,
- (FAST_PATH_NO_ALPHA_MAP |
- FAST_PATH_HAS_TRANSFORM |
- FAST_PATH_AFFINE_TRANSFORM),
+ (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
bits_image_fetch_affine_no_alpha,
_pixman_image_get_scanline_generic_64
},
+ /* General */
{ PIXMAN_any, 0, bits_image_fetch_general, _pixman_image_get_scanline_generic_64 },
{ PIXMAN_null },
diff --git a/pixman/pixman/pixman-compiler.h b/pixman/pixman/pixman-compiler.h
index 7e435b631..36b71fd1d 100644
--- a/pixman/pixman/pixman-compiler.h
+++ b/pixman/pixman/pixman-compiler.h
@@ -50,17 +50,22 @@
/* 'inline' is available only in C++ in MSVC */
# define inline __inline
# define force_inline __forceinline
+# define noinline __declspec(noinline)
#elif defined __GNUC__ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define inline __inline__
# define force_inline __inline__ __attribute__ ((__always_inline__))
+# define noinline __attribute__((noinline))
#else
# ifndef force_inline
# define force_inline inline
# endif
+# ifndef noinline
+# define noinline
+# endif
#endif
/* GCC visibility */
-#if defined(__GNUC__) && __GNUC__ >= 4
+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(_WIN32)
# define PIXMAN_EXPORT __attribute__ ((visibility("default")))
/* Sun Studio 8 visibility */
#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index 6c214fede..0b8a2526e 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -30,6 +30,7 @@
#include <stdlib.h>
#include "pixman-private.h"
#include "pixman-combine32.h"
+#include "pixman-fast-path.h"
static force_inline uint32_t
fetch_24 (uint8_t *a)
@@ -1386,248 +1387,25 @@ fast_composite_src_memcpy (pixman_implementation_t *imp,
}
}
-static force_inline pixman_bool_t
-repeat (pixman_repeat_t repeat, int *c, int size)
-{
- if (repeat == PIXMAN_REPEAT_NONE)
- {
- if (*c < 0 || *c >= size)
- return FALSE;
- }
- else if (repeat == PIXMAN_REPEAT_NORMAL)
- {
- while (*c >= size)
- *c -= size;
- while (*c < 0)
- *c += size;
- }
- else if (repeat == PIXMAN_REPEAT_PAD)
- {
- *c = CLIP (*c, 0, size - 1);
- }
- else /* REFLECT */
- {
- *c = MOD (*c, size * 2);
- if (*c >= size)
- *c = size * 2 - *c - 1;
- }
- return TRUE;
-}
-
-/* A macroified version of specialized nearest scalers for some
- * common 8888 and 565 formats. It supports SRC and OVER ops.
- *
- * There are two repeat versions, one that handles repeat normal,
- * and one without repeat handling that only works if the src region
- * used is completely covered by the pre-repeated source samples.
- *
- * The loops are unrolled to process two pixels per iteration for better
- * performance on most CPU architectures (superscalar processors
- * can issue several operations simultaneously, other processors can hide
- * instructions latencies by pipelining operations). Unrolling more
- * does not make much sense because the compiler will start running out
- * of spare registers soon.
- */
-
-#define GET_8888_ALPHA(s) ((s) >> 24)
- /* This is not actually used since we don't have an OVER with
- 565 source, but it is needed to build. */
-#define GET_0565_ALPHA(s) 0xff
-
-#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
-static void \
-fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dst_x, \
- int32_t dst_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type_t *dst_line; \
- src_type_t *src_first_line; \
- uint32_t d; \
- src_type_t s1, s2; \
- uint8_t a1, a2; \
- int w; \
- int x1, x2, y; \
- pixman_fixed_t orig_vx; \
- pixman_fixed_t max_vx, max_vy; \
- pixman_vector_t v; \
- pixman_fixed_t vx, vy; \
- pixman_fixed_t unit_x, unit_y; \
- \
- src_type_t *src; \
- dst_type_t *dst; \
- int src_stride, dst_stride; \
- \
- if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
- abort(); \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NORMAL && \
- PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NONE) \
- { \
- abort(); \
- } \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
- * transformed from destination space to source space */ \
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
- \
- /* reference point is the center of the pixel */ \
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
- v.vector[2] = pixman_fixed_1; \
- \
- if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
- return; \
- \
- unit_x = src_image->common.transform->matrix[0][0]; \
- unit_y = src_image->common.transform->matrix[1][1]; \
- \
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
- v.vector[0] -= pixman_fixed_e; \
- v.vector[1] -= pixman_fixed_e; \
- \
- vx = v.vector[0]; \
- vy = v.vector[1]; \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* Clamp repeating positions inside the actual samples */ \
- max_vx = src_image->bits.width << 16; \
- max_vy = src_image->bits.height << 16; \
- \
- repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- } \
- \
- orig_vx = vx; \
- \
- while (--height >= 0) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- \
- y = vy >> 16; \
- vy += unit_y; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- \
- src = src_first_line + src_stride * y; \
- \
- w = width; \
- vx = orig_vx; \
- while ((w -= 2) >= 0) \
- { \
- x1 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s1 = src[x1]; \
- \
- x2 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s2 = src[x2]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- \
- if (a2 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- else if (s2) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
- a2 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- } \
- \
- if (w & 1) \
- { \
- x1 = vx >> 16; \
- s1 = src[x1]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- } \
- } \
-}
-
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER);
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE);
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD);
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL);
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER);
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE);
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD);
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL);
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
+FAST_NEAREST (565_565_cover, 0565, 0565, uint16_t, uint16_t, SRC, COVER);
FAST_NEAREST (565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, NONE);
+FAST_NEAREST (565_565_pad, 0565, 0565, uint16_t, uint16_t, SRC, PAD);
FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
static force_inline uint32_t
@@ -1859,30 +1637,6 @@ static const pixman_fast_path_t c_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
-#define SCALED_NEAREST_FLAGS \
- (FAST_PATH_SCALE_TRANSFORM | \
- FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NO_WIDE_FORMAT)
-
-#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }, \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
diff --git a/pixman/pixman/pixman-fast-path.h b/pixman/pixman/pixman-fast-path.h
new file mode 100644
index 000000000..0f47ff77c
--- /dev/null
+++ b/pixman/pixman/pixman-fast-path.h
@@ -0,0 +1,443 @@
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author: Keith Packard, SuSE, Inc.
+ */
+
+#ifndef PIXMAN_FAST_PATH_H__
+#define PIXMAN_FAST_PATH_H__
+
+#include "pixman-private.h"
+
+#define PIXMAN_REPEAT_COVER -1
+
+static force_inline pixman_bool_t
+repeat (pixman_repeat_t repeat, int *c, int size)
+{
+ if (repeat == PIXMAN_REPEAT_NONE)
+ {
+ if (*c < 0 || *c >= size)
+ return FALSE;
+ }
+ else if (repeat == PIXMAN_REPEAT_NORMAL)
+ {
+ while (*c >= size)
+ *c -= size;
+ while (*c < 0)
+ *c += size;
+ }
+ else if (repeat == PIXMAN_REPEAT_PAD)
+ {
+ *c = CLIP (*c, 0, size - 1);
+ }
+ else /* REFLECT */
+ {
+ *c = MOD (*c, size * 2);
+ if (*c >= size)
+ *c = size * 2 - *c - 1;
+ }
+ return TRUE;
+}
+
+/*
+ * For each scanline fetched from source image with PAD repeat:
+ * - calculate how many pixels need to be padded on the left side
+ * - calculate how many pixels need to be padded on the right side
+ * - update width to only count pixels which are fetched from the image
+ * All this information is returned via 'width', 'left_pad', 'right_pad'
+ * arguments. The code is assuming that 'unit_x' is positive.
+ *
+ * Note: 64-bit math is used in order to avoid potential overflows, which
+ * is probably excessive in many cases. This particular function
+ * may need its own correctness test and performance tuning.
+ */
+static force_inline void
+pad_repeat_get_scanline_bounds (int32_t source_image_width,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ int32_t * width,
+ int32_t * left_pad,
+ int32_t * right_pad)
+{
+ int64_t max_vx = (int64_t) source_image_width << 16;
+ int64_t tmp;
+ if (vx < 0)
+ {
+ tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
+ if (tmp > *width)
+ {
+ *left_pad = *width;
+ *width = 0;
+ }
+ else
+ {
+ *left_pad = (int32_t) tmp;
+ *width -= (int32_t) tmp;
+ }
+ }
+ else
+ {
+ *left_pad = 0;
+ }
+ tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
+ if (tmp < 0)
+ {
+ *right_pad = *width;
+ *width = 0;
+ }
+ else if (tmp >= *width)
+ {
+ *right_pad = 0;
+ }
+ else
+ {
+ *right_pad = *width - (int32_t) tmp;
+ *width = (int32_t) tmp;
+ }
+}
+
+/* A macroified version of specialized nearest scalers for some
+ * common 8888 and 565 formats. It supports SRC and OVER ops.
+ *
+ * There are two repeat versions, one that handles repeat normal,
+ * and one without repeat handling that only works if the src region
+ * used is completely covered by the pre-repeated source samples.
+ *
+ * The loops are unrolled to process two pixels per iteration for better
+ * performance on most CPU architectures (superscalar processors
+ * can issue several operations simultaneously, other processors can hide
+ * instructions latencies by pipelining operations). Unrolling more
+ * does not make much sense because the compiler will start running out
+ * of spare registers soon.
+ */
+
+#define GET_8888_ALPHA(s) ((s) >> 24)
+ /* This is not actually used since we don't have an OVER with
+ 565 source, but it is needed to build. */
+#define GET_0565_ALPHA(s) 0xff
+
+#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, repeat_mode) \
+static force_inline void \
+scanline_func_name (dst_type_t *dst, \
+ src_type_t *src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx) \
+{ \
+ uint32_t d; \
+ src_type_t s1, s2; \
+ uint8_t a1, a2; \
+ int x1, x2; \
+ \
+ if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
+ abort(); \
+ \
+ while ((w -= 2) >= 0) \
+ { \
+ x1 = vx >> 16; \
+ vx += unit_x; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s1 = src[x1]; \
+ \
+ x2 = vx >> 16; \
+ vx += unit_x; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s2 = src[x2]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
+ s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ \
+ if (a2 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ else if (s2) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
+ a2 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ } \
+ \
+ if (w & 1) \
+ { \
+ x1 = vx >> 16; \
+ s1 = src[x1]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ } \
+}
+
+#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
+ repeat_mode) \
+static void \
+fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dst_x, \
+ int32_t dst_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type_t *dst_line; \
+ src_type_t *src_first_line; \
+ int y; \
+ pixman_fixed_t max_vx = max_vx; /* suppress uninitialized variable warning */ \
+ pixman_fixed_t max_vy; \
+ pixman_vector_t v; \
+ pixman_fixed_t vx, vy; \
+ pixman_fixed_t unit_x, unit_y; \
+ int32_t left_pad, right_pad; \
+ \
+ src_type_t *src; \
+ dst_type_t *dst; \
+ int src_stride, dst_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
+ * transformed from destination space to source space */ \
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
+ \
+ /* reference point is the center of the pixel */ \
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
+ v.vector[2] = pixman_fixed_1; \
+ \
+ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
+ return; \
+ \
+ unit_x = src_image->common.transform->matrix[0][0]; \
+ unit_y = src_image->common.transform->matrix[1][1]; \
+ \
+ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
+ v.vector[0] -= pixman_fixed_e; \
+ v.vector[1] -= pixman_fixed_e; \
+ \
+ vx = v.vector[0]; \
+ vy = v.vector[1]; \
+ \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* Clamp repeating positions inside the actual samples */ \
+ max_vx = src_image->bits.width << 16; \
+ max_vy = src_image->bits.height << 16; \
+ \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ } \
+ \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
+ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
+ &width, &left_pad, &right_pad); \
+ vx += left_pad * unit_x; \
+ } \
+ \
+ while (--height >= 0) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ \
+ y = vy >> 16; \
+ vy += unit_y; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
+ { \
+ repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
+ src = src_first_line + src_stride * y; \
+ if (left_pad > 0) \
+ { \
+ scanline_func (dst, src, left_pad, 0, 0, 0); \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
+ } \
+ if (right_pad > 0) \
+ { \
+ scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \
+ right_pad, 0, 0, 0); \
+ } \
+ } \
+ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ static src_type_t zero = 0; \
+ if (y < 0 || y >= src_image->bits.height) \
+ { \
+ scanline_func (dst, &zero, left_pad + width + right_pad, 0, 0, 0); \
+ continue; \
+ } \
+ src = src_first_line + src_stride * y; \
+ if (left_pad > 0) \
+ { \
+ scanline_func (dst, &zero, left_pad, 0, 0, 0); \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
+ } \
+ if (right_pad > 0) \
+ { \
+ scanline_func (dst + left_pad + width, &zero, right_pad, 0, 0, 0); \
+ } \
+ } \
+ else \
+ { \
+ src = src_first_line + src_stride * y; \
+ scanline_func (dst, src, width, vx, unit_x, max_vx); \
+ } \
+ } \
+}
+
+#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, repeat_mode) \
+ FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
+ SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
+ OP, repeat_mode) \
+ FAST_NEAREST_MAINLOOP(scale_func_name##_##OP, \
+ scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
+ src_type_t, dst_type_t, repeat_mode)
+
+
+#define SCALED_NEAREST_FLAGS \
+ (FAST_PATH_SCALE_TRANSFORM | \
+ FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NEAREST_FILTER | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_NARROW_FORMAT)
+
+#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
+/* Prefer the use of 'cover' variant, because it is faster */
+#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
+
+#endif
diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c
index fa22049df..105125359 100644
--- a/pixman/pixman/pixman-general.c
+++ b/pixman/pixman/pixman-general.c
@@ -57,17 +57,6 @@ general_composite_rect (pixman_implementation_t *imp,
int32_t height)
{
uint8_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH * 3];
- const pixman_format_code_t src_format =
- src->type == BITS ? src->bits.format : 0;
- const pixman_format_code_t mask_format =
- mask && mask->type == BITS ? mask->bits.format : 0;
- const pixman_format_code_t dest_format =
- dest->type == BITS ? dest->bits.format : 0;
- const int src_wide = PIXMAN_FORMAT_IS_WIDE (src_format);
- const int mask_wide = mask && PIXMAN_FORMAT_IS_WIDE (mask_format);
- const int dest_wide = PIXMAN_FORMAT_IS_WIDE (dest_format);
- const int wide = src_wide || mask_wide || dest_wide;
- const int Bpp = wide ? 8 : 4;
uint8_t *scanline_buffer = stack_scanline_buffer;
uint8_t *src_buffer, *mask_buffer, *dest_buffer;
fetch_scanline_t fetch_src = NULL, fetch_mask = NULL, fetch_dest = NULL;
@@ -77,8 +66,15 @@ general_composite_rect (pixman_implementation_t *imp,
pixman_bool_t component_alpha;
uint32_t *bits;
int32_t stride;
+ int narrow, Bpp;
int i;
+ narrow =
+ (src->common.flags & FAST_PATH_NARROW_FORMAT) &&
+ (!mask || mask->common.flags & FAST_PATH_NARROW_FORMAT) &&
+ (dest->common.flags & FAST_PATH_NARROW_FORMAT);
+ Bpp = narrow ? 4 : 8;
+
if (width * Bpp > SCANLINE_BUFFER_LENGTH)
{
scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
@@ -106,29 +102,29 @@ general_composite_rect (pixman_implementation_t *imp,
if (op == PIXMAN_OP_CLEAR)
fetch_src = NULL;
- else if (wide)
- fetch_src = _pixman_image_get_scanline_64;
- else
+ else if (narrow)
fetch_src = _pixman_image_get_scanline_32;
+ else
+ fetch_src = _pixman_image_get_scanline_64;
if (!mask || op == PIXMAN_OP_CLEAR)
fetch_mask = NULL;
- else if (wide)
- fetch_mask = _pixman_image_get_scanline_64;
- else
+ else if (narrow)
fetch_mask = _pixman_image_get_scanline_32;
+ else
+ fetch_mask = _pixman_image_get_scanline_64;
if (op == PIXMAN_OP_CLEAR || op == PIXMAN_OP_SRC)
fetch_dest = NULL;
- else if (wide)
- fetch_dest = _pixman_image_get_scanline_64;
- else
+ else if (narrow)
fetch_dest = _pixman_image_get_scanline_32;
-
- if (wide)
- store = _pixman_image_store_scanline_64;
else
+ fetch_dest = _pixman_image_get_scanline_64;
+
+ if (narrow)
store = _pixman_image_store_scanline_32;
+ else
+ store = _pixman_image_store_scanline_64;
/* Skip the store step and composite directly into the
* destination if the output format of the compose func matches
@@ -148,7 +144,7 @@ general_composite_rect (pixman_implementation_t *imp,
op == PIXMAN_OP_OUT_REVERSE ||
op == PIXMAN_OP_DST)))
{
- if (!wide &&
+ if (narrow &&
!dest->common.alpha_map &&
!dest->bits.write_func)
{
@@ -175,19 +171,19 @@ general_composite_rect (pixman_implementation_t *imp,
mask->common.component_alpha &&
PIXMAN_FORMAT_RGB (mask->bits.format);
- if (wide)
+ if (narrow)
{
if (component_alpha)
- compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
+ compose = _pixman_implementation_combine_32_ca;
else
- compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
+ compose = _pixman_implementation_combine_32;
}
else
{
if (component_alpha)
- compose = _pixman_implementation_combine_32_ca;
+ compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
else
- compose = _pixman_implementation_combine_32;
+ compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
}
if (!compose)
diff --git a/pixman/pixman/pixman-image.c b/pixman/pixman/pixman-image.c
index 3020f246c..1b243979a 100644
--- a/pixman/pixman/pixman-image.c
+++ b/pixman/pixman/pixman-image.c
@@ -327,10 +327,6 @@ compute_image_info (pixman_image_t *image)
flags |= FAST_PATH_Y_UNIT_ZERO;
}
- /* Alpha map */
- if (!image->common.alpha_map)
- flags |= FAST_PATH_NO_ALPHA_MAP;
-
/* Filter */
switch (image->common.filter)
{
@@ -357,19 +353,34 @@ compute_image_info (pixman_image_t *image)
switch (image->common.repeat)
{
case PIXMAN_REPEAT_NONE:
- flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NORMAL_REPEAT;
+ flags |=
+ FAST_PATH_NO_REFLECT_REPEAT |
+ FAST_PATH_NO_PAD_REPEAT |
+ FAST_PATH_NO_NORMAL_REPEAT;
break;
case PIXMAN_REPEAT_REFLECT:
- flags |= FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NONE_REPEAT | FAST_PATH_NO_NORMAL_REPEAT;
+ flags |=
+ FAST_PATH_NO_PAD_REPEAT |
+ FAST_PATH_NO_NONE_REPEAT |
+ FAST_PATH_NO_NORMAL_REPEAT |
+ FAST_PATH_COVERS_CLIP;
break;
case PIXMAN_REPEAT_PAD:
- flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_NONE_REPEAT | FAST_PATH_NO_NORMAL_REPEAT;
+ flags |=
+ FAST_PATH_NO_REFLECT_REPEAT |
+ FAST_PATH_NO_NONE_REPEAT |
+ FAST_PATH_NO_NORMAL_REPEAT |
+ FAST_PATH_COVERS_CLIP;
break;
default:
- flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NONE_REPEAT;
+ flags |=
+ FAST_PATH_NO_REFLECT_REPEAT |
+ FAST_PATH_NO_PAD_REPEAT |
+ FAST_PATH_NO_NONE_REPEAT |
+ FAST_PATH_COVERS_CLIP;
break;
}
@@ -379,7 +390,7 @@ compute_image_info (pixman_image_t *image)
else
flags |= FAST_PATH_UNIFIED_ALPHA;
- flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NO_WIDE_FORMAT);
+ flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT);
/* Type specific checks */
switch (image->type)
@@ -389,6 +400,8 @@ compute_image_info (pixman_image_t *image)
if (image->solid.color.alpha == 0xffff)
flags |= FAST_PATH_IS_OPAQUE;
+
+ flags |= FAST_PATH_COVERS_CLIP;
break;
case BITS:
@@ -426,7 +439,7 @@ compute_image_info (pixman_image_t *image)
flags &= ~FAST_PATH_NO_ACCESSORS;
if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
- flags &= ~FAST_PATH_NO_WIDE_FORMAT;
+ flags &= ~FAST_PATH_NARROW_FORMAT;
break;
case LINEAR:
@@ -454,6 +467,17 @@ compute_image_info (pixman_image_t *image)
break;
}
+ /* Alpha map */
+ if (!image->common.alpha_map)
+ {
+ flags |= FAST_PATH_NO_ALPHA_MAP;
+ }
+ else
+ {
+ if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format))
+ flags &= ~FAST_PATH_NARROW_FORMAT;
+ }
+
/* Both alpha maps and convolution filters can introduce
* non-opaqueness in otherwise opaque images. Also
* an image with component alpha turned on is only opaque
diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h
index 65f40a1b3..8c68304dc 100644
--- a/pixman/pixman/pixman-private.h
+++ b/pixman/pixman/pixman-private.h
@@ -554,7 +554,7 @@ _pixman_choose_implementation (void);
#define FAST_PATH_NO_PAD_REPEAT (1 << 3)
#define FAST_PATH_NO_REFLECT_REPEAT (1 << 4)
#define FAST_PATH_NO_ACCESSORS (1 << 5)
-#define FAST_PATH_NO_WIDE_FORMAT (1 << 6)
+#define FAST_PATH_NARROW_FORMAT (1 << 6)
#define FAST_PATH_COVERS_CLIP (1 << 7)
#define FAST_PATH_COMPONENT_ALPHA (1 << 8)
#define FAST_PATH_UNIFIED_ALPHA (1 << 9)
@@ -600,7 +600,7 @@ _pixman_choose_implementation (void);
FAST_PATH_NO_PAD_REPEAT | \
FAST_PATH_NO_REFLECT_REPEAT | \
FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NO_WIDE_FORMAT | \
+ FAST_PATH_NARROW_FORMAT | \
FAST_PATH_COVERS_CLIP)
#define FAST_PATH_STD_SRC_FLAGS \
@@ -614,7 +614,7 @@ _pixman_choose_implementation (void);
#define FAST_PATH_STD_DEST_FLAGS \
(FAST_PATH_NO_ACCESSORS | \
FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NO_WIDE_FORMAT)
+ FAST_PATH_NARROW_FORMAT)
#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \
PIXMAN_OP_ ## op, \
@@ -744,6 +744,9 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
#undef DEBUG
+#define COMPILE_TIME_ASSERT(x) \
+ do { typedef int compile_time_assertion [(x)?1:-1]; } while (0)
+
/* Turn on debugging depending on what type of release this is
*/
#if (((PIXMAN_VERSION_MICRO % 2) == 0) && ((PIXMAN_VERSION_MINOR % 2) == 1))
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index cfef466c8..8e175b78d 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -35,6 +35,7 @@
#include <emmintrin.h> /* for SSE2 intrinsics */
#include "pixman-private.h"
#include "pixman-combine32.h"
+#include "pixman-fast-path.h"
#if defined(_MSC_VER) && defined(_M_AMD64)
/* Windows 64 doesn't allow MMX to be used, so
@@ -6346,6 +6347,107 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
_mm_empty ();
}
+/* A variant of 'core_combine_over_u_sse2' with minor tweaks */
+static force_inline void
+scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
+ const uint32_t* ps,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx)
+{
+ uint32_t s, d;
+ const uint32_t* pm = NULL;
+
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ /* Align dst on a 16-byte boundary */
+ while (w && ((unsigned long)pd & 15))
+ {
+ d = *pd;
+ s = combine1 (ps + (vx >> 16), pm);
+ vx += unit_x;
+
+ *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m128i tmp;
+ uint32_t tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = ps[vx >> 16];
+ vx += unit_x;
+ tmp2 = ps[vx >> 16];
+ vx += unit_x;
+ tmp3 = ps[vx >> 16];
+ vx += unit_x;
+ tmp4 = ps[vx >> 16];
+ vx += unit_x;
+
+ tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
+
+ xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
+
+ if (is_opaque (xmm_src_hi))
+ {
+ save_128_aligned ((__m128i*)pd, xmm_src_hi);
+ }
+ else if (!is_zero (xmm_src_hi))
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (
+ xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+
+ over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ /* rebuid the 4 pixel data and save*/
+ save_128_aligned ((__m128i*)pd,
+ pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ w -= 4;
+ pd += 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ d = *pd;
+ s = combine1 (ps + (vx >> 16), pm);
+ vx += unit_x;
+
+ *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+
+ w--;
+ }
+ _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, COVER);
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, NONE);
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, PAD);
+
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
@@ -6429,6 +6531,19 @@ static const pixman_fast_path_t sse2_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+
{ PIXMAN_OP_NONE },
};
diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c
index 4dd45f471..f2565ca01 100644
--- a/pixman/pixman/pixman.c
+++ b/pixman/pixman/pixman.c
@@ -139,14 +139,18 @@ optimize_operator (pixman_op_t op,
uint32_t dst_flags)
{
pixman_bool_t is_source_opaque, is_dest_opaque;
- int opaqueness;
- is_source_opaque = ((src_flags & mask_flags) & FAST_PATH_IS_OPAQUE) != 0;
- is_dest_opaque = (dst_flags & FAST_PATH_IS_OPAQUE) != 0;
+#define OPAQUE_SHIFT 13
+
+ COMPILE_TIME_ASSERT (FAST_PATH_IS_OPAQUE == (1 << OPAQUE_SHIFT));
+
+ is_dest_opaque = (dst_flags & FAST_PATH_IS_OPAQUE);
+ is_source_opaque = ((src_flags & mask_flags) & FAST_PATH_IS_OPAQUE);
- opaqueness = ((is_dest_opaque << 1) | is_source_opaque);
+ is_dest_opaque >>= OPAQUE_SHIFT - 1;
+ is_source_opaque >>= OPAQUE_SHIFT;
- return operator_table[op].opaque_info[opaqueness];
+ return operator_table[op].opaque_info[is_dest_opaque | is_source_opaque];
}
static void
@@ -302,6 +306,10 @@ pixman_compute_composite_region32 (pixman_region32_t * region,
if (region->extents.x1 >= region->extents.x2 ||
region->extents.y1 >= region->extents.y2)
{
+ region->extents.x1 = 0;
+ region->extents.x2 = 0;
+ region->extents.y1 = 0;
+ region->extents.y2 = 0;
return FALSE;
}
@@ -311,14 +319,27 @@ pixman_compute_composite_region32 (pixman_region32_t * region,
return FALSE;
}
- if (dst_image->common.alpha_map && dst_image->common.alpha_map->common.have_clip_region)
+ if (dst_image->common.alpha_map)
{
- if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region,
- -dst_image->common.alpha_origin_x,
- -dst_image->common.alpha_origin_y))
+ if (!pixman_region32_intersect_rect (region, region,
+ dst_image->common.alpha_origin_x,
+ dst_image->common.alpha_origin_y,
+ dst_image->common.alpha_map->width,
+ dst_image->common.alpha_map->height))
{
return FALSE;
}
+ if (!pixman_region32_not_empty (region))
+ return FALSE;
+ if (dst_image->common.alpha_map->common.have_clip_region)
+ {
+ if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region,
+ -dst_image->common.alpha_origin_x,
+ -dst_image->common.alpha_origin_y))
+ {
+ return FALSE;
+ }
+ }
}
/* clip against src */
@@ -691,29 +712,9 @@ analyze_extent (pixman_image_t *image, int x, int y,
pixman_fixed_t width, height;
pixman_box32_t ex;
- *flags |= FAST_PATH_COVERS_CLIP;
if (!image)
return TRUE;
- transform = image->common.transform;
- if (image->common.type == BITS)
- {
- /* During repeat mode calculations we might convert the
- * width/height of an image to fixed 16.16, so we need
- * them to be smaller than 16 bits.
- */
- if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff)
- return FALSE;
-
- if (image->common.repeat == PIXMAN_REPEAT_NONE &&
- (x > extents->x1 || y > extents->y1 ||
- x + image->bits.width < extents->x2 ||
- y + image->bits.height < extents->y2))
- {
- (*flags) &= ~FAST_PATH_COVERS_CLIP;
- }
- }
-
/* Some compositing functions walk one step
* outside the destination rectangle, so we
* check here that the expanded-by-one source
@@ -727,8 +728,28 @@ analyze_extent (pixman_image_t *image, int x, int y,
return FALSE;
}
+ transform = image->common.transform;
if (image->common.type == BITS)
{
+ /* During repeat mode calculations we might convert the
+ * width/height of an image to fixed 16.16, so we need
+ * them to be smaller than 16 bits.
+ */
+ if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff)
+ return FALSE;
+
+#define ID_AND_NEAREST (FAST_PATH_ID_TRANSFORM | FAST_PATH_NEAREST_FILTER)
+
+ if ((image->common.flags & ID_AND_NEAREST) == ID_AND_NEAREST &&
+ extents->x1 - x >= 0 &&
+ extents->y1 - y >= 0 &&
+ extents->x2 - x <= image->bits.width &&
+ extents->y2 - y <= image->bits.height)
+ {
+ *flags |= (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_COVERS_CLIP);
+ return TRUE;
+ }
+
switch (image->common.filter)
{
case PIXMAN_FILTER_CONVOLUTION:
@@ -759,6 +780,17 @@ analyze_extent (pixman_image_t *image, int x, int y,
default:
return FALSE;
}
+
+ /* Check whether the non-expanded, transformed extent is entirely within
+ * the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is.
+ */
+ ex = *extents;
+ if (compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height) &&
+ ex.x1 >= 0 && ex.y1 >= 0 &&
+ ex.x2 <= image->bits.width && ex.y2 <= image->bits.height)
+ {
+ *flags |= (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_COVERS_CLIP);
+ }
}
else
{
@@ -769,8 +801,8 @@ analyze_extent (pixman_image_t *image, int x, int y,
}
/* Check that the extents expanded by one don't overflow. This ensures that
- * compositing functions can simply walk the source space using 16.16 variables
- * without worrying about overflow.
+ * compositing functions can simply walk the source space using 16.16
+ * variables without worrying about overflow.
*/
ex.x1 = extents->x1 - 1;
ex.y1 = extents->y1 - 1;
@@ -780,19 +812,6 @@ analyze_extent (pixman_image_t *image, int x, int y,
if (!compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height))
return FALSE;
- if (image->type == BITS)
- {
- /* Check whether the non-expanded, transformed extent is entirely within
- * the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is.
- */
- ex = *extents;
- if (compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height))
- {
- if (ex.x1 >= 0 && ex.y1 >= 0 && ex.x2 <= image->bits.width && ex.y2 <= image->bits.height)
- *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
- }
- }
-
return TRUE;
}