aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman')
-rw-r--r--pixman/configure.ac5
-rw-r--r--pixman/pixman/pixman-bits-image.c74
-rw-r--r--pixman/pixman/pixman-fast-path.c178
-rw-r--r--pixman/pixman/pixman-image.c111
-rw-r--r--pixman/pixman/pixman-private.h29
-rw-r--r--pixman/pixman/pixman-sse2.c428
-rw-r--r--pixman/pixman/pixman-utils.c112
-rw-r--r--pixman/pixman/pixman.c123
8 files changed, 788 insertions, 272 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 481d0bb00..6f5aef562 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -552,7 +552,10 @@ AC_MSG_CHECKING(whether to use ARM IWMMXT intrinsics)
xserver_save_CFLAGS=$CFLAGS
CFLAGS="$IWMMXT_CFLAGS $CFLAGS"
AC_COMPILE_IFELSE([
-#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 3 && __GNUC_MINOR__ < 6))
+#ifndef __arm__
+#error "IWMMXT is only available on ARM"
+#endif
+#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6))
#error "Need GCC >= 4.6 for IWMMXT intrinsics"
#endif
#include <mmintrin.h>
diff --git a/pixman/pixman/pixman-bits-image.c b/pixman/pixman/pixman-bits-image.c
index f382c65ad..99c0dfe6d 100644
--- a/pixman/pixman/pixman-bits-image.c
+++ b/pixman/pixman/pixman-bits-image.c
@@ -1437,40 +1437,30 @@ create_bits (pixman_format_code_t format,
return calloc (buf_size, 1);
}
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_bits (pixman_format_code_t format,
- int width,
- int height,
- uint32_t * bits,
- int rowstride_bytes)
+pixman_bool_t
+_pixman_bits_image_init (pixman_image_t * image,
+ pixman_format_code_t format,
+ int width,
+ int height,
+ uint32_t * bits,
+ int rowstride)
{
- pixman_image_t *image;
uint32_t *free_me = NULL;
- /* must be a whole number of uint32_t's
- */
- return_val_if_fail (
- bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
-
- return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL);
-
if (!bits && width && height)
{
- free_me = bits = create_bits (format, width, height, &rowstride_bytes);
- if (!bits)
- return NULL;
- }
+ int rowstride_bytes;
- image = _pixman_image_allocate ();
+ free_me = bits = create_bits (format, width, height, &rowstride_bytes);
- if (!image)
- {
- if (free_me)
- free (free_me);
+ if (!bits)
+ return FALSE;
- return NULL;
+ rowstride = rowstride_bytes / (int) sizeof (uint32_t);
}
+ _pixman_image_init (image);
+
image->type = BITS;
image->bits.format = format;
image->bits.width = width;
@@ -1479,15 +1469,43 @@ pixman_image_create_bits (pixman_format_code_t format,
image->bits.free_me = free_me;
image->bits.read_func = NULL;
image->bits.write_func = NULL;
-
- /* The rowstride is stored in number of uint32_t */
- image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t);
-
+ image->bits.rowstride = rowstride;
image->bits.indexed = NULL;
image->common.property_changed = bits_image_property_changed;
_pixman_image_reset_clip_region (image);
+ return TRUE;
+}
+
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_bits (pixman_format_code_t format,
+ int width,
+ int height,
+ uint32_t * bits,
+ int rowstride_bytes)
+{
+ pixman_image_t *image;
+
+ /* must be a whole number of uint32_t's
+ */
+ return_val_if_fail (
+ bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
+
+ return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL);
+
+ image = _pixman_image_allocate ();
+
+ if (!image)
+ return NULL;
+
+ if (!_pixman_bits_image_init (image, format, width, height, bits,
+ rowstride_bytes / (int) sizeof (uint32_t)))
+ {
+ free (image);
+ return NULL;
+ }
+
return image;
}
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index 033efd7b1..038dcf722 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -1191,6 +1191,174 @@ FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
+#define REPEAT_MIN_WIDTH 32
+
+static void
+fast_composite_tiled_repeat (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ pixman_composite_func_t func;
+ pixman_format_code_t mask_format;
+ uint32_t src_flags, mask_flags;
+
+ src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
+ FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+
+ if (mask_image)
+ {
+ mask_format = mask_image->common.extended_format_code;
+ mask_flags = info->mask_flags;
+ }
+ else
+ {
+ mask_format = PIXMAN_null;
+ mask_flags = FAST_PATH_IS_OPAQUE;
+ }
+
+ if (_pixman_lookup_composite_function (
+ imp->toplevel, info->op,
+ src_image->common.extended_format_code, src_flags,
+ mask_format, mask_flags,
+ dest_image->common.extended_format_code, info->dest_flags,
+ &imp, &func))
+ {
+ int32_t sx, sy;
+ int32_t width_remain;
+ int32_t num_pixels;
+ int32_t src_width;
+ int32_t i, j;
+ pixman_image_t extended_src_image;
+ uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
+ pixman_bool_t need_src_extension;
+ uint32_t *src_line;
+ int32_t src_stride;
+ int32_t src_bpp;
+ pixman_composite_info_t info2 = *info;
+
+ src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
+
+ if (src_image->bits.width < REPEAT_MIN_WIDTH &&
+ (src_bpp == 32 || src_bpp == 16 || src_bpp == 8))
+ {
+ sx = src_x;
+ sx = MOD (sx, src_image->bits.width);
+ sx += width;
+ src_width = 0;
+
+ while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
+ src_width += src_image->bits.width;
+
+ src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
+
+ /* Initialize/validate stack-allocated temporary image */
+ _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
+ src_width, 1, &extended_src[0], src_stride);
+ _pixman_image_validate (&extended_src_image);
+
+ info2.src_image = &extended_src_image;
+ need_src_extension = TRUE;
+ }
+ else
+ {
+ src_width = src_image->bits.width;
+ need_src_extension = FALSE;
+ }
+
+ sx = src_x;
+ sy = src_y;
+
+ while (--height >= 0)
+ {
+ sx = MOD (sx, src_width);
+ sy = MOD (sy, src_image->bits.height);
+
+ if (need_src_extension)
+ {
+ if (src_bpp == 32)
+ {
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
+
+ for (i = 0; i < src_width; )
+ {
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ extended_src[i] = src_line[j];
+ }
+ }
+ else if (src_bpp == 16)
+ {
+ uint16_t *src_line_16;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
+ src_line_16, 1);
+ src_line = (uint32_t*)src_line_16;
+
+ for (i = 0; i < src_width; )
+ {
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
+ }
+ }
+ else if (src_bpp == 8)
+ {
+ uint8_t *src_line_8;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
+ src_line_8, 1);
+ src_line = (uint32_t*)src_line_8;
+
+ for (i = 0; i < src_width; )
+ {
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
+ }
+ }
+
+ info2.src_y = 0;
+ }
+ else
+ {
+ info2.src_y = sy;
+ }
+
+ width_remain = width;
+
+ while (width_remain > 0)
+ {
+ num_pixels = src_width - sx;
+
+ if (num_pixels > width_remain)
+ num_pixels = width_remain;
+
+ info2.src_x = sx;
+ info2.width = num_pixels;
+ info2.height = 1;
+
+ func (imp, &info2);
+
+ width_remain -= num_pixels;
+ info2.mask_x += num_pixels;
+ info2.dest_x += num_pixels;
+ sx = 0;
+ }
+
+ sx = src_x;
+ sy++;
+ info2.mask_x = info->mask_x;
+ info2.mask_y++;
+ info2.dest_x = info->dest_x;
+ info2.dest_y++;
+ }
+
+ if (need_src_extension)
+ _pixman_image_fini (&extended_src_image);
+ }
+ else
+ {
+ _pixman_log_error (FUNC, "Didn't find a suitable function ");
+ }
+}
+
/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
static force_inline void
scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
@@ -1787,6 +1955,16 @@ static const pixman_fast_path_t c_fast_paths[] =
SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
+ /* Simple repeat fast path entry. */
+ { PIXMAN_OP_any,
+ PIXMAN_any,
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
+ FAST_PATH_NORMAL_REPEAT),
+ PIXMAN_any, 0,
+ PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
+ fast_composite_tiled_repeat
+ },
+
{ PIXMAN_OP_NONE },
};
diff --git a/pixman/pixman/pixman-image.c b/pixman/pixman/pixman-image.c
index a3bb9b63a..afe587f31 100644
--- a/pixman/pixman/pixman-image.c
+++ b/pixman/pixman/pixman-image.c
@@ -49,56 +49,33 @@ _pixman_init_gradient (gradient_t * gradient,
return TRUE;
}
-pixman_image_t *
-_pixman_image_allocate (void)
-{
- pixman_image_t *image = malloc (sizeof (pixman_image_t));
-
- if (image)
- {
- image_common_t *common = &image->common;
-
- pixman_region32_init (&common->clip_region);
-
- common->alpha_count = 0;
- common->have_clip_region = FALSE;
- common->clip_sources = FALSE;
- common->transform = NULL;
- common->repeat = PIXMAN_REPEAT_NONE;
- common->filter = PIXMAN_FILTER_NEAREST;
- common->filter_params = NULL;
- common->n_filter_params = 0;
- common->alpha_map = NULL;
- common->component_alpha = FALSE;
- common->ref_count = 1;
- common->property_changed = NULL;
- common->client_clip = FALSE;
- common->destroy_func = NULL;
- common->destroy_data = NULL;
- common->dirty = TRUE;
- }
-
- return image;
-}
-
-static void
-image_property_changed (pixman_image_t *image)
-{
- image->common.dirty = TRUE;
-}
-
-/* Ref Counting */
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_ref (pixman_image_t *image)
+void
+_pixman_image_init (pixman_image_t *image)
{
- image->common.ref_count++;
-
- return image;
+ image_common_t *common = &image->common;
+
+ pixman_region32_init (&common->clip_region);
+
+ common->alpha_count = 0;
+ common->have_clip_region = FALSE;
+ common->clip_sources = FALSE;
+ common->transform = NULL;
+ common->repeat = PIXMAN_REPEAT_NONE;
+ common->filter = PIXMAN_FILTER_NEAREST;
+ common->filter_params = NULL;
+ common->n_filter_params = 0;
+ common->alpha_map = NULL;
+ common->component_alpha = FALSE;
+ common->ref_count = 1;
+ common->property_changed = NULL;
+ common->client_clip = FALSE;
+ common->destroy_func = NULL;
+ common->destroy_data = NULL;
+ common->dirty = TRUE;
}
-/* returns TRUE when the image is freed */
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_unref (pixman_image_t *image)
+pixman_bool_t
+_pixman_image_fini (pixman_image_t *image)
{
image_common_t *common = (image_common_t *)image;
@@ -131,8 +108,45 @@ pixman_image_unref (pixman_image_t *image)
if (image->type == BITS && image->bits.free_me)
free (image->bits.free_me);
- free (image);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+pixman_image_t *
+_pixman_image_allocate (void)
+{
+ pixman_image_t *image = malloc (sizeof (pixman_image_t));
+
+ if (image)
+ _pixman_image_init (image);
+
+ return image;
+}
+
+static void
+image_property_changed (pixman_image_t *image)
+{
+ image->common.dirty = TRUE;
+}
+
+/* Ref Counting */
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_ref (pixman_image_t *image)
+{
+ image->common.ref_count++;
+
+ return image;
+}
+
+/* returns TRUE when the image is freed */
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_unref (pixman_image_t *image)
+{
+ if (_pixman_image_fini (image))
+ {
+ free (image);
return TRUE;
}
@@ -361,6 +375,7 @@ compute_image_info (pixman_image_t *image)
else
{
code = image->bits.format;
+ flags |= FAST_PATH_BITS_IMAGE;
}
if (!PIXMAN_FORMAT_A (image->bits.format) &&
diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h
index 4d645fe4b..cbd48f3d1 100644
--- a/pixman/pixman/pixman-private.h
+++ b/pixman/pixman/pixman-private.h
@@ -250,6 +250,19 @@ _pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
void
_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+void
+_pixman_image_init (pixman_image_t *image);
+
+pixman_bool_t
+_pixman_bits_image_init (pixman_image_t * image,
+ pixman_format_code_t format,
+ int width,
+ int height,
+ uint32_t * bits,
+ int rowstride);
+pixman_bool_t
+_pixman_image_fini (pixman_image_t *image);
+
pixman_image_t *
_pixman_image_allocate (void);
@@ -360,6 +373,10 @@ typedef struct
int32_t dest_y;
int32_t width;
int32_t height;
+
+ uint32_t src_flags;
+ uint32_t mask_flags;
+ uint32_t dest_flags;
} pixman_composite_info_t;
#define PIXMAN_COMPOSITE_ARGS(info) \
@@ -618,6 +635,7 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
#define FAST_PATH_ROTATE_270_TRANSFORM (1 << 22)
#define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST (1 << 23)
#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24)
+#define FAST_PATH_BITS_IMAGE (1 << 25)
#define FAST_PATH_PAD_REPEAT \
(FAST_PATH_NO_NONE_REPEAT | \
@@ -712,6 +730,17 @@ pixman_contract (uint32_t * dst,
const uint64_t *src,
int width);
+pixman_bool_t
+_pixman_lookup_composite_function (pixman_implementation_t *toplevel,
+ pixman_op_t op,
+ pixman_format_code_t src_format,
+ uint32_t src_flags,
+ pixman_format_code_t mask_format,
+ uint32_t mask_flags,
+ pixman_format_code_t dest_format,
+ uint32_t dest_flags,
+ pixman_implementation_t **out_imp,
+ pixman_composite_func_t *out_func);
/* Region Helpers */
pixman_bool_t
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index da6bcdfa0..a183f937d 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -5287,83 +5287,53 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
-static void
-bilinear_interpolate_line_sse2 (uint32_t * out,
- const uint32_t * top,
- const uint32_t * bottom,
- int wt,
- int wb,
- pixman_fixed_t x,
- pixman_fixed_t ux,
- int width)
-{
- const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);
- const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);
- const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);
- const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
- const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux);
- const __m128i xmm_zero = _mm_setzero_si128 ();
- __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x);
- uint32_t pix1, pix2, pix3, pix4;
-
- #define INTERPOLATE_ONE_PIXEL(pix) \
- do { \
- __m128i xmm_wh, xmm_lo, xmm_hi, a; \
- /* fetch 2x2 pixel block into sse2 register */ \
- uint32_t tl = top [pixman_fixed_to_int (x)]; \
- uint32_t tr = top [pixman_fixed_to_int (x) + 1]; \
- uint32_t bl = bottom [pixman_fixed_to_int (x)]; \
- uint32_t br = bottom [pixman_fixed_to_int (x) + 1]; \
- a = _mm_set_epi32 (tr, tl, br, bl); \
- x += ux; \
- /* vertical interpolation */ \
- a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \
- xmm_wt), \
- _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \
- xmm_wb)); \
- /* calculate horizontal weights */ \
- xmm_wh = _mm_add_epi16 (xmm_addc, \
- _mm_xor_si128 (xmm_xorc, \
- _mm_srli_epi16 (xmm_x, 8))); \
- xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
- /* horizontal interpolation */ \
- xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
- xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
- a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
- _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
- /* shift and pack the result */ \
- a = _mm_srli_epi32 (a, 16); \
- a = _mm_packs_epi32 (a, a); \
- a = _mm_packus_epi16 (a, a); \
- pix = _mm_cvtsi128_si32 (a); \
- } while (0)
-
- while ((width -= 4) >= 0)
- {
- INTERPOLATE_ONE_PIXEL (pix1);
- INTERPOLATE_ONE_PIXEL (pix2);
- INTERPOLATE_ONE_PIXEL (pix3);
- INTERPOLATE_ONE_PIXEL (pix4);
- *out++ = pix1;
- *out++ = pix2;
- *out++ = pix3;
- *out++ = pix4;
- }
- if (width & 2)
- {
- INTERPOLATE_ONE_PIXEL (pix1);
- INTERPOLATE_ONE_PIXEL (pix2);
- *out++ = pix1;
- *out++ = pix2;
- }
- if (width & 1)
- {
- INTERPOLATE_ONE_PIXEL (pix1);
- *out = pix1;
- }
-
- #undef INTERPOLATE_ONE_PIXEL
-}
+#define BILINEAR_DECLARE_VARIABLES \
+ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
+ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
+ const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);\
+ const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
+ const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \
+ unit_x, unit_x, unit_x, unit_x); \
+ const __m128i xmm_zero = _mm_setzero_si128 (); \
+ __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx)
+
+#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
+do { \
+ __m128i xmm_wh, xmm_lo, xmm_hi, a; \
+ /* fetch 2x2 pixel block into sse2 register */ \
+ uint32_t tl = src_top [pixman_fixed_to_int (vx)]; \
+ uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; \
+ uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; \
+ uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; \
+ a = _mm_set_epi32 (tr, tl, br, bl); \
+ vx += unit_x; \
+ /* vertical interpolation */ \
+ a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \
+ xmm_wt), \
+ _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \
+ xmm_wb)); \
+ /* calculate horizontal weights */ \
+ xmm_wh = _mm_add_epi16 (xmm_addc, \
+ _mm_xor_si128 (xmm_xorc, \
+ _mm_srli_epi16 (xmm_x, 8))); \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
+ /* horizontal interpolation */ \
+ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
+ xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
+ a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
+ _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
+ /* shift and pack the result */ \
+ a = _mm_srli_epi32 (a, 16); \
+ a = _mm_packs_epi32 (a, a); \
+ a = _mm_packus_epi16 (a, a); \
+ pix = _mm_cvtsi128_si32 (a); \
+} while (0)
+
+#define BILINEAR_SKIP_ONE_PIXEL() \
+do { \
+ vx += unit_x; \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
+} while(0)
static force_inline void
scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
@@ -5378,8 +5348,35 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
- bilinear_interpolate_line_sse2 (dst, src_top, src_bottom,
- wt, wb, vx, unit_x, w);
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+
+ while ((w -= 4) >= 0)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+ *dst++ = pix1;
+ *dst++ = pix2;
+ *dst++ = pix3;
+ *dst++ = pix4;
+ }
+
+ if (w & 2)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ *dst++ = pix1;
+ *dst++ = pix2;
+ }
+
+ if (w & 1)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ *dst = pix1;
+ }
+
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
@@ -5399,6 +5396,269 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
+ const uint32_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+
+ if (pix1)
+ {
+ pix2 = *dst;
+ *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
+ }
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 4)
+ {
+ __m128i xmm_src;
+ __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
+ __m128i xmm_alpha_hi, xmm_alpha_lo;
+
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+
+ xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+
+ if (!is_zero (xmm_src))
+ {
+ if (is_opaque (xmm_src))
+ {
+ save_128_aligned ((__m128i *)dst, xmm_src);
+ }
+ else
+ {
+ __m128i xmm_dst = load_128_aligned ((__m128i *)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+ over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ }
+
+ w -= 4;
+ dst += 4;
+ }
+
+ while (w)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+
+ if (pix1)
+ {
+ pix2 = *dst;
+ *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
+ }
+
+ w--;
+ dst++;
+ }
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NORMAL, FLAG_NONE)
+
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
+ const uint8_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+ uint32_t m;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ uint32_t sa;
+
+ m = (uint32_t) *mask++;
+
+ if (m)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ sa = pix1 >> 24;
+
+ if (sa == 0xff && m == 0xff)
+ {
+ *dst = pix1;
+ }
+ else
+ {
+ __m128i ms, md, ma, msa;
+
+ pix2 = *dst;
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (pix1);
+ md = unpack_32_1x128 (pix2);
+
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+ }
+ }
+ else
+ {
+ BILINEAR_SKIP_ONE_PIXEL ();
+ }
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 4)
+ {
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+
+ m = *(uint32_t*)mask;
+
+ if (m)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+
+ xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+
+ if (m == 0xffffffff && is_opaque (xmm_src))
+ {
+ save_128_aligned ((__m128i *)dst, xmm_src);
+ }
+ else
+ {
+ xmm_dst = load_128_aligned ((__m128i *)dst);
+
+ xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
+ &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ }
+ else
+ {
+ BILINEAR_SKIP_ONE_PIXEL ();
+ BILINEAR_SKIP_ONE_PIXEL ();
+ BILINEAR_SKIP_ONE_PIXEL ();
+ BILINEAR_SKIP_ONE_PIXEL ();
+ }
+
+ w -= 4;
+ dst += 4;
+ mask += 4;
+ }
+
+ while (w)
+ {
+ uint32_t sa;
+
+ m = (uint32_t) *mask++;
+
+ if (m)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ sa = pix1 >> 24;
+
+ if (sa == 0xff && m == 0xff)
+ {
+ *dst = pix1;
+ }
+ else
+ {
+ __m128i ms, md, ma, msa;
+
+ pix2 = *dst;
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (pix1);
+ md = unpack_32_1x128 (pix2);
+
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+ }
+ }
+ else
+ {
+ BILINEAR_SKIP_ONE_PIXEL ();
+ }
+
+ w--;
+ dst++;
+ }
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ COVER, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ PAD, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ NONE, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ NORMAL, FLAG_HAVE_NON_SOLID_MASK)
static const pixman_fast_path_t sse2_fast_paths[] =
{
@@ -5505,6 +5765,16 @@ static const pixman_fast_path_t sse2_fast_paths[] =
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888),
+
{ PIXMAN_OP_NONE },
};
diff --git a/pixman/pixman/pixman-utils.c b/pixman/pixman/pixman-utils.c
index 768ca1b89..d2af51a30 100644
--- a/pixman/pixman/pixman-utils.c
+++ b/pixman/pixman/pixman-utils.c
@@ -30,6 +30,118 @@
#include "pixman-private.h"
+#define N_CACHED_FAST_PATHS 8
+
+typedef struct
+{
+ struct
+ {
+ pixman_implementation_t * imp;
+ pixman_fast_path_t fast_path;
+ } cache [N_CACHED_FAST_PATHS];
+} cache_t;
+
+PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
+
+pixman_bool_t
+_pixman_lookup_composite_function (pixman_implementation_t *toplevel,
+ pixman_op_t op,
+ pixman_format_code_t src_format,
+ uint32_t src_flags,
+ pixman_format_code_t mask_format,
+ uint32_t mask_flags,
+ pixman_format_code_t dest_format,
+ uint32_t dest_flags,
+ pixman_implementation_t **out_imp,
+ pixman_composite_func_t *out_func)
+{
+ pixman_implementation_t *imp;
+ cache_t *cache;
+ int i;
+
+ /* Check cache for fast paths */
+ cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
+
+ for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
+ {
+ const pixman_fast_path_t *info = &(cache->cache[i].fast_path);
+
+ /* Note that we check for equality here, not whether
+ * the cached fast path matches. This is to prevent
+ * us from selecting an overly general fast path
+ * when a more specific one would work.
+ */
+ if (info->op == op &&
+ info->src_format == src_format &&
+ info->mask_format == mask_format &&
+ info->dest_format == dest_format &&
+ info->src_flags == src_flags &&
+ info->mask_flags == mask_flags &&
+ info->dest_flags == dest_flags &&
+ info->func)
+ {
+ *out_imp = cache->cache[i].imp;
+ *out_func = cache->cache[i].fast_path.func;
+
+ goto update_cache;
+ }
+ }
+
+ for (imp = toplevel; imp != NULL; imp = imp->delegate)
+ {
+ const pixman_fast_path_t *info = imp->fast_paths;
+
+ while (info->op != PIXMAN_OP_NONE)
+ {
+ if ((info->op == op || info->op == PIXMAN_OP_any) &&
+ /* Formats */
+ ((info->src_format == src_format) ||
+ (info->src_format == PIXMAN_any)) &&
+ ((info->mask_format == mask_format) ||
+ (info->mask_format == PIXMAN_any)) &&
+ ((info->dest_format == dest_format) ||
+ (info->dest_format == PIXMAN_any)) &&
+ /* Flags */
+ (info->src_flags & src_flags) == info->src_flags &&
+ (info->mask_flags & mask_flags) == info->mask_flags &&
+ (info->dest_flags & dest_flags) == info->dest_flags)
+ {
+ *out_imp = imp;
+ *out_func = info->func;
+
+ /* Set i to the last spot in the cache so that the
+ * move-to-front code below will work
+ */
+ i = N_CACHED_FAST_PATHS - 1;
+
+ goto update_cache;
+ }
+
+ ++info;
+ }
+ }
+ return FALSE;
+
+update_cache:
+ if (i)
+ {
+ while (i--)
+ cache->cache[i + 1] = cache->cache[i];
+
+ cache->cache[0].imp = *out_imp;
+ cache->cache[0].fast_path.op = op;
+ cache->cache[0].fast_path.src_format = src_format;
+ cache->cache[0].fast_path.src_flags = src_flags;
+ cache->cache[0].fast_path.mask_format = mask_format;
+ cache->cache[0].fast_path.mask_flags = mask_flags;
+ cache->cache[0].fast_path.dest_format = dest_format;
+ cache->cache[0].fast_path.dest_flags = dest_flags;
+ cache->cache[0].fast_path.func = *out_func;
+ }
+
+ return TRUE;
+}
+
pixman_bool_t
_pixman_multiply_overflows_size (size_t a, size_t b)
{
diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c
index 87f5a933a..8fb53568f 100644
--- a/pixman/pixman/pixman.c
+++ b/pixman/pixman/pixman.c
@@ -335,117 +335,6 @@ pixman_compute_composite_region32 (pixman_region32_t * region,
return TRUE;
}
-#define N_CACHED_FAST_PATHS 8
-
-typedef struct
-{
- struct
- {
- pixman_implementation_t * imp;
- pixman_fast_path_t fast_path;
- } cache [N_CACHED_FAST_PATHS];
-} cache_t;
-
-PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
-
-static force_inline pixman_bool_t
-lookup_composite_function (pixman_op_t op,
- pixman_format_code_t src_format,
- uint32_t src_flags,
- pixman_format_code_t mask_format,
- uint32_t mask_flags,
- pixman_format_code_t dest_format,
- uint32_t dest_flags,
- pixman_implementation_t **out_imp,
- pixman_composite_func_t *out_func)
-{
- pixman_implementation_t *imp;
- cache_t *cache;
- int i;
-
- /* Check cache for fast paths */
- cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
-
- for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
- {
- const pixman_fast_path_t *info = &(cache->cache[i].fast_path);
-
- /* Note that we check for equality here, not whether
- * the cached fast path matches. This is to prevent
- * us from selecting an overly general fast path
- * when a more specific one would work.
- */
- if (info->op == op &&
- info->src_format == src_format &&
- info->mask_format == mask_format &&
- info->dest_format == dest_format &&
- info->src_flags == src_flags &&
- info->mask_flags == mask_flags &&
- info->dest_flags == dest_flags &&
- info->func)
- {
- *out_imp = cache->cache[i].imp;
- *out_func = cache->cache[i].fast_path.func;
-
- goto update_cache;
- }
- }
-
- for (imp = get_implementation (); imp != NULL; imp = imp->delegate)
- {
- const pixman_fast_path_t *info = imp->fast_paths;
-
- while (info->op != PIXMAN_OP_NONE)
- {
- if ((info->op == op || info->op == PIXMAN_OP_any) &&
- /* Formats */
- ((info->src_format == src_format) ||
- (info->src_format == PIXMAN_any)) &&
- ((info->mask_format == mask_format) ||
- (info->mask_format == PIXMAN_any)) &&
- ((info->dest_format == dest_format) ||
- (info->dest_format == PIXMAN_any)) &&
- /* Flags */
- (info->src_flags & src_flags) == info->src_flags &&
- (info->mask_flags & mask_flags) == info->mask_flags &&
- (info->dest_flags & dest_flags) == info->dest_flags)
- {
- *out_imp = imp;
- *out_func = info->func;
-
- /* Set i to the last spot in the cache so that the
- * move-to-front code below will work
- */
- i = N_CACHED_FAST_PATHS - 1;
-
- goto update_cache;
- }
-
- ++info;
- }
- }
- return FALSE;
-
-update_cache:
- if (i)
- {
- while (i--)
- cache->cache[i + 1] = cache->cache[i];
-
- cache->cache[0].imp = *out_imp;
- cache->cache[0].fast_path.op = op;
- cache->cache[0].fast_path.src_format = src_format;
- cache->cache[0].fast_path.src_flags = src_flags;
- cache->cache[0].fast_path.mask_format = mask_format;
- cache->cache[0].fast_path.mask_flags = mask_flags;
- cache->cache[0].fast_path.dest_format = dest_format;
- cache->cache[0].fast_path.dest_flags = dest_flags;
- cache->cache[0].fast_path.func = *out_func;
- }
-
- return TRUE;
-}
-
typedef struct
{
pixman_fixed_48_16_t x1;
@@ -790,11 +679,10 @@ pixman_image_composite32 (pixman_op_t op,
*/
op = optimize_operator (op, src_flags, mask_flags, dest_flags);
- if (lookup_composite_function (op,
- src_format, src_flags,
- mask_format, mask_flags,
- dest_format, dest_flags,
- &imp, &func))
+ if (_pixman_lookup_composite_function (
+ get_implementation (), op,
+ src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags,
+ &imp, &func))
{
pixman_composite_info_t info;
const pixman_box32_t *pbox;
@@ -804,6 +692,9 @@ pixman_image_composite32 (pixman_op_t op,
info.src_image = src;
info.mask_image = mask;
info.dest_image = dest;
+ info.src_flags = src_flags;
+ info.mask_flags = mask_flags;
+ info.dest_flags = dest_flags;
pbox = pixman_region32_rectangles (&region, &n);