diff options
Diffstat (limited to 'pixman')
-rw-r--r-- | pixman/configure.ac | 5 | ||||
-rw-r--r-- | pixman/pixman/pixman-bits-image.c | 74 | ||||
-rw-r--r-- | pixman/pixman/pixman-fast-path.c | 178 | ||||
-rw-r--r-- | pixman/pixman/pixman-image.c | 111 | ||||
-rw-r--r-- | pixman/pixman/pixman-private.h | 29 | ||||
-rw-r--r-- | pixman/pixman/pixman-sse2.c | 428 | ||||
-rw-r--r-- | pixman/pixman/pixman-utils.c | 112 | ||||
-rw-r--r-- | pixman/pixman/pixman.c | 123 |
8 files changed, 788 insertions, 272 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac index 481d0bb00..6f5aef562 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -552,7 +552,10 @@ AC_MSG_CHECKING(whether to use ARM IWMMXT intrinsics) xserver_save_CFLAGS=$CFLAGS CFLAGS="$IWMMXT_CFLAGS $CFLAGS" AC_COMPILE_IFELSE([ -#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 3 && __GNUC_MINOR__ < 6)) +#ifndef __arm__ +#error "IWMMXT is only available on ARM" +#endif +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)) #error "Need GCC >= 4.6 for IWMMXT intrinsics" #endif #include <mmintrin.h> diff --git a/pixman/pixman/pixman-bits-image.c b/pixman/pixman/pixman-bits-image.c index f382c65ad..99c0dfe6d 100644 --- a/pixman/pixman/pixman-bits-image.c +++ b/pixman/pixman/pixman-bits-image.c @@ -1437,40 +1437,30 @@ create_bits (pixman_format_code_t format, return calloc (buf_size, 1); } -PIXMAN_EXPORT pixman_image_t * -pixman_image_create_bits (pixman_format_code_t format, - int width, - int height, - uint32_t * bits, - int rowstride_bytes) +pixman_bool_t +_pixman_bits_image_init (pixman_image_t * image, + pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride) { - pixman_image_t *image; uint32_t *free_me = NULL; - /* must be a whole number of uint32_t's - */ - return_val_if_fail ( - bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); - - return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL); - if (!bits && width && height) { - free_me = bits = create_bits (format, width, height, &rowstride_bytes); - if (!bits) - return NULL; - } + int rowstride_bytes; - image = _pixman_image_allocate (); + free_me = bits = create_bits (format, width, height, &rowstride_bytes); - if (!image) - { - if (free_me) - free (free_me); + if (!bits) + return FALSE; - return NULL; + rowstride = rowstride_bytes / (int) sizeof (uint32_t); } + _pixman_image_init (image); + image->type = BITS; image->bits.format = format; image->bits.width = width; @@ -1479,15 +1469,43 @@ pixman_image_create_bits (pixman_format_code_t format, image->bits.free_me = free_me; image->bits.read_func = NULL; image->bits.write_func = NULL; - - /* The rowstride is stored in number of uint32_t */ - image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t); - + image->bits.rowstride = rowstride; image->bits.indexed = NULL; image->common.property_changed = bits_image_property_changed; _pixman_image_reset_clip_region (image); + return TRUE; +} + +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_bits (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes) +{ + pixman_image_t *image; + + /* must be a whole number of uint32_t's + */ + return_val_if_fail ( + bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); + + return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL); + + image = _pixman_image_allocate (); + + if (!image) + return NULL; + + if (!_pixman_bits_image_init (image, format, width, height, bits, + rowstride_bytes / (int) sizeof (uint32_t))) + { + free (image); + return NULL; + } + return image; } diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index 033efd7b1..038dcf722 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -1191,6 +1191,174 @@ FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) +#define REPEAT_MIN_WIDTH 32 + +static void +fast_composite_tiled_repeat (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + pixman_composite_func_t func; + pixman_format_code_t mask_format; + uint32_t src_flags, mask_flags; + + src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + + if (mask_image) + { + mask_format = mask_image->common.extended_format_code; + mask_flags = info->mask_flags; + } + else + { + mask_format = PIXMAN_null; + mask_flags = FAST_PATH_IS_OPAQUE; + } + + if (_pixman_lookup_composite_function ( + imp->toplevel, info->op, + src_image->common.extended_format_code, src_flags, + mask_format, mask_flags, + dest_image->common.extended_format_code, info->dest_flags, + &imp, &func)) + { + int32_t sx, sy; + int32_t width_remain; + int32_t num_pixels; + int32_t src_width; + int32_t i, j; + pixman_image_t extended_src_image; + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; + pixman_bool_t need_src_extension; + uint32_t *src_line; + int32_t src_stride; + int32_t src_bpp; + pixman_composite_info_t info2 = *info; + + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8)) + { + sx = src_x; + sx = MOD (sx, src_image->bits.width); + sx += width; + src_width = 0; + + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) + src_width += src_image->bits.width; + + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); + + /* Initialize/validate stack-allocated temporary image */ + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, + src_width, 1, &extended_src[0], src_stride); + _pixman_image_validate (&extended_src_image); + + info2.src_image = &extended_src_image; + need_src_extension = TRUE; + } + else + { + src_width = src_image->bits.width; + need_src_extension = FALSE; + } + + sx = src_x; + sy = src_y; + + while (--height >= 0) + { + sx = MOD (sx, src_width); + sy = MOD (sy, src_image->bits.height); + + if (need_src_extension) + { + if (src_bpp == 32) + { + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + extended_src[i] = src_line[j]; + } + } + else if (src_bpp == 16) + { + uint16_t *src_line_16; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, + src_line_16, 1); + src_line = (uint32_t*)src_line_16; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; + } + } + else if (src_bpp == 8) + { + uint8_t *src_line_8; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, + src_line_8, 1); + src_line = (uint32_t*)src_line_8; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; + } + } + + info2.src_y = 0; + } + else + { + info2.src_y = sy; + } + + width_remain = width; + + while (width_remain > 0) + { + num_pixels = src_width - sx; + + if (num_pixels > width_remain) + num_pixels = width_remain; + + info2.src_x = sx; + info2.width = num_pixels; + info2.height = 1; + + func (imp, &info2); + + width_remain -= num_pixels; + info2.mask_x += num_pixels; + info2.dest_x += num_pixels; + sx = 0; + } + + sx = src_x; + sy++; + info2.mask_x = info->mask_x; + info2.mask_y++; + info2.dest_x = info->dest_x; + info2.dest_y++; + } + + if (need_src_extension) + _pixman_image_fini (&extended_src_image); + } + else + { + _pixman_log_error (FUNC, "Didn't find a suitable function "); + } +} + /* Use more unrolling for src_0565_0565 because it is typically CPU bound */ static force_inline void scaled_nearest_scanline_565_565_SRC (uint16_t * dst, @@ -1787,6 +1955,16 @@ static const pixman_fast_path_t c_fast_paths[] = SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), + /* Simple repeat fast path entry. */ + { PIXMAN_OP_any, + PIXMAN_any, + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE | + FAST_PATH_NORMAL_REPEAT), + PIXMAN_any, 0, + PIXMAN_any, FAST_PATH_STD_DEST_FLAGS, + fast_composite_tiled_repeat + }, + { PIXMAN_OP_NONE }, }; diff --git a/pixman/pixman/pixman-image.c b/pixman/pixman/pixman-image.c index a3bb9b63a..afe587f31 100644 --- a/pixman/pixman/pixman-image.c +++ b/pixman/pixman/pixman-image.c @@ -49,56 +49,33 @@ _pixman_init_gradient (gradient_t * gradient, return TRUE; } -pixman_image_t * -_pixman_image_allocate (void) -{ - pixman_image_t *image = malloc (sizeof (pixman_image_t)); - - if (image) - { - image_common_t *common = &image->common; - - pixman_region32_init (&common->clip_region); - - common->alpha_count = 0; - common->have_clip_region = FALSE; - common->clip_sources = FALSE; - common->transform = NULL; - common->repeat = PIXMAN_REPEAT_NONE; - common->filter = PIXMAN_FILTER_NEAREST; - common->filter_params = NULL; - common->n_filter_params = 0; - common->alpha_map = NULL; - common->component_alpha = FALSE; - common->ref_count = 1; - common->property_changed = NULL; - common->client_clip = FALSE; - common->destroy_func = NULL; - common->destroy_data = NULL; - common->dirty = TRUE; - } - - return image; -} - -static void -image_property_changed (pixman_image_t *image) -{ - image->common.dirty = TRUE; -} - -/* Ref Counting */ -PIXMAN_EXPORT pixman_image_t * -pixman_image_ref (pixman_image_t *image) +void +_pixman_image_init (pixman_image_t *image) { - image->common.ref_count++; - - return image; + image_common_t *common = &image->common; + + pixman_region32_init (&common->clip_region); + + common->alpha_count = 0; + common->have_clip_region = FALSE; + common->clip_sources = FALSE; + common->transform = NULL; + common->repeat = PIXMAN_REPEAT_NONE; + common->filter = PIXMAN_FILTER_NEAREST; + common->filter_params = NULL; + common->n_filter_params = 0; + common->alpha_map = NULL; + common->component_alpha = FALSE; + common->ref_count = 1; + common->property_changed = NULL; + common->client_clip = FALSE; + common->destroy_func = NULL; + common->destroy_data = NULL; + common->dirty = TRUE; } -/* returns TRUE when the image is freed */ -PIXMAN_EXPORT pixman_bool_t -pixman_image_unref (pixman_image_t *image) +pixman_bool_t +_pixman_image_fini (pixman_image_t *image) { image_common_t *common = (image_common_t *)image; @@ -131,8 +108,45 @@ pixman_image_unref (pixman_image_t *image) if (image->type == BITS && image->bits.free_me) free (image->bits.free_me); - free (image); + return TRUE; + } + return FALSE; +} + +pixman_image_t * +_pixman_image_allocate (void) +{ + pixman_image_t *image = malloc (sizeof (pixman_image_t)); + + if (image) + _pixman_image_init (image); + + return image; +} + +static void +image_property_changed (pixman_image_t *image) +{ + image->common.dirty = TRUE; +} + +/* Ref Counting */ +PIXMAN_EXPORT pixman_image_t * +pixman_image_ref (pixman_image_t *image) +{ + image->common.ref_count++; + + return image; +} + +/* returns TRUE when the image is freed */ +PIXMAN_EXPORT pixman_bool_t +pixman_image_unref (pixman_image_t *image) +{ + if (_pixman_image_fini (image)) + { + free (image); return TRUE; } @@ -361,6 +375,7 @@ compute_image_info (pixman_image_t *image) else { code = image->bits.format; + flags |= FAST_PATH_BITS_IMAGE; } if (!PIXMAN_FORMAT_A (image->bits.format) && diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 4d645fe4b..cbd48f3d1 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -250,6 +250,19 @@ _pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); void _pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); +void +_pixman_image_init (pixman_image_t *image); + +pixman_bool_t +_pixman_bits_image_init (pixman_image_t * image, + pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride); +pixman_bool_t +_pixman_image_fini (pixman_image_t *image); + pixman_image_t * _pixman_image_allocate (void); @@ -360,6 +373,10 @@ typedef struct int32_t dest_y; int32_t width; int32_t height; + + uint32_t src_flags; + uint32_t mask_flags; + uint32_t dest_flags; } pixman_composite_info_t; #define PIXMAN_COMPOSITE_ARGS(info) \ @@ -618,6 +635,7 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); #define FAST_PATH_ROTATE_270_TRANSFORM (1 << 22) #define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST (1 << 23) #define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24) +#define FAST_PATH_BITS_IMAGE (1 << 25) #define FAST_PATH_PAD_REPEAT \ (FAST_PATH_NO_NONE_REPEAT | \ @@ -712,6 +730,17 @@ pixman_contract (uint32_t * dst, const uint64_t *src, int width); +pixman_bool_t +_pixman_lookup_composite_function (pixman_implementation_t *toplevel, + pixman_op_t op, + pixman_format_code_t src_format, + uint32_t src_flags, + pixman_format_code_t mask_format, + uint32_t mask_flags, + pixman_format_code_t dest_format, + uint32_t dest_flags, + pixman_implementation_t **out_imp, + pixman_composite_func_t *out_func); /* Region Helpers */ pixman_bool_t diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index da6bcdfa0..a183f937d 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -5287,83 +5287,53 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, scaled_nearest_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) -static void -bilinear_interpolate_line_sse2 (uint32_t * out, - const uint32_t * top, - const uint32_t * bottom, - int wt, - int wb, - pixman_fixed_t x, - pixman_fixed_t ux, - int width) -{ - const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); - const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); - const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff); - const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); - const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux); - const __m128i xmm_zero = _mm_setzero_si128 (); - __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x); - uint32_t pix1, pix2, pix3, pix4; - - #define INTERPOLATE_ONE_PIXEL(pix) \ - do { \ - __m128i xmm_wh, xmm_lo, xmm_hi, a; \ - /* fetch 2x2 pixel block into sse2 register */ \ - uint32_t tl = top [pixman_fixed_to_int (x)]; \ - uint32_t tr = top [pixman_fixed_to_int (x) + 1]; \ - uint32_t bl = bottom [pixman_fixed_to_int (x)]; \ - uint32_t br = bottom [pixman_fixed_to_int (x) + 1]; \ - a = _mm_set_epi32 (tr, tl, br, bl); \ - x += ux; \ - /* vertical interpolation */ \ - a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \ - xmm_wt), \ - _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \ - xmm_wb)); \ - /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc, \ - _mm_xor_si128 (xmm_xorc, \ - _mm_srli_epi16 (xmm_x, 8))); \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ - /* horizontal interpolation */ \ - xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ - xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \ - a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \ - _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \ - /* shift and pack the result */ \ - a = _mm_srli_epi32 (a, 16); \ - a = _mm_packs_epi32 (a, a); \ - a = _mm_packus_epi16 (a, a); \ - pix = _mm_cvtsi128_si32 (a); \ - } while (0) - - while ((width -= 4) >= 0) - { - INTERPOLATE_ONE_PIXEL (pix1); - INTERPOLATE_ONE_PIXEL (pix2); - INTERPOLATE_ONE_PIXEL (pix3); - INTERPOLATE_ONE_PIXEL (pix4); - *out++ = pix1; - *out++ = pix2; - *out++ = pix3; - *out++ = pix4; - } - if (width & 2) - { - INTERPOLATE_ONE_PIXEL (pix1); - INTERPOLATE_ONE_PIXEL (pix2); - *out++ = pix1; - *out++ = pix2; - } - if (width & 1) - { - INTERPOLATE_ONE_PIXEL (pix1); - *out = pix1; - } - - #undef INTERPOLATE_ONE_PIXEL -} +#define BILINEAR_DECLARE_VARIABLES \ + const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ + const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ + const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);\ + const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ + const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \ + unit_x, unit_x, unit_x, unit_x); \ + const __m128i xmm_zero = _mm_setzero_si128 (); \ + __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx) + +#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ +do { \ + __m128i xmm_wh, xmm_lo, xmm_hi, a; \ + /* fetch 2x2 pixel block into sse2 register */ \ + uint32_t tl = src_top [pixman_fixed_to_int (vx)]; \ + uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; \ + uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; \ + uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; \ + a = _mm_set_epi32 (tr, tl, br, bl); \ + vx += unit_x; \ + /* vertical interpolation */ \ + a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \ + xmm_wt), \ + _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \ + xmm_wb)); \ + /* calculate horizontal weights */ \ + xmm_wh = _mm_add_epi16 (xmm_addc, \ + _mm_xor_si128 (xmm_xorc, \ + _mm_srli_epi16 (xmm_x, 8))); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ + /* horizontal interpolation */ \ + xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ + xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \ + a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \ + _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \ + /* shift and pack the result */ \ + a = _mm_srli_epi32 (a, 16); \ + a = _mm_packs_epi32 (a, a); \ + a = _mm_packus_epi16 (a, a); \ + pix = _mm_cvtsi128_si32 (a); \ +} while (0) + +#define BILINEAR_SKIP_ONE_PIXEL() \ +do { \ + vx += unit_x; \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ +} while(0) static force_inline void scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, @@ -5378,8 +5348,35 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, pixman_fixed_t max_vx, pixman_bool_t zero_src) { - bilinear_interpolate_line_sse2 (dst, src_top, src_bottom, - wt, wb, vx, unit_x, w); + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + + while ((w -= 4) >= 0) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + *dst++ = pix1; + *dst++ = pix2; + *dst++ = pix3; + *dst++ = pix4; + } + + if (w & 2) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + *dst++ = pix1; + *dst++ = pix2; + } + + if (w & 1) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst = pix1; + } + } FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC, @@ -5399,6 +5396,269 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC, uint32_t, uint32_t, uint32_t, NORMAL, FLAG_NONE) +static force_inline void +scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + + while (w && ((unsigned long)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (pix1) + { + pix2 = *dst; + *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); + } + + w--; + dst++; + } + + while (w >= 4) + { + __m128i xmm_src; + __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo; + __m128i xmm_alpha_hi, xmm_alpha_lo; + + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + + xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + + if (!is_zero (xmm_src)) + { + if (is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + __m128i xmm_dst = load_128_aligned ((__m128i *)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); + over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + + w -= 4; + dst += 4; + } + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (pix1) + { + pix2 = *dst; + *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); + } + + w--; + dst++; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, + const uint8_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + uint32_t m; + + while (w && ((unsigned long)dst & 15)) + { + uint32_t sa; + + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + sa = pix1 >> 24; + + if (sa == 0xff && m == 0xff) + { + *dst = pix1; + } + else + { + __m128i ms, md, ma, msa; + + pix2 = *dst; + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (pix1); + md = unpack_32_1x128 (pix2); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } + + while (w >= 4) + { + __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + m = *(uint32_t*)mask; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + + xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + + if (m == 0xffffffff && is_opaque (xmm_src)) + { + save_128_aligned ((__m128i *)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i *)dst); + + xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, + &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + BILINEAR_SKIP_ONE_PIXEL (); + BILINEAR_SKIP_ONE_PIXEL (); + BILINEAR_SKIP_ONE_PIXEL (); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint32_t sa; + + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + sa = pix1 >> 24; + + if (sa == 0xff && m == 0xff) + { + *dst = pix1; + } + else + { + __m128i ms, md, ma, msa; + + pix2 = *dst; + ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); + ms = unpack_32_1x128 (pix1); + md = unpack_32_1x128 (pix2); + + msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); + + *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + COVER, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + PAD, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NONE, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NORMAL, FLAG_HAVE_NON_SOLID_MASK) static const pixman_fast_path_t sse2_fast_paths[] = { @@ -5505,6 +5765,16 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888), + { PIXMAN_OP_NONE }, }; diff --git a/pixman/pixman/pixman-utils.c b/pixman/pixman/pixman-utils.c index 768ca1b89..d2af51a30 100644 --- a/pixman/pixman/pixman-utils.c +++ b/pixman/pixman/pixman-utils.c @@ -30,6 +30,118 @@ #include "pixman-private.h" +#define N_CACHED_FAST_PATHS 8 + +typedef struct +{ + struct + { + pixman_implementation_t * imp; + pixman_fast_path_t fast_path; + } cache [N_CACHED_FAST_PATHS]; +} cache_t; + +PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); + +pixman_bool_t +_pixman_lookup_composite_function (pixman_implementation_t *toplevel, + pixman_op_t op, + pixman_format_code_t src_format, + uint32_t src_flags, + pixman_format_code_t mask_format, + uint32_t mask_flags, + pixman_format_code_t dest_format, + uint32_t dest_flags, + pixman_implementation_t **out_imp, + pixman_composite_func_t *out_func) +{ + pixman_implementation_t *imp; + cache_t *cache; + int i; + + /* Check cache for fast paths */ + cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache); + + for (i = 0; i < N_CACHED_FAST_PATHS; ++i) + { + const pixman_fast_path_t *info = &(cache->cache[i].fast_path); + + /* Note that we check for equality here, not whether + * the cached fast path matches. This is to prevent + * us from selecting an overly general fast path + * when a more specific one would work. + */ + if (info->op == op && + info->src_format == src_format && + info->mask_format == mask_format && + info->dest_format == dest_format && + info->src_flags == src_flags && + info->mask_flags == mask_flags && + info->dest_flags == dest_flags && + info->func) + { + *out_imp = cache->cache[i].imp; + *out_func = cache->cache[i].fast_path.func; + + goto update_cache; + } + } + + for (imp = toplevel; imp != NULL; imp = imp->delegate) + { + const pixman_fast_path_t *info = imp->fast_paths; + + while (info->op != PIXMAN_OP_NONE) + { + if ((info->op == op || info->op == PIXMAN_OP_any) && + /* Formats */ + ((info->src_format == src_format) || + (info->src_format == PIXMAN_any)) && + ((info->mask_format == mask_format) || + (info->mask_format == PIXMAN_any)) && + ((info->dest_format == dest_format) || + (info->dest_format == PIXMAN_any)) && + /* Flags */ + (info->src_flags & src_flags) == info->src_flags && + (info->mask_flags & mask_flags) == info->mask_flags && + (info->dest_flags & dest_flags) == info->dest_flags) + { + *out_imp = imp; + *out_func = info->func; + + /* Set i to the last spot in the cache so that the + * move-to-front code below will work + */ + i = N_CACHED_FAST_PATHS - 1; + + goto update_cache; + } + + ++info; + } + } + return FALSE; + +update_cache: + if (i) + { + while (i--) + cache->cache[i + 1] = cache->cache[i]; + + cache->cache[0].imp = *out_imp; + cache->cache[0].fast_path.op = op; + cache->cache[0].fast_path.src_format = src_format; + cache->cache[0].fast_path.src_flags = src_flags; + cache->cache[0].fast_path.mask_format = mask_format; + cache->cache[0].fast_path.mask_flags = mask_flags; + cache->cache[0].fast_path.dest_format = dest_format; + cache->cache[0].fast_path.dest_flags = dest_flags; + cache->cache[0].fast_path.func = *out_func; + } + + return TRUE; +} + pixman_bool_t _pixman_multiply_overflows_size (size_t a, size_t b) { diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c index 87f5a933a..8fb53568f 100644 --- a/pixman/pixman/pixman.c +++ b/pixman/pixman/pixman.c @@ -335,117 +335,6 @@ pixman_compute_composite_region32 (pixman_region32_t * region, return TRUE; } -#define N_CACHED_FAST_PATHS 8 - -typedef struct -{ - struct - { - pixman_implementation_t * imp; - pixman_fast_path_t fast_path; - } cache [N_CACHED_FAST_PATHS]; -} cache_t; - -PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); - -static force_inline pixman_bool_t -lookup_composite_function (pixman_op_t op, - pixman_format_code_t src_format, - uint32_t src_flags, - pixman_format_code_t mask_format, - uint32_t mask_flags, - pixman_format_code_t dest_format, - uint32_t dest_flags, - pixman_implementation_t **out_imp, - pixman_composite_func_t *out_func) -{ - pixman_implementation_t *imp; - cache_t *cache; - int i; - - /* Check cache for fast paths */ - cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache); - - for (i = 0; i < N_CACHED_FAST_PATHS; ++i) - { - const pixman_fast_path_t *info = &(cache->cache[i].fast_path); - - /* Note that we check for equality here, not whether - * the cached fast path matches. This is to prevent - * us from selecting an overly general fast path - * when a more specific one would work. - */ - if (info->op == op && - info->src_format == src_format && - info->mask_format == mask_format && - info->dest_format == dest_format && - info->src_flags == src_flags && - info->mask_flags == mask_flags && - info->dest_flags == dest_flags && - info->func) - { - *out_imp = cache->cache[i].imp; - *out_func = cache->cache[i].fast_path.func; - - goto update_cache; - } - } - - for (imp = get_implementation (); imp != NULL; imp = imp->delegate) - { - const pixman_fast_path_t *info = imp->fast_paths; - - while (info->op != PIXMAN_OP_NONE) - { - if ((info->op == op || info->op == PIXMAN_OP_any) && - /* Formats */ - ((info->src_format == src_format) || - (info->src_format == PIXMAN_any)) && - ((info->mask_format == mask_format) || - (info->mask_format == PIXMAN_any)) && - ((info->dest_format == dest_format) || - (info->dest_format == PIXMAN_any)) && - /* Flags */ - (info->src_flags & src_flags) == info->src_flags && - (info->mask_flags & mask_flags) == info->mask_flags && - (info->dest_flags & dest_flags) == info->dest_flags) - { - *out_imp = imp; - *out_func = info->func; - - /* Set i to the last spot in the cache so that the - * move-to-front code below will work - */ - i = N_CACHED_FAST_PATHS - 1; - - goto update_cache; - } - - ++info; - } - } - return FALSE; - -update_cache: - if (i) - { - while (i--) - cache->cache[i + 1] = cache->cache[i]; - - cache->cache[0].imp = *out_imp; - cache->cache[0].fast_path.op = op; - cache->cache[0].fast_path.src_format = src_format; - cache->cache[0].fast_path.src_flags = src_flags; - cache->cache[0].fast_path.mask_format = mask_format; - cache->cache[0].fast_path.mask_flags = mask_flags; - cache->cache[0].fast_path.dest_format = dest_format; - cache->cache[0].fast_path.dest_flags = dest_flags; - cache->cache[0].fast_path.func = *out_func; - } - - return TRUE; -} - typedef struct { pixman_fixed_48_16_t x1; @@ -790,11 +679,10 @@ pixman_image_composite32 (pixman_op_t op, */ op = optimize_operator (op, src_flags, mask_flags, dest_flags); - if (lookup_composite_function (op, - src_format, src_flags, - mask_format, mask_flags, - dest_format, dest_flags, - &imp, &func)) + if (_pixman_lookup_composite_function ( + get_implementation (), op, + src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags, + &imp, &func)) { pixman_composite_info_t info; const pixman_box32_t *pbox; @@ -804,6 +692,9 @@ pixman_image_composite32 (pixman_op_t op, info.src_image = src; info.mask_image = mask; info.dest_image = dest; + info.src_flags = src_flags; + info.mask_flags = mask_flags; + info.dest_flags = dest_flags; pbox = pixman_region32_rectangles (®ion, &n); |