diff options
Diffstat (limited to 'pixman/pixman/pixman-fast-path.c')
-rw-r--r-- | pixman/pixman/pixman-fast-path.c | 446 |
1 files changed, 297 insertions, 149 deletions
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index c625e0c4a..247aea645 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -739,36 +739,6 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp, } static void -fast_composite_src_x888_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - *dst = convert_8888_to_0565 (s); - dst++; - } - } -} - -static void fast_composite_add_8_8 (pixman_implementation_t *imp, pixman_composite_info_t *info) { @@ -1243,6 +1213,18 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, pixman_composite_func_t func; pixman_format_code_t mask_format; uint32_t src_flags, mask_flags; + int32_t sx, sy; + int32_t width_remain; + int32_t num_pixels; + int32_t src_width; + int32_t i, j; + pixman_image_t extended_src_image; + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; + pixman_bool_t need_src_extension; + uint32_t *src_line; + int32_t src_stride; + int32_t src_bpp; + pixman_composite_info_t info2 = *info; src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; @@ -1258,149 +1240,131 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, mask_flags = FAST_PATH_IS_OPAQUE; } - if (_pixman_implementation_lookup_composite ( - imp->toplevel, info->op, - src_image->common.extended_format_code, src_flags, - mask_format, mask_flags, - dest_image->common.extended_format_code, info->dest_flags, - &imp, &func)) + _pixman_implementation_lookup_composite ( + imp->toplevel, info->op, + src_image->common.extended_format_code, src_flags, + mask_format, mask_flags, + dest_image->common.extended_format_code, info->dest_flags, + &imp, &func); + + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && + !src_image->bits.indexed) { - int32_t sx, sy; - int32_t width_remain; - int32_t num_pixels; - int32_t src_width; - int32_t i, j; - pixman_image_t extended_src_image; - uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; - pixman_bool_t need_src_extension; - uint32_t *src_line; - int32_t src_stride; - int32_t src_bpp; - pixman_composite_info_t info2 = *info; - - src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); - - if (src_image->bits.width < REPEAT_MIN_WIDTH && - (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && - !src_image->bits.indexed) - { - sx = src_x; - sx = MOD (sx, src_image->bits.width); - sx += width; - src_width = 0; + sx = src_x; + sx = MOD (sx, src_image->bits.width); + sx += width; + src_width = 0; - while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) - src_width += src_image->bits.width; + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) + src_width += src_image->bits.width; - src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); - /* Initialize/validate stack-allocated temporary image */ - _pixman_bits_image_init (&extended_src_image, src_image->bits.format, - src_width, 1, &extended_src[0], src_stride, - FALSE); - _pixman_image_validate (&extended_src_image); + /* Initialize/validate stack-allocated temporary image */ + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, + src_width, 1, &extended_src[0], src_stride, + FALSE); + _pixman_image_validate (&extended_src_image); - info2.src_image = &extended_src_image; - need_src_extension = TRUE; - } - else - { - src_width = src_image->bits.width; - need_src_extension = FALSE; - } + info2.src_image = &extended_src_image; + need_src_extension = TRUE; + } + else + { + src_width = src_image->bits.width; + need_src_extension = FALSE; + } - sx = src_x; - sy = src_y; + sx = src_x; + sy = src_y; - while (--height >= 0) - { - sx = MOD (sx, src_width); - sy = MOD (sy, src_image->bits.height); + while (--height >= 0) + { + sx = MOD (sx, src_width); + sy = MOD (sy, src_image->bits.height); - if (need_src_extension) + if (need_src_extension) + { + if (src_bpp == 32) { - if (src_bpp == 32) - { - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - extended_src[i] = src_line[j]; - } - } - else if (src_bpp == 16) + for (i = 0; i < src_width; ) { - uint16_t *src_line_16; - - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, - src_line_16, 1); - src_line = (uint32_t*)src_line_16; - - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; - } + for (j = 0; j < src_image->bits.width; j++, i++) + extended_src[i] = src_line[j]; } - else if (src_bpp == 8) - { - uint8_t *src_line_8; + } + else if (src_bpp == 16) + { + uint16_t *src_line_16; - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, - src_line_8, 1); - src_line = (uint32_t*)src_line_8; + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, + src_line_16, 1); + src_line = (uint32_t*)src_line_16; - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; - } + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; } - - info2.src_y = 0; } - else + else if (src_bpp == 8) { - info2.src_y = sy; + uint8_t *src_line_8; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, + src_line_8, 1); + src_line = (uint32_t*)src_line_8; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; + } } - width_remain = width; + info2.src_y = 0; + } + else + { + info2.src_y = sy; + } - while (width_remain > 0) - { - num_pixels = src_width - sx; + width_remain = width; - if (num_pixels > width_remain) - num_pixels = width_remain; + while (width_remain > 0) + { + num_pixels = src_width - sx; - info2.src_x = sx; - info2.width = num_pixels; - info2.height = 1; + if (num_pixels > width_remain) + num_pixels = width_remain; - func (imp, &info2); + info2.src_x = sx; + info2.width = num_pixels; + info2.height = 1; - width_remain -= num_pixels; - info2.mask_x += num_pixels; - info2.dest_x += num_pixels; - sx = 0; - } + func (imp, &info2); - sx = src_x; - sy++; - info2.mask_x = info->mask_x; - info2.mask_y++; - info2.dest_x = info->dest_x; - info2.dest_y++; + width_remain -= num_pixels; + info2.mask_x += num_pixels; + info2.dest_x += num_pixels; + sx = 0; } - if (need_src_extension) - _pixman_image_fini (&extended_src_image); - } - else - { - _pixman_log_error (FUNC, "Didn't find a suitable function "); + sx = src_x; + sy++; + info2.mask_x = info->mask_x; + info2.mask_y++; + info2.dest_x = info->dest_x; + info2.dest_y++; } + + if (need_src_extension) + _pixman_image_fini (&extended_src_image); } /* Use more unrolling for src_0565_0565 because it is typically CPU bound */ @@ -1913,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), @@ -2199,12 +2159,200 @@ fast_path_fill (pixman_implementation_t *imp, return TRUE; } +/*****************************************************************************/ + +static uint32_t * +fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int32_t w = iter->width; + uint32_t *dst = iter->buffer; + const uint16_t *src = (const uint16_t *)iter->bits; + + iter->bits += iter->stride; + + /* Align the source buffer at 4 bytes boundary */ + if (w > 0 && ((uintptr_t)src & 3)) + { + *dst++ = convert_0565_to_8888 (*src++); + w--; + } + /* Process two pixels per iteration */ + while ((w -= 2) >= 0) + { + uint32_t sr, sb, sg, t0, t1; + uint32_t s = *(const uint32_t *)src; + src += 2; + sr = (s >> 8) & 0x00F800F8; + sb = (s << 3) & 0x00F800F8; + sg = (s >> 3) & 0x00FC00FC; + sr |= sr >> 5; + sb |= sb >> 5; + sg |= sg >> 6; + t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | + (sb & 0xFF) | 0xFF000000; + t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | + (sb >> 16) | 0xFF000000; +#ifdef WORDS_BIGENDIAN + *dst++ = t1; + *dst++ = t0; +#else + *dst++ = t0; + *dst++ = t1; +#endif + } + if (w & 1) + { + *dst = convert_0565_to_8888 (*src); + } + + return iter->buffer; +} + +static uint32_t * +fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) +{ + iter->bits += iter->stride; + return iter->buffer; +} + +/* Helper function for a workaround, which tries to ensure that 0x1F001F + * constant is always allocated in a register on RISC architectures. + */ +static force_inline uint32_t +convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) +{ + uint32_t a, b; + a = (s >> 3) & x1F001F; + b = s & 0xFC00; + a |= a >> 5; + a |= b >> 5; + return a; +} + +static void +fast_write_back_r5g6b5 (pixman_iter_t *iter) +{ + int32_t w = iter->width; + uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); + const uint32_t *src = iter->buffer; + /* Workaround to ensure that x1F001F variable is allocated in a register */ + static volatile uint32_t volatile_x1F001F = 0x1F001F; + uint32_t x1F001F = volatile_x1F001F; + + while ((w -= 4) >= 0) + { + uint32_t s1 = *src++; + uint32_t s2 = *src++; + uint32_t s3 = *src++; + uint32_t s4 = *src++; + *dst++ = convert_8888_to_0565_workaround (s1, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s2, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s3, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s4, x1F001F); + } + if (w & 2) + { + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + } + if (w & 1) + { + *dst = convert_8888_to_0565_workaround (*src, x1F001F); + } +} + +typedef struct +{ + pixman_format_code_t format; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +} fetcher_info_t; + +static const fetcher_info_t fetchers[] = +{ + { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + { PIXMAN_null } +}; + +static pixman_bool_t +fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + +#define FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FLAGS) == FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + iter->get_scanline = f->get_scanline; + return TRUE; + } + } + } + + return FALSE; +} + +static pixman_bool_t +fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) + { + iter->get_scanline = fast_dest_fetch_noop; + } + else + { + iter->get_scanline = f->get_scanline; + } + iter->write_back = f->write_back; + return TRUE; + } + } + } + return FALSE; +} + + pixman_implementation_t * _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) { pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); imp->fill = fast_path_fill; + imp->src_iter_init = fast_src_iter_init; + imp->dest_iter_init = fast_dest_iter_init; return imp; } |