From 150771e7aabf4c864b0b970c5b8d773634793abe Mon Sep 17 00:00:00 2001 From: marha Date: Tue, 4 Jun 2013 09:07:26 +0200 Subject: xwininfo fontconfig libX11 libXau libXdmcp libXext mesa libXinerama libxcb libxcb/xcb-proto libfontenc pixman xkbcomp mkfontscale xkeyboard-config git update 4 Jun 2013 xserver commit c21344add2fc589df83b29be5831c36a372201bd libxcb commit 9ae84ad187e2ba440c40f44b8eb21c82c2fdbf12 libxcb/xcb-proto commit bdfedfa57a13ff805580cfacafc70f9cc55df363 xkeyboard-config commit dad9ade4e83d1ef5a517fcc4cc9ad3a79b47acce libX11 commit 8496122eb00ce6cd5d2308ee54f64b68c378e455 libXdmcp commit 0b443c1b769b9c9a3b45b4252afe07e18b709ff4 libXext commit d8366afbb0d2e4fbb1e419b1187f490522270bea libfontenc commit 3acba630d8b57084f7e92c15732408711ed5137a libXinerama commit 6e1d1dc328ba8162bba2f4694e7f3c706a1491ff libXau commit 899790011304c4029e15abf410e49ce7cec17e0a xkbcomp commit ed582f4fccd4e23abcfba8b3b03649fea6414f44 pixman commit 2acfac5f8e097ee2ae225d986f981b55d65dd152 mkfontscale commit 19e2cb7c6a3ec2c5b1bc0d24866fa685eef0ee13 xwininfo commit ba0d1b0da21d2dbdd81098ed5778f3792b472e13 fontconfig commit cd9b1033a68816a7acfbba1718ba0aa5888f6ec7 mesa commit 7bafd88c153e395274b632e7eae4bc9fc3aec1d2 --- pixman/configure.ac | 13 +- pixman/pixman/pixman-arm-neon-asm.h | 2 +- pixman/pixman/pixman-fast-path.c | 99 +---- pixman/pixman/pixman-filter.c | 2 + pixman/pixman/pixman-general.c | 92 +++-- pixman/pixman/pixman-image.c | 10 +- pixman/pixman/pixman-implementation.c | 89 +++-- pixman/pixman/pixman-mips-dspr2-asm.S | 723 ++++++++++++++++++++++++---------- pixman/pixman/pixman-mips-dspr2-asm.h | 51 ++- pixman/pixman/pixman-mips-dspr2.c | 31 ++ pixman/pixman/pixman-mips-dspr2.h | 42 ++ pixman/pixman/pixman-mmx.c | 82 ++-- pixman/pixman/pixman-noop.c | 183 ++++----- pixman/pixman/pixman-private.h | 70 ++-- pixman/pixman/pixman-region.c | 6 +- pixman/pixman/pixman-sse2.c | 93 ++--- pixman/pixman/pixman-utils.c | 11 + pixman/pixman/pixman-vmx.c | 2 + pixman/pixman/pixman.c | 2 +- pixman/pixman/refactor | 478 ---------------------- pixman/test/blitters-test.c | 13 +- pixman/test/lowlevel-blt-bench.c | 13 +- pixman/test/prng-test.c | 5 +- pixman/test/utils-prng.c | 58 ++- pixman/test/utils-prng.h | 5 +- 25 files changed, 1063 insertions(+), 1112 deletions(-) delete mode 100644 pixman/pixman/refactor (limited to 'pixman') diff --git a/pixman/configure.ac b/pixman/configure.ac index 38f89b31e..221179ff1 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -53,8 +53,8 @@ AC_PREREQ([2.57]) # m4_define([pixman_major], 0) -m4_define([pixman_minor], 29) -m4_define([pixman_micro], 3) +m4_define([pixman_minor], 31) +m4_define([pixman_micro], 1) m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) @@ -279,7 +279,7 @@ AC_MSG_CHECKING(whether to use Loongson MMI assembler) xserver_save_CFLAGS=$CFLAGS CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir" -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +AC_LINK_IFELSE([AC_LANG_SOURCE([[ #ifndef __mips_loongson_vector_rev #error "Loongson Multimedia Instructions are only available on Loongson" #endif @@ -844,6 +844,13 @@ if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then AC_DEFINE(HAVE_GETTIMEOFDAY, 1, [Whether we have gettimeofday()]) fi +dnl ===================================== +dnl Check for missing sqrtf() as, e.g., for Solaris 9 + +AC_SEARCH_LIBS([sqrtf], [m], [], + [AC_DEFINE([sqrtf], [sqrt], + [Define to sqrt if you do not have the `sqrtf' function.])]) + dnl ===================================== dnl Thread local storage diff --git a/pixman/pixman/pixman-arm-neon-asm.h b/pixman/pixman/pixman-arm-neon-asm.h index 1673b080f..d0d92d74c 100644 --- a/pixman/pixman/pixman-arm-neon-asm.h +++ b/pixman/pixman/pixman-arm-neon-asm.h @@ -385,7 +385,7 @@ * execute simultaneously with NEON and be completely shadowed by it. Thus * we get no performance overhead at all (*). This looks like a very nice * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, - * but still can implement some rather advanced prefetch logic in sofware + * but still can implement some rather advanced prefetch logic in software * for almost zero cost! * * (*) The overhead of the prefetcher is visible when running some trivial diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index 247aea645..3982dce8b 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -2261,89 +2261,27 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter) } } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; - pixman_iter_write_back_t write_back; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, - { PIXMAN_null } -}; - -static pixman_bool_t -fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} - -static pixman_bool_t -fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static const pixman_iter_info_t fast_iters[] = { - pixman_image_t *image = iter->image; - - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) - { - iter->get_scanline = fast_dest_fetch_noop; - } - else - { - iter->get_scanline = f->get_scanline; - } - iter->write_back = f->write_back; - return TRUE; - } - } - } - return FALSE; -} - + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC, + _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST, + _pixman_iter_init_bits_stride, + fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA, + _pixman_iter_init_bits_stride, + fast_dest_fetch_noop, fast_write_back_r5g6b5 }, + + { PIXMAN_null }, +}; pixman_implementation_t * _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) @@ -2351,8 +2289,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); imp->fill = fast_path_fill; - imp->src_iter_init = fast_src_iter_init; - imp->dest_iter_init = fast_dest_iter_init; + imp->iter_info = fast_iters; return imp; } diff --git a/pixman/pixman/pixman-filter.c b/pixman/pixman/pixman-filter.c index 26b39d571..5ff7b6eaa 100644 --- a/pixman/pixman/pixman-filter.c +++ b/pixman/pixman/pixman-filter.c @@ -28,7 +28,9 @@ #include #include #include +#ifdef HAVE_CONFIG_H #include +#endif #include "pixman-private.h" typedef double (* kernel_func_t) (double x); diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c index 93a1b9acf..4da5da5e2 100644 --- a/pixman/pixman/pixman-general.c +++ b/pixman/pixman/pixman-general.c @@ -37,43 +37,47 @@ #include #include "pixman-private.h" -static pixman_bool_t -general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; - if (image->type == LINEAR) - _pixman_linear_gradient_iter_init (image, iter); - else if (image->type == RADIAL) + switch (image->type) + { + case BITS: + if ((iter->iter_flags & ITER_SRC) == ITER_SRC) + _pixman_bits_image_src_iter_init (image, iter); + else + _pixman_bits_image_dest_iter_init (image, iter); + break; + + case LINEAR: + _pixman_linear_gradient_iter_init (image, iter); + break; + + case RADIAL: _pixman_radial_gradient_iter_init (image, iter); - else if (image->type == CONICAL) + break; + + case CONICAL: _pixman_conical_gradient_iter_init (image, iter); - else if (image->type == BITS) - _pixman_bits_image_src_iter_init (image, iter); - else if (image->type == SOLID) + break; + + case SOLID: _pixman_log_error (FUNC, "Solid image not handled by noop"); - else - _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + break; - return TRUE; + default: + _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + break; + } } -static pixman_bool_t -general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static const pixman_iter_info_t general_iters[] = { - if (iter->image->type == BITS) - { - _pixman_bits_image_dest_iter_init (iter->image, iter); - - return TRUE; - } - else - { - _pixman_log_error (FUNC, "Trying to write to a non-writable image"); - - return FALSE; - } -} + { PIXMAN_any, 0, 0, general_iter_init, NULL, NULL }, + { PIXMAN_null }, +}; typedef struct op_info_t op_info_t; struct op_info_t @@ -116,7 +120,7 @@ general_composite_rect (pixman_implementation_t *imp, pixman_iter_t src_iter, mask_iter, dest_iter; pixman_combine_32_func_t compose; pixman_bool_t component_alpha; - iter_flags_t narrow, src_iter_flags; + iter_flags_t width_flag, src_iter_flags; int Bpp; int i; @@ -124,12 +128,12 @@ general_composite_rect (pixman_implementation_t *imp, (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) && (dest_image->common.flags & FAST_PATH_NARROW_FORMAT)) { - narrow = ITER_NARROW; + width_flag = ITER_NARROW; Bpp = 4; } else { - narrow = 0; + width_flag = ITER_WIDE; Bpp = 16; } @@ -145,7 +149,7 @@ general_composite_rect (pixman_implementation_t *imp, mask_buffer = src_buffer + width * Bpp; dest_buffer = mask_buffer + width * Bpp; - if (!narrow) + if (width_flag == ITER_WIDE) { /* To make sure there aren't any NANs in the buffers */ memset (src_buffer, 0, width * Bpp); @@ -154,11 +158,12 @@ general_composite_rect (pixman_implementation_t *imp, } /* src iter */ - src_iter_flags = narrow | op_flags[op].src; + src_iter_flags = width_flag | op_flags[op].src | ITER_SRC; - _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image, - src_x, src_y, width, height, - src_buffer, src_iter_flags, info->src_flags); + _pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image, + src_x, src_y, width, height, + src_buffer, src_iter_flags, + info->src_flags); /* mask iter */ if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == @@ -176,17 +181,19 @@ general_composite_rect (pixman_implementation_t *imp, mask_image->common.component_alpha && PIXMAN_FORMAT_RGB (mask_image->bits.format); - _pixman_implementation_src_iter_init ( - imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height, - mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags); + _pixman_implementation_iter_init ( + imp->toplevel, &mask_iter, + mask_image, mask_x, mask_y, width, height, mask_buffer, + ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB), + info->mask_flags); /* dest iter */ - _pixman_implementation_dest_iter_init ( + _pixman_implementation_iter_init ( imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height, - dest_buffer, narrow | op_flags[op].dst, info->dest_flags); + dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags); compose = _pixman_implementation_lookup_combiner ( - imp->toplevel, op, component_alpha, narrow); + imp->toplevel, op, component_alpha, width_flag != ITER_WIDE); for (i = 0; i < height; ++i) { @@ -219,8 +226,7 @@ _pixman_implementation_create_general (void) _pixman_setup_combiner_functions_32 (imp); _pixman_setup_combiner_functions_float (imp); - imp->src_iter_init = general_src_iter_init; - imp->dest_iter_init = general_dest_iter_init; + imp->iter_info = general_iters; return imp; } diff --git a/pixman/pixman/pixman-image.c b/pixman/pixman/pixman-image.c index 65041b43b..4f9c2f966 100644 --- a/pixman/pixman/pixman-image.c +++ b/pixman/pixman/pixman-image.c @@ -502,8 +502,10 @@ compute_image_info (pixman_image_t *image) break; } - /* Alpha map */ - if (!image->common.alpha_map) + /* Alpha maps are only supported for BITS images, so it's always + * safe to ignore their presense for non-BITS images + */ + if (!image->common.alpha_map || image->type != BITS) { flags |= FAST_PATH_NO_ALPHA_MAP; } @@ -918,10 +920,10 @@ _pixman_image_get_solid (pixman_implementation_t *imp, pixman_iter_t iter; otherwise: - _pixman_implementation_src_iter_init ( + _pixman_implementation_iter_init ( imp, &iter, image, 0, 0, 1, 1, (uint8_t *)&result, - ITER_NARROW, image->common.flags); + ITER_NARROW | ITER_SRC, image->common.flags); result = *iter.get_scanline (&iter, NULL); } diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c index cfb82bb1f..160847ad0 100644 --- a/pixman/pixman/pixman-implementation.c +++ b/pixman/pixman/pixman-implementation.c @@ -285,18 +285,26 @@ _pixman_implementation_fill (pixman_implementation_t *imp, return FALSE; } -pixman_bool_t -_pixman_implementation_src_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t iter_flags, - uint32_t image_flags) +static uint32_t * +get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) { + return NULL; +} + +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t iter_flags, + uint32_t image_flags) +{ + pixman_format_code_t format; + iter->image = image; iter->buffer = (uint32_t *)buffer; iter->x = x; @@ -306,47 +314,38 @@ _pixman_implementation_src_iter_init (pixman_implementation_t *imp, iter->iter_flags = iter_flags; iter->image_flags = image_flags; - while (imp) + if (!iter->image) { - if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter)) - return TRUE; - - imp = imp->fallback; + iter->get_scanline = get_scanline_null; + return; } - return FALSE; -} - -pixman_bool_t -_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t iter_flags, - uint32_t image_flags) -{ - iter->image = image; - iter->buffer = (uint32_t *)buffer; - iter->x = x; - iter->y = y; - iter->width = width; - iter->height = height; - iter->iter_flags = iter_flags; - iter->image_flags = image_flags; + format = iter->image->common.extended_format_code; while (imp) { - if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter)) - return TRUE; - - imp = imp->fallback; + if (imp->iter_info) + { + const pixman_iter_info_t *info; + + for (info = imp->iter_info; info->format != PIXMAN_null; ++info) + { + if ((info->format == PIXMAN_any || info->format == format) && + (info->image_flags & image_flags) == info->image_flags && + (info->iter_flags & iter_flags) == info->iter_flags) + { + iter->get_scanline = info->get_scanline; + iter->write_back = info->write_back; + + if (info->initializer) + info->initializer (iter, info); + return; + } + } + } + + imp = imp->fallback; } - - return FALSE; } pixman_bool_t diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S index 3adbb2afe..866e93e58 100644 --- a/pixman/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman/pixman-mips-dspr2-asm.S @@ -699,6 +699,127 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips) END(pixman_composite_src_0888_0565_rev_asm_mips) #endif +LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips) +/* + * a0 - dst (a8b8g8r8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + wsbh t0, t0 + wsbh t1, t1 + rotr t0, t0, 16 + rotr t1, t1, 16 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + wsbh t0, t0 + rotr t0, t0, 16 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_pixbuf_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + rotr t0, t0, 8 + rotr t1, t1, 8 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + rotr t0, t0, 8 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_rpixbuf_8888_asm_mips) + LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips) /* * a0 - dst (a8r8g8b8) @@ -840,34 +961,35 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) * a3 - w */ - SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 - beqz a3, 4f + beqz a3, 8f nop + SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 + li t6, 0xff addiu t7, zero, -1 /* t7 = 0xffffffff */ srl t8, a1, 24 /* t8 = srca */ li t9, 0x00ff00ff + addiu t1, a3, -1 - beqz t1, 3f /* last pixel */ + beqz t1, 4f /* last pixel */ nop - beq t8, t6, 2f /* if (srca == 0xff) */ - nop -1: - /* a1 = src */ + +0: lw t0, 0(a2) /* t0 = mask */ lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ or t2, t0, t1 - beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ addiu a2, a2, 8 - and t3, t0, t1 - move t4, a1 /* t4 = src */ - move t5, a1 /* t5 = src */ + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) lw t2, 0(a0) /* t2 = dst */ - beq t3, t7, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - lw t3, 4(a0) /* t3 = dst */ + lw t3, 4(a0) /* t3 = dst */ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 -11: not t0, t0 not t1, t1 MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 @@ -875,62 +997,79 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) addu_s.qb t3, t5, t3 sw t2, 0(a0) sw t3, 4(a0) -12: - addiu a3, a3, -2 addiu t1, a3, -1 - bgtz t1, 1b + bgtz t1, 0b addiu a0, a0, 8 - b 3f + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ nop -2: - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ - addiu a2, a2, 8 - and t2, t0, t1 - move t4, a1 - beq t2, t7, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - move t5, a1 lw t2, 0(a0) /* t2 = dst */ lw t3, 4(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 - not t0, t0 - not t1, t1 - MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 - addu_s.qb t4, t4, t2 - addu_s.qb t5, t5, t3 -21: - sw t4, 0(a0) - sw t5, 4(a0) -22: - addiu a3, a3, -2 + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + sw t2, 0(a0) + sw t3, 4(a0) addiu t1, a3, -1 - bgtz t1, 2b + bgtz t1, 0b addiu a0, a0, 8 + b 4f + nop +2: + sw a1, 0(a0) + sw a1, 4(a0) 3: - blez a3, 4f + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 8 + +4: + beqz a3, 7f nop /* a1 = src */ - lw t1, 0(a2) /* t1 = mask */ - beqz t1, 4f + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ nop - move t2, a1 /* t2 = src */ - beq t1, t7, 31f - lw t0, 0(a0) /* t0 = dst */ - - MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 - MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 -31: - not t1, t1 - MIPS_UN8x4_MUL_UN8x4 t0, t1, t0, t9, t3, t4, t5, t6 - addu_s.qb t0, t2, t0 - sw t0, 0(a0) -4: + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lw t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + MIPS_UN8x4_MUL_UN8x4 t1, t0, t1, t9, t3, t4, t5, s0 + addu_s.qb t1, t2, t1 + sw t1, 0(a0) RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 j ra nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lw t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4 + addu_s.qb t1, a1, t1 + sw t1, 0(a0) + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop +6: + sw a1, 0(a0) +7: + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 +8: + j ra + nop END(pixman_composite_over_n_8888_8888_ca_asm_mips) @@ -942,106 +1081,126 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips) * a3 - w */ - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 - beqz a3, 4f + beqz a3, 8f nop - li t5, 0xf800f800 - li t6, 0x07e007e0 - li t7, 0x001F001F - li t9, 0x00ff00ff + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + li t6, 0xff + addiu t7, zero, -1 /* t7 = 0xffffffff */ srl t8, a1, 24 /* t8 = srca */ + li t9, 0x00ff00ff + li s6, 0xf800f800 + li s7, 0x07e007e0 + li s8, 0x001F001F + addiu t1, a3, -1 - beqz t1, 3f /* last pixel */ + beqz t1, 4f /* last pixel */ nop - li s0, 0xff /* s0 = 0xff */ - addiu s1, zero, -1 /* s1 = 0xffffffff */ - beq t8, s0, 2f /* if (srca == 0xff) */ - nop -1: - /* a1 = src */ +0: lw t0, 0(a2) /* t0 = mask */ lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ or t2, t0, t1 - beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ addiu a2, a2, 8 - and t3, t0, t1 - move s2, a1 /* s2 = src */ - move s3, a1 /* s3 = src */ + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) lhu t2, 0(a0) /* t2 = dst */ - beq t3, s1, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - lhu t3, 2(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8 -11: + lhu t3, 2(a0) /* t3 = dst */ + MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 not t0, t0 not t1, t1 - CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1 - addu_s.qb s2, s2, s4 - addu_s.qb s3, s3, s5 - CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, t4, t2 + addu_s.qb t3, t5, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 sh t2, 0(a0) sh t3, 2(a0) -12: - addiu a3, a3, -2 addiu t1, a3, -1 - bgtz t1, 1b + bgtz t1, 0b addiu a0, a0, 4 - b 3f + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ nop -2: - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ - addiu a2, a2, 8 - and t3, t0, t1 - move t2, a1 - beq t3, s1, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - move t3, a1 lhu t2, 0(a0) /* t2 = dst */ lhu t3, 2(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 - not t0, t0 - not t1, t1 - CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3 - addu_s.qb t2, s2, s4 - addu_s.qb t3, s3, s5 -21: - CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3 - sh t0, 0(a0) - sh t1, 2(a0) -22: - addiu a3, a3, -2 + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 + sh t2, 0(a0) + sh t3, 2(a0) addiu t1, a3, -1 - bgtz t1, 2b + bgtz t1, 0b addiu a0, a0, 4 + b 4f + nop +2: + CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1 + sh t2, 0(a0) + sh t2, 2(a0) 3: - blez a3, 4f + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 4 + +4: + beqz a3, 7f nop /* a1 = src */ - lw t1, 0(a2) /* t1 = mask */ - beqz t1, 4f + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ nop - move t2, a1 /* t2 = src */ - beq t1, t7, 31f - lhu t0, 0(a0) /* t0 = dst */ - - MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 - MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 -31: - not t1, t1 - CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3 - MIPS_UN8x4_MUL_UN8x4 s1, t1, t3, t9, t4, t5, t6, t7 - addu_s.qb t0, t2, t3 - CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3 - sh s1, 0(a0) -4: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lhu t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8x4 s1, t0, s1, t9, t3, t4, t5, s0 + addu_s.qb s1, t2, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lhu t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8 s1, t0, s1, t9, t2, t3, t4 + addu_s.qb s1, a1, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +6: + CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2 + sh t1, 0(a0) +7: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 +8: j ra nop @@ -2936,101 +3095,265 @@ END(pixman_composite_over_reverse_n_8888_asm_mips) LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips) /* * a0 - dst (a8) - * a1 - src (a8r8g8b8) + * a1 - src (32bit constant) * a2 - w */ - beqz a2, 5f + li t9, 0x00ff00ff + beqz a2, 3f nop - - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 - move t7, a1 - srl t5, t7, 24 - replv.ph t5, t5 - srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */ - beqz t9, 2f /* branch if less than 4 src pixels */ + srl t7, a2, 2 /* t7 = how many multiples of 4 dst pixels */ + beqz t7, 1f /* branch if less than 4 src pixels */ nop -1: - addiu t9, t9, -1 - addiu a2, a2, -4 + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz t7, 1f + addiu t7, t7, -1 lbu t0, 0(a0) lbu t1, 1(a0) lbu t2, 2(a0) lbu t3, 3(a0) - muleu_s.ph.qbl s0, t0, t5 - muleu_s.ph.qbr s1, t0, t5 - muleu_s.ph.qbl s2, t1, t5 - muleu_s.ph.qbr s3, t1, t5 - muleu_s.ph.qbl s4, t2, t5 - muleu_s.ph.qbr s5, t2, t5 - muleu_s.ph.qbl s6, t3, t5 - muleu_s.ph.qbr s7, t3, t5 - - shrl.ph t4, s0, 8 - shrl.ph t6, s1, 8 - shrl.ph t7, s2, 8 - shrl.ph t8, s3, 8 - addq.ph t0, s0, t4 - addq.ph t1, s1, t6 - addq.ph t2, s2, t7 - addq.ph t3, s3, t8 - shra_r.ph t0, t0, 8 - shra_r.ph t1, t1, 8 + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr.qb.ph t0, t3, t1 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 shra_r.ph t2, t2, 8 shra_r.ph t3, t3, 8 - shrl.ph t4, s4, 8 - shrl.ph t6, s5, 8 - shrl.ph t7, s6, 8 - shrl.ph t8, s7, 8 - addq.ph s0, s4, t4 - addq.ph s1, s5, t6 - addq.ph s2, s6, t7 - addq.ph s3, s7, t8 - shra_r.ph t4, s0, 8 - shra_r.ph t6, s1, 8 - shra_r.ph t7, s2, 8 - shra_r.ph t8, s3, 8 - - precr.qb.ph s0, t0, t1 - precr.qb.ph s1, t2, t3 - precr.qb.ph s2, t4, t6 - precr.qb.ph s3, t7, t8 + precr.qb.ph t2, t2, t3 - sb s0, 0(a0) - sb s1, 1(a0) - sb s2, 2(a0) - sb s3, 3(a0) - bgtz t9, 1b + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a2, a2, -4 + b 0b addiu a0, a0, 4 -2: - beqz a2, 4f + +1: + beqz a2, 3f nop -3: - lbu t1, 0(a0) + srl t8, a1, 24 +2: + lbu t0, 0(a0) + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 - muleu_s.ph.qbl t4, t1, t5 - muleu_s.ph.qbr t7, t1, t5 - shrl.ph t6, t4, 8 - shrl.ph t0, t7, 8 - addq.ph t8, t4, t6 - addq.ph t9, t7, t0 - shra_r.ph t8, t8, 8 - shra_r.ph t9, t9, 8 - precr.qb.ph t2, t8, t9 sb t2, 0(a0) addiu a2, a2, -1 - bnez a2, 3b + bnez a2, 2b addiu a0, a0, 1 -4: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 -5: + +3: j ra nop END(pixman_composite_in_n_8_asm_mips) +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + lw t8, 16(sp) /* t8 = unit_x */ + li t6, 0x00ff00ff + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + + OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3 + + sw t4, 0(a0) + sw t5, 4(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7 + + sw t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1 + lw t8, 40(sp) /* t8 = unit_x */ + li t4, 0x00ff00ff + li t5, 0xf800f800 + li t6, 0x07e007e0 + li t7, 0x001F001F + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ + + CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3 + OVER_2x8888_2x8888 t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4 + CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2 + + sh v0, 0(a0) + sh v1, 2(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6 + OVER_8888_8888 t0, t2, t1, t4, t3, t5, t6, t7 + CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6 + + sh t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (r5g6b5) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, v0 + beqz a2, 3f + nop + + lw v0, 16(sp) /* v0 = unit_x */ + addiu t1, a2, -1 + beqz t1, 2f + nop + + li t4, 0x07e007e0 + li t5, 0x001F001F +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t1 = vx >> 16 */ + sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */ + addu t1, a1, t1 + lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + addiu a2, a2, -2 + + CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + + CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 + + sw t1, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) /* * a0 - dst (r5g6b5) diff --git a/pixman/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman/pixman-mips-dspr2-asm.h index b330c0f0d..cab122d80 100644 --- a/pixman/pixman/pixman-mips-dspr2-asm.h +++ b/pixman/pixman/pixman-mips-dspr2-asm.h @@ -354,17 +354,16 @@ LEAF_MIPS32R2(symbol) \ out1_565, out2_565, \ maskR, maskG, maskB, \ scratch1, scratch2 - precrq.ph.w \scratch1, \in2_8888, \in1_8888 - precr_sra.ph.w \in2_8888, \in1_8888, 0 - shll.ph \scratch1, \scratch1, 8 - srl \in2_8888, \in2_8888, 3 - and \scratch2, \in2_8888, \maskB - and \scratch1, \scratch1, \maskR - srl \in2_8888, \in2_8888, 2 - and \out2_565, \in2_8888, \maskG - or \out2_565, \out2_565, \scratch2 - or \out1_565, \out2_565, \scratch1 - srl \out2_565, \out1_565, 16 + precr.qb.ph \scratch1, \in2_8888, \in1_8888 + precrq.qb.ph \in2_8888, \in2_8888, \in1_8888 + and \out1_565, \scratch1, \maskR + shrl.ph \scratch1, \scratch1, 3 + shll.ph \in2_8888, \in2_8888, 3 + and \scratch1, \scratch1, \maskB + or \out1_565, \out1_565, \scratch1 + and \in2_8888, \in2_8888, \maskG + or \out1_565, \out1_565, \in2_8888 + srl \out2_565, \out1_565, 16 .endm /* @@ -587,6 +586,36 @@ LEAF_MIPS32R2(symbol) \ addu_s.qb \out_8888, \out_8888, \s_8888 .endm +/* + * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two + * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR + * needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_2x8888_2x8888 s1_8888, \ + s2_8888, \ + d1_8888, \ + d2_8888, \ + out1_8888, \ + out2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + not \scratch1, \s1_8888 + srl \scratch1, \scratch1, 24 + not \scratch2, \s2_8888 + srl \scratch2, \scratch2, 24 + MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ + \scratch1, \scratch2, \ + \out1_8888, \out2_8888, \ + \maskLSR, \ + \scratch3, \scratch4, \scratch5, \ + \scratch6, \d1_8888, \d2_8888 + + addu_s.qb \out1_8888, \out1_8888, \s1_8888 + addu_s.qb \out2_8888, \out2_8888, \s2_8888 +.endm + .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ m_8, \ d_8888, \ diff --git a/pixman/pixman/pixman-mips-dspr2.c b/pixman/pixman/pixman-mips-dspr2.c index 1ea244576..e10c9df0a 100644 --- a/pixman/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman/pixman-mips-dspr2.c @@ -54,6 +54,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev, PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev, uint8_t, 3, uint16_t, 1) #endif +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888, uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565, @@ -121,6 +125,13 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1, PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC, + uint16_t, uint32_t) + PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, uint32_t, uint32_t) PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC, @@ -292,6 +303,10 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev), PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev), #endif + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888), PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888), PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888), PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888), @@ -357,6 +372,22 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888), PIXMAN_STD_FAST_PATH (IN, solid, null, a8, mips_composite_in_n_8), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565), PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565), diff --git a/pixman/pixman/pixman-mips-dspr2.h b/pixman/pixman/pixman-mips-dspr2.h index 4ac9ff95d..955ed70b8 100644 --- a/pixman/pixman/pixman-mips-dspr2.h +++ b/pixman/pixman/pixman-mips-dspr2.h @@ -246,6 +246,48 @@ mips_composite_##name (pixman_implementation_t *imp, \ } \ } +/****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \ + dst_type * dst, \ + const src_type * src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \ + vx, unit_x); \ +} \ + \ +FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, COVER) \ +FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, NONE) \ +FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, PAD) + +/* Provide entries for the fast path table */ +#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) + + /*****************************************************************************/ #define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \ diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c index 14790c029..c94d282a9 100644 --- a/pixman/pixman/pixman-mmx.c +++ b/pixman/pixman/pixman-mmx.c @@ -301,6 +301,29 @@ negate (__m64 mask) return _mm_xor_si64 (mask, MC (4x00ff)); } +/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1 + * and maps its result to the same range. + * + * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner: + * Notation, Notation, Notation", the first of which is + * + * prod(a, b) = (a * b + 128) / 255. + * + * By approximating the division by 255 as 257/65536 it can be replaced by a + * multiply and a right shift. This is the implementation that we use in + * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended + * 3DNow!, and unavailable at the time of the book's publication) to perform + * the multiplication and right shift in a single operation. + * + * prod(a, b) = ((a * b + 128) * 257) >> 16. + * + * A third way (how pix_multiply() was implemented prior to 14208344) exists + * also that performs the multiplication by 257 with adds and shifts. + * + * Where temp = a * b + 128 + * + * prod(a, b) = (temp + (temp >> 8)) >> 8. + */ static force_inline __m64 pix_multiply (__m64 a, __m64 b) { @@ -3899,52 +3922,23 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 }, - { PIXMAN_r5g6b5, mmx_fetch_r5g6b5 }, - { PIXMAN_a8, mmx_fetch_a8 }, - { PIXMAN_null } -}; - -static pixman_bool_t -mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} +static const pixman_iter_info_t mmx_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL + }, + { PIXMAN_null }, +}; static const pixman_fast_path_t mmx_fast_paths[] = { @@ -4074,7 +4068,7 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback) imp->blt = mmx_blt; imp->fill = mmx_fill; - imp->src_iter_init = mmx_src_iter_init; + imp->iter_info = mmx_iters; return imp; } diff --git a/pixman/pixman/pixman-noop.c b/pixman/pixman/pixman-noop.c index e39996d9d..e59890492 100644 --- a/pixman/pixman/pixman-noop.c +++ b/pixman/pixman/pixman-noop.c @@ -37,12 +37,6 @@ noop_composite (pixman_implementation_t *imp, return; } -static void -dest_write_back_direct (pixman_iter_t *iter) -{ - iter->buffer += iter->image->bits.rowstride; -} - static uint32_t * noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) { @@ -53,110 +47,102 @@ noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) return result; } -static uint32_t * -get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) -{ - return NULL; +static void +noop_init_solid_narrow (pixman_iter_t *iter, + const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint32_t *buffer = iter->buffer; + uint32_t *end = buffer + iter->width; + uint32_t color; + + if (iter->image->type == SOLID) + color = image->solid.color_32; + else + color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); + + while (buffer < end) + *(buffer++) = color; } -static pixman_bool_t -noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +noop_init_solid_wide (pixman_iter_t *iter, + const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; + argb_t *buffer = (argb_t *)iter->buffer; + argb_t *end = buffer + iter->width; + argb_t color; -#define FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM) - - if (!image) - { - iter->get_scanline = get_scanline_null; - } - else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == - (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) - { - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else if (image->common.extended_format_code == PIXMAN_solid && - (iter->image->type == SOLID || - (iter->image_flags & FAST_PATH_NO_ALPHA_MAP))) - { - if (iter->iter_flags & ITER_NARROW) - { - uint32_t *buffer = iter->buffer; - uint32_t *end = buffer + iter->width; - uint32_t color; - - if (image->type == SOLID) - color = image->solid.color_32; - else - color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; - } - else - { - argb_t *buffer = (argb_t *)iter->buffer; - argb_t *end = buffer + iter->width; - argb_t color; - - if (image->type == SOLID) - color = image->solid.color_float; - else - color = image->bits.fetch_pixel_float (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; - } - - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else if (image->common.extended_format_code == PIXMAN_a8r8g8b8 && - (iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS && - iter->x >= 0 && iter->y >= 0 && - iter->x + iter->width <= image->bits.width && - iter->y + iter->height <= image->bits.height) - { - iter->buffer = - image->bits.bits + iter->y * image->bits.rowstride + iter->x; - - iter->get_scanline = noop_get_scanline; - } + if (iter->image->type == SOLID) + color = image->solid.color_float; else - { - return FALSE; - } + color = image->bits.fetch_pixel_float (&image->bits, 0, 0); - return TRUE; + while (buffer < end) + *(buffer++) = color; } -static pixman_bool_t -noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; - uint32_t image_flags = iter->image_flags; - uint32_t iter_flags = iter->iter_flags; - - if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS && - (iter_flags & ITER_NARROW) == ITER_NARROW && - ((image->common.extended_format_code == PIXMAN_a8r8g8b8) || - (image->common.extended_format_code == PIXMAN_x8r8g8b8 && - (iter_flags & (ITER_LOCALIZED_ALPHA))))) - { - iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x; - - iter->get_scanline = _pixman_iter_get_scanline_noop; - iter->write_back = dest_write_back_direct; - - return TRUE; - } - else - { - return FALSE; - } + + iter->buffer = + image->bits.bits + iter->y * image->bits.rowstride + iter->x; } +static void +dest_write_back_direct (pixman_iter_t *iter) +{ + iter->buffer += iter->image->bits.rowstride; +} + +static const pixman_iter_info_t noop_iters[] = +{ + /* Source iters */ + { PIXMAN_any, + 0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC, + NULL, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC, + noop_init_solid_narrow, + _pixman_iter_get_scanline_noop, + NULL, + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC, + noop_init_solid_wide, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_a8r8g8b8, + FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, + ITER_NARROW | ITER_SRC, + noop_init_direct_buffer, + noop_get_scanline, + NULL + }, + /* Dest iters */ + { PIXMAN_a8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_x8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_null }, +}; + static const pixman_fast_path_t noop_fast_paths[] = { { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite }, @@ -169,8 +155,7 @@ _pixman_implementation_create_noop (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, noop_fast_paths); - imp->src_iter_init = noop_src_iter_init; - imp->dest_iter_init = noop_dest_iter_init; + imp->iter_info = noop_iters; return imp; } diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 6d9c05321..af4a0b6e0 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -212,7 +212,8 @@ typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); typedef enum { - ITER_NARROW = (1 << 0), + ITER_NARROW = (1 << 0), + ITER_WIDE = (1 << 1), /* "Localized alpha" is when the alpha channel is used only to compute * the alpha value of the destination. This means that the computation @@ -229,9 +230,15 @@ typedef enum * we can treat it as if it were ARGB, which means in some cases we can * avoid copying it to a temporary buffer. */ - ITER_LOCALIZED_ALPHA = (1 << 1), - ITER_IGNORE_ALPHA = (1 << 2), - ITER_IGNORE_RGB = (1 << 3) + ITER_LOCALIZED_ALPHA = (1 << 2), + ITER_IGNORE_ALPHA = (1 << 3), + ITER_IGNORE_RGB = (1 << 4), + + /* These indicate whether the iterator is for a source + * or a destination image + */ + ITER_SRC = (1 << 5), + ITER_DEST = (1 << 6) } iter_flags_t; struct pixman_iter_t @@ -255,6 +262,19 @@ struct pixman_iter_t int stride; }; +typedef struct pixman_iter_info_t pixman_iter_info_t; +typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter, + const pixman_iter_info_t *info); +struct pixman_iter_info_t +{ + pixman_format_code_t format; + uint32_t image_flags; + iter_flags_t iter_flags; + pixman_iter_initializer_t initializer; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +}; + void _pixman_bits_image_setup_accessors (bits_image_t *image); @@ -454,8 +474,6 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, int width, int height, uint32_t filler); -typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp, - pixman_iter_t *iter); void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp); void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp); @@ -477,11 +495,10 @@ struct pixman_implementation_t pixman_implementation_t * toplevel; pixman_implementation_t * fallback; const pixman_fast_path_t * fast_paths; + const pixman_iter_info_t * iter_info; pixman_blt_func_t blt; pixman_fill_func_t fill; - pixman_iter_init_func_t src_iter_init; - pixman_iter_init_func_t dest_iter_init; pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS]; pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS]; @@ -542,29 +559,17 @@ _pixman_implementation_fill (pixman_implementation_t *imp, int height, uint32_t filler); -pixman_bool_t -_pixman_implementation_src_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t flags, - uint32_t image_flags); - -pixman_bool_t -_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t flags, - uint32_t image_flags); +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t flags, + uint32_t image_flags); /* Specific implementations */ pixman_implementation_t * @@ -647,6 +652,9 @@ _pixman_compute_composite_region32 (pixman_region32_t * region, uint32_t * _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info); + /* These "formats" all have depth 0, so they * will never clash with any real ones */ diff --git a/pixman/pixman/pixman-region.c b/pixman/pixman/pixman-region.c index 2d6f1571c..59bc9c797 100644 --- a/pixman/pixman/pixman-region.c +++ b/pixman/pixman/pixman-region.c @@ -1858,7 +1858,7 @@ pixman_region_subtract_o (region_type_t * region, else if (r2->x1 <= x1) { /* - * Subtrahend preceeds minuend: nuke left edge of minuend. + * Subtrahend precedes minuend: nuke left edge of minuend. */ x1 = r2->x2; if (x1 >= r1->x2) @@ -1982,7 +1982,7 @@ PREFIX (_subtract) (region_type_t *reg_d, } /* Add those rectangles in region 1 that aren't in region 2, - do yucky substraction for overlaps, and + do yucky subtraction for overlaps, and just throw away rectangles in region 2 that aren't in region 1 */ if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE)) return FALSE; @@ -2042,7 +2042,7 @@ PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ } /* Add those rectangles in region 1 that aren't in region 2, - * do yucky substraction for overlaps, and + * do yucky subtraction for overlaps, and * just throw away rectangles in region 2 that aren't in region 1 */ inv_reg.extents = *inv_rect; diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index c7e9a4bb2..dde923524 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -5554,19 +5554,27 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, scaled_nearest_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) -#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1) - -#define BILINEAR_DECLARE_VARIABLES \ +#if BILINEAR_INTERPOLATION_BITS < 8 +# define BILINEAR_DECLARE_VARIABLES \ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ - const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\ - const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ - const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\ - const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_ux = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_zero = _mm_setzero_si128 (); \ + __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ + vx, -(vx + 1), vx, -(vx + 1)) +#else +# define BILINEAR_DECLARE_VARIABLES \ + const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ + const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \ - unit_x, unit_x, unit_x, unit_x); \ + -unit_x, -unit_x, -unit_x, -unit_x); \ const __m128i xmm_zero = _mm_setzero_si128 (); \ - __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx) + __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, \ + -(vx + 1), -(vx + 1), -(vx + 1), -(vx + 1)) +#endif #define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ do { \ @@ -5585,8 +5593,8 @@ do { \ if (BILINEAR_INTERPOLATION_BITS < 8) \ { \ /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, \ - _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ /* horizontal interpolation */ \ a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ @@ -5595,8 +5603,8 @@ do { \ else \ { \ /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, \ - _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ /* horizontal interpolation */ \ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ @@ -6332,52 +6340,23 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 }, - { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 }, - { PIXMAN_a8, sse2_fetch_a8 }, - { PIXMAN_null } -}; - -static pixman_bool_t -sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} +static const pixman_iter_info_t sse2_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL + }, + { PIXMAN_null }, +}; #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) __attribute__((__force_align_arg_pointer__)) @@ -6435,7 +6414,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback) imp->blt = sse2_blt; imp->fill = sse2_fill; - imp->src_iter_init = sse2_src_iter_init; + imp->iter_info = sse2_iters; return imp; } diff --git a/pixman/pixman/pixman-utils.c b/pixman/pixman/pixman-utils.c index f31171f6d..98723a800 100644 --- a/pixman/pixman/pixman-utils.c +++ b/pixman/pixman/pixman-utils.c @@ -214,6 +214,17 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8; + iter->stride = s; +} + #define N_TMP_BOXES (16) pixman_bool_t diff --git a/pixman/pixman/pixman-vmx.c b/pixman/pixman/pixman-vmx.c index 6868704a8..f629003ab 100644 --- a/pixman/pixman/pixman-vmx.c +++ b/pixman/pixman/pixman-vmx.c @@ -25,7 +25,9 @@ * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell */ +#ifdef HAVE_CONFIG_H #include +#endif #include "pixman-private.h" #include "pixman-combine32.h" #include diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c index 184f0c4e6..9555ceaaf 100644 --- a/pixman/pixman/pixman.c +++ b/pixman/pixman/pixman.c @@ -605,7 +605,7 @@ pixman_image_composite32 (pixman_op_t op, else { mask_format = PIXMAN_null; - info.mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP; } dest_format = dest->common.extended_format_code; diff --git a/pixman/pixman/refactor b/pixman/pixman/refactor deleted file mode 100644 index 52fceab17..000000000 --- a/pixman/pixman/refactor +++ /dev/null @@ -1,478 +0,0 @@ -Roadmap - -- Move all the fetchers etc. into pixman-image to make pixman-compose.c - less intimidating. - - DONE - -- Make combiners for unified alpha take a mask argument. That way - we won't need two separate paths for unified vs component in the - general compositing code. - - DONE, except that the Altivec code needs to be updated. Luca is - looking into that. - -- Delete separate 'unified alpha' path - - DONE - -- Split images into their own files - - DONE - -- Split the gradient walker code out into its own file - - DONE - -- Add scanline getters per image - - DONE - -- Generic 64 bit fetcher - - DONE - -- Split fast path tables into their respective architecture dependent - files. - -See "Render Algorithm" below for rationale - -Images will eventually have these virtual functions: - - get_scanline() - get_scanline_wide() - get_pixel() - get_pixel_wide() - get_untransformed_pixel() - get_untransformed_pixel_wide() - get_unfiltered_pixel() - get_unfiltered_pixel_wide() - - store_scanline() - store_scanline_wide() - -1. - -Initially we will just have get_scanline() and get_scanline_wide(); -these will be based on the ones in pixman-compose. Hopefully this will -reduce the complexity in pixman_composite_rect_general(). - -Note that there is access considerations - the compose function is -being compiled twice. - - -2. - -Split image types into their own source files. Export noop virtual -reinit() call. Call this whenever a property of the image changes. - - -3. - -Split the get_scanline() call into smaller functions that are -initialized by the reinit() call. - -The Render Algorithm: - (first repeat, then filter, then transform, then clip) - -Starting from a destination pixel (x, y), do - - 1 x = x - xDst + xSrc - y = y - yDst + ySrc - - 2 reject pixel that is outside the clip - - This treats clipping as something that happens after - transformation, which I think is correct for client clips. For - hierarchy clips it is wrong, but who really cares? Without - GraphicsExposes hierarchy clips are basically irrelevant. Yes, - you could imagine cases where the pixels of a subwindow of a - redirected, transformed window should be treated as - transparent. I don't really care - - Basically, I think the render spec should say that pixels that - are unavailable due to the hierarcy have undefined content, - and that GraphicsExposes are not generated. Ie., basically - that using non-redirected windows as sources is fail. This is - at least consistent with the current implementation and we can - update the spec later if someone makes it work. - - The implication for render is that it should stop passing the - hierarchy clip to pixman. In pixman, if a souce image has a - clip it should be used in computing the composite region and - nowhere else, regardless of what "has_client_clip" says. The - default should be for there to not be any clip. - - I would really like to get rid of the client clip as well for - source images, but unfortunately there is at least one - application in the wild that uses them. - - 3 Transform pixel: (x, y) = T(x, y) - - 4 Call p = GetUntransformedPixel (x, y) - - 5 If the image has an alpha map, then - - Call GetUntransformedPixel (x, y) on the alpha map - - add resulting alpha channel to p - - return p - - Where GetUnTransformedPixel is: - - 6 switch (filter) - { - case NEAREST: - return GetUnfilteredPixel (x, y); - break; - - case BILINEAR: - return GetUnfilteredPixel (...) // 4 times - break; - - case CONVOLUTION: - return GetUnfilteredPixel (...) // as many times as necessary. - break; - } - - Where GetUnfilteredPixel (x, y) is - - 7 switch (repeat) - { - case REPEAT_NORMAL: - case REPEAT_PAD: - case REPEAT_REFLECT: - // adjust x, y as appropriate - break; - - case REPEAT_NONE: - if (x, y) is outside image bounds - return 0; - break; - } - - return GetRawPixel(x, y) - - Where GetRawPixel (x, y) is - - 8 Compute the pixel in question, depending on image type. - -For gradients, repeat has a totally different meaning, so -UnfilteredPixel() and RawPixel() must be the same function so that -gradients can do their own repeat algorithm. - -So, the GetRawPixel - - for bits must deal with repeats - for gradients must deal with repeats (differently) - for solids, should ignore repeats. - - for polygons, when we add them, either ignore repeats or do - something similar to bits (in which case, we may want an extra - layer of indirection to modify the coordinates). - -It is then possible to build things like "get scanline" or "get tile" on -top of this. In the simplest case, just repeatedly calling GetPixel() -would work, but specialized get_scanline()s or get_tile()s could be -plugged in for common cases. - -By not plugging anything in for images with access functions, we only -have to compile the pixel functions twice, not the scanline functions. - -And we can get rid of fetchers for the bizarre formats that no one -uses. Such as b2g3r3 etc. r1g2b1? Seriously? It is also worth -considering a generic format based pixel fetcher for these edge cases. - -Since the actual routines depend on the image attributes, the images -must be notified when those change and update their function pointers -appropriately. So there should probably be a virtual function called -(* reinit) or something like that. - -There will also be wide fetchers for both pixels and lines. The line -fetcher will just call the wide pixel fetcher. The wide pixel fetcher -will just call expand, except for 10 bit formats. - -Rendering pipeline: - -Drawable: - 0. if (picture has alpha map) - 0.1. Position alpha map according to the alpha_x/alpha_y - 0.2. Where the two drawables intersect, the alpha channel - Replace the alpha channel of source with the one - from the alpha map. Replacement only takes place - in the intersection of the two drawables' geometries. - 1. Repeat the drawable according to the repeat attribute - 2. Reconstruct a continuous image according to the filter - 3. Transform according to the transform attribute - 4. Position image such that src_x, src_y is over dst_x, dst_y - 5. Sample once per destination pixel - 6. Clip. If a pixel is not within the source clip, then no - compositing takes place at that pixel. (Ie., it's *not* - treated as 0). - - Sampling a drawable: - - - If the channel does not have an alpha channel, the pixels in it - are treated as opaque. - - Note on reconstruction: - - - The top left pixel has coordinates (0.5, 0.5) and pixels are - spaced 1 apart. - -Gradient: - 1. Unless gradient type is conical, repeat the underlying (0, 1) - gradient according to the repeat attribute - 2. Integrate the gradient across the plane according to type. - 3. Transform according to transform attribute - 4. Position gradient - 5. Sample once per destination pixel. - 6. Clip - -Solid Fill: - 1. Repeat has no effect - 2. Image is already continuous and defined for the entire plane - 3. Transform has no effect - 4. Positioning has no effect - 5. Sample once per destination pixel. - 6. Clip - -Polygon: - 1. Repeat has no effect - 2. Image is already continuous and defined on the whole plane - 3. Transform according to transform attribute - 4. Position image - 5. Supersample 15x17 per destination pixel. - 6. Clip - -Possibly interesting additions: - - More general transformations, such as warping, or general - shading. - - - Shader image where a function is called to generate the - pixel (ie., uploading assembly code). - - - Resampling kernels - - In principle the polygon image uses a 15x17 box filter for - resampling. If we allow general resampling filters, then we - get all the various antialiasing types for free. - - Bilinear downsampling looks terrible and could be much - improved by a resampling filter. NEAREST reconstruction - combined with a box resampling filter is what GdkPixbuf - does, I believe. - - Useful for high frequency gradients as well. - - (Note that the difference between a reconstruction and a - resampling filter is mainly where in the pipeline they - occur. High quality resampling should use a correctly - oriented kernel so it should happen after transformation. - - An implementation can transform the resampling kernel and - convolve it with the reconstruction if it so desires, but it - will need to deal with the fact that the resampling kernel - will not necessarily be pixel aligned. - - "Output kernels" - - One could imagine doing the resampling after compositing, - ie., for each destination pixel sample each source image 16 - times, then composite those subpixels individually, then - finally apply a kernel. - - However, this is effectively the same as full screen - antialiasing, which is a simpler way to think about it. So - resampling kernels may make sense for individual images, but - not as a post-compositing step. - - Fullscreen AA is inefficient without chained compositing - though. Consider an (image scaled up to oversample size IN - some polygon) scaled down to screen size. With the current - implementation, there will be a huge temporary. With chained - compositing, the whole thing ends up being equivalent to the - output kernel from above. - - - Color space conversion - - The complete model here is that each surface has a color - space associated with it and that the compositing operation - also has one associated with it. Note also that gradients - should have associcated colorspaces. - - - Dithering - - If people dither something that is already dithered, it will - look terrible, but don't do that, then. (Dithering happens - after resampling if at all - what is the relationship - with color spaces? Presumably dithering should happen in linear - intensity space). - - - Floating point surfaces, 16, 32 and possibly 64 bit per - channel. - - Maybe crack: - - - Glyph polygons - - If glyphs could be given as polygons, they could be - positioned and rasterized more accurately. The glyph - structure would need subpixel positioning though. - - - Luminance vs. coverage for the alpha channel - - Whether the alpha channel should be interpreted as luminance - modulation or as coverage (intensity modulation). This is a - bit of a departure from the rendering model though. It could - also be considered whether it should be possible to have - both channels in the same drawable. - - - Alternative for component alpha - - - Set component-alpha on the output image. - - - This means each of the components are sampled - independently and composited in the corresponding - channel only. - - - Have 3 x oversampled mask - - - Scale it down by 3 horizontally, with [ 1/3, 1/3, 1/3 ] - resampling filter. - - Is this equivalent to just using a component alpha mask? - - Incompatible changes: - - - Gradients could be specified with premultiplied colors. (You - can use a mask to get things like gradients from solid red to - transparent red. - -Refactoring pixman - -The pixman code is not particularly nice to put it mildly. Among the -issues are - -- inconsistent naming style (fb vs Fb, camelCase vs - underscore_naming). Sometimes there is even inconsistency *within* - one name. - - fetchProc32 ACCESS(pixman_fetchProcForPicture32) - - may be one of the uglies names ever created. - - coding style: - use the one from cairo except that pixman uses this brace style: - - while (blah) - { - } - - Format do while like this: - - do - { - - } - while (...); - -- PIXMAN_COMPOSITE_RECT_GENERAL() is horribly complex - -- switch case logic in pixman-access.c - - Instead it would be better to just store function pointers in the - image objects themselves, - - get_pixel() - get_scanline() - -- Much of the scanline fetching code is for formats that no one - ever uses. a2r2g2b2 anyone? - - It would probably be worthwhile having a generic fetcher for any - pixman format whatsoever. - -- Code related to particular image types should be split into individual - files. - - pixman-bits-image.c - pixman-linear-gradient-image.c - pixman-radial-gradient-image.c - pixman-solid-image.c - -- Fast path code should be split into files based on architecture: - - pixman-mmx-fastpath.c - pixman-sse2-fastpath.c - pixman-c-fastpath.c - - etc. - - Each of these files should then export a fastpath table, which would - be declared in pixman-private.h. This should allow us to get rid - of the pixman-mmx.h files. - - The fast path table should describe each fast path. Ie there should - be bitfields indicating what things the fast path can handle, rather than - like now where it is only allowed to take one format per src/mask/dest. Ie., - - { - FAST_a8r8g8b8 | FAST_x8r8g8b8, - FAST_null, - FAST_x8r8g8b8, - FAST_repeat_normal | FAST_repeat_none, - the_fast_path - } - -There should then be *one* file that implements pixman_image_composite(). -This should do this: - - optimize_operator(); - - convert 1x1 repeat to solid (actually this should be done at - image creation time). - - is there a useful fastpath? - -There should be a file called pixman-cpu.c that contains all the -architecture specific stuff to detect what CPU features we have. - -Issues that must be kept in mind: - - - we need accessor code to be preserved - - - maybe there should be a "store_scanline" too? - - Is this sufficient? - - We should preserve the optimization where the - compositing happens directly in the destination - whenever possible. - - - It should be possible to create GPU samplers from the - images. - -The "horizontal" classification should be a bit in the image, the -"vertical" classification should just happen inside the gradient -file. Note though that - - (a) these will change if the tranformation/repeat changes. - - (b) at the moment the optimization for linear gradients - takes the source rectangle into account. Presumably - this is to also optimize the case where the gradient - is close enough to horizontal? - -Who is responsible for repeats? In principle it should be the scanline -fetch. Right now NORMAL repeats are handled by walk_composite_region() -while other repeats are handled by the scanline code. - - -(Random note on filtering: do you filter before or after -transformation? Hardware is going to filter after transformation; -this is also what pixman does currently). It's not completely clear -what filtering *after* transformation means. One thing that might look -good would be to do *supersampling*, ie., compute multiple subpixels -per destination pixel, then average them together. diff --git a/pixman/test/blitters-test.c b/pixman/test/blitters-test.c index 8766fa800..a2c6ff4d8 100644 --- a/pixman/test/blitters-test.c +++ b/pixman/test/blitters-test.c @@ -46,7 +46,16 @@ create_random_image (pixman_format_code_t *allowed_formats, /* do the allocation */ buf = aligned_malloc (64, stride * height); - prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF); + if (prng_rand_n (4) == 0) + { + /* uniform distribution */ + prng_randmemset (buf, stride * height, 0); + } + else + { + /* significantly increased probability for 0x00 and 0xFF */ + prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF); + } img = pixman_image_create_bits (fmt, width, height, buf, stride); @@ -393,6 +402,6 @@ main (int argc, const char *argv[]) } return fuzzer_test_main("blitters", 2000000, - 0xD8265D5E, + 0x0CF3283B, test_composite, argc, argv); } diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c index 4e16f7ba1..1049e21e7 100644 --- a/pixman/test/lowlevel-blt-bench.c +++ b/pixman/test/lowlevel-blt-bench.c @@ -385,6 +385,7 @@ bench_composite (char * testname, double t1, t2, t3, pix_cnt; int64_t n, l1test_width, nlines; double bytes_per_pix = 0; + pixman_bool_t bench_pixbuf = FALSE; pixman_composite_func_t func = pixman_image_composite_wrapper; @@ -422,16 +423,20 @@ bench_composite (char * testname, mask_img = NULL; xmask_img = NULL; + if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0) + { + bench_pixbuf = TRUE; + } if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null) { bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0); mask_img = pixman_image_create_bits (mask_fmt, WIDTH, HEIGHT, - mask, + bench_pixbuf ? src : mask, WIDTH * 4); xmask_img = pixman_image_create_bits (mask_fmt, XWIDTH, XHEIGHT, - mask, + bench_pixbuf ? src : mask, XWIDTH * 4); } else if (mask_fmt != PIXMAN_null) @@ -643,6 +648,8 @@ tests_tbl[] = { "src_0888_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_0888_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, { "src_0888_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_0888_8888_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_0888_0565_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, { "src_x888_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, { "src_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, @@ -707,6 +714,8 @@ tests_tbl[] = { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 }, { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 }, { "over_reverse_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "pixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 }, + { "rpixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 }, }; int diff --git a/pixman/test/prng-test.c b/pixman/test/prng-test.c index 0a3ad5e8f..c1d9320cc 100644 --- a/pixman/test/prng-test.c +++ b/pixman/test/prng-test.c @@ -106,7 +106,10 @@ int main (int argc, char *argv[]) { const uint32_t ref_crc[RANDMEMSET_MORE_00_AND_FF + 1] = { - 0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39 + 0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39, + 0xD2321099, 0xFD8C5420, 0xD3B7C42A, 0xFC098093, + 0x85E01DE0, 0x6680F8F7, 0x4D32DD3C, 0xAE52382B, + 0x149E6CB5, 0x8B336987, 0x15DCB2B3, 0x8A71B781 }; uint32_t crc1, crc2; uint32_t ref, seed, seed0, seed1, seed2, seed3; diff --git a/pixman/test/utils-prng.c b/pixman/test/utils-prng.c index 967b8989a..7b32e3531 100644 --- a/pixman/test/utils-prng.c +++ b/pixman/test/utils-prng.c @@ -107,6 +107,7 @@ randmemset_internal (prng_t *prng, { prng_t local_prng = *prng; prng_rand_128_data_t randdata; + size_t i; while (size >= 16) { @@ -138,6 +139,22 @@ randmemset_internal (prng_t *prng, }; randdata.vb &= (t.vb >= const_40); } + if (flags & RANDMEMSET_MORE_FFFFFFFF) + { + const uint32x4 const_C0000000 = + { + 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000 + }; + randdata.vw |= ((t.vw << 30) >= const_C0000000); + } + if (flags & RANDMEMSET_MORE_00000000) + { + const uint32x4 const_40000000 = + { + 0x40000000, 0x40000000, 0x40000000, 0x40000000 + }; + randdata.vw &= ((t.vw << 30) >= const_40000000); + } #else #define PROCESS_ONE_LANE(i) \ if (flags & RANDMEMSET_MORE_FF) \ @@ -155,6 +172,18 @@ randmemset_internal (prng_t *prng, mask_00 |= mask_00 >> 2; \ mask_00 |= mask_00 >> 4; \ randdata.w[i] &= mask_00; \ + } \ + if (flags & RANDMEMSET_MORE_FFFFFFFF) \ + { \ + int32_t mask_ff = ((t.w[i] << 30) & (t.w[i] << 31)) & \ + 0x80000000; \ + randdata.w[i] |= mask_ff >> 31; \ + } \ + if (flags & RANDMEMSET_MORE_00000000) \ + { \ + int32_t mask_00 = ((t.w[i] << 30) | (t.w[i] << 31)) & \ + 0x80000000; \ + randdata.w[i] &= mask_00 >> 31; \ } PROCESS_ONE_LANE (0) @@ -198,7 +227,8 @@ randmemset_internal (prng_t *prng, } size -= 16; } - while (size > 0) + i = 0; + while (i < size) { uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF; if (flags != 0) @@ -208,9 +238,25 @@ randmemset_internal (prng_t *prng, randbyte = 0xFF; if ((flags & RANDMEMSET_MORE_00) && (t < 0x40)) randbyte = 0x00; + if (i % 4 == 0 && i + 4 <= size) + { + t = prng_rand_r (&local_prng) & 0xFF; + if ((flags & RANDMEMSET_MORE_FFFFFFFF) && (t >= 0xC0)) + { + memset(&buf[i], 0xFF, 4); + i += 4; + continue; + } + if ((flags & RANDMEMSET_MORE_00000000) && (t < 0x40)) + { + memset(&buf[i], 0x00, 4); + i += 4; + continue; + } + } } - *buf++ = randbyte; - size--; + buf[i] = randbyte; + i++; } *prng = local_prng; } @@ -218,8 +264,10 @@ randmemset_internal (prng_t *prng, /* * Fill memory buffer with random data. Flags argument may be used * to tweak some statistics properties: - * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 - * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 + * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + * RANDMEMSET_MORE_00000000 - ~25% chance for 00000000 4-byte clusters + * RANDMEMSET_MORE_FFFFFFFF - ~25% chance for FFFFFFFF 4-byte clusters */ void prng_randmemset_r (prng_t *prng, void *voidbuf, diff --git a/pixman/test/utils-prng.h b/pixman/test/utils-prng.h index 285107f08..564ffcef1 100644 --- a/pixman/test/utils-prng.h +++ b/pixman/test/utils-prng.h @@ -153,7 +153,10 @@ typedef enum { RANDMEMSET_MORE_00 = 1, /* ~25% chance for 0x00 bytes */ RANDMEMSET_MORE_FF = 2, /* ~25% chance for 0xFF bytes */ - RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_FF) + RANDMEMSET_MORE_00000000 = 4, /* ~25% chance for 0x00000000 clusters */ + RANDMEMSET_MORE_FFFFFFFF = 8, /* ~25% chance for 0xFFFFFFFF clusters */ + RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_00000000 | + RANDMEMSET_MORE_FF | RANDMEMSET_MORE_FFFFFFFF) } prng_randmemset_flags_t; /* Set the 32-bit seed for PRNG */ -- cgit v1.2.3