diff options
Diffstat (limited to 'pixman')
25 files changed, 1063 insertions, 1112 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac index 38f89b31e..221179ff1 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -53,8 +53,8 @@ AC_PREREQ([2.57]) # m4_define([pixman_major], 0) -m4_define([pixman_minor], 29) -m4_define([pixman_micro], 3) +m4_define([pixman_minor], 31) +m4_define([pixman_micro], 1) m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) @@ -279,7 +279,7 @@ AC_MSG_CHECKING(whether to use Loongson MMI assembler) xserver_save_CFLAGS=$CFLAGS CFLAGS=" $LS_CFLAGS $CFLAGS -I$srcdir" -AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +AC_LINK_IFELSE([AC_LANG_SOURCE([[ #ifndef __mips_loongson_vector_rev #error "Loongson Multimedia Instructions are only available on Loongson" #endif @@ -845,6 +845,13 @@ if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then fi dnl ===================================== +dnl Check for missing sqrtf() as, e.g., for Solaris 9 + +AC_SEARCH_LIBS([sqrtf], [m], [], + [AC_DEFINE([sqrtf], [sqrt], + [Define to sqrt if you do not have the `sqrtf' function.])]) + +dnl ===================================== dnl Thread local storage AC_MSG_CHECKING(for thread local storage (TLS) support) diff --git a/pixman/pixman/pixman-arm-neon-asm.h b/pixman/pixman/pixman-arm-neon-asm.h index 1673b080f..d0d92d74c 100644 --- a/pixman/pixman/pixman-arm-neon-asm.h +++ b/pixman/pixman/pixman-arm-neon-asm.h @@ -385,7 +385,7 @@ * execute simultaneously with NEON and be completely shadowed by it. Thus * we get no performance overhead at all (*). This looks like a very nice * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, - * but still can implement some rather advanced prefetch logic in sofware + * but still can implement some rather advanced prefetch logic in software * for almost zero cost! * * (*) The overhead of the prefetcher is visible when running some trivial diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index 247aea645..3982dce8b 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -2261,89 +2261,27 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter) } } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; - pixman_iter_write_back_t write_back; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, - { PIXMAN_null } -}; - -static pixman_bool_t -fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} - -static pixman_bool_t -fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static const pixman_iter_info_t fast_iters[] = { - pixman_image_t *image = iter->image; - - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) - { - iter->get_scanline = fast_dest_fetch_noop; - } - else - { - iter->get_scanline = f->get_scanline; - } - iter->write_back = f->write_back; - return TRUE; - } - } - } - return FALSE; -} - + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC, + _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST, + _pixman_iter_init_bits_stride, + fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA, + _pixman_iter_init_bits_stride, + fast_dest_fetch_noop, fast_write_back_r5g6b5 }, + + { PIXMAN_null }, +}; pixman_implementation_t * _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) @@ -2351,8 +2289,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); imp->fill = fast_path_fill; - imp->src_iter_init = fast_src_iter_init; - imp->dest_iter_init = fast_dest_iter_init; + imp->iter_info = fast_iters; return imp; } diff --git a/pixman/pixman/pixman-filter.c b/pixman/pixman/pixman-filter.c index 26b39d571..5ff7b6eaa 100644 --- a/pixman/pixman/pixman-filter.c +++ b/pixman/pixman/pixman-filter.c @@ -28,7 +28,9 @@ #include <stdio.h> #include <math.h> #include <assert.h> +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include "pixman-private.h" typedef double (* kernel_func_t) (double x); diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c index 93a1b9acf..4da5da5e2 100644 --- a/pixman/pixman/pixman-general.c +++ b/pixman/pixman/pixman-general.c @@ -37,43 +37,47 @@ #include <string.h> #include "pixman-private.h" -static pixman_bool_t -general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; - if (image->type == LINEAR) - _pixman_linear_gradient_iter_init (image, iter); - else if (image->type == RADIAL) + switch (image->type) + { + case BITS: + if ((iter->iter_flags & ITER_SRC) == ITER_SRC) + _pixman_bits_image_src_iter_init (image, iter); + else + _pixman_bits_image_dest_iter_init (image, iter); + break; + + case LINEAR: + _pixman_linear_gradient_iter_init (image, iter); + break; + + case RADIAL: _pixman_radial_gradient_iter_init (image, iter); - else if (image->type == CONICAL) + break; + + case CONICAL: _pixman_conical_gradient_iter_init (image, iter); - else if (image->type == BITS) - _pixman_bits_image_src_iter_init (image, iter); - else if (image->type == SOLID) + break; + + case SOLID: _pixman_log_error (FUNC, "Solid image not handled by noop"); - else - _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + break; - return TRUE; + default: + _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + break; + } } -static pixman_bool_t -general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static const pixman_iter_info_t general_iters[] = { - if (iter->image->type == BITS) - { - _pixman_bits_image_dest_iter_init (iter->image, iter); - - return TRUE; - } - else - { - _pixman_log_error (FUNC, "Trying to write to a non-writable image"); - - return FALSE; - } -} + { PIXMAN_any, 0, 0, general_iter_init, NULL, NULL }, + { PIXMAN_null }, +}; typedef struct op_info_t op_info_t; struct op_info_t @@ -116,7 +120,7 @@ general_composite_rect (pixman_implementation_t *imp, pixman_iter_t src_iter, mask_iter, dest_iter; pixman_combine_32_func_t compose; pixman_bool_t component_alpha; - iter_flags_t narrow, src_iter_flags; + iter_flags_t width_flag, src_iter_flags; int Bpp; int i; @@ -124,12 +128,12 @@ general_composite_rect (pixman_implementation_t *imp, (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) && (dest_image->common.flags & FAST_PATH_NARROW_FORMAT)) { - narrow = ITER_NARROW; + width_flag = ITER_NARROW; Bpp = 4; } else { - narrow = 0; + width_flag = ITER_WIDE; Bpp = 16; } @@ -145,7 +149,7 @@ general_composite_rect (pixman_implementation_t *imp, mask_buffer = src_buffer + width * Bpp; dest_buffer = mask_buffer + width * Bpp; - if (!narrow) + if (width_flag == ITER_WIDE) { /* To make sure there aren't any NANs in the buffers */ memset (src_buffer, 0, width * Bpp); @@ -154,11 +158,12 @@ general_composite_rect (pixman_implementation_t *imp, } /* src iter */ - src_iter_flags = narrow | op_flags[op].src; + src_iter_flags = width_flag | op_flags[op].src | ITER_SRC; - _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image, - src_x, src_y, width, height, - src_buffer, src_iter_flags, info->src_flags); + _pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image, + src_x, src_y, width, height, + src_buffer, src_iter_flags, + info->src_flags); /* mask iter */ if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == @@ -176,17 +181,19 @@ general_composite_rect (pixman_implementation_t *imp, mask_image->common.component_alpha && PIXMAN_FORMAT_RGB (mask_image->bits.format); - _pixman_implementation_src_iter_init ( - imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height, - mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags); + _pixman_implementation_iter_init ( + imp->toplevel, &mask_iter, + mask_image, mask_x, mask_y, width, height, mask_buffer, + ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB), + info->mask_flags); /* dest iter */ - _pixman_implementation_dest_iter_init ( + _pixman_implementation_iter_init ( imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height, - dest_buffer, narrow | op_flags[op].dst, info->dest_flags); + dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags); compose = _pixman_implementation_lookup_combiner ( - imp->toplevel, op, component_alpha, narrow); + imp->toplevel, op, component_alpha, width_flag != ITER_WIDE); for (i = 0; i < height; ++i) { @@ -219,8 +226,7 @@ _pixman_implementation_create_general (void) _pixman_setup_combiner_functions_32 (imp); _pixman_setup_combiner_functions_float (imp); - imp->src_iter_init = general_src_iter_init; - imp->dest_iter_init = general_dest_iter_init; + imp->iter_info = general_iters; return imp; } diff --git a/pixman/pixman/pixman-image.c b/pixman/pixman/pixman-image.c index 65041b43b..4f9c2f966 100644 --- a/pixman/pixman/pixman-image.c +++ b/pixman/pixman/pixman-image.c @@ -502,8 +502,10 @@ compute_image_info (pixman_image_t *image) break; } - /* Alpha map */ - if (!image->common.alpha_map) + /* Alpha maps are only supported for BITS images, so it's always + * safe to ignore their presense for non-BITS images + */ + if (!image->common.alpha_map || image->type != BITS) { flags |= FAST_PATH_NO_ALPHA_MAP; } @@ -918,10 +920,10 @@ _pixman_image_get_solid (pixman_implementation_t *imp, pixman_iter_t iter; otherwise: - _pixman_implementation_src_iter_init ( + _pixman_implementation_iter_init ( imp, &iter, image, 0, 0, 1, 1, (uint8_t *)&result, - ITER_NARROW, image->common.flags); + ITER_NARROW | ITER_SRC, image->common.flags); result = *iter.get_scanline (&iter, NULL); } diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c index cfb82bb1f..160847ad0 100644 --- a/pixman/pixman/pixman-implementation.c +++ b/pixman/pixman/pixman-implementation.c @@ -285,18 +285,26 @@ _pixman_implementation_fill (pixman_implementation_t *imp, return FALSE; } -pixman_bool_t -_pixman_implementation_src_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t iter_flags, - uint32_t image_flags) +static uint32_t * +get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) { + return NULL; +} + +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t iter_flags, + uint32_t image_flags) +{ + pixman_format_code_t format; + iter->image = image; iter->buffer = (uint32_t *)buffer; iter->x = x; @@ -306,47 +314,38 @@ _pixman_implementation_src_iter_init (pixman_implementation_t *imp, iter->iter_flags = iter_flags; iter->image_flags = image_flags; - while (imp) + if (!iter->image) { - if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter)) - return TRUE; - - imp = imp->fallback; + iter->get_scanline = get_scanline_null; + return; } - return FALSE; -} - -pixman_bool_t -_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t iter_flags, - uint32_t image_flags) -{ - iter->image = image; - iter->buffer = (uint32_t *)buffer; - iter->x = x; - iter->y = y; - iter->width = width; - iter->height = height; - iter->iter_flags = iter_flags; - iter->image_flags = image_flags; + format = iter->image->common.extended_format_code; while (imp) { - if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter)) - return TRUE; - - imp = imp->fallback; + if (imp->iter_info) + { + const pixman_iter_info_t *info; + + for (info = imp->iter_info; info->format != PIXMAN_null; ++info) + { + if ((info->format == PIXMAN_any || info->format == format) && + (info->image_flags & image_flags) == info->image_flags && + (info->iter_flags & iter_flags) == info->iter_flags) + { + iter->get_scanline = info->get_scanline; + iter->write_back = info->write_back; + + if (info->initializer) + info->initializer (iter, info); + return; + } + } + } + + imp = imp->fallback; } - - return FALSE; } pixman_bool_t diff --git a/pixman/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman/pixman-mips-dspr2-asm.S index 3adbb2afe..866e93e58 100644 --- a/pixman/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman/pixman-mips-dspr2-asm.S @@ -699,6 +699,127 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips) END(pixman_composite_src_0888_0565_rev_asm_mips) #endif +LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips) +/* + * a0 - dst (a8b8g8r8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + wsbh t0, t0 + wsbh t1, t1 + rotr t0, t0, 16 + rotr t1, t1, 16 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + wsbh t0, t0 + rotr t0, t0, 16 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_pixbuf_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + rotr t0, t0, 8 + rotr t1, t1, 8 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + rotr t0, t0, 8 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_rpixbuf_8888_asm_mips) + LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips) /* * a0 - dst (a8r8g8b8) @@ -840,34 +961,35 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) * a3 - w */ - SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 - beqz a3, 4f + beqz a3, 8f nop + SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 + li t6, 0xff addiu t7, zero, -1 /* t7 = 0xffffffff */ srl t8, a1, 24 /* t8 = srca */ li t9, 0x00ff00ff + addiu t1, a3, -1 - beqz t1, 3f /* last pixel */ + beqz t1, 4f /* last pixel */ nop - beq t8, t6, 2f /* if (srca == 0xff) */ - nop -1: - /* a1 = src */ + +0: lw t0, 0(a2) /* t0 = mask */ lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ or t2, t0, t1 - beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ addiu a2, a2, 8 - and t3, t0, t1 - move t4, a1 /* t4 = src */ - move t5, a1 /* t5 = src */ + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) lw t2, 0(a0) /* t2 = dst */ - beq t3, t7, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - lw t3, 4(a0) /* t3 = dst */ + lw t3, 4(a0) /* t3 = dst */ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 -11: not t0, t0 not t1, t1 MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 @@ -875,62 +997,79 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) addu_s.qb t3, t5, t3 sw t2, 0(a0) sw t3, 4(a0) -12: - addiu a3, a3, -2 addiu t1, a3, -1 - bgtz t1, 1b + bgtz t1, 0b addiu a0, a0, 8 - b 3f + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ nop -2: - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ - addiu a2, a2, 8 - and t2, t0, t1 - move t4, a1 - beq t2, t7, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - move t5, a1 lw t2, 0(a0) /* t2 = dst */ lw t3, 4(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 - not t0, t0 - not t1, t1 - MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 - addu_s.qb t4, t4, t2 - addu_s.qb t5, t5, t3 -21: - sw t4, 0(a0) - sw t5, 4(a0) -22: - addiu a3, a3, -2 + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + sw t2, 0(a0) + sw t3, 4(a0) addiu t1, a3, -1 - bgtz t1, 2b + bgtz t1, 0b addiu a0, a0, 8 + b 4f + nop +2: + sw a1, 0(a0) + sw a1, 4(a0) 3: - blez a3, 4f + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 8 + +4: + beqz a3, 7f nop /* a1 = src */ - lw t1, 0(a2) /* t1 = mask */ - beqz t1, 4f + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ nop - move t2, a1 /* t2 = src */ - beq t1, t7, 31f - lw t0, 0(a0) /* t0 = dst */ - - MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 - MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 -31: - not t1, t1 - MIPS_UN8x4_MUL_UN8x4 t0, t1, t0, t9, t3, t4, t5, t6 - addu_s.qb t0, t2, t0 - sw t0, 0(a0) -4: + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lw t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + MIPS_UN8x4_MUL_UN8x4 t1, t0, t1, t9, t3, t4, t5, s0 + addu_s.qb t1, t2, t1 + sw t1, 0(a0) RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 j ra nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lw t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4 + addu_s.qb t1, a1, t1 + sw t1, 0(a0) + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop +6: + sw a1, 0(a0) +7: + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 +8: + j ra + nop END(pixman_composite_over_n_8888_8888_ca_asm_mips) @@ -942,106 +1081,126 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips) * a3 - w */ - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 - beqz a3, 4f + beqz a3, 8f nop - li t5, 0xf800f800 - li t6, 0x07e007e0 - li t7, 0x001F001F - li t9, 0x00ff00ff + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + li t6, 0xff + addiu t7, zero, -1 /* t7 = 0xffffffff */ srl t8, a1, 24 /* t8 = srca */ + li t9, 0x00ff00ff + li s6, 0xf800f800 + li s7, 0x07e007e0 + li s8, 0x001F001F + addiu t1, a3, -1 - beqz t1, 3f /* last pixel */ + beqz t1, 4f /* last pixel */ nop - li s0, 0xff /* s0 = 0xff */ - addiu s1, zero, -1 /* s1 = 0xffffffff */ - beq t8, s0, 2f /* if (srca == 0xff) */ - nop -1: - /* a1 = src */ +0: lw t0, 0(a2) /* t0 = mask */ lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ or t2, t0, t1 - beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ addiu a2, a2, 8 - and t3, t0, t1 - move s2, a1 /* s2 = src */ - move s3, a1 /* s3 = src */ + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) lhu t2, 0(a0) /* t2 = dst */ - beq t3, s1, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - lhu t3, 2(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8 -11: + lhu t3, 2(a0) /* t3 = dst */ + MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 not t0, t0 not t1, t1 - CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1 - addu_s.qb s2, s2, s4 - addu_s.qb s3, s3, s5 - CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, t4, t2 + addu_s.qb t3, t5, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 sh t2, 0(a0) sh t3, 2(a0) -12: - addiu a3, a3, -2 addiu t1, a3, -1 - bgtz t1, 1b + bgtz t1, 0b addiu a0, a0, 4 - b 3f + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ nop -2: - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ - addiu a2, a2, 8 - and t3, t0, t1 - move t2, a1 - beq t3, s1, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - move t3, a1 lhu t2, 0(a0) /* t2 = dst */ lhu t3, 2(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 - not t0, t0 - not t1, t1 - CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3 - addu_s.qb t2, s2, s4 - addu_s.qb t3, s3, s5 -21: - CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3 - sh t0, 0(a0) - sh t1, 2(a0) -22: - addiu a3, a3, -2 + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 + sh t2, 0(a0) + sh t3, 2(a0) addiu t1, a3, -1 - bgtz t1, 2b + bgtz t1, 0b addiu a0, a0, 4 + b 4f + nop +2: + CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1 + sh t2, 0(a0) + sh t2, 2(a0) 3: - blez a3, 4f + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 4 + +4: + beqz a3, 7f nop /* a1 = src */ - lw t1, 0(a2) /* t1 = mask */ - beqz t1, 4f + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ nop - move t2, a1 /* t2 = src */ - beq t1, t7, 31f - lhu t0, 0(a0) /* t0 = dst */ - - MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 - MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 -31: - not t1, t1 - CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3 - MIPS_UN8x4_MUL_UN8x4 s1, t1, t3, t9, t4, t5, t6, t7 - addu_s.qb t0, t2, t3 - CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3 - sh s1, 0(a0) -4: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lhu t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8x4 s1, t0, s1, t9, t3, t4, t5, s0 + addu_s.qb s1, t2, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lhu t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8 s1, t0, s1, t9, t2, t3, t4 + addu_s.qb s1, a1, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +6: + CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2 + sh t1, 0(a0) +7: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 +8: j ra nop @@ -2936,101 +3095,265 @@ END(pixman_composite_over_reverse_n_8888_asm_mips) LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips) /* * a0 - dst (a8) - * a1 - src (a8r8g8b8) + * a1 - src (32bit constant) * a2 - w */ - beqz a2, 5f + li t9, 0x00ff00ff + beqz a2, 3f nop - - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 - move t7, a1 - srl t5, t7, 24 - replv.ph t5, t5 - srl t9, a2, 2 /* t1 = how many multiples of 4 src pixels */ - beqz t9, 2f /* branch if less than 4 src pixels */ + srl t7, a2, 2 /* t7 = how many multiples of 4 dst pixels */ + beqz t7, 1f /* branch if less than 4 src pixels */ nop -1: - addiu t9, t9, -1 - addiu a2, a2, -4 + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz t7, 1f + addiu t7, t7, -1 lbu t0, 0(a0) lbu t1, 1(a0) lbu t2, 2(a0) lbu t3, 3(a0) - muleu_s.ph.qbl s0, t0, t5 - muleu_s.ph.qbr s1, t0, t5 - muleu_s.ph.qbl s2, t1, t5 - muleu_s.ph.qbr s3, t1, t5 - muleu_s.ph.qbl s4, t2, t5 - muleu_s.ph.qbr s5, t2, t5 - muleu_s.ph.qbl s6, t3, t5 - muleu_s.ph.qbr s7, t3, t5 - - shrl.ph t4, s0, 8 - shrl.ph t6, s1, 8 - shrl.ph t7, s2, 8 - shrl.ph t8, s3, 8 - addq.ph t0, s0, t4 - addq.ph t1, s1, t6 - addq.ph t2, s2, t7 - addq.ph t3, s3, t8 - shra_r.ph t0, t0, 8 - shra_r.ph t1, t1, 8 + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr.qb.ph t0, t3, t1 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 shra_r.ph t2, t2, 8 shra_r.ph t3, t3, 8 - shrl.ph t4, s4, 8 - shrl.ph t6, s5, 8 - shrl.ph t7, s6, 8 - shrl.ph t8, s7, 8 - addq.ph s0, s4, t4 - addq.ph s1, s5, t6 - addq.ph s2, s6, t7 - addq.ph s3, s7, t8 - shra_r.ph t4, s0, 8 - shra_r.ph t6, s1, 8 - shra_r.ph t7, s2, 8 - shra_r.ph t8, s3, 8 - - precr.qb.ph s0, t0, t1 - precr.qb.ph s1, t2, t3 - precr.qb.ph s2, t4, t6 - precr.qb.ph s3, t7, t8 + precr.qb.ph t2, t2, t3 - sb s0, 0(a0) - sb s1, 1(a0) - sb s2, 2(a0) - sb s3, 3(a0) - bgtz t9, 1b + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a2, a2, -4 + b 0b addiu a0, a0, 4 -2: - beqz a2, 4f + +1: + beqz a2, 3f nop -3: - lbu t1, 0(a0) + srl t8, a1, 24 +2: + lbu t0, 0(a0) + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 - muleu_s.ph.qbl t4, t1, t5 - muleu_s.ph.qbr t7, t1, t5 - shrl.ph t6, t4, 8 - shrl.ph t0, t7, 8 - addq.ph t8, t4, t6 - addq.ph t9, t7, t0 - shra_r.ph t8, t8, 8 - shra_r.ph t9, t9, 8 - precr.qb.ph t2, t8, t9 sb t2, 0(a0) addiu a2, a2, -1 - bnez a2, 3b + bnez a2, 2b addiu a0, a0, 1 -4: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 -5: + +3: j ra nop END(pixman_composite_in_n_8_asm_mips) +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + lw t8, 16(sp) /* t8 = unit_x */ + li t6, 0x00ff00ff + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + + OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3 + + sw t4, 0(a0) + sw t5, 4(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7 + + sw t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1 + lw t8, 40(sp) /* t8 = unit_x */ + li t4, 0x00ff00ff + li t5, 0xf800f800 + li t6, 0x07e007e0 + li t7, 0x001F001F + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ + + CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3 + OVER_2x8888_2x8888 t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4 + CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2 + + sh v0, 0(a0) + sh v1, 2(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6 + OVER_8888_8888 t0, t2, t1, t4, t3, t5, t6, t7 + CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6 + + sh t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (r5g6b5) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, v0 + beqz a2, 3f + nop + + lw v0, 16(sp) /* v0 = unit_x */ + addiu t1, a2, -1 + beqz t1, 2f + nop + + li t4, 0x07e007e0 + li t5, 0x001F001F +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t1 = vx >> 16 */ + sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */ + addu t1, a1, t1 + lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + addiu a2, a2, -2 + + CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + + CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 + + sw t1, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) /* * a0 - dst (r5g6b5) diff --git a/pixman/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman/pixman-mips-dspr2-asm.h index b330c0f0d..cab122d80 100644 --- a/pixman/pixman/pixman-mips-dspr2-asm.h +++ b/pixman/pixman/pixman-mips-dspr2-asm.h @@ -354,17 +354,16 @@ LEAF_MIPS32R2(symbol) \ out1_565, out2_565, \ maskR, maskG, maskB, \ scratch1, scratch2 - precrq.ph.w \scratch1, \in2_8888, \in1_8888 - precr_sra.ph.w \in2_8888, \in1_8888, 0 - shll.ph \scratch1, \scratch1, 8 - srl \in2_8888, \in2_8888, 3 - and \scratch2, \in2_8888, \maskB - and \scratch1, \scratch1, \maskR - srl \in2_8888, \in2_8888, 2 - and \out2_565, \in2_8888, \maskG - or \out2_565, \out2_565, \scratch2 - or \out1_565, \out2_565, \scratch1 - srl \out2_565, \out1_565, 16 + precr.qb.ph \scratch1, \in2_8888, \in1_8888 + precrq.qb.ph \in2_8888, \in2_8888, \in1_8888 + and \out1_565, \scratch1, \maskR + shrl.ph \scratch1, \scratch1, 3 + shll.ph \in2_8888, \in2_8888, 3 + and \scratch1, \scratch1, \maskB + or \out1_565, \out1_565, \scratch1 + and \in2_8888, \in2_8888, \maskG + or \out1_565, \out1_565, \in2_8888 + srl \out2_565, \out1_565, 16 .endm /* @@ -587,6 +586,36 @@ LEAF_MIPS32R2(symbol) \ addu_s.qb \out_8888, \out_8888, \s_8888 .endm +/* + * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two + * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR + * needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_2x8888_2x8888 s1_8888, \ + s2_8888, \ + d1_8888, \ + d2_8888, \ + out1_8888, \ + out2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + not \scratch1, \s1_8888 + srl \scratch1, \scratch1, 24 + not \scratch2, \s2_8888 + srl \scratch2, \scratch2, 24 + MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ + \scratch1, \scratch2, \ + \out1_8888, \out2_8888, \ + \maskLSR, \ + \scratch3, \scratch4, \scratch5, \ + \scratch6, \d1_8888, \d2_8888 + + addu_s.qb \out1_8888, \out1_8888, \s1_8888 + addu_s.qb \out2_8888, \out2_8888, \s2_8888 +.endm + .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ m_8, \ d_8888, \ diff --git a/pixman/pixman/pixman-mips-dspr2.c b/pixman/pixman/pixman-mips-dspr2.c index 1ea244576..e10c9df0a 100644 --- a/pixman/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman/pixman-mips-dspr2.c @@ -54,6 +54,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev, PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev, uint8_t, 3, uint16_t, 1) #endif +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888, uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565, @@ -121,6 +125,13 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1, PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC, + uint16_t, uint32_t) + PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, uint32_t, uint32_t) PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC, @@ -292,6 +303,10 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev), PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev), #endif + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888), PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888), PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888), PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888), @@ -357,6 +372,22 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888), PIXMAN_STD_FAST_PATH (IN, solid, null, a8, mips_composite_in_n_8), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565), PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565), diff --git a/pixman/pixman/pixman-mips-dspr2.h b/pixman/pixman/pixman-mips-dspr2.h index 4ac9ff95d..955ed70b8 100644 --- a/pixman/pixman/pixman-mips-dspr2.h +++ b/pixman/pixman/pixman-mips-dspr2.h @@ -246,6 +246,48 @@ mips_composite_##name (pixman_implementation_t *imp, \ } \ } +/****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \ + dst_type * dst, \ + const src_type * src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \ + vx, unit_x); \ +} \ + \ +FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, COVER) \ +FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, NONE) \ +FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, PAD) + +/* Provide entries for the fast path table */ +#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) + + /*****************************************************************************/ #define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \ diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c index 14790c029..c94d282a9 100644 --- a/pixman/pixman/pixman-mmx.c +++ b/pixman/pixman/pixman-mmx.c @@ -301,6 +301,29 @@ negate (__m64 mask) return _mm_xor_si64 (mask, MC (4x00ff)); } +/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1 + * and maps its result to the same range. + * + * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner: + * Notation, Notation, Notation", the first of which is + * + * prod(a, b) = (a * b + 128) / 255. + * + * By approximating the division by 255 as 257/65536 it can be replaced by a + * multiply and a right shift. This is the implementation that we use in + * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended + * 3DNow!, and unavailable at the time of the book's publication) to perform + * the multiplication and right shift in a single operation. + * + * prod(a, b) = ((a * b + 128) * 257) >> 16. + * + * A third way (how pix_multiply() was implemented prior to 14208344) exists + * also that performs the multiplication by 257 with adds and shifts. + * + * Where temp = a * b + 128 + * + * prod(a, b) = (temp + (temp >> 8)) >> 8. + */ static force_inline __m64 pix_multiply (__m64 a, __m64 b) { @@ -3899,52 +3922,23 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 }, - { PIXMAN_r5g6b5, mmx_fetch_r5g6b5 }, - { PIXMAN_a8, mmx_fetch_a8 }, - { PIXMAN_null } -}; - -static pixman_bool_t -mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} +static const pixman_iter_info_t mmx_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL + }, + { PIXMAN_null }, +}; static const pixman_fast_path_t mmx_fast_paths[] = { @@ -4074,7 +4068,7 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback) imp->blt = mmx_blt; imp->fill = mmx_fill; - imp->src_iter_init = mmx_src_iter_init; + imp->iter_info = mmx_iters; return imp; } diff --git a/pixman/pixman/pixman-noop.c b/pixman/pixman/pixman-noop.c index e39996d9d..e59890492 100644 --- a/pixman/pixman/pixman-noop.c +++ b/pixman/pixman/pixman-noop.c @@ -37,12 +37,6 @@ noop_composite (pixman_implementation_t *imp, return; } -static void -dest_write_back_direct (pixman_iter_t *iter) -{ - iter->buffer += iter->image->bits.rowstride; -} - static uint32_t * noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) { @@ -53,110 +47,102 @@ noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) return result; } -static uint32_t * -get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) -{ - return NULL; +static void +noop_init_solid_narrow (pixman_iter_t *iter, + const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint32_t *buffer = iter->buffer; + uint32_t *end = buffer + iter->width; + uint32_t color; + + if (iter->image->type == SOLID) + color = image->solid.color_32; + else + color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); + + while (buffer < end) + *(buffer++) = color; } -static pixman_bool_t -noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +noop_init_solid_wide (pixman_iter_t *iter, + const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; + argb_t *buffer = (argb_t *)iter->buffer; + argb_t *end = buffer + iter->width; + argb_t color; -#define FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM) - - if (!image) - { - iter->get_scanline = get_scanline_null; - } - else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == - (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) - { - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else if (image->common.extended_format_code == PIXMAN_solid && - (iter->image->type == SOLID || - (iter->image_flags & FAST_PATH_NO_ALPHA_MAP))) - { - if (iter->iter_flags & ITER_NARROW) - { - uint32_t *buffer = iter->buffer; - uint32_t *end = buffer + iter->width; - uint32_t color; - - if (image->type == SOLID) - color = image->solid.color_32; - else - color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; - } - else - { - argb_t *buffer = (argb_t *)iter->buffer; - argb_t *end = buffer + iter->width; - argb_t color; - - if (image->type == SOLID) - color = image->solid.color_float; - else - color = image->bits.fetch_pixel_float (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; - } - - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else if (image->common.extended_format_code == PIXMAN_a8r8g8b8 && - (iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS && - iter->x >= 0 && iter->y >= 0 && - iter->x + iter->width <= image->bits.width && - iter->y + iter->height <= image->bits.height) - { - iter->buffer = - image->bits.bits + iter->y * image->bits.rowstride + iter->x; - - iter->get_scanline = noop_get_scanline; - } + if (iter->image->type == SOLID) + color = image->solid.color_float; else - { - return FALSE; - } + color = image->bits.fetch_pixel_float (&image->bits, 0, 0); - return TRUE; + while (buffer < end) + *(buffer++) = color; } -static pixman_bool_t -noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; - uint32_t image_flags = iter->image_flags; - uint32_t iter_flags = iter->iter_flags; - - if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS && - (iter_flags & ITER_NARROW) == ITER_NARROW && - ((image->common.extended_format_code == PIXMAN_a8r8g8b8) || - (image->common.extended_format_code == PIXMAN_x8r8g8b8 && - (iter_flags & (ITER_LOCALIZED_ALPHA))))) - { - iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x; - - iter->get_scanline = _pixman_iter_get_scanline_noop; - iter->write_back = dest_write_back_direct; - - return TRUE; - } - else - { - return FALSE; - } + + iter->buffer = + image->bits.bits + iter->y * image->bits.rowstride + iter->x; } +static void +dest_write_back_direct (pixman_iter_t *iter) +{ + iter->buffer += iter->image->bits.rowstride; +} + +static const pixman_iter_info_t noop_iters[] = +{ + /* Source iters */ + { PIXMAN_any, + 0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC, + NULL, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC, + noop_init_solid_narrow, + _pixman_iter_get_scanline_noop, + NULL, + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC, + noop_init_solid_wide, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_a8r8g8b8, + FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, + ITER_NARROW | ITER_SRC, + noop_init_direct_buffer, + noop_get_scanline, + NULL + }, + /* Dest iters */ + { PIXMAN_a8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_x8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_null }, +}; + static const pixman_fast_path_t noop_fast_paths[] = { { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite }, @@ -169,8 +155,7 @@ _pixman_implementation_create_noop (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, noop_fast_paths); - imp->src_iter_init = noop_src_iter_init; - imp->dest_iter_init = noop_dest_iter_init; + imp->iter_info = noop_iters; return imp; } diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 6d9c05321..af4a0b6e0 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -212,7 +212,8 @@ typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); typedef enum { - ITER_NARROW = (1 << 0), + ITER_NARROW = (1 << 0), + ITER_WIDE = (1 << 1), /* "Localized alpha" is when the alpha channel is used only to compute * the alpha value of the destination. This means that the computation @@ -229,9 +230,15 @@ typedef enum * we can treat it as if it were ARGB, which means in some cases we can * avoid copying it to a temporary buffer. */ - ITER_LOCALIZED_ALPHA = (1 << 1), - ITER_IGNORE_ALPHA = (1 << 2), - ITER_IGNORE_RGB = (1 << 3) + ITER_LOCALIZED_ALPHA = (1 << 2), + ITER_IGNORE_ALPHA = (1 << 3), + ITER_IGNORE_RGB = (1 << 4), + + /* These indicate whether the iterator is for a source + * or a destination image + */ + ITER_SRC = (1 << 5), + ITER_DEST = (1 << 6) } iter_flags_t; struct pixman_iter_t @@ -255,6 +262,19 @@ struct pixman_iter_t int stride; }; +typedef struct pixman_iter_info_t pixman_iter_info_t; +typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter, + const pixman_iter_info_t *info); +struct pixman_iter_info_t +{ + pixman_format_code_t format; + uint32_t image_flags; + iter_flags_t iter_flags; + pixman_iter_initializer_t initializer; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +}; + void _pixman_bits_image_setup_accessors (bits_image_t *image); @@ -454,8 +474,6 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, int width, int height, uint32_t filler); -typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp, - pixman_iter_t *iter); void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp); void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp); @@ -477,11 +495,10 @@ struct pixman_implementation_t pixman_implementation_t * toplevel; pixman_implementation_t * fallback; const pixman_fast_path_t * fast_paths; + const pixman_iter_info_t * iter_info; pixman_blt_func_t blt; pixman_fill_func_t fill; - pixman_iter_init_func_t src_iter_init; - pixman_iter_init_func_t dest_iter_init; pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS]; pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS]; @@ -542,29 +559,17 @@ _pixman_implementation_fill (pixman_implementation_t *imp, int height, uint32_t filler); -pixman_bool_t -_pixman_implementation_src_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t flags, - uint32_t image_flags); - -pixman_bool_t -_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t flags, - uint32_t image_flags); +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t flags, + uint32_t image_flags); /* Specific implementations */ pixman_implementation_t * @@ -647,6 +652,9 @@ _pixman_compute_composite_region32 (pixman_region32_t * region, uint32_t * _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info); + /* These "formats" all have depth 0, so they * will never clash with any real ones */ diff --git a/pixman/pixman/pixman-region.c b/pixman/pixman/pixman-region.c index 2d6f1571c..59bc9c797 100644 --- a/pixman/pixman/pixman-region.c +++ b/pixman/pixman/pixman-region.c @@ -1858,7 +1858,7 @@ pixman_region_subtract_o (region_type_t * region, else if (r2->x1 <= x1) { /* - * Subtrahend preceeds minuend: nuke left edge of minuend. + * Subtrahend precedes minuend: nuke left edge of minuend. */ x1 = r2->x2; if (x1 >= r1->x2) @@ -1982,7 +1982,7 @@ PREFIX (_subtract) (region_type_t *reg_d, } /* Add those rectangles in region 1 that aren't in region 2, - do yucky substraction for overlaps, and + do yucky subtraction for overlaps, and just throw away rectangles in region 2 that aren't in region 1 */ if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE)) return FALSE; @@ -2042,7 +2042,7 @@ PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ } /* Add those rectangles in region 1 that aren't in region 2, - * do yucky substraction for overlaps, and + * do yucky subtraction for overlaps, and * just throw away rectangles in region 2 that aren't in region 1 */ inv_reg.extents = *inv_rect; diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index c7e9a4bb2..dde923524 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -5554,19 +5554,27 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, scaled_nearest_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) -#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1) - -#define BILINEAR_DECLARE_VARIABLES \ +#if BILINEAR_INTERPOLATION_BITS < 8 +# define BILINEAR_DECLARE_VARIABLES \ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ - const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\ - const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ - const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\ - const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_ux = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_zero = _mm_setzero_si128 (); \ + __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ + vx, -(vx + 1), vx, -(vx + 1)) +#else +# define BILINEAR_DECLARE_VARIABLES \ + const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ + const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \ - unit_x, unit_x, unit_x, unit_x); \ + -unit_x, -unit_x, -unit_x, -unit_x); \ const __m128i xmm_zero = _mm_setzero_si128 (); \ - __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx) + __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, \ + -(vx + 1), -(vx + 1), -(vx + 1), -(vx + 1)) +#endif #define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ do { \ @@ -5585,8 +5593,8 @@ do { \ if (BILINEAR_INTERPOLATION_BITS < 8) \ { \ /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, \ - _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ /* horizontal interpolation */ \ a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ @@ -5595,8 +5603,8 @@ do { \ else \ { \ /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, \ - _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ /* horizontal interpolation */ \ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ @@ -6332,52 +6340,23 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 }, - { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 }, - { PIXMAN_a8, sse2_fetch_a8 }, - { PIXMAN_null } -}; - -static pixman_bool_t -sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} +static const pixman_iter_info_t sse2_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL + }, + { PIXMAN_null }, +}; #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) __attribute__((__force_align_arg_pointer__)) @@ -6435,7 +6414,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback) imp->blt = sse2_blt; imp->fill = sse2_fill; - imp->src_iter_init = sse2_src_iter_init; + imp->iter_info = sse2_iters; return imp; } diff --git a/pixman/pixman/pixman-utils.c b/pixman/pixman/pixman-utils.c index f31171f6d..98723a800 100644 --- a/pixman/pixman/pixman-utils.c +++ b/pixman/pixman/pixman-utils.c @@ -214,6 +214,17 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8; + iter->stride = s; +} + #define N_TMP_BOXES (16) pixman_bool_t diff --git a/pixman/pixman/pixman-vmx.c b/pixman/pixman/pixman-vmx.c index 6868704a8..f629003ab 100644 --- a/pixman/pixman/pixman-vmx.c +++ b/pixman/pixman/pixman-vmx.c @@ -25,7 +25,9 @@ * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include "pixman-private.h" #include "pixman-combine32.h" #include <altivec.h> diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c index 184f0c4e6..9555ceaaf 100644 --- a/pixman/pixman/pixman.c +++ b/pixman/pixman/pixman.c @@ -605,7 +605,7 @@ pixman_image_composite32 (pixman_op_t op, else { mask_format = PIXMAN_null; - info.mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP; } dest_format = dest->common.extended_format_code; diff --git a/pixman/pixman/refactor b/pixman/pixman/refactor deleted file mode 100644 index 52fceab17..000000000 --- a/pixman/pixman/refactor +++ /dev/null @@ -1,478 +0,0 @@ -Roadmap - -- Move all the fetchers etc. into pixman-image to make pixman-compose.c - less intimidating. - - DONE - -- Make combiners for unified alpha take a mask argument. That way - we won't need two separate paths for unified vs component in the - general compositing code. - - DONE, except that the Altivec code needs to be updated. Luca is - looking into that. - -- Delete separate 'unified alpha' path - - DONE - -- Split images into their own files - - DONE - -- Split the gradient walker code out into its own file - - DONE - -- Add scanline getters per image - - DONE - -- Generic 64 bit fetcher - - DONE - -- Split fast path tables into their respective architecture dependent - files. - -See "Render Algorithm" below for rationale - -Images will eventually have these virtual functions: - - get_scanline() - get_scanline_wide() - get_pixel() - get_pixel_wide() - get_untransformed_pixel() - get_untransformed_pixel_wide() - get_unfiltered_pixel() - get_unfiltered_pixel_wide() - - store_scanline() - store_scanline_wide() - -1. - -Initially we will just have get_scanline() and get_scanline_wide(); -these will be based on the ones in pixman-compose. Hopefully this will -reduce the complexity in pixman_composite_rect_general(). - -Note that there is access considerations - the compose function is -being compiled twice. - - -2. - -Split image types into their own source files. Export noop virtual -reinit() call. Call this whenever a property of the image changes. - - -3. - -Split the get_scanline() call into smaller functions that are -initialized by the reinit() call. - -The Render Algorithm: - (first repeat, then filter, then transform, then clip) - -Starting from a destination pixel (x, y), do - - 1 x = x - xDst + xSrc - y = y - yDst + ySrc - - 2 reject pixel that is outside the clip - - This treats clipping as something that happens after - transformation, which I think is correct for client clips. For - hierarchy clips it is wrong, but who really cares? Without - GraphicsExposes hierarchy clips are basically irrelevant. Yes, - you could imagine cases where the pixels of a subwindow of a - redirected, transformed window should be treated as - transparent. I don't really care - - Basically, I think the render spec should say that pixels that - are unavailable due to the hierarcy have undefined content, - and that GraphicsExposes are not generated. Ie., basically - that using non-redirected windows as sources is fail. This is - at least consistent with the current implementation and we can - update the spec later if someone makes it work. - - The implication for render is that it should stop passing the - hierarchy clip to pixman. In pixman, if a souce image has a - clip it should be used in computing the composite region and - nowhere else, regardless of what "has_client_clip" says. The - default should be for there to not be any clip. - - I would really like to get rid of the client clip as well for - source images, but unfortunately there is at least one - application in the wild that uses them. - - 3 Transform pixel: (x, y) = T(x, y) - - 4 Call p = GetUntransformedPixel (x, y) - - 5 If the image has an alpha map, then - - Call GetUntransformedPixel (x, y) on the alpha map - - add resulting alpha channel to p - - return p - - Where GetUnTransformedPixel is: - - 6 switch (filter) - { - case NEAREST: - return GetUnfilteredPixel (x, y); - break; - - case BILINEAR: - return GetUnfilteredPixel (...) // 4 times - break; - - case CONVOLUTION: - return GetUnfilteredPixel (...) // as many times as necessary. - break; - } - - Where GetUnfilteredPixel (x, y) is - - 7 switch (repeat) - { - case REPEAT_NORMAL: - case REPEAT_PAD: - case REPEAT_REFLECT: - // adjust x, y as appropriate - break; - - case REPEAT_NONE: - if (x, y) is outside image bounds - return 0; - break; - } - - return GetRawPixel(x, y) - - Where GetRawPixel (x, y) is - - 8 Compute the pixel in question, depending on image type. - -For gradients, repeat has a totally different meaning, so -UnfilteredPixel() and RawPixel() must be the same function so that -gradients can do their own repeat algorithm. - -So, the GetRawPixel - - for bits must deal with repeats - for gradients must deal with repeats (differently) - for solids, should ignore repeats. - - for polygons, when we add them, either ignore repeats or do - something similar to bits (in which case, we may want an extra - layer of indirection to modify the coordinates). - -It is then possible to build things like "get scanline" or "get tile" on -top of this. In the simplest case, just repeatedly calling GetPixel() -would work, but specialized get_scanline()s or get_tile()s could be -plugged in for common cases. - -By not plugging anything in for images with access functions, we only -have to compile the pixel functions twice, not the scanline functions. - -And we can get rid of fetchers for the bizarre formats that no one -uses. Such as b2g3r3 etc. r1g2b1? Seriously? It is also worth -considering a generic format based pixel fetcher for these edge cases. - -Since the actual routines depend on the image attributes, the images -must be notified when those change and update their function pointers -appropriately. So there should probably be a virtual function called -(* reinit) or something like that. - -There will also be wide fetchers for both pixels and lines. The line -fetcher will just call the wide pixel fetcher. The wide pixel fetcher -will just call expand, except for 10 bit formats. - -Rendering pipeline: - -Drawable: - 0. if (picture has alpha map) - 0.1. Position alpha map according to the alpha_x/alpha_y - 0.2. Where the two drawables intersect, the alpha channel - Replace the alpha channel of source with the one - from the alpha map. Replacement only takes place - in the intersection of the two drawables' geometries. - 1. Repeat the drawable according to the repeat attribute - 2. Reconstruct a continuous image according to the filter - 3. Transform according to the transform attribute - 4. Position image such that src_x, src_y is over dst_x, dst_y - 5. Sample once per destination pixel - 6. Clip. If a pixel is not within the source clip, then no - compositing takes place at that pixel. (Ie., it's *not* - treated as 0). - - Sampling a drawable: - - - If the channel does not have an alpha channel, the pixels in it - are treated as opaque. - - Note on reconstruction: - - - The top left pixel has coordinates (0.5, 0.5) and pixels are - spaced 1 apart. - -Gradient: - 1. Unless gradient type is conical, repeat the underlying (0, 1) - gradient according to the repeat attribute - 2. Integrate the gradient across the plane according to type. - 3. Transform according to transform attribute - 4. Position gradient - 5. Sample once per destination pixel. - 6. Clip - -Solid Fill: - 1. Repeat has no effect - 2. Image is already continuous and defined for the entire plane - 3. Transform has no effect - 4. Positioning has no effect - 5. Sample once per destination pixel. - 6. Clip - -Polygon: - 1. Repeat has no effect - 2. Image is already continuous and defined on the whole plane - 3. Transform according to transform attribute - 4. Position image - 5. Supersample 15x17 per destination pixel. - 6. Clip - -Possibly interesting additions: - - More general transformations, such as warping, or general - shading. - - - Shader image where a function is called to generate the - pixel (ie., uploading assembly code). - - - Resampling kernels - - In principle the polygon image uses a 15x17 box filter for - resampling. If we allow general resampling filters, then we - get all the various antialiasing types for free. - - Bilinear downsampling looks terrible and could be much - improved by a resampling filter. NEAREST reconstruction - combined with a box resampling filter is what GdkPixbuf - does, I believe. - - Useful for high frequency gradients as well. - - (Note that the difference between a reconstruction and a - resampling filter is mainly where in the pipeline they - occur. High quality resampling should use a correctly - oriented kernel so it should happen after transformation. - - An implementation can transform the resampling kernel and - convolve it with the reconstruction if it so desires, but it - will need to deal with the fact that the resampling kernel - will not necessarily be pixel aligned. - - "Output kernels" - - One could imagine doing the resampling after compositing, - ie., for each destination pixel sample each source image 16 - times, then composite those subpixels individually, then - finally apply a kernel. - - However, this is effectively the same as full screen - antialiasing, which is a simpler way to think about it. So - resampling kernels may make sense for individual images, but - not as a post-compositing step. - - Fullscreen AA is inefficient without chained compositing - though. Consider an (image scaled up to oversample size IN - some polygon) scaled down to screen size. With the current - implementation, there will be a huge temporary. With chained - compositing, the whole thing ends up being equivalent to the - output kernel from above. - - - Color space conversion - - The complete model here is that each surface has a color - space associated with it and that the compositing operation - also has one associated with it. Note also that gradients - should have associcated colorspaces. - - - Dithering - - If people dither something that is already dithered, it will - look terrible, but don't do that, then. (Dithering happens - after resampling if at all - what is the relationship - with color spaces? Presumably dithering should happen in linear - intensity space). - - - Floating point surfaces, 16, 32 and possibly 64 bit per - channel. - - Maybe crack: - - - Glyph polygons - - If glyphs could be given as polygons, they could be - positioned and rasterized more accurately. The glyph - structure would need subpixel positioning though. - - - Luminance vs. coverage for the alpha channel - - Whether the alpha channel should be interpreted as luminance - modulation or as coverage (intensity modulation). This is a - bit of a departure from the rendering model though. It could - also be considered whether it should be possible to have - both channels in the same drawable. - - - Alternative for component alpha - - - Set component-alpha on the output image. - - - This means each of the components are sampled - independently and composited in the corresponding - channel only. - - - Have 3 x oversampled mask - - - Scale it down by 3 horizontally, with [ 1/3, 1/3, 1/3 ] - resampling filter. - - Is this equivalent to just using a component alpha mask? - - Incompatible changes: - - - Gradients could be specified with premultiplied colors. (You - can use a mask to get things like gradients from solid red to - transparent red. - -Refactoring pixman - -The pixman code is not particularly nice to put it mildly. Among the -issues are - -- inconsistent naming style (fb vs Fb, camelCase vs - underscore_naming). Sometimes there is even inconsistency *within* - one name. - - fetchProc32 ACCESS(pixman_fetchProcForPicture32) - - may be one of the uglies names ever created. - - coding style: - use the one from cairo except that pixman uses this brace style: - - while (blah) - { - } - - Format do while like this: - - do - { - - } - while (...); - -- PIXMAN_COMPOSITE_RECT_GENERAL() is horribly complex - -- switch case logic in pixman-access.c - - Instead it would be better to just store function pointers in the - image objects themselves, - - get_pixel() - get_scanline() - -- Much of the scanline fetching code is for formats that no one - ever uses. a2r2g2b2 anyone? - - It would probably be worthwhile having a generic fetcher for any - pixman format whatsoever. - -- Code related to particular image types should be split into individual - files. - - pixman-bits-image.c - pixman-linear-gradient-image.c - pixman-radial-gradient-image.c - pixman-solid-image.c - -- Fast path code should be split into files based on architecture: - - pixman-mmx-fastpath.c - pixman-sse2-fastpath.c - pixman-c-fastpath.c - - etc. - - Each of these files should then export a fastpath table, which would - be declared in pixman-private.h. This should allow us to get rid - of the pixman-mmx.h files. - - The fast path table should describe each fast path. Ie there should - be bitfields indicating what things the fast path can handle, rather than - like now where it is only allowed to take one format per src/mask/dest. Ie., - - { - FAST_a8r8g8b8 | FAST_x8r8g8b8, - FAST_null, - FAST_x8r8g8b8, - FAST_repeat_normal | FAST_repeat_none, - the_fast_path - } - -There should then be *one* file that implements pixman_image_composite(). -This should do this: - - optimize_operator(); - - convert 1x1 repeat to solid (actually this should be done at - image creation time). - - is there a useful fastpath? - -There should be a file called pixman-cpu.c that contains all the -architecture specific stuff to detect what CPU features we have. - -Issues that must be kept in mind: - - - we need accessor code to be preserved - - - maybe there should be a "store_scanline" too? - - Is this sufficient? - - We should preserve the optimization where the - compositing happens directly in the destination - whenever possible. - - - It should be possible to create GPU samplers from the - images. - -The "horizontal" classification should be a bit in the image, the -"vertical" classification should just happen inside the gradient -file. Note though that - - (a) these will change if the tranformation/repeat changes. - - (b) at the moment the optimization for linear gradients - takes the source rectangle into account. Presumably - this is to also optimize the case where the gradient - is close enough to horizontal? - -Who is responsible for repeats? In principle it should be the scanline -fetch. Right now NORMAL repeats are handled by walk_composite_region() -while other repeats are handled by the scanline code. - - -(Random note on filtering: do you filter before or after -transformation? Hardware is going to filter after transformation; -this is also what pixman does currently). It's not completely clear -what filtering *after* transformation means. One thing that might look -good would be to do *supersampling*, ie., compute multiple subpixels -per destination pixel, then average them together. diff --git a/pixman/test/blitters-test.c b/pixman/test/blitters-test.c index 8766fa800..a2c6ff4d8 100644 --- a/pixman/test/blitters-test.c +++ b/pixman/test/blitters-test.c @@ -46,7 +46,16 @@ create_random_image (pixman_format_code_t *allowed_formats, /* do the allocation */ buf = aligned_malloc (64, stride * height); - prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF); + if (prng_rand_n (4) == 0) + { + /* uniform distribution */ + prng_randmemset (buf, stride * height, 0); + } + else + { + /* significantly increased probability for 0x00 and 0xFF */ + prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF); + } img = pixman_image_create_bits (fmt, width, height, buf, stride); @@ -393,6 +402,6 @@ main (int argc, const char *argv[]) } return fuzzer_test_main("blitters", 2000000, - 0xD8265D5E, + 0x0CF3283B, test_composite, argc, argv); } diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c index 4e16f7ba1..1049e21e7 100644 --- a/pixman/test/lowlevel-blt-bench.c +++ b/pixman/test/lowlevel-blt-bench.c @@ -385,6 +385,7 @@ bench_composite (char * testname, double t1, t2, t3, pix_cnt; int64_t n, l1test_width, nlines; double bytes_per_pix = 0; + pixman_bool_t bench_pixbuf = FALSE; pixman_composite_func_t func = pixman_image_composite_wrapper; @@ -422,16 +423,20 @@ bench_composite (char * testname, mask_img = NULL; xmask_img = NULL; + if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0) + { + bench_pixbuf = TRUE; + } if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null) { bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0); mask_img = pixman_image_create_bits (mask_fmt, WIDTH, HEIGHT, - mask, + bench_pixbuf ? src : mask, WIDTH * 4); xmask_img = pixman_image_create_bits (mask_fmt, XWIDTH, XHEIGHT, - mask, + bench_pixbuf ? src : mask, XWIDTH * 4); } else if (mask_fmt != PIXMAN_null) @@ -643,6 +648,8 @@ tests_tbl[] = { "src_0888_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_0888_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, { "src_0888_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_0888_8888_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_0888_0565_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, { "src_x888_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, { "src_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, @@ -707,6 +714,8 @@ tests_tbl[] = { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 }, { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 }, { "over_reverse_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "pixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 }, + { "rpixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 }, }; int diff --git a/pixman/test/prng-test.c b/pixman/test/prng-test.c index 0a3ad5e8f..c1d9320cc 100644 --- a/pixman/test/prng-test.c +++ b/pixman/test/prng-test.c @@ -106,7 +106,10 @@ int main (int argc, char *argv[]) { const uint32_t ref_crc[RANDMEMSET_MORE_00_AND_FF + 1] = { - 0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39 + 0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39, + 0xD2321099, 0xFD8C5420, 0xD3B7C42A, 0xFC098093, + 0x85E01DE0, 0x6680F8F7, 0x4D32DD3C, 0xAE52382B, + 0x149E6CB5, 0x8B336987, 0x15DCB2B3, 0x8A71B781 }; uint32_t crc1, crc2; uint32_t ref, seed, seed0, seed1, seed2, seed3; diff --git a/pixman/test/utils-prng.c b/pixman/test/utils-prng.c index 967b8989a..7b32e3531 100644 --- a/pixman/test/utils-prng.c +++ b/pixman/test/utils-prng.c @@ -107,6 +107,7 @@ randmemset_internal (prng_t *prng, { prng_t local_prng = *prng; prng_rand_128_data_t randdata; + size_t i; while (size >= 16) { @@ -138,6 +139,22 @@ randmemset_internal (prng_t *prng, }; randdata.vb &= (t.vb >= const_40); } + if (flags & RANDMEMSET_MORE_FFFFFFFF) + { + const uint32x4 const_C0000000 = + { + 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000 + }; + randdata.vw |= ((t.vw << 30) >= const_C0000000); + } + if (flags & RANDMEMSET_MORE_00000000) + { + const uint32x4 const_40000000 = + { + 0x40000000, 0x40000000, 0x40000000, 0x40000000 + }; + randdata.vw &= ((t.vw << 30) >= const_40000000); + } #else #define PROCESS_ONE_LANE(i) \ if (flags & RANDMEMSET_MORE_FF) \ @@ -155,6 +172,18 @@ randmemset_internal (prng_t *prng, mask_00 |= mask_00 >> 2; \ mask_00 |= mask_00 >> 4; \ randdata.w[i] &= mask_00; \ + } \ + if (flags & RANDMEMSET_MORE_FFFFFFFF) \ + { \ + int32_t mask_ff = ((t.w[i] << 30) & (t.w[i] << 31)) & \ + 0x80000000; \ + randdata.w[i] |= mask_ff >> 31; \ + } \ + if (flags & RANDMEMSET_MORE_00000000) \ + { \ + int32_t mask_00 = ((t.w[i] << 30) | (t.w[i] << 31)) & \ + 0x80000000; \ + randdata.w[i] &= mask_00 >> 31; \ } PROCESS_ONE_LANE (0) @@ -198,7 +227,8 @@ randmemset_internal (prng_t *prng, } size -= 16; } - while (size > 0) + i = 0; + while (i < size) { uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF; if (flags != 0) @@ -208,9 +238,25 @@ randmemset_internal (prng_t *prng, randbyte = 0xFF; if ((flags & RANDMEMSET_MORE_00) && (t < 0x40)) randbyte = 0x00; + if (i % 4 == 0 && i + 4 <= size) + { + t = prng_rand_r (&local_prng) & 0xFF; + if ((flags & RANDMEMSET_MORE_FFFFFFFF) && (t >= 0xC0)) + { + memset(&buf[i], 0xFF, 4); + i += 4; + continue; + } + if ((flags & RANDMEMSET_MORE_00000000) && (t < 0x40)) + { + memset(&buf[i], 0x00, 4); + i += 4; + continue; + } + } } - *buf++ = randbyte; - size--; + buf[i] = randbyte; + i++; } *prng = local_prng; } @@ -218,8 +264,10 @@ randmemset_internal (prng_t *prng, /* * Fill memory buffer with random data. Flags argument may be used * to tweak some statistics properties: - * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 - * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 + * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + * RANDMEMSET_MORE_00000000 - ~25% chance for 00000000 4-byte clusters + * RANDMEMSET_MORE_FFFFFFFF - ~25% chance for FFFFFFFF 4-byte clusters */ void prng_randmemset_r (prng_t *prng, void *voidbuf, diff --git a/pixman/test/utils-prng.h b/pixman/test/utils-prng.h index 285107f08..564ffcef1 100644 --- a/pixman/test/utils-prng.h +++ b/pixman/test/utils-prng.h @@ -153,7 +153,10 @@ typedef enum { RANDMEMSET_MORE_00 = 1, /* ~25% chance for 0x00 bytes */ RANDMEMSET_MORE_FF = 2, /* ~25% chance for 0xFF bytes */ - RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_FF) + RANDMEMSET_MORE_00000000 = 4, /* ~25% chance for 0x00000000 clusters */ + RANDMEMSET_MORE_FFFFFFFF = 8, /* ~25% chance for 0xFFFFFFFF clusters */ + RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_00000000 | + RANDMEMSET_MORE_FF | RANDMEMSET_MORE_FFFFFFFF) } prng_randmemset_flags_t; /* Set the 32-bit seed for PRNG */ |