diff options
Diffstat (limited to 'pixman')
-rw-r--r-- | pixman/configure.ac | 16 | ||||
-rw-r--r-- | pixman/demos/scale.c | 7 | ||||
-rw-r--r-- | pixman/demos/scale.ui | 30 | ||||
-rw-r--r-- | pixman/pixman/pixman-fast-path.c | 446 | ||||
-rw-r--r-- | pixman/pixman/pixman-general.c | 3 | ||||
-rw-r--r-- | pixman/pixman/pixman-glyph.c | 8 | ||||
-rw-r--r-- | pixman/pixman/pixman-implementation.c | 28 | ||||
-rw-r--r-- | pixman/pixman/pixman-inlines.h | 37 | ||||
-rw-r--r-- | pixman/pixman/pixman-matrix.c | 408 | ||||
-rw-r--r-- | pixman/pixman/pixman-private.h | 25 | ||||
-rw-r--r-- | pixman/pixman/pixman-sse2.c | 284 | ||||
-rw-r--r-- | pixman/pixman/pixman.c | 87 | ||||
-rw-r--r-- | pixman/test/Makefile.sources | 1 | ||||
-rw-r--r-- | pixman/test/affine-test.c | 6 | ||||
-rw-r--r-- | pixman/test/lowlevel-blt-bench.c | 4 | ||||
-rw-r--r-- | pixman/test/matrix-test.c | 186 |
16 files changed, 1308 insertions, 268 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac index 515e31218..a93e2905b 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -968,6 +968,22 @@ fi AC_MSG_RESULT($support_for_attribute_constructor) AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR) +dnl ===================================== +dnl __float128 + +support_for_float128=no + +AC_MSG_CHECKING(for __float128) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; } +]])], support_for_float128=yes) + +if test x$support_for_float128 = xyes; then + AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128]) +fi + +AC_MSG_RESULT($support_for_float128) + dnl ================== dnl libpng diff --git a/pixman/demos/scale.c b/pixman/demos/scale.c index 9100ff72a..869ada12b 100644 --- a/pixman/demos/scale.c +++ b/pixman/demos/scale.c @@ -39,6 +39,7 @@ typedef struct GtkAdjustment * scale_x_adjustment; GtkAdjustment * scale_y_adjustment; GtkAdjustment * rotate_adjustment; + GtkAdjustment * subsample_adjustment; int scaled_width; int scaled_height; } app_t; @@ -236,7 +237,8 @@ rescale (GtkWidget *may_be_null, app_t *app) get_value (app, filters, "reconstruct_y_combo_box"), get_value (app, filters, "sample_x_combo_box"), get_value (app, filters, "sample_y_combo_box"), - 4, 4); + gtk_adjustment_get_value (app->subsample_adjustment), + gtk_adjustment_get_value (app->subsample_adjustment)); pixman_image_set_filter (app->original, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, params, n_params); @@ -360,10 +362,13 @@ app_new (pixman_image_t *original) GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_y_adjustment")); app->rotate_adjustment = GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "rotate_adjustment")); + app->subsample_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "subsample_adjustment")); g_signal_connect (app->scale_x_adjustment, "value_changed", G_CALLBACK (rescale), app); g_signal_connect (app->scale_y_adjustment, "value_changed", G_CALLBACK (rescale), app); g_signal_connect (app->rotate_adjustment, "value_changed", G_CALLBACK (rescale), app); + g_signal_connect (app->subsample_adjustment, "value_changed", G_CALLBACK (rescale), app); widget = get_widget (app, "scale_x_scale"); gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL); diff --git a/pixman/demos/scale.ui b/pixman/demos/scale.ui index f7c0c805f..b3450d34d 100644 --- a/pixman/demos/scale.ui +++ b/pixman/demos/scale.ui @@ -23,6 +23,14 @@ <property name="page_increment">10</property> <property name="page_size">10</property> </object> + <object class="GtkAdjustment" id="subsample_adjustment"> + <property name="lower">1</property> + <property name="upper">12</property> + <property name="step_increment">1</property> + <property name="page_increment">1</property> + <property name="page_size">0</property> + <property name="value">4</property> + </object> <object class="GtkWindow" id="main"> <child> <object class="GtkHBox" id="u"> @@ -51,6 +59,7 @@ <child> <object class="GtkVBox" id="box1"> <property name="visible">True</property> + <property name="spacing">12</property> <child> <object class="GtkHBox" id="box2"> <property name="visible">True</property> @@ -234,6 +243,17 @@ </packing> </child> <child> + <object class="GtkLabel" id="label9"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Subsample:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">5</property> + </packing> + </child> + <child> <object class="GtkComboBox" id="reconstruct_x_combo_box"> <property name="visible">True</property> </object> @@ -277,6 +297,16 @@ <property name="top_attach">4</property> </packing> </child> + <child> + <object class="GtkSpinButton" id="subsample_spin_button"> + <property name="visible">True</property> + <property name="adjustment">subsample_adjustment</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">5</property> + </packing> + </child> </object> <packing> <property name="expand">False</property> diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index c625e0c4a..247aea645 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -739,36 +739,6 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp, } static void -fast_composite_src_x888_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - *dst = convert_8888_to_0565 (s); - dst++; - } - } -} - -static void fast_composite_add_8_8 (pixman_implementation_t *imp, pixman_composite_info_t *info) { @@ -1243,6 +1213,18 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, pixman_composite_func_t func; pixman_format_code_t mask_format; uint32_t src_flags, mask_flags; + int32_t sx, sy; + int32_t width_remain; + int32_t num_pixels; + int32_t src_width; + int32_t i, j; + pixman_image_t extended_src_image; + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; + pixman_bool_t need_src_extension; + uint32_t *src_line; + int32_t src_stride; + int32_t src_bpp; + pixman_composite_info_t info2 = *info; src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; @@ -1258,149 +1240,131 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, mask_flags = FAST_PATH_IS_OPAQUE; } - if (_pixman_implementation_lookup_composite ( - imp->toplevel, info->op, - src_image->common.extended_format_code, src_flags, - mask_format, mask_flags, - dest_image->common.extended_format_code, info->dest_flags, - &imp, &func)) + _pixman_implementation_lookup_composite ( + imp->toplevel, info->op, + src_image->common.extended_format_code, src_flags, + mask_format, mask_flags, + dest_image->common.extended_format_code, info->dest_flags, + &imp, &func); + + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && + !src_image->bits.indexed) { - int32_t sx, sy; - int32_t width_remain; - int32_t num_pixels; - int32_t src_width; - int32_t i, j; - pixman_image_t extended_src_image; - uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; - pixman_bool_t need_src_extension; - uint32_t *src_line; - int32_t src_stride; - int32_t src_bpp; - pixman_composite_info_t info2 = *info; - - src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); - - if (src_image->bits.width < REPEAT_MIN_WIDTH && - (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && - !src_image->bits.indexed) - { - sx = src_x; - sx = MOD (sx, src_image->bits.width); - sx += width; - src_width = 0; + sx = src_x; + sx = MOD (sx, src_image->bits.width); + sx += width; + src_width = 0; - while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) - src_width += src_image->bits.width; + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) + src_width += src_image->bits.width; - src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); - /* Initialize/validate stack-allocated temporary image */ - _pixman_bits_image_init (&extended_src_image, src_image->bits.format, - src_width, 1, &extended_src[0], src_stride, - FALSE); - _pixman_image_validate (&extended_src_image); + /* Initialize/validate stack-allocated temporary image */ + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, + src_width, 1, &extended_src[0], src_stride, + FALSE); + _pixman_image_validate (&extended_src_image); - info2.src_image = &extended_src_image; - need_src_extension = TRUE; - } - else - { - src_width = src_image->bits.width; - need_src_extension = FALSE; - } + info2.src_image = &extended_src_image; + need_src_extension = TRUE; + } + else + { + src_width = src_image->bits.width; + need_src_extension = FALSE; + } - sx = src_x; - sy = src_y; + sx = src_x; + sy = src_y; - while (--height >= 0) - { - sx = MOD (sx, src_width); - sy = MOD (sy, src_image->bits.height); + while (--height >= 0) + { + sx = MOD (sx, src_width); + sy = MOD (sy, src_image->bits.height); - if (need_src_extension) + if (need_src_extension) + { + if (src_bpp == 32) { - if (src_bpp == 32) - { - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - extended_src[i] = src_line[j]; - } - } - else if (src_bpp == 16) + for (i = 0; i < src_width; ) { - uint16_t *src_line_16; - - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, - src_line_16, 1); - src_line = (uint32_t*)src_line_16; - - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; - } + for (j = 0; j < src_image->bits.width; j++, i++) + extended_src[i] = src_line[j]; } - else if (src_bpp == 8) - { - uint8_t *src_line_8; + } + else if (src_bpp == 16) + { + uint16_t *src_line_16; - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, - src_line_8, 1); - src_line = (uint32_t*)src_line_8; + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, + src_line_16, 1); + src_line = (uint32_t*)src_line_16; - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; - } + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; } - - info2.src_y = 0; } - else + else if (src_bpp == 8) { - info2.src_y = sy; + uint8_t *src_line_8; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, + src_line_8, 1); + src_line = (uint32_t*)src_line_8; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; + } } - width_remain = width; + info2.src_y = 0; + } + else + { + info2.src_y = sy; + } - while (width_remain > 0) - { - num_pixels = src_width - sx; + width_remain = width; - if (num_pixels > width_remain) - num_pixels = width_remain; + while (width_remain > 0) + { + num_pixels = src_width - sx; - info2.src_x = sx; - info2.width = num_pixels; - info2.height = 1; + if (num_pixels > width_remain) + num_pixels = width_remain; - func (imp, &info2); + info2.src_x = sx; + info2.width = num_pixels; + info2.height = 1; - width_remain -= num_pixels; - info2.mask_x += num_pixels; - info2.dest_x += num_pixels; - sx = 0; - } + func (imp, &info2); - sx = src_x; - sy++; - info2.mask_x = info->mask_x; - info2.mask_y++; - info2.dest_x = info->dest_x; - info2.dest_y++; + width_remain -= num_pixels; + info2.mask_x += num_pixels; + info2.dest_x += num_pixels; + sx = 0; } - if (need_src_extension) - _pixman_image_fini (&extended_src_image); - } - else - { - _pixman_log_error (FUNC, "Didn't find a suitable function "); + sx = src_x; + sy++; + info2.mask_x = info->mask_x; + info2.mask_y++; + info2.dest_x = info->dest_x; + info2.dest_y++; } + + if (need_src_extension) + _pixman_image_fini (&extended_src_image); } /* Use more unrolling for src_0565_0565 because it is typically CPU bound */ @@ -1913,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), @@ -2199,12 +2159,200 @@ fast_path_fill (pixman_implementation_t *imp, return TRUE; } +/*****************************************************************************/ + +static uint32_t * +fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int32_t w = iter->width; + uint32_t *dst = iter->buffer; + const uint16_t *src = (const uint16_t *)iter->bits; + + iter->bits += iter->stride; + + /* Align the source buffer at 4 bytes boundary */ + if (w > 0 && ((uintptr_t)src & 3)) + { + *dst++ = convert_0565_to_8888 (*src++); + w--; + } + /* Process two pixels per iteration */ + while ((w -= 2) >= 0) + { + uint32_t sr, sb, sg, t0, t1; + uint32_t s = *(const uint32_t *)src; + src += 2; + sr = (s >> 8) & 0x00F800F8; + sb = (s << 3) & 0x00F800F8; + sg = (s >> 3) & 0x00FC00FC; + sr |= sr >> 5; + sb |= sb >> 5; + sg |= sg >> 6; + t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | + (sb & 0xFF) | 0xFF000000; + t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | + (sb >> 16) | 0xFF000000; +#ifdef WORDS_BIGENDIAN + *dst++ = t1; + *dst++ = t0; +#else + *dst++ = t0; + *dst++ = t1; +#endif + } + if (w & 1) + { + *dst = convert_0565_to_8888 (*src); + } + + return iter->buffer; +} + +static uint32_t * +fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) +{ + iter->bits += iter->stride; + return iter->buffer; +} + +/* Helper function for a workaround, which tries to ensure that 0x1F001F + * constant is always allocated in a register on RISC architectures. + */ +static force_inline uint32_t +convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) +{ + uint32_t a, b; + a = (s >> 3) & x1F001F; + b = s & 0xFC00; + a |= a >> 5; + a |= b >> 5; + return a; +} + +static void +fast_write_back_r5g6b5 (pixman_iter_t *iter) +{ + int32_t w = iter->width; + uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); + const uint32_t *src = iter->buffer; + /* Workaround to ensure that x1F001F variable is allocated in a register */ + static volatile uint32_t volatile_x1F001F = 0x1F001F; + uint32_t x1F001F = volatile_x1F001F; + + while ((w -= 4) >= 0) + { + uint32_t s1 = *src++; + uint32_t s2 = *src++; + uint32_t s3 = *src++; + uint32_t s4 = *src++; + *dst++ = convert_8888_to_0565_workaround (s1, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s2, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s3, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s4, x1F001F); + } + if (w & 2) + { + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + } + if (w & 1) + { + *dst = convert_8888_to_0565_workaround (*src, x1F001F); + } +} + +typedef struct +{ + pixman_format_code_t format; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +} fetcher_info_t; + +static const fetcher_info_t fetchers[] = +{ + { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + { PIXMAN_null } +}; + +static pixman_bool_t +fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + +#define FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FLAGS) == FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + iter->get_scanline = f->get_scanline; + return TRUE; + } + } + } + + return FALSE; +} + +static pixman_bool_t +fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) + { + iter->get_scanline = fast_dest_fetch_noop; + } + else + { + iter->get_scanline = f->get_scanline; + } + iter->write_back = f->write_back; + return TRUE; + } + } + } + return FALSE; +} + + pixman_implementation_t * _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) { pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); imp->fill = fast_path_fill; + imp->src_iter_init = fast_src_iter_init; + imp->dest_iter_init = fast_dest_iter_init; return imp; } diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c index f175d771e..93a1b9acf 100644 --- a/pixman/pixman/pixman-general.c +++ b/pixman/pixman/pixman-general.c @@ -188,9 +188,6 @@ general_composite_rect (pixman_implementation_t *imp, compose = _pixman_implementation_lookup_combiner ( imp->toplevel, op, component_alpha, narrow); - if (!compose) - return; - for (i = 0; i < height; ++i) { uint32_t *s, *m, *d; diff --git a/pixman/pixman/pixman-glyph.c b/pixman/pixman/pixman-glyph.c index 6d2c8bbb7..5a271b64b 100644 --- a/pixman/pixman/pixman-glyph.c +++ b/pixman/pixman/pixman-glyph.c @@ -463,16 +463,13 @@ pixman_composite_glyphs_no_mask (pixman_op_t op, { glyph_format = glyph_img->common.extended_format_code; glyph_flags = glyph_img->common.flags; - + _pixman_implementation_lookup_composite ( get_implementation(), op, src->common.extended_format_code, src->common.flags, glyph_format, glyph_flags | extra, dest_format, dest_flags, &implementation, &func); - - if (!func) - goto out; } info.src_x = src_x + composite_box.x1 - dest_x; @@ -582,9 +579,6 @@ add_glyphs (pixman_glyph_cache_t *cache, mask_format, info.mask_flags, dest_format, dest_flags, &implementation, &func); - - if (!func) - goto out; } glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x; diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c index ec467a619..c0a643633 100644 --- a/pixman/pixman/pixman-implementation.c +++ b/pixman/pixman/pixman-implementation.c @@ -65,7 +65,13 @@ typedef struct PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); -pixman_bool_t +static void +dummy_composite_rect (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ +} + +void _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, pixman_op_t op, pixman_format_code_t src_format, @@ -142,7 +148,11 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, ++info; } } - return FALSE; + + /* We should never reach this point */ + _pixman_log_error (FUNC, "No known composite function\n"); + *out_imp = NULL; + *out_func = dummy_composite_rect; update_cache: if (i) @@ -160,8 +170,16 @@ update_cache: cache->cache[0].fast_path.dest_flags = dest_flags; cache->cache[0].fast_path.func = *out_func; } +} - return TRUE; +static void +dummy_combine (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ } pixman_combine_32_func_t @@ -199,7 +217,9 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp, imp = imp->fallback; } - return NULL; + /* We should never reach this point */ + _pixman_log_error (FUNC, "No known combine function\n"); + return dummy_combine; } pixman_bool_t diff --git a/pixman/pixman/pixman-inlines.h b/pixman/pixman/pixman-inlines.h index ab4def0dc..dd1c2f17f 100644 --- a/pixman/pixman/pixman-inlines.h +++ b/pixman/pixman/pixman-inlines.h @@ -88,6 +88,42 @@ pixman_fixed_to_bilinear_weight (pixman_fixed_t x) ((1 << BILINEAR_INTERPOLATION_BITS) - 1); } +#if BILINEAR_INTERPOLATION_BITS <= 4 +/* Inspired by Filter_32_opaque from Skia */ +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t lo, hi; + + distx <<= (4 - BILINEAR_INTERPOLATION_BITS); + disty <<= (4 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ + distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ + distixiy = + 16 * 16 - (disty << 4) - + (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ + + lo = (tl & 0xff00ff) * distixiy; + hi = ((tl >> 8) & 0xff00ff) * distixiy; + + lo += (tr & 0xff00ff) * distxiy; + hi += ((tr >> 8) & 0xff00ff) * distxiy; + + lo += (bl & 0xff00ff) * distixy; + hi += ((bl >> 8) & 0xff00ff) * distixy; + + lo += (br & 0xff00ff) * distxy; + hi += ((br >> 8) & 0xff00ff) * distxy; + + return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); +} + +#else #if SIZEOF_LONG > 4 static force_inline uint32_t @@ -184,6 +220,7 @@ bilinear_interpolation (uint32_t tl, uint32_t tr, } #endif +#endif // BILINEAR_INTERPOLATION_BITS <= 4 /* * For each scanline fetched from source image with PAD repeat: diff --git a/pixman/pixman/pixman-matrix.c b/pixman/pixman/pixman-matrix.c index cd2f1b5b8..89b96826b 100644 --- a/pixman/pixman/pixman-matrix.c +++ b/pixman/pixman/pixman-matrix.c @@ -34,6 +34,338 @@ #define F(x) pixman_int_to_fixed (x) +static force_inline int +count_leading_zeros (uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz (x); +#else + int n = 0; + while (x) + { + n++; + x >>= 1; + } + return 32 - n; +#endif +} + +/* + * Large signed/unsigned integer division with rounding for the platforms with + * only 64-bit integer data type supported (no 128-bit data type). + * + * Arguments: + * hi, lo - high and low 64-bit parts of the dividend + * div - 48-bit divisor + * + * Returns: lowest 64 bits of the result as a return value and highest 64 + * bits of the result to "result_hi" pointer + */ + +/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */ +static force_inline uint64_t +rounded_udiv_128_by_48 (uint64_t hi, + uint64_t lo, + uint64_t div, + uint64_t *result_hi) +{ + uint64_t tmp, remainder, result_lo; + assert(div < ((uint64_t)1 << 48)); + + remainder = hi % div; + *result_hi = hi / div; + + tmp = (remainder << 16) + (lo >> 48); + result_lo = tmp / div; + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + (lo & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + /* round to nearest */ + if (remainder * 2 >= div && ++result_lo == 0) + *result_hi += 1; + + return result_lo; +} + +/* signed division (128-bit by 49-bit) with rounding to nearest */ +static inline int64_t +rounded_sdiv_128_by_49 (int64_t hi, + uint64_t lo, + int64_t div, + int64_t *signed_result_hi) +{ + uint64_t result_lo, result_hi; + int sign = 0; + if (div < 0) + { + div = -div; + sign ^= 1; + } + if (hi < 0) + { + if (lo != 0) + hi++; + hi = -hi; + lo = -lo; + sign ^= 1; + } + result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi); + if (sign) + { + if (result_lo != 0) + result_hi++; + result_hi = -result_hi; + result_lo = -result_lo; + } + if (signed_result_hi) + { + *signed_result_hi = result_hi; + } + return result_lo; +} + +/* + * Multiply 64.16 fixed point value by (2^scalebits) and convert + * to 128-bit integer. + */ +static force_inline void +fixed_64_16_to_int128 (int64_t hi, + int64_t lo, + int64_t *rhi, + int64_t *rlo, + int scalebits) +{ + /* separate integer and fractional parts */ + hi += lo >> 16; + lo &= 0xFFFF; + + if (scalebits <= 0) + { + *rlo = hi >> (-scalebits); + *rhi = *rlo >> 63; + } + else + { + *rhi = hi >> (64 - scalebits); + *rlo = (uint64_t)hi << scalebits; + if (scalebits < 16) + *rlo += lo >> (16 - scalebits); + else + *rlo += lo << (scalebits - 16); + } +} + +/* + * Convert 112.16 fixed point value to 48.16 with clamping for the out + * of range values. + */ +static force_inline pixman_fixed_48_16_t +fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag) +{ + if ((lo >> 63) != hi) + { + *clampflag = TRUE; + return hi >= 0 ? INT64_MAX : INT64_MIN; + } + else + { + return lo; + } +} + +/* + * Transform a point with 31.16 fixed point coordinates from the destination + * space to a point with 48.16 fixed point coordinates in the source space. + * No overflows are possible for affine transformations and the results are + * accurate including the least significant bit. Projective transformations + * may overflow, in this case the results are just clamped to return maximum + * or minimum 48.16 values (so that the caller can at least handle the NONE + * and PAD repeats correctly) and the return value is FALSE to indicate that + * such clamping has happened. + */ +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + pixman_bool_t clampflag = FALSE; + int i; + int64_t tmp[3][2], divint; + uint16_t divfrac; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + /* + * separate 64-bit integer and 16-bit fractional parts for the divisor, + * which is also scaled by 65536 after fixed point multiplication. + */ + divint = tmp[2][0] + (tmp[2][1] >> 16); + divfrac = tmp[2][1] & 0xFFFF; + + if (divint == pixman_fixed_1 && divfrac == 0) + { + /* + * this is a simple affine transformation + */ + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; + } + else if (divint == 0 && divfrac == 0) + { + /* + * handle zero divisor (if the values are non-zero, set the + * results to maximum positive or minimum negative) + */ + clampflag = TRUE; + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + + if (result->v[0] > 0) + result->v[0] = INT64_MAX; + else if (result->v[0] < 0) + result->v[0] = INT64_MIN; + + if (result->v[1] > 0) + result->v[1] = INT64_MAX; + else if (result->v[1] < 0) + result->v[1] = INT64_MIN; + } + else + { + /* + * projective transformation, analyze the top 32 bits of the divisor + */ + int32_t hi32divbits = divint >> 32; + if (hi32divbits < 0) + hi32divbits = ~hi32divbits; + + if (hi32divbits == 0) + { + /* the divisor is small, we can actually keep all the bits */ + int64_t hi, rhi, lo, rlo; + int64_t div = (divint << 16) + divfrac; + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + else + { + /* the divisor needs to be reduced to 48 bits */ + int64_t hi, rhi, lo, rlo, div; + int shift = 32 - count_leading_zeros (hi32divbits); + fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift); + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + } + result->v[2] = pixman_fixed_1; + return !clampflag; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int64_t hi0, lo0, hi1, lo1; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16); + lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16); + lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][2]; + + hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16); + lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16); + lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][2]; + + result->v[0] = hi0 + ((lo0 + 0x8000) >> 16); + result->v[1] = hi1 + ((lo1 + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int i; + int64_t tmp[3][2]; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16); +} + PIXMAN_EXPORT void pixman_transform_init_identity (struct pixman_transform *matrix) { @@ -50,69 +382,41 @@ PIXMAN_EXPORT pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform, struct pixman_vector * vector) { - struct pixman_vector result; - pixman_fixed_32_32_t partial; - pixman_fixed_48_16_t v; - int i, j; + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; - for (j = 0; j < 3; j++) - { - v = 0; - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * - (pixman_fixed_48_16_t) vector->vector[i]); - v += (partial + 0x8000) >> 16; - } - - if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) - return FALSE; - - result.vector[j] = (pixman_fixed_t) v; - } - - *vector = result; + pixman_transform_point_31_16_3d (transform, &tmp, &tmp); - if (!result.vector[2]) - return FALSE; + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; - return TRUE; + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; } PIXMAN_EXPORT pixman_bool_t pixman_transform_point (const struct pixman_transform *transform, struct pixman_vector * vector) { - pixman_fixed_32_32_t partial; - pixman_fixed_34_30_t v[3]; - pixman_fixed_48_16_t quo; - int i, j; + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; - for (j = 0; j < 3; j++) - { - v[j] = 0; - - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * - (pixman_fixed_32_32_t) vector->vector[i]); - v[j] += (partial + 2) >> 2; - } - } - - if (!((v[2] + 0x8000) >> 16)) - return FALSE; + if (!pixman_transform_point_31_16 (transform, &tmp, &tmp)) + return FALSE; - for (j = 0; j < 2; j++) - { - quo = v[j] / ((v[2] + 0x8000) >> 16); - if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) - return FALSE; - vector->vector[j] = (pixman_fixed_t) quo; - } - - vector->vector[2] = pixman_fixed_1; - return TRUE; + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; + + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; } PIXMAN_EXPORT pixman_bool_t diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index e5ab873ed..cb78a2ed8 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -497,7 +497,7 @@ pixman_implementation_t * _pixman_implementation_create (pixman_implementation_t *fallback, const pixman_fast_path_t *fast_paths); -pixman_bool_t +void _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, pixman_op_t op, pixman_format_code_t src_format, @@ -1052,7 +1052,7 @@ _pixman_log_error (const char *function, const char *message); #else -#define _pixman_log_error(f,m) do { } while (0) \ +#define _pixman_log_error(f,m) do { } while (0) #define return_if_fail(expr) \ do \ @@ -1078,6 +1078,27 @@ _pixman_log_error (const char *function, const char *message); #endif /* + * Matrix + */ + +typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t; + +pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); + +/* * Timers */ diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index 5a0e0626a..fc873cc96 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -4523,7 +4523,163 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp, sse2_combine_add_u (imp, op, dst, src, NULL, width); } +} + +static void +sse2_composite_add_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst, src; + int dst_stride; + + __m128i xmm_src; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + + if (src == ~0) + { + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, + dest_x, dest_y, width, height, ~0); + + return; + } + + xmm_src = _mm_set_epi32 (src, src, src, src); + while (height--) + { + int w = width; + uint32_t d; + + dst = dst_line; + dst_line += dst_stride; + + while (w && (unsigned long)dst & 15) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d))); + w--; + } + + while (w >= 4) + { + save_128_aligned + ((__m128i*)dst, + _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); + + dst += 4; + w -= 4; + } + + while (w--) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src, + _mm_cvtsi32_si128 (d))); + } + } +} + +static void +sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + + __m128i xmm_src; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + xmm_src = expand_pixel_32_1x128 (src); + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && ((unsigned long)dst & 15)) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + + while (w >= 4) + { + uint32_t m = *(uint32_t*)mask; + if (m) + { + __m128i xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + __m128i xmm_dst = load_128_aligned ((__m128i*)dst); + __m128i xmm_mask = + _mm_unpacklo_epi8 (unpack_32_1x128(m), + _mm_setzero_si128 ()); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); + xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + } } static pixman_bool_t @@ -5786,6 +5942,121 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER, uint32_t, uint8_t, uint32_t, NORMAL, FLAG_HAVE_NON_SOLID_MASK) +static force_inline void +scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + __m128i xmm_mask; + + if (zero_src || (*mask >> 24) == 0) + return; + + xmm_mask = create_mask_16_128 (*mask >> 24); + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } + + while (w >= 4) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + + if (pix1 | pix2 | pix3 | pix4) + { + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned + ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + w -= 4; + } + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_HAVE_SOLID_MASK) + static const pixman_fast_path_t sse2_fast_paths[] = { /* PIXMAN_OP_OVER */ @@ -5848,6 +6119,14 @@ static const pixman_fast_path_t sse2_fast_paths[] = PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8), PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888), /* PIXMAN_OP_SRC */ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888), @@ -5912,6 +6191,11 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888), diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c index 3fabed161..184f0c4e6 100644 --- a/pixman/pixman/pixman.c +++ b/pixman/pixman/pixman.c @@ -581,11 +581,13 @@ pixman_image_composite32 (pixman_op_t op, int32_t height) { pixman_format_code_t src_format, mask_format, dest_format; - uint32_t src_flags, mask_flags, dest_flags; pixman_region32_t region; pixman_box32_t extents; pixman_implementation_t *imp; pixman_composite_func_t func; + pixman_composite_info_t info; + const pixman_box32_t *pbox; + int n; _pixman_image_validate (src); if (mask) @@ -593,27 +595,27 @@ pixman_image_composite32 (pixman_op_t op, _pixman_image_validate (dest); src_format = src->common.extended_format_code; - src_flags = src->common.flags; + info.src_flags = src->common.flags; if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE)) { mask_format = mask->common.extended_format_code; - mask_flags = mask->common.flags; + info.mask_flags = mask->common.flags; } else { mask_format = PIXMAN_null; - mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_flags = FAST_PATH_IS_OPAQUE; } dest_format = dest->common.extended_format_code; - dest_flags = dest->common.flags; + info.dest_flags = dest->common.flags; /* Check for pixbufs */ if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) && (src->type == BITS && src->bits.bits == mask->bits.bits) && (src->common.repeat == mask->common.repeat) && - (src_flags & mask_flags & FAST_PATH_ID_TRANSFORM) && + (info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM) && (src_x == mask_x && src_y == mask_y)) { if (src_format == PIXMAN_x8b8g8r8) @@ -638,7 +640,7 @@ pixman_image_composite32 (pixman_op_t op, extents.x2 -= dest_x - src_x; extents.y2 -= dest_y - src_y; - if (!analyze_extent (src, &extents, &src_flags)) + if (!analyze_extent (src, &extents, &info.src_flags)) goto out; extents.x1 -= src_x - mask_x; @@ -646,7 +648,7 @@ pixman_image_composite32 (pixman_op_t op, extents.x2 -= src_x - mask_x; extents.y2 -= src_y - mask_y; - if (!analyze_extent (mask, &extents, &mask_flags)) + if (!analyze_extent (mask, &extents, &info.mask_flags)) goto out; /* If the clip is within the source samples, and the samples are @@ -659,16 +661,16 @@ pixman_image_composite32 (pixman_op_t op, FAST_PATH_BILINEAR_FILTER | \ FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR) - if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || - (src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) { - src_flags |= FAST_PATH_IS_OPAQUE; + info.src_flags |= FAST_PATH_IS_OPAQUE; } - if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || - (mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) { - mask_flags |= FAST_PATH_IS_OPAQUE; + info.mask_flags |= FAST_PATH_IS_OPAQUE; } /* @@ -676,42 +678,35 @@ pixman_image_composite32 (pixman_op_t op, * if the src or dest are opaque. The output operator should be * mathematically equivalent to the source. */ - op = optimize_operator (op, src_flags, mask_flags, dest_flags); + info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags); - if (_pixman_implementation_lookup_composite ( - get_implementation (), op, - src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags, - &imp, &func)) - { - pixman_composite_info_t info; - const pixman_box32_t *pbox; - int n; + _pixman_implementation_lookup_composite ( + get_implementation (), info.op, + src_format, info.src_flags, + mask_format, info.mask_flags, + dest_format, info.dest_flags, + &imp, &func); - info.op = op; - info.src_image = src; - info.mask_image = mask; - info.dest_image = dest; - info.src_flags = src_flags; - info.mask_flags = mask_flags; - info.dest_flags = dest_flags; + info.src_image = src; + info.mask_image = mask; + info.dest_image = dest; - pbox = pixman_region32_rectangles (®ion, &n); + pbox = pixman_region32_rectangles (®ion, &n); - while (n--) - { - info.src_x = pbox->x1 + src_x - dest_x; - info.src_y = pbox->y1 + src_y - dest_y; - info.mask_x = pbox->x1 + mask_x - dest_x; - info.mask_y = pbox->y1 + mask_y - dest_y; - info.dest_x = pbox->x1; - info.dest_y = pbox->y1; - info.width = pbox->x2 - pbox->x1; - info.height = pbox->y2 - pbox->y1; - - func (imp, &info); - - pbox++; - } + while (n--) + { + info.src_x = pbox->x1 + src_x - dest_x; + info.src_y = pbox->y1 + src_y - dest_y; + info.mask_x = pbox->x1 + mask_x - dest_x; + info.mask_y = pbox->y1 + mask_y - dest_y; + info.dest_x = pbox->x1; + info.dest_y = pbox->y1; + info.width = pbox->x2 - pbox->x1; + info.height = pbox->y2 - pbox->y1; + + func (imp, &info); + + pbox++; } out: diff --git a/pixman/test/Makefile.sources b/pixman/test/Makefile.sources index 8c0b505df..e323a8e8c 100644 --- a/pixman/test/Makefile.sources +++ b/pixman/test/Makefile.sources @@ -17,6 +17,7 @@ TESTPROGRAMS = \ gradient-crash-test \ region-contains-test \ alphamap \ + matrix-test \ stress-test \ composite-traps-test \ blitters-test \ diff --git a/pixman/test/affine-test.c b/pixman/test/affine-test.c index 678fbe844..2506250db 100644 --- a/pixman/test/affine-test.c +++ b/pixman/test/affine-test.c @@ -307,11 +307,11 @@ test_composite (int testnum, } #if BILINEAR_INTERPOLATION_BITS == 8 -#define CHECKSUM 0x97097336 +#define CHECKSUM 0x2CDF1F07 #elif BILINEAR_INTERPOLATION_BITS == 7 -#define CHECKSUM 0x31D2DC21 +#define CHECKSUM 0xBC00B1DF #elif BILINEAR_INTERPOLATION_BITS == 4 -#define CHECKSUM 0x8B925154 +#define CHECKSUM 0xA227306B #else #define CHECKSUM 0x00000000 #endif diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c index 2f97b7b24..7336fa0d5 100644 --- a/pixman/test/lowlevel-blt-bench.c +++ b/pixman/test/lowlevel-blt-bench.c @@ -630,6 +630,8 @@ tests_tbl[] = { "src_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_1555_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_0565_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, + { "src_8_8", PIXMAN_a8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, + { "src_n_8", PIXMAN_a8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, { "src_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, { "src_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, { "src_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 }, @@ -772,7 +774,7 @@ main (int argc, char *argv[]) for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++) { - if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern)) + if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0) { bench_composite (tests_tbl[i].testname, tests_tbl[i].src_fmt, diff --git a/pixman/test/matrix-test.c b/pixman/test/matrix-test.c new file mode 100644 index 000000000..8437dd291 --- /dev/null +++ b/pixman/test/matrix-test.c @@ -0,0 +1,186 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> + +#ifdef HAVE_FLOAT128 + +#define pixman_fixed_to_float128(x) (((__float128)(x)) / 65536.0Q) + +typedef struct { __float128 v[3]; } pixman_vector_f128_t; +typedef struct { __float128 m[3][3]; } pixman_transform_f128_t; + +pixman_bool_t +pixman_transform_point_f128 (const pixman_transform_f128_t *t, + const pixman_vector_f128_t *v, + pixman_vector_f128_t *result) +{ + int i; + for (i = 0; i < 3; i++) + { + result->v[i] = t->m[i][0] * v->v[0] + + t->m[i][1] * v->v[1] + + t->m[i][2] * v->v[2]; + } + if (result->v[2] != 0) + { + result->v[0] /= result->v[2]; + result->v[1] /= result->v[2]; + result->v[2] = 1; + return TRUE; + } + else + { + return FALSE; + } +} + +pixman_bool_t does_it_fit_fixed_48_16 (__float128 x) +{ + if (x >= 65536.0Q * 65536.0Q * 32768.0Q) + return FALSE; + if (x <= -65536.0Q * 65536.0Q * 32768.0Q) + return FALSE; + return TRUE; +} + +#endif + +uint32_t +test_matrix (int testnum, int verbose) +{ + uint32_t crc32 = 0; + int i, j, k; + pixman_bool_t is_affine; + + prng_srand (testnum); + + for (i = 0; i < 100; i++) + { + pixman_bool_t transform_ok; + pixman_transform_t ti; + pixman_vector_48_16_t vi, result_i; +#ifdef HAVE_FLOAT128 + pixman_transform_f128_t tf; + pixman_vector_f128_t vf, result_f; +#endif + prng_randmemset (&ti, sizeof(ti), 0); + prng_randmemset (&vi, sizeof(vi), 0); + + for (j = 0; j < 3; j++) + { + /* make sure that "vi" contains 31.16 fixed point data */ + vi.v[j] >>= 17; + /* and apply random shift */ + if (prng_rand_n (3) == 0) + vi.v[j] >>= prng_rand_n (46); + } + + if (prng_rand_n (2)) + { + /* random shift for the matrix */ + for (j = 0; j < 3; j++) + for (k = 0; k < 3; k++) + ti.matrix[j][k] >>= prng_rand_n (30); + } + + if (prng_rand_n (2)) + { + /* affine matrix */ + ti.matrix[2][0] = 0; + ti.matrix[2][1] = 0; + ti.matrix[2][2] = pixman_fixed_1; + } + + if (prng_rand_n (2)) + { + /* cartesian coordinates */ + vi.v[2] = pixman_fixed_1; + } + + is_affine = (ti.matrix[2][0] == 0 && ti.matrix[2][1] == 0 && + ti.matrix[2][2] == pixman_fixed_1 && + vi.v[2] == pixman_fixed_1); + + transform_ok = TRUE; + if (is_affine && prng_rand_n (2)) + pixman_transform_point_31_16_affine (&ti, &vi, &result_i); + else + transform_ok = pixman_transform_point_31_16 (&ti, &vi, &result_i); + + crc32 = compute_crc32 (crc32, &result_i, sizeof(result_i)); + +#ifdef HAVE_FLOAT128 + /* compare with a reference 128-bit floating point implementation */ + for (j = 0; j < 3; j++) + { + vf.v[j] = pixman_fixed_to_float128 (vi.v[j]); + for (k = 0; k < 3; k++) + { + tf.m[j][k] = pixman_fixed_to_float128 (ti.matrix[j][k]); + } + } + + if (pixman_transform_point_f128 (&tf, &vf, &result_f)) + { + if (transform_ok || + (does_it_fit_fixed_48_16 (result_f.v[0]) && + does_it_fit_fixed_48_16 (result_f.v[1]) && + does_it_fit_fixed_48_16 (result_f.v[2]))) + { + for (j = 0; j < 3; j++) + { + double diff = fabs (result_f.v[j] - + pixman_fixed_to_float128 (result_i.v[j])); + + if (is_affine && diff > (0.51 / 65536.0)) + { + printf ("%d:%d: bad precision for affine (%.12f)\n", + testnum, i, diff); + abort (); + } + else if (diff > (0.71 / 65536.0)) + { + printf ("%d:%d: bad precision for projective (%.12f)\n", + testnum, i, diff); + abort (); + } + } + } + } +#endif + } + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main ("matrix", 20000, + 0xBEBF98C3, + test_matrix, argc, argv); +} |