aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman')
-rw-r--r--pixman/configure.ac16
-rw-r--r--pixman/demos/scale.c7
-rw-r--r--pixman/demos/scale.ui30
-rw-r--r--pixman/pixman/pixman-fast-path.c446
-rw-r--r--pixman/pixman/pixman-general.c3
-rw-r--r--pixman/pixman/pixman-glyph.c8
-rw-r--r--pixman/pixman/pixman-implementation.c28
-rw-r--r--pixman/pixman/pixman-inlines.h37
-rw-r--r--pixman/pixman/pixman-matrix.c408
-rw-r--r--pixman/pixman/pixman-private.h25
-rwxr-xr-xpixman/pixman/pixman-sse2.c284
-rw-r--r--pixman/pixman/pixman.c87
-rw-r--r--pixman/test/Makefile.sources1
-rw-r--r--pixman/test/affine-test.c6
-rw-r--r--pixman/test/lowlevel-blt-bench.c4
-rw-r--r--pixman/test/matrix-test.c186
16 files changed, 1308 insertions, 268 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 515e31218..a93e2905b 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -968,6 +968,22 @@ fi
AC_MSG_RESULT($support_for_attribute_constructor)
AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR)
+dnl =====================================
+dnl __float128
+
+support_for_float128=no
+
+AC_MSG_CHECKING(for __float128)
+AC_LINK_IFELSE([AC_LANG_SOURCE([[
+__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; }
+]])], support_for_float128=yes)
+
+if test x$support_for_float128 = xyes; then
+ AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128])
+fi
+
+AC_MSG_RESULT($support_for_float128)
+
dnl ==================
dnl libpng
diff --git a/pixman/demos/scale.c b/pixman/demos/scale.c
index 9100ff72a..869ada12b 100644
--- a/pixman/demos/scale.c
+++ b/pixman/demos/scale.c
@@ -39,6 +39,7 @@ typedef struct
GtkAdjustment * scale_x_adjustment;
GtkAdjustment * scale_y_adjustment;
GtkAdjustment * rotate_adjustment;
+ GtkAdjustment * subsample_adjustment;
int scaled_width;
int scaled_height;
} app_t;
@@ -236,7 +237,8 @@ rescale (GtkWidget *may_be_null, app_t *app)
get_value (app, filters, "reconstruct_y_combo_box"),
get_value (app, filters, "sample_x_combo_box"),
get_value (app, filters, "sample_y_combo_box"),
- 4, 4);
+ gtk_adjustment_get_value (app->subsample_adjustment),
+ gtk_adjustment_get_value (app->subsample_adjustment));
pixman_image_set_filter (app->original, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, params, n_params);
@@ -360,10 +362,13 @@ app_new (pixman_image_t *original)
GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_y_adjustment"));
app->rotate_adjustment =
GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "rotate_adjustment"));
+ app->subsample_adjustment =
+ GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "subsample_adjustment"));
g_signal_connect (app->scale_x_adjustment, "value_changed", G_CALLBACK (rescale), app);
g_signal_connect (app->scale_y_adjustment, "value_changed", G_CALLBACK (rescale), app);
g_signal_connect (app->rotate_adjustment, "value_changed", G_CALLBACK (rescale), app);
+ g_signal_connect (app->subsample_adjustment, "value_changed", G_CALLBACK (rescale), app);
widget = get_widget (app, "scale_x_scale");
gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL);
diff --git a/pixman/demos/scale.ui b/pixman/demos/scale.ui
index f7c0c805f..b3450d34d 100644
--- a/pixman/demos/scale.ui
+++ b/pixman/demos/scale.ui
@@ -23,6 +23,14 @@
<property name="page_increment">10</property>
<property name="page_size">10</property>
</object>
+ <object class="GtkAdjustment" id="subsample_adjustment">
+ <property name="lower">1</property>
+ <property name="upper">12</property>
+ <property name="step_increment">1</property>
+ <property name="page_increment">1</property>
+ <property name="page_size">0</property>
+ <property name="value">4</property>
+ </object>
<object class="GtkWindow" id="main">
<child>
<object class="GtkHBox" id="u">
@@ -51,6 +59,7 @@
<child>
<object class="GtkVBox" id="box1">
<property name="visible">True</property>
+ <property name="spacing">12</property>
<child>
<object class="GtkHBox" id="box2">
<property name="visible">True</property>
@@ -234,6 +243,17 @@
</packing>
</child>
<child>
+ <object class="GtkLabel" id="label9">
+ <property name="visible">True</property>
+ <property name="xalign">1</property>
+ <property name="label" translatable="yes">&lt;b&gt;Subsample:&lt;/b&gt;</property>
+ <property name="use_markup">True</property>
+ </object>
+ <packing>
+ <property name="top_attach">5</property>
+ </packing>
+ </child>
+ <child>
<object class="GtkComboBox" id="reconstruct_x_combo_box">
<property name="visible">True</property>
</object>
@@ -277,6 +297,16 @@
<property name="top_attach">4</property>
</packing>
</child>
+ <child>
+ <object class="GtkSpinButton" id="subsample_spin_button">
+ <property name="visible">True</property>
+ <property name="adjustment">subsample_adjustment</property>
+ </object>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="top_attach">5</property>
+ </packing>
+ </child>
</object>
<packing>
<property name="expand">False</property>
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index c625e0c4a..247aea645 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -739,36 +739,6 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp,
}
static void
-fast_composite_src_x888_0565 (pixman_implementation_t *imp,
- pixman_composite_info_t *info)
-{
- PIXMAN_COMPOSITE_ARGS (info);
- uint16_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- *dst = convert_8888_to_0565 (s);
- dst++;
- }
- }
-}
-
-static void
fast_composite_add_8_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
@@ -1243,6 +1213,18 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,
pixman_composite_func_t func;
pixman_format_code_t mask_format;
uint32_t src_flags, mask_flags;
+ int32_t sx, sy;
+ int32_t width_remain;
+ int32_t num_pixels;
+ int32_t src_width;
+ int32_t i, j;
+ pixman_image_t extended_src_image;
+ uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
+ pixman_bool_t need_src_extension;
+ uint32_t *src_line;
+ int32_t src_stride;
+ int32_t src_bpp;
+ pixman_composite_info_t info2 = *info;
src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
@@ -1258,149 +1240,131 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,
mask_flags = FAST_PATH_IS_OPAQUE;
}
- if (_pixman_implementation_lookup_composite (
- imp->toplevel, info->op,
- src_image->common.extended_format_code, src_flags,
- mask_format, mask_flags,
- dest_image->common.extended_format_code, info->dest_flags,
- &imp, &func))
+ _pixman_implementation_lookup_composite (
+ imp->toplevel, info->op,
+ src_image->common.extended_format_code, src_flags,
+ mask_format, mask_flags,
+ dest_image->common.extended_format_code, info->dest_flags,
+ &imp, &func);
+
+ src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
+
+ if (src_image->bits.width < REPEAT_MIN_WIDTH &&
+ (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) &&
+ !src_image->bits.indexed)
{
- int32_t sx, sy;
- int32_t width_remain;
- int32_t num_pixels;
- int32_t src_width;
- int32_t i, j;
- pixman_image_t extended_src_image;
- uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
- pixman_bool_t need_src_extension;
- uint32_t *src_line;
- int32_t src_stride;
- int32_t src_bpp;
- pixman_composite_info_t info2 = *info;
-
- src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
-
- if (src_image->bits.width < REPEAT_MIN_WIDTH &&
- (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) &&
- !src_image->bits.indexed)
- {
- sx = src_x;
- sx = MOD (sx, src_image->bits.width);
- sx += width;
- src_width = 0;
+ sx = src_x;
+ sx = MOD (sx, src_image->bits.width);
+ sx += width;
+ src_width = 0;
- while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
- src_width += src_image->bits.width;
+ while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
+ src_width += src_image->bits.width;
- src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
+ src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
- /* Initialize/validate stack-allocated temporary image */
- _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
- src_width, 1, &extended_src[0], src_stride,
- FALSE);
- _pixman_image_validate (&extended_src_image);
+ /* Initialize/validate stack-allocated temporary image */
+ _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
+ src_width, 1, &extended_src[0], src_stride,
+ FALSE);
+ _pixman_image_validate (&extended_src_image);
- info2.src_image = &extended_src_image;
- need_src_extension = TRUE;
- }
- else
- {
- src_width = src_image->bits.width;
- need_src_extension = FALSE;
- }
+ info2.src_image = &extended_src_image;
+ need_src_extension = TRUE;
+ }
+ else
+ {
+ src_width = src_image->bits.width;
+ need_src_extension = FALSE;
+ }
- sx = src_x;
- sy = src_y;
+ sx = src_x;
+ sy = src_y;
- while (--height >= 0)
- {
- sx = MOD (sx, src_width);
- sy = MOD (sy, src_image->bits.height);
+ while (--height >= 0)
+ {
+ sx = MOD (sx, src_width);
+ sy = MOD (sy, src_image->bits.height);
- if (need_src_extension)
+ if (need_src_extension)
+ {
+ if (src_bpp == 32)
{
- if (src_bpp == 32)
- {
- PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
- for (i = 0; i < src_width; )
- {
- for (j = 0; j < src_image->bits.width; j++, i++)
- extended_src[i] = src_line[j];
- }
- }
- else if (src_bpp == 16)
+ for (i = 0; i < src_width; )
{
- uint16_t *src_line_16;
-
- PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
- src_line_16, 1);
- src_line = (uint32_t*)src_line_16;
-
- for (i = 0; i < src_width; )
- {
- for (j = 0; j < src_image->bits.width; j++, i++)
- ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
- }
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ extended_src[i] = src_line[j];
}
- else if (src_bpp == 8)
- {
- uint8_t *src_line_8;
+ }
+ else if (src_bpp == 16)
+ {
+ uint16_t *src_line_16;
- PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
- src_line_8, 1);
- src_line = (uint32_t*)src_line_8;
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
+ src_line_16, 1);
+ src_line = (uint32_t*)src_line_16;
- for (i = 0; i < src_width; )
- {
- for (j = 0; j < src_image->bits.width; j++, i++)
- ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
- }
+ for (i = 0; i < src_width; )
+ {
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
}
-
- info2.src_y = 0;
}
- else
+ else if (src_bpp == 8)
{
- info2.src_y = sy;
+ uint8_t *src_line_8;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
+ src_line_8, 1);
+ src_line = (uint32_t*)src_line_8;
+
+ for (i = 0; i < src_width; )
+ {
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
+ }
}
- width_remain = width;
+ info2.src_y = 0;
+ }
+ else
+ {
+ info2.src_y = sy;
+ }
- while (width_remain > 0)
- {
- num_pixels = src_width - sx;
+ width_remain = width;
- if (num_pixels > width_remain)
- num_pixels = width_remain;
+ while (width_remain > 0)
+ {
+ num_pixels = src_width - sx;
- info2.src_x = sx;
- info2.width = num_pixels;
- info2.height = 1;
+ if (num_pixels > width_remain)
+ num_pixels = width_remain;
- func (imp, &info2);
+ info2.src_x = sx;
+ info2.width = num_pixels;
+ info2.height = 1;
- width_remain -= num_pixels;
- info2.mask_x += num_pixels;
- info2.dest_x += num_pixels;
- sx = 0;
- }
+ func (imp, &info2);
- sx = src_x;
- sy++;
- info2.mask_x = info->mask_x;
- info2.mask_y++;
- info2.dest_x = info->dest_x;
- info2.dest_y++;
+ width_remain -= num_pixels;
+ info2.mask_x += num_pixels;
+ info2.dest_x += num_pixels;
+ sx = 0;
}
- if (need_src_extension)
- _pixman_image_fini (&extended_src_image);
- }
- else
- {
- _pixman_log_error (FUNC, "Didn't find a suitable function ");
+ sx = src_x;
+ sy++;
+ info2.mask_x = info->mask_x;
+ info2.mask_y++;
+ info2.dest_x = info->dest_x;
+ info2.dest_y++;
}
+
+ if (need_src_extension)
+ _pixman_image_fini (&extended_src_image);
}
/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
@@ -1913,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
@@ -2199,12 +2159,200 @@ fast_path_fill (pixman_implementation_t *imp,
return TRUE;
}
+/*****************************************************************************/
+
+static uint32_t *
+fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int32_t w = iter->width;
+ uint32_t *dst = iter->buffer;
+ const uint16_t *src = (const uint16_t *)iter->bits;
+
+ iter->bits += iter->stride;
+
+ /* Align the source buffer at 4 bytes boundary */
+ if (w > 0 && ((uintptr_t)src & 3))
+ {
+ *dst++ = convert_0565_to_8888 (*src++);
+ w--;
+ }
+ /* Process two pixels per iteration */
+ while ((w -= 2) >= 0)
+ {
+ uint32_t sr, sb, sg, t0, t1;
+ uint32_t s = *(const uint32_t *)src;
+ src += 2;
+ sr = (s >> 8) & 0x00F800F8;
+ sb = (s << 3) & 0x00F800F8;
+ sg = (s >> 3) & 0x00FC00FC;
+ sr |= sr >> 5;
+ sb |= sb >> 5;
+ sg |= sg >> 6;
+ t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
+ (sb & 0xFF) | 0xFF000000;
+ t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
+ (sb >> 16) | 0xFF000000;
+#ifdef WORDS_BIGENDIAN
+ *dst++ = t1;
+ *dst++ = t0;
+#else
+ *dst++ = t0;
+ *dst++ = t1;
+#endif
+ }
+ if (w & 1)
+ {
+ *dst = convert_0565_to_8888 (*src);
+ }
+
+ return iter->buffer;
+}
+
+static uint32_t *
+fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
+{
+ iter->bits += iter->stride;
+ return iter->buffer;
+}
+
+/* Helper function for a workaround, which tries to ensure that 0x1F001F
+ * constant is always allocated in a register on RISC architectures.
+ */
+static force_inline uint32_t
+convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
+{
+ uint32_t a, b;
+ a = (s >> 3) & x1F001F;
+ b = s & 0xFC00;
+ a |= a >> 5;
+ a |= b >> 5;
+ return a;
+}
+
+static void
+fast_write_back_r5g6b5 (pixman_iter_t *iter)
+{
+ int32_t w = iter->width;
+ uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
+ const uint32_t *src = iter->buffer;
+ /* Workaround to ensure that x1F001F variable is allocated in a register */
+ static volatile uint32_t volatile_x1F001F = 0x1F001F;
+ uint32_t x1F001F = volatile_x1F001F;
+
+ while ((w -= 4) >= 0)
+ {
+ uint32_t s1 = *src++;
+ uint32_t s2 = *src++;
+ uint32_t s3 = *src++;
+ uint32_t s4 = *src++;
+ *dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
+ }
+ if (w & 2)
+ {
+ *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+ }
+ if (w & 1)
+ {
+ *dst = convert_8888_to_0565_workaround (*src, x1F001F);
+ }
+}
+
+typedef struct
+{
+ pixman_format_code_t format;
+ pixman_iter_get_scanline_t get_scanline;
+ pixman_iter_write_back_t write_back;
+} fetcher_info_t;
+
+static const fetcher_info_t fetchers[] =
+{
+ { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
+ { PIXMAN_null }
+};
+
+static pixman_bool_t
+fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+ pixman_image_t *image = iter->image;
+
+#define FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+
+ if ((iter->iter_flags & ITER_NARROW) &&
+ (iter->image_flags & FLAGS) == FLAGS)
+ {
+ const fetcher_info_t *f;
+
+ for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ {
+ if (image->common.extended_format_code == f->format)
+ {
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+
+ iter->get_scanline = f->get_scanline;
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+static pixman_bool_t
+fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+ pixman_image_t *image = iter->image;
+
+ if ((iter->iter_flags & ITER_NARROW) &&
+ (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
+ {
+ const fetcher_info_t *f;
+
+ for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ {
+ if (image->common.extended_format_code == f->format)
+ {
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+
+ if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+ (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+ {
+ iter->get_scanline = fast_dest_fetch_noop;
+ }
+ else
+ {
+ iter->get_scanline = f->get_scanline;
+ }
+ iter->write_back = f->write_back;
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+
pixman_implementation_t *
_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
imp->fill = fast_path_fill;
+ imp->src_iter_init = fast_src_iter_init;
+ imp->dest_iter_init = fast_dest_iter_init;
return imp;
}
diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c
index f175d771e..93a1b9acf 100644
--- a/pixman/pixman/pixman-general.c
+++ b/pixman/pixman/pixman-general.c
@@ -188,9 +188,6 @@ general_composite_rect (pixman_implementation_t *imp,
compose = _pixman_implementation_lookup_combiner (
imp->toplevel, op, component_alpha, narrow);
- if (!compose)
- return;
-
for (i = 0; i < height; ++i)
{
uint32_t *s, *m, *d;
diff --git a/pixman/pixman/pixman-glyph.c b/pixman/pixman/pixman-glyph.c
index 6d2c8bbb7..5a271b64b 100644
--- a/pixman/pixman/pixman-glyph.c
+++ b/pixman/pixman/pixman-glyph.c
@@ -463,16 +463,13 @@ pixman_composite_glyphs_no_mask (pixman_op_t op,
{
glyph_format = glyph_img->common.extended_format_code;
glyph_flags = glyph_img->common.flags;
-
+
_pixman_implementation_lookup_composite (
get_implementation(), op,
src->common.extended_format_code, src->common.flags,
glyph_format, glyph_flags | extra,
dest_format, dest_flags,
&implementation, &func);
-
- if (!func)
- goto out;
}
info.src_x = src_x + composite_box.x1 - dest_x;
@@ -582,9 +579,6 @@ add_glyphs (pixman_glyph_cache_t *cache,
mask_format, info.mask_flags,
dest_format, dest_flags,
&implementation, &func);
-
- if (!func)
- goto out;
}
glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x;
diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c
index ec467a619..c0a643633 100644
--- a/pixman/pixman/pixman-implementation.c
+++ b/pixman/pixman/pixman-implementation.c
@@ -65,7 +65,13 @@ typedef struct
PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
-pixman_bool_t
+static void
+dummy_composite_rect (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+}
+
+void
_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
pixman_op_t op,
pixman_format_code_t src_format,
@@ -142,7 +148,11 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
++info;
}
}
- return FALSE;
+
+ /* We should never reach this point */
+ _pixman_log_error (FUNC, "No known composite function\n");
+ *out_imp = NULL;
+ *out_func = dummy_composite_rect;
update_cache:
if (i)
@@ -160,8 +170,16 @@ update_cache:
cache->cache[0].fast_path.dest_flags = dest_flags;
cache->cache[0].fast_path.func = *out_func;
}
+}
- return TRUE;
+static void
+dummy_combine (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
}
pixman_combine_32_func_t
@@ -199,7 +217,9 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
imp = imp->fallback;
}
- return NULL;
+ /* We should never reach this point */
+ _pixman_log_error (FUNC, "No known combine function\n");
+ return dummy_combine;
}
pixman_bool_t
diff --git a/pixman/pixman/pixman-inlines.h b/pixman/pixman/pixman-inlines.h
index ce94e5248..ccb29b093 100644
--- a/pixman/pixman/pixman-inlines.h
+++ b/pixman/pixman/pixman-inlines.h
@@ -88,6 +88,42 @@ pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
((1 << BILINEAR_INTERPOLATION_BITS) - 1);
}
+#if BILINEAR_INTERPOLATION_BITS <= 4
+/* Inspired by Filter_32_opaque from Skia */
+static force_inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+ uint32_t bl, uint32_t br,
+ int distx, int disty)
+{
+ int distxy, distxiy, distixy, distixiy;
+ uint32_t lo, hi;
+
+ distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
+ disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
+
+ distxy = distx * disty;
+ distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */
+ distixy = (disty << 4) - distxy; /* disty * (16 - distx) */
+ distixiy =
+ 16 * 16 - (disty << 4) -
+ (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
+
+ lo = (tl & 0xff00ff) * distixiy;
+ hi = ((tl >> 8) & 0xff00ff) * distixiy;
+
+ lo += (tr & 0xff00ff) * distxiy;
+ hi += ((tr >> 8) & 0xff00ff) * distxiy;
+
+ lo += (bl & 0xff00ff) * distixy;
+ hi += ((bl >> 8) & 0xff00ff) * distixy;
+
+ lo += (br & 0xff00ff) * distxy;
+ hi += ((br >> 8) & 0xff00ff) * distxy;
+
+ return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
+}
+
+#else
#if SIZEOF_LONG > 4
static force_inline uint32_t
@@ -184,6 +220,7 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
}
#endif
+#endif // BILINEAR_INTERPOLATION_BITS <= 4
/*
* For each scanline fetched from source image with PAD repeat:
diff --git a/pixman/pixman/pixman-matrix.c b/pixman/pixman/pixman-matrix.c
index cd2f1b5b8..89b96826b 100644
--- a/pixman/pixman/pixman-matrix.c
+++ b/pixman/pixman/pixman-matrix.c
@@ -34,6 +34,338 @@
#define F(x) pixman_int_to_fixed (x)
+static force_inline int
+count_leading_zeros (uint32_t x)
+{
+#ifdef __GNUC__
+ return __builtin_clz (x);
+#else
+ int n = 0;
+ while (x)
+ {
+ n++;
+ x >>= 1;
+ }
+ return 32 - n;
+#endif
+}
+
+/*
+ * Large signed/unsigned integer division with rounding for the platforms with
+ * only 64-bit integer data type supported (no 128-bit data type).
+ *
+ * Arguments:
+ * hi, lo - high and low 64-bit parts of the dividend
+ * div - 48-bit divisor
+ *
+ * Returns: lowest 64 bits of the result as a return value and highest 64
+ * bits of the result to "result_hi" pointer
+ */
+
+/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */
+static force_inline uint64_t
+rounded_udiv_128_by_48 (uint64_t hi,
+ uint64_t lo,
+ uint64_t div,
+ uint64_t *result_hi)
+{
+ uint64_t tmp, remainder, result_lo;
+ assert(div < ((uint64_t)1 << 48));
+
+ remainder = hi % div;
+ *result_hi = hi / div;
+
+ tmp = (remainder << 16) + (lo >> 48);
+ result_lo = tmp / div;
+ remainder = tmp % div;
+
+ tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF);
+ result_lo = (result_lo << 16) + (tmp / div);
+ remainder = tmp % div;
+
+ tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF);
+ result_lo = (result_lo << 16) + (tmp / div);
+ remainder = tmp % div;
+
+ tmp = (remainder << 16) + (lo & 0xFFFF);
+ result_lo = (result_lo << 16) + (tmp / div);
+ remainder = tmp % div;
+
+ /* round to nearest */
+ if (remainder * 2 >= div && ++result_lo == 0)
+ *result_hi += 1;
+
+ return result_lo;
+}
+
+/* signed division (128-bit by 49-bit) with rounding to nearest */
+static inline int64_t
+rounded_sdiv_128_by_49 (int64_t hi,
+ uint64_t lo,
+ int64_t div,
+ int64_t *signed_result_hi)
+{
+ uint64_t result_lo, result_hi;
+ int sign = 0;
+ if (div < 0)
+ {
+ div = -div;
+ sign ^= 1;
+ }
+ if (hi < 0)
+ {
+ if (lo != 0)
+ hi++;
+ hi = -hi;
+ lo = -lo;
+ sign ^= 1;
+ }
+ result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi);
+ if (sign)
+ {
+ if (result_lo != 0)
+ result_hi++;
+ result_hi = -result_hi;
+ result_lo = -result_lo;
+ }
+ if (signed_result_hi)
+ {
+ *signed_result_hi = result_hi;
+ }
+ return result_lo;
+}
+
+/*
+ * Multiply 64.16 fixed point value by (2^scalebits) and convert
+ * to 128-bit integer.
+ */
+static force_inline void
+fixed_64_16_to_int128 (int64_t hi,
+ int64_t lo,
+ int64_t *rhi,
+ int64_t *rlo,
+ int scalebits)
+{
+ /* separate integer and fractional parts */
+ hi += lo >> 16;
+ lo &= 0xFFFF;
+
+ if (scalebits <= 0)
+ {
+ *rlo = hi >> (-scalebits);
+ *rhi = *rlo >> 63;
+ }
+ else
+ {
+ *rhi = hi >> (64 - scalebits);
+ *rlo = (uint64_t)hi << scalebits;
+ if (scalebits < 16)
+ *rlo += lo >> (16 - scalebits);
+ else
+ *rlo += lo << (scalebits - 16);
+ }
+}
+
+/*
+ * Convert 112.16 fixed point value to 48.16 with clamping for the out
+ * of range values.
+ */
+static force_inline pixman_fixed_48_16_t
+fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag)
+{
+ if ((lo >> 63) != hi)
+ {
+ *clampflag = TRUE;
+ return hi >= 0 ? INT64_MAX : INT64_MIN;
+ }
+ else
+ {
+ return lo;
+ }
+}
+
+/*
+ * Transform a point with 31.16 fixed point coordinates from the destination
+ * space to a point with 48.16 fixed point coordinates in the source space.
+ * No overflows are possible for affine transformations and the results are
+ * accurate including the least significant bit. Projective transformations
+ * may overflow, in this case the results are just clamped to return maximum
+ * or minimum 48.16 values (so that the caller can at least handle the NONE
+ * and PAD repeats correctly) and the return value is FALSE to indicate that
+ * such clamping has happened.
+ */
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_point_31_16 (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result)
+{
+ pixman_bool_t clampflag = FALSE;
+ int i;
+ int64_t tmp[3][2], divint;
+ uint16_t divfrac;
+
+ /* input vector values must have no more than 31 bits (including sign)
+ * in the integer part */
+ assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+
+ for (i = 0; i < 3; i++)
+ {
+ tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
+ tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
+ tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
+ tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
+ tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
+ tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
+ }
+
+ /*
+ * separate 64-bit integer and 16-bit fractional parts for the divisor,
+ * which is also scaled by 65536 after fixed point multiplication.
+ */
+ divint = tmp[2][0] + (tmp[2][1] >> 16);
+ divfrac = tmp[2][1] & 0xFFFF;
+
+ if (divint == pixman_fixed_1 && divfrac == 0)
+ {
+ /*
+ * this is a simple affine transformation
+ */
+ result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+ result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+ result->v[2] = pixman_fixed_1;
+ }
+ else if (divint == 0 && divfrac == 0)
+ {
+ /*
+ * handle zero divisor (if the values are non-zero, set the
+ * results to maximum positive or minimum negative)
+ */
+ clampflag = TRUE;
+
+ result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+ result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+
+ if (result->v[0] > 0)
+ result->v[0] = INT64_MAX;
+ else if (result->v[0] < 0)
+ result->v[0] = INT64_MIN;
+
+ if (result->v[1] > 0)
+ result->v[1] = INT64_MAX;
+ else if (result->v[1] < 0)
+ result->v[1] = INT64_MIN;
+ }
+ else
+ {
+ /*
+ * projective transformation, analyze the top 32 bits of the divisor
+ */
+ int32_t hi32divbits = divint >> 32;
+ if (hi32divbits < 0)
+ hi32divbits = ~hi32divbits;
+
+ if (hi32divbits == 0)
+ {
+ /* the divisor is small, we can actually keep all the bits */
+ int64_t hi, rhi, lo, rlo;
+ int64_t div = (divint << 16) + divfrac;
+
+ fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32);
+ rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+ result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+
+ fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32);
+ rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+ result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+ }
+ else
+ {
+ /* the divisor needs to be reduced to 48 bits */
+ int64_t hi, rhi, lo, rlo, div;
+ int shift = 32 - count_leading_zeros (hi32divbits);
+ fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift);
+
+ fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift);
+ rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+ result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+
+ fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift);
+ rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+ result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+ }
+ }
+ result->v[2] = pixman_fixed_1;
+ return !clampflag;
+}
+
+PIXMAN_EXPORT void
+pixman_transform_point_31_16_affine (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result)
+{
+ int64_t hi0, lo0, hi1, lo1;
+
+ /* input vector values must have no more than 31 bits (including sign)
+ * in the integer part */
+ assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+
+ hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16);
+ lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF);
+ hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16);
+ lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF);
+ hi0 += (int64_t)t->matrix[0][2];
+
+ hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16);
+ lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF);
+ hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16);
+ lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF);
+ hi1 += (int64_t)t->matrix[1][2];
+
+ result->v[0] = hi0 + ((lo0 + 0x8000) >> 16);
+ result->v[1] = hi1 + ((lo1 + 0x8000) >> 16);
+ result->v[2] = pixman_fixed_1;
+}
+
+PIXMAN_EXPORT void
+pixman_transform_point_31_16_3d (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result)
+{
+ int i;
+ int64_t tmp[3][2];
+
+ /* input vector values must have no more than 31 bits (including sign)
+ * in the integer part */
+ assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
+ assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+
+ for (i = 0; i < 3; i++)
+ {
+ tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
+ tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
+ tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
+ tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
+ tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
+ tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
+ }
+
+ result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+ result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+ result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16);
+}
+
PIXMAN_EXPORT void
pixman_transform_init_identity (struct pixman_transform *matrix)
{
@@ -50,69 +382,41 @@ PIXMAN_EXPORT pixman_bool_t
pixman_transform_point_3d (const struct pixman_transform *transform,
struct pixman_vector * vector)
{
- struct pixman_vector result;
- pixman_fixed_32_32_t partial;
- pixman_fixed_48_16_t v;
- int i, j;
+ pixman_vector_48_16_t tmp;
+ tmp.v[0] = vector->vector[0];
+ tmp.v[1] = vector->vector[1];
+ tmp.v[2] = vector->vector[2];
- for (j = 0; j < 3; j++)
- {
- v = 0;
- for (i = 0; i < 3; i++)
- {
- partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] *
- (pixman_fixed_48_16_t) vector->vector[i]);
- v += (partial + 0x8000) >> 16;
- }
-
- if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
- return FALSE;
-
- result.vector[j] = (pixman_fixed_t) v;
- }
-
- *vector = result;
+ pixman_transform_point_31_16_3d (transform, &tmp, &tmp);
- if (!result.vector[2])
- return FALSE;
+ vector->vector[0] = tmp.v[0];
+ vector->vector[1] = tmp.v[1];
+ vector->vector[2] = tmp.v[2];
- return TRUE;
+ return vector->vector[0] == tmp.v[0] &&
+ vector->vector[1] == tmp.v[1] &&
+ vector->vector[2] == tmp.v[2];
}
PIXMAN_EXPORT pixman_bool_t
pixman_transform_point (const struct pixman_transform *transform,
struct pixman_vector * vector)
{
- pixman_fixed_32_32_t partial;
- pixman_fixed_34_30_t v[3];
- pixman_fixed_48_16_t quo;
- int i, j;
+ pixman_vector_48_16_t tmp;
+ tmp.v[0] = vector->vector[0];
+ tmp.v[1] = vector->vector[1];
+ tmp.v[2] = vector->vector[2];
- for (j = 0; j < 3; j++)
- {
- v[j] = 0;
-
- for (i = 0; i < 3; i++)
- {
- partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] *
- (pixman_fixed_32_32_t) vector->vector[i]);
- v[j] += (partial + 2) >> 2;
- }
- }
-
- if (!((v[2] + 0x8000) >> 16))
- return FALSE;
+ if (!pixman_transform_point_31_16 (transform, &tmp, &tmp))
+ return FALSE;
- for (j = 0; j < 2; j++)
- {
- quo = v[j] / ((v[2] + 0x8000) >> 16);
- if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16)
- return FALSE;
- vector->vector[j] = (pixman_fixed_t) quo;
- }
-
- vector->vector[2] = pixman_fixed_1;
- return TRUE;
+ vector->vector[0] = tmp.v[0];
+ vector->vector[1] = tmp.v[1];
+ vector->vector[2] = tmp.v[2];
+
+ return vector->vector[0] == tmp.v[0] &&
+ vector->vector[1] == tmp.v[1] &&
+ vector->vector[2] == tmp.v[2];
}
PIXMAN_EXPORT pixman_bool_t
diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h
index e5ab873ed..cb78a2ed8 100644
--- a/pixman/pixman/pixman-private.h
+++ b/pixman/pixman/pixman-private.h
@@ -497,7 +497,7 @@ pixman_implementation_t *
_pixman_implementation_create (pixman_implementation_t *fallback,
const pixman_fast_path_t *fast_paths);
-pixman_bool_t
+void
_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
pixman_op_t op,
pixman_format_code_t src_format,
@@ -1052,7 +1052,7 @@ _pixman_log_error (const char *function, const char *message);
#else
-#define _pixman_log_error(f,m) do { } while (0) \
+#define _pixman_log_error(f,m) do { } while (0)
#define return_if_fail(expr) \
do \
@@ -1078,6 +1078,27 @@ _pixman_log_error (const char *function, const char *message);
#endif
/*
+ * Matrix
+ */
+
+typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
+
+pixman_bool_t
+pixman_transform_point_31_16 (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result);
+
+void
+pixman_transform_point_31_16_3d (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result);
+
+void
+pixman_transform_point_31_16_affine (const pixman_transform_t *t,
+ const pixman_vector_48_16_t *v,
+ pixman_vector_48_16_t *result);
+
+/*
* Timers
*/
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index 97d63d1ad..f04ea92d9 100755
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -4523,7 +4523,163 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
sse2_combine_add_u (imp, op, dst, src, NULL, width);
}
+}
+
+static void
+sse2_composite_add_n_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t *dst_line, *dst, src;
+ int dst_stride;
+
+ __m128i xmm_src;
+
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+ if (src == 0)
+ return;
+
+ if (src == ~0)
+ {
+ pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
+ dest_x, dest_y, width, height, ~0);
+
+ return;
+ }
+
+ xmm_src = _mm_set_epi32 (src, src, src, src);
+ while (height--)
+ {
+ int w = width;
+ uint32_t d;
+
+ dst = dst_line;
+ dst_line += dst_stride;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ d = *dst;
+ *dst++ =
+ _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d)));
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ save_128_aligned
+ ((__m128i*)dst,
+ _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
+
+ dst += 4;
+ w -= 4;
+ }
+
+ while (w--)
+ {
+ d = *dst;
+ *dst++ =
+ _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src,
+ _mm_cvtsi32_si128 (d)));
+ }
+ }
+}
+
+static void
+sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint32_t src;
+
+ __m128i xmm_src;
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+ if (src == 0)
+ return;
+ xmm_src = expand_pixel_32_1x128 (src);
+
+ PIXMAN_IMAGE_GET_LINE (
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ uint8_t m = *mask++;
+ if (m)
+ {
+ *dst = pack_1x128_32
+ (_mm_adds_epu16
+ (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
+ unpack_32_1x128 (*dst)));
+ }
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ uint32_t m = *(uint32_t*)mask;
+ if (m)
+ {
+ __m128i xmm_mask_lo, xmm_mask_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+
+ __m128i xmm_dst = load_128_aligned ((__m128i*)dst);
+ __m128i xmm_mask =
+ _mm_unpacklo_epi8 (unpack_32_1x128(m),
+ _mm_setzero_si128 ());
+
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_multiply_2x128 (&xmm_src, &xmm_src,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
+ xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ w -= 4;
+ dst += 4;
+ mask += 4;
+ }
+
+ while (w)
+ {
+ uint8_t m = *mask++;
+ if (m)
+ {
+ *dst = pack_1x128_32
+ (_mm_adds_epu16
+ (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
+ unpack_32_1x128 (*dst)));
+ }
+ dst++;
+ w--;
+ }
+ }
}
static pixman_bool_t
@@ -5786,6 +5942,121 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
uint32_t, uint8_t, uint32_t,
NORMAL, FLAG_HAVE_NON_SOLID_MASK)
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
+ const uint32_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+ __m128i xmm_mask;
+
+ if (zero_src || (*mask >> 24) == 0)
+ return;
+
+ xmm_mask = create_mask_16_128 (*mask >> 24);
+
+ while (w && ((uintptr_t)dst & 15))
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ if (pix1)
+ {
+ uint32_t d = *dst;
+
+ __m128i ms = unpack_32_1x128 (pix1);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i dest = xmm_mask;
+ __m128i alpha_dst = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32
+ (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+ }
+
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+
+ if (pix1 | pix2 | pix3 | pix4)
+ {
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_mask, &xmm_mask,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned
+ ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ dst += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ if (pix1)
+ {
+ uint32_t d = *dst;
+
+ __m128i ms = unpack_32_1x128 (pix1);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i dest = xmm_mask;
+ __m128i alpha_dst = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32
+ (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+ }
+
+ dst++;
+ w--;
+ }
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ COVER, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ PAD, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NONE, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
+ scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NORMAL, FLAG_HAVE_SOLID_MASK)
+
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
@@ -5848,6 +6119,14 @@ static const pixman_fast_path_t sse2_fast_paths[] =
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8),
+ PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888),
/* PIXMAN_OP_SRC */
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
@@ -5912,6 +6191,11 @@ static const pixman_fast_path_t sse2_fast_paths[] =
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
diff --git a/pixman/pixman/pixman.c b/pixman/pixman/pixman.c
index 3fabed161..184f0c4e6 100644
--- a/pixman/pixman/pixman.c
+++ b/pixman/pixman/pixman.c
@@ -581,11 +581,13 @@ pixman_image_composite32 (pixman_op_t op,
int32_t height)
{
pixman_format_code_t src_format, mask_format, dest_format;
- uint32_t src_flags, mask_flags, dest_flags;
pixman_region32_t region;
pixman_box32_t extents;
pixman_implementation_t *imp;
pixman_composite_func_t func;
+ pixman_composite_info_t info;
+ const pixman_box32_t *pbox;
+ int n;
_pixman_image_validate (src);
if (mask)
@@ -593,27 +595,27 @@ pixman_image_composite32 (pixman_op_t op,
_pixman_image_validate (dest);
src_format = src->common.extended_format_code;
- src_flags = src->common.flags;
+ info.src_flags = src->common.flags;
if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE))
{
mask_format = mask->common.extended_format_code;
- mask_flags = mask->common.flags;
+ info.mask_flags = mask->common.flags;
}
else
{
mask_format = PIXMAN_null;
- mask_flags = FAST_PATH_IS_OPAQUE;
+ info.mask_flags = FAST_PATH_IS_OPAQUE;
}
dest_format = dest->common.extended_format_code;
- dest_flags = dest->common.flags;
+ info.dest_flags = dest->common.flags;
/* Check for pixbufs */
if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) &&
(src->type == BITS && src->bits.bits == mask->bits.bits) &&
(src->common.repeat == mask->common.repeat) &&
- (src_flags & mask_flags & FAST_PATH_ID_TRANSFORM) &&
+ (info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM) &&
(src_x == mask_x && src_y == mask_y))
{
if (src_format == PIXMAN_x8b8g8r8)
@@ -638,7 +640,7 @@ pixman_image_composite32 (pixman_op_t op,
extents.x2 -= dest_x - src_x;
extents.y2 -= dest_y - src_y;
- if (!analyze_extent (src, &extents, &src_flags))
+ if (!analyze_extent (src, &extents, &info.src_flags))
goto out;
extents.x1 -= src_x - mask_x;
@@ -646,7 +648,7 @@ pixman_image_composite32 (pixman_op_t op,
extents.x2 -= src_x - mask_x;
extents.y2 -= src_y - mask_y;
- if (!analyze_extent (mask, &extents, &mask_flags))
+ if (!analyze_extent (mask, &extents, &info.mask_flags))
goto out;
/* If the clip is within the source samples, and the samples are
@@ -659,16 +661,16 @@ pixman_image_composite32 (pixman_op_t op,
FAST_PATH_BILINEAR_FILTER | \
FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR)
- if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
- (src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+ if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+ (info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
{
- src_flags |= FAST_PATH_IS_OPAQUE;
+ info.src_flags |= FAST_PATH_IS_OPAQUE;
}
- if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
- (mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+ if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+ (info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
{
- mask_flags |= FAST_PATH_IS_OPAQUE;
+ info.mask_flags |= FAST_PATH_IS_OPAQUE;
}
/*
@@ -676,42 +678,35 @@ pixman_image_composite32 (pixman_op_t op,
* if the src or dest are opaque. The output operator should be
* mathematically equivalent to the source.
*/
- op = optimize_operator (op, src_flags, mask_flags, dest_flags);
+ info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags);
- if (_pixman_implementation_lookup_composite (
- get_implementation (), op,
- src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags,
- &imp, &func))
- {
- pixman_composite_info_t info;
- const pixman_box32_t *pbox;
- int n;
+ _pixman_implementation_lookup_composite (
+ get_implementation (), info.op,
+ src_format, info.src_flags,
+ mask_format, info.mask_flags,
+ dest_format, info.dest_flags,
+ &imp, &func);
- info.op = op;
- info.src_image = src;
- info.mask_image = mask;
- info.dest_image = dest;
- info.src_flags = src_flags;
- info.mask_flags = mask_flags;
- info.dest_flags = dest_flags;
+ info.src_image = src;
+ info.mask_image = mask;
+ info.dest_image = dest;
- pbox = pixman_region32_rectangles (&region, &n);
+ pbox = pixman_region32_rectangles (&region, &n);
- while (n--)
- {
- info.src_x = pbox->x1 + src_x - dest_x;
- info.src_y = pbox->y1 + src_y - dest_y;
- info.mask_x = pbox->x1 + mask_x - dest_x;
- info.mask_y = pbox->y1 + mask_y - dest_y;
- info.dest_x = pbox->x1;
- info.dest_y = pbox->y1;
- info.width = pbox->x2 - pbox->x1;
- info.height = pbox->y2 - pbox->y1;
-
- func (imp, &info);
-
- pbox++;
- }
+ while (n--)
+ {
+ info.src_x = pbox->x1 + src_x - dest_x;
+ info.src_y = pbox->y1 + src_y - dest_y;
+ info.mask_x = pbox->x1 + mask_x - dest_x;
+ info.mask_y = pbox->y1 + mask_y - dest_y;
+ info.dest_x = pbox->x1;
+ info.dest_y = pbox->y1;
+ info.width = pbox->x2 - pbox->x1;
+ info.height = pbox->y2 - pbox->y1;
+
+ func (imp, &info);
+
+ pbox++;
}
out:
diff --git a/pixman/test/Makefile.sources b/pixman/test/Makefile.sources
index 8c0b505df..e323a8e8c 100644
--- a/pixman/test/Makefile.sources
+++ b/pixman/test/Makefile.sources
@@ -17,6 +17,7 @@ TESTPROGRAMS = \
gradient-crash-test \
region-contains-test \
alphamap \
+ matrix-test \
stress-test \
composite-traps-test \
blitters-test \
diff --git a/pixman/test/affine-test.c b/pixman/test/affine-test.c
index 678fbe844..2506250db 100644
--- a/pixman/test/affine-test.c
+++ b/pixman/test/affine-test.c
@@ -307,11 +307,11 @@ test_composite (int testnum,
}
#if BILINEAR_INTERPOLATION_BITS == 8
-#define CHECKSUM 0x97097336
+#define CHECKSUM 0x2CDF1F07
#elif BILINEAR_INTERPOLATION_BITS == 7
-#define CHECKSUM 0x31D2DC21
+#define CHECKSUM 0xBC00B1DF
#elif BILINEAR_INTERPOLATION_BITS == 4
-#define CHECKSUM 0x8B925154
+#define CHECKSUM 0xA227306B
#else
#define CHECKSUM 0x00000000
#endif
diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c
index 2f97b7b24..7336fa0d5 100644
--- a/pixman/test/lowlevel-blt-bench.c
+++ b/pixman/test/lowlevel-blt-bench.c
@@ -630,6 +630,8 @@ tests_tbl[] =
{ "src_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
{ "src_1555_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
{ "src_0565_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
+ { "src_8_8", PIXMAN_a8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 },
+ { "src_n_8", PIXMAN_a8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 },
{ "src_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
{ "src_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
{ "src_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 },
@@ -772,7 +774,7 @@ main (int argc, char *argv[])
for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)
{
- if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern))
+ if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0)
{
bench_composite (tests_tbl[i].testname,
tests_tbl[i].src_fmt,
diff --git a/pixman/test/matrix-test.c b/pixman/test/matrix-test.c
new file mode 100644
index 000000000..8437dd291
--- /dev/null
+++ b/pixman/test/matrix-test.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "utils.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef HAVE_FLOAT128
+
+#define pixman_fixed_to_float128(x) (((__float128)(x)) / 65536.0Q)
+
+typedef struct { __float128 v[3]; } pixman_vector_f128_t;
+typedef struct { __float128 m[3][3]; } pixman_transform_f128_t;
+
+pixman_bool_t
+pixman_transform_point_f128 (const pixman_transform_f128_t *t,
+ const pixman_vector_f128_t *v,
+ pixman_vector_f128_t *result)
+{
+ int i;
+ for (i = 0; i < 3; i++)
+ {
+ result->v[i] = t->m[i][0] * v->v[0] +
+ t->m[i][1] * v->v[1] +
+ t->m[i][2] * v->v[2];
+ }
+ if (result->v[2] != 0)
+ {
+ result->v[0] /= result->v[2];
+ result->v[1] /= result->v[2];
+ result->v[2] = 1;
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+}
+
+pixman_bool_t does_it_fit_fixed_48_16 (__float128 x)
+{
+ if (x >= 65536.0Q * 65536.0Q * 32768.0Q)
+ return FALSE;
+ if (x <= -65536.0Q * 65536.0Q * 32768.0Q)
+ return FALSE;
+ return TRUE;
+}
+
+#endif
+
+uint32_t
+test_matrix (int testnum, int verbose)
+{
+ uint32_t crc32 = 0;
+ int i, j, k;
+ pixman_bool_t is_affine;
+
+ prng_srand (testnum);
+
+ for (i = 0; i < 100; i++)
+ {
+ pixman_bool_t transform_ok;
+ pixman_transform_t ti;
+ pixman_vector_48_16_t vi, result_i;
+#ifdef HAVE_FLOAT128
+ pixman_transform_f128_t tf;
+ pixman_vector_f128_t vf, result_f;
+#endif
+ prng_randmemset (&ti, sizeof(ti), 0);
+ prng_randmemset (&vi, sizeof(vi), 0);
+
+ for (j = 0; j < 3; j++)
+ {
+ /* make sure that "vi" contains 31.16 fixed point data */
+ vi.v[j] >>= 17;
+ /* and apply random shift */
+ if (prng_rand_n (3) == 0)
+ vi.v[j] >>= prng_rand_n (46);
+ }
+
+ if (prng_rand_n (2))
+ {
+ /* random shift for the matrix */
+ for (j = 0; j < 3; j++)
+ for (k = 0; k < 3; k++)
+ ti.matrix[j][k] >>= prng_rand_n (30);
+ }
+
+ if (prng_rand_n (2))
+ {
+ /* affine matrix */
+ ti.matrix[2][0] = 0;
+ ti.matrix[2][1] = 0;
+ ti.matrix[2][2] = pixman_fixed_1;
+ }
+
+ if (prng_rand_n (2))
+ {
+ /* cartesian coordinates */
+ vi.v[2] = pixman_fixed_1;
+ }
+
+ is_affine = (ti.matrix[2][0] == 0 && ti.matrix[2][1] == 0 &&
+ ti.matrix[2][2] == pixman_fixed_1 &&
+ vi.v[2] == pixman_fixed_1);
+
+ transform_ok = TRUE;
+ if (is_affine && prng_rand_n (2))
+ pixman_transform_point_31_16_affine (&ti, &vi, &result_i);
+ else
+ transform_ok = pixman_transform_point_31_16 (&ti, &vi, &result_i);
+
+ crc32 = compute_crc32 (crc32, &result_i, sizeof(result_i));
+
+#ifdef HAVE_FLOAT128
+ /* compare with a reference 128-bit floating point implementation */
+ for (j = 0; j < 3; j++)
+ {
+ vf.v[j] = pixman_fixed_to_float128 (vi.v[j]);
+ for (k = 0; k < 3; k++)
+ {
+ tf.m[j][k] = pixman_fixed_to_float128 (ti.matrix[j][k]);
+ }
+ }
+
+ if (pixman_transform_point_f128 (&tf, &vf, &result_f))
+ {
+ if (transform_ok ||
+ (does_it_fit_fixed_48_16 (result_f.v[0]) &&
+ does_it_fit_fixed_48_16 (result_f.v[1]) &&
+ does_it_fit_fixed_48_16 (result_f.v[2])))
+ {
+ for (j = 0; j < 3; j++)
+ {
+ double diff = fabs (result_f.v[j] -
+ pixman_fixed_to_float128 (result_i.v[j]));
+
+ if (is_affine && diff > (0.51 / 65536.0))
+ {
+ printf ("%d:%d: bad precision for affine (%.12f)\n",
+ testnum, i, diff);
+ abort ();
+ }
+ else if (diff > (0.71 / 65536.0))
+ {
+ printf ("%d:%d: bad precision for projective (%.12f)\n",
+ testnum, i, diff);
+ abort ();
+ }
+ }
+ }
+ }
+#endif
+ }
+ return crc32;
+}
+
+int
+main (int argc, const char *argv[])
+{
+ return fuzzer_test_main ("matrix", 20000,
+ 0xBEBF98C3,
+ test_matrix, argc, argv);
+}