aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman/pixman-fast-path.c
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman/pixman-fast-path.c')
-rw-r--r--pixman/pixman/pixman-fast-path.c446
1 files changed, 297 insertions, 149 deletions
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index c625e0c4a..247aea645 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -739,36 +739,6 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp,
}
static void
-fast_composite_src_x888_0565 (pixman_implementation_t *imp,
- pixman_composite_info_t *info)
-{
- PIXMAN_COMPOSITE_ARGS (info);
- uint16_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- *dst = convert_8888_to_0565 (s);
- dst++;
- }
- }
-}
-
-static void
fast_composite_add_8_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
@@ -1243,6 +1213,18 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,
pixman_composite_func_t func;
pixman_format_code_t mask_format;
uint32_t src_flags, mask_flags;
+ int32_t sx, sy;
+ int32_t width_remain;
+ int32_t num_pixels;
+ int32_t src_width;
+ int32_t i, j;
+ pixman_image_t extended_src_image;
+ uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
+ pixman_bool_t need_src_extension;
+ uint32_t *src_line;
+ int32_t src_stride;
+ int32_t src_bpp;
+ pixman_composite_info_t info2 = *info;
src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
@@ -1258,149 +1240,131 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,
mask_flags = FAST_PATH_IS_OPAQUE;
}
- if (_pixman_implementation_lookup_composite (
- imp->toplevel, info->op,
- src_image->common.extended_format_code, src_flags,
- mask_format, mask_flags,
- dest_image->common.extended_format_code, info->dest_flags,
- &imp, &func))
+ _pixman_implementation_lookup_composite (
+ imp->toplevel, info->op,
+ src_image->common.extended_format_code, src_flags,
+ mask_format, mask_flags,
+ dest_image->common.extended_format_code, info->dest_flags,
+ &imp, &func);
+
+ src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
+
+ if (src_image->bits.width < REPEAT_MIN_WIDTH &&
+ (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) &&
+ !src_image->bits.indexed)
{
- int32_t sx, sy;
- int32_t width_remain;
- int32_t num_pixels;
- int32_t src_width;
- int32_t i, j;
- pixman_image_t extended_src_image;
- uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
- pixman_bool_t need_src_extension;
- uint32_t *src_line;
- int32_t src_stride;
- int32_t src_bpp;
- pixman_composite_info_t info2 = *info;
-
- src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
-
- if (src_image->bits.width < REPEAT_MIN_WIDTH &&
- (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) &&
- !src_image->bits.indexed)
- {
- sx = src_x;
- sx = MOD (sx, src_image->bits.width);
- sx += width;
- src_width = 0;
+ sx = src_x;
+ sx = MOD (sx, src_image->bits.width);
+ sx += width;
+ src_width = 0;
- while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
- src_width += src_image->bits.width;
+ while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
+ src_width += src_image->bits.width;
- src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
+ src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
- /* Initialize/validate stack-allocated temporary image */
- _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
- src_width, 1, &extended_src[0], src_stride,
- FALSE);
- _pixman_image_validate (&extended_src_image);
+ /* Initialize/validate stack-allocated temporary image */
+ _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
+ src_width, 1, &extended_src[0], src_stride,
+ FALSE);
+ _pixman_image_validate (&extended_src_image);
- info2.src_image = &extended_src_image;
- need_src_extension = TRUE;
- }
- else
- {
- src_width = src_image->bits.width;
- need_src_extension = FALSE;
- }
+ info2.src_image = &extended_src_image;
+ need_src_extension = TRUE;
+ }
+ else
+ {
+ src_width = src_image->bits.width;
+ need_src_extension = FALSE;
+ }
- sx = src_x;
- sy = src_y;
+ sx = src_x;
+ sy = src_y;
- while (--height >= 0)
- {
- sx = MOD (sx, src_width);
- sy = MOD (sy, src_image->bits.height);
+ while (--height >= 0)
+ {
+ sx = MOD (sx, src_width);
+ sy = MOD (sy, src_image->bits.height);
- if (need_src_extension)
+ if (need_src_extension)
+ {
+ if (src_bpp == 32)
{
- if (src_bpp == 32)
- {
- PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
- for (i = 0; i < src_width; )
- {
- for (j = 0; j < src_image->bits.width; j++, i++)
- extended_src[i] = src_line[j];
- }
- }
- else if (src_bpp == 16)
+ for (i = 0; i < src_width; )
{
- uint16_t *src_line_16;
-
- PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
- src_line_16, 1);
- src_line = (uint32_t*)src_line_16;
-
- for (i = 0; i < src_width; )
- {
- for (j = 0; j < src_image->bits.width; j++, i++)
- ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
- }
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ extended_src[i] = src_line[j];
}
- else if (src_bpp == 8)
- {
- uint8_t *src_line_8;
+ }
+ else if (src_bpp == 16)
+ {
+ uint16_t *src_line_16;
- PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
- src_line_8, 1);
- src_line = (uint32_t*)src_line_8;
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
+ src_line_16, 1);
+ src_line = (uint32_t*)src_line_16;
- for (i = 0; i < src_width; )
- {
- for (j = 0; j < src_image->bits.width; j++, i++)
- ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
- }
+ for (i = 0; i < src_width; )
+ {
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
}
-
- info2.src_y = 0;
}
- else
+ else if (src_bpp == 8)
{
- info2.src_y = sy;
+ uint8_t *src_line_8;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
+ src_line_8, 1);
+ src_line = (uint32_t*)src_line_8;
+
+ for (i = 0; i < src_width; )
+ {
+ for (j = 0; j < src_image->bits.width; j++, i++)
+ ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
+ }
}
- width_remain = width;
+ info2.src_y = 0;
+ }
+ else
+ {
+ info2.src_y = sy;
+ }
- while (width_remain > 0)
- {
- num_pixels = src_width - sx;
+ width_remain = width;
- if (num_pixels > width_remain)
- num_pixels = width_remain;
+ while (width_remain > 0)
+ {
+ num_pixels = src_width - sx;
- info2.src_x = sx;
- info2.width = num_pixels;
- info2.height = 1;
+ if (num_pixels > width_remain)
+ num_pixels = width_remain;
- func (imp, &info2);
+ info2.src_x = sx;
+ info2.width = num_pixels;
+ info2.height = 1;
- width_remain -= num_pixels;
- info2.mask_x += num_pixels;
- info2.dest_x += num_pixels;
- sx = 0;
- }
+ func (imp, &info2);
- sx = src_x;
- sy++;
- info2.mask_x = info->mask_x;
- info2.mask_y++;
- info2.dest_x = info->dest_x;
- info2.dest_y++;
+ width_remain -= num_pixels;
+ info2.mask_x += num_pixels;
+ info2.dest_x += num_pixels;
+ sx = 0;
}
- if (need_src_extension)
- _pixman_image_fini (&extended_src_image);
- }
- else
- {
- _pixman_log_error (FUNC, "Didn't find a suitable function ");
+ sx = src_x;
+ sy++;
+ info2.mask_x = info->mask_x;
+ info2.mask_y++;
+ info2.dest_x = info->dest_x;
+ info2.dest_y++;
}
+
+ if (need_src_extension)
+ _pixman_image_fini (&extended_src_image);
}
/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
@@ -1913,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
@@ -2199,12 +2159,200 @@ fast_path_fill (pixman_implementation_t *imp,
return TRUE;
}
+/*****************************************************************************/
+
+static uint32_t *
+fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int32_t w = iter->width;
+ uint32_t *dst = iter->buffer;
+ const uint16_t *src = (const uint16_t *)iter->bits;
+
+ iter->bits += iter->stride;
+
+ /* Align the source buffer at 4 bytes boundary */
+ if (w > 0 && ((uintptr_t)src & 3))
+ {
+ *dst++ = convert_0565_to_8888 (*src++);
+ w--;
+ }
+ /* Process two pixels per iteration */
+ while ((w -= 2) >= 0)
+ {
+ uint32_t sr, sb, sg, t0, t1;
+ uint32_t s = *(const uint32_t *)src;
+ src += 2;
+ sr = (s >> 8) & 0x00F800F8;
+ sb = (s << 3) & 0x00F800F8;
+ sg = (s >> 3) & 0x00FC00FC;
+ sr |= sr >> 5;
+ sb |= sb >> 5;
+ sg |= sg >> 6;
+ t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
+ (sb & 0xFF) | 0xFF000000;
+ t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
+ (sb >> 16) | 0xFF000000;
+#ifdef WORDS_BIGENDIAN
+ *dst++ = t1;
+ *dst++ = t0;
+#else
+ *dst++ = t0;
+ *dst++ = t1;
+#endif
+ }
+ if (w & 1)
+ {
+ *dst = convert_0565_to_8888 (*src);
+ }
+
+ return iter->buffer;
+}
+
+static uint32_t *
+fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
+{
+ iter->bits += iter->stride;
+ return iter->buffer;
+}
+
+/* Helper function for a workaround, which tries to ensure that 0x1F001F
+ * constant is always allocated in a register on RISC architectures.
+ */
+static force_inline uint32_t
+convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
+{
+ uint32_t a, b;
+ a = (s >> 3) & x1F001F;
+ b = s & 0xFC00;
+ a |= a >> 5;
+ a |= b >> 5;
+ return a;
+}
+
+static void
+fast_write_back_r5g6b5 (pixman_iter_t *iter)
+{
+ int32_t w = iter->width;
+ uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
+ const uint32_t *src = iter->buffer;
+ /* Workaround to ensure that x1F001F variable is allocated in a register */
+ static volatile uint32_t volatile_x1F001F = 0x1F001F;
+ uint32_t x1F001F = volatile_x1F001F;
+
+ while ((w -= 4) >= 0)
+ {
+ uint32_t s1 = *src++;
+ uint32_t s2 = *src++;
+ uint32_t s3 = *src++;
+ uint32_t s4 = *src++;
+ *dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
+ }
+ if (w & 2)
+ {
+ *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+ *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+ }
+ if (w & 1)
+ {
+ *dst = convert_8888_to_0565_workaround (*src, x1F001F);
+ }
+}
+
+typedef struct
+{
+ pixman_format_code_t format;
+ pixman_iter_get_scanline_t get_scanline;
+ pixman_iter_write_back_t write_back;
+} fetcher_info_t;
+
+static const fetcher_info_t fetchers[] =
+{
+ { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
+ { PIXMAN_null }
+};
+
+static pixman_bool_t
+fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+ pixman_image_t *image = iter->image;
+
+#define FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+
+ if ((iter->iter_flags & ITER_NARROW) &&
+ (iter->image_flags & FLAGS) == FLAGS)
+ {
+ const fetcher_info_t *f;
+
+ for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ {
+ if (image->common.extended_format_code == f->format)
+ {
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+
+ iter->get_scanline = f->get_scanline;
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+static pixman_bool_t
+fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+ pixman_image_t *image = iter->image;
+
+ if ((iter->iter_flags & ITER_NARROW) &&
+ (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
+ {
+ const fetcher_info_t *f;
+
+ for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ {
+ if (image->common.extended_format_code == f->format)
+ {
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+
+ if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+ (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+ {
+ iter->get_scanline = fast_dest_fetch_noop;
+ }
+ else
+ {
+ iter->get_scanline = f->get_scanline;
+ }
+ iter->write_back = f->write_back;
+ return TRUE;
+ }
+ }
+ }
+ return FALSE;
+}
+
+
pixman_implementation_t *
_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
imp->fill = fast_path_fill;
+ imp->src_iter_init = fast_src_iter_init;
+ imp->dest_iter_init = fast_dest_iter_init;
return imp;
}