diff options
Diffstat (limited to 'pixman')
-rw-r--r-- | pixman/configure.ac | 23 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-neon.c | 865 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-simd.c | 847 | ||||
-rw-r--r-- | pixman/pixman/pixman-cpu.c | 30 | ||||
-rw-r--r-- | pixman/pixman/pixman-fast-path.c | 3871 | ||||
-rw-r--r-- | pixman/pixman/pixman-mmx.c | 7 | ||||
-rw-r--r-- | pixman/pixman/pixman-private.h | 12 | ||||
-rw-r--r-- | pixman/pixman/pixman-sse2.c | 7 | ||||
-rw-r--r-- | pixman/pixman/pixman-vmx.c | 5 |
9 files changed, 2842 insertions, 2825 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac index e2f73dc06..ab2ecde1b 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -795,3 +795,26 @@ AC_OUTPUT([pixman-1.pc pixman/Makefile pixman/pixman-version.h test/Makefile]) + +m4_if(m4_eval(pixman_minor % 2), [1], [ + echo + echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + echo + echo " Thanks for testing this development snapshot of pixman. Please" + echo " report any problems you find, either by sending email to " + echo + echo " pixman@lists.freedesktop.org" + echo + echo " or by filing a bug at " + echo + echo " https://bugs.freedesktop.org/enter_bug.cgi?product=pixman " + echo + echo " If you are looking for a stable release of pixman, please note " + echo " that stable releases have _even_ minor version numbers. Ie., " + echo " pixman-0.]m4_eval(pixman_minor & ~1)[.x are stable releases, whereas pixman-$PIXMAN_VERSION_MAJOR.$PIXMAN_VERSION_MINOR.$PIXMAN_VERSION_MICRO is a " + echo " development snapshot that may contain bugs and experimental " + echo " features. " + echo + echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + echo +]) diff --git a/pixman/pixman/pixman-arm-neon.c b/pixman/pixman/pixman-arm-neon.c index e89cc3215..7d6c83775 100644 --- a/pixman/pixman/pixman-arm-neon.c +++ b/pixman/pixman/pixman-arm-neon.c @@ -1,435 +1,430 @@ -/*
- * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of ARM Ltd not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission. ARM Ltd makes no
- * representations about the suitability of this software for any purpose. It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author: Ian Rickards (ian.rickards@arm.com)
- * Author: Jonathan Morton (jonathan.morton@movial.com)
- * Author: Markku Vire (markku.vire@movial.com)
- *
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <string.h>
-#include "pixman-private.h"
-#include "pixman-arm-common.h"
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
- uint16_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
- uint8_t, 3, uint8_t, 3)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
- uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
- uint16_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
- uint8_t, 3, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
- uint8_t, 3, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
- uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
- uint8_t, 1, uint16_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
- uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
- uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
- uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
- uint8_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
- uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
- uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
- uint8_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
- uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
- uint16_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
- uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
- uint8_t, 1, uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
- uint16_t, 1, uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
- uint32_t, 1, uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
- uint32_t, 1, uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
- uint32_t, 1, uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
- uint32_t, 1, uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
- uint32_t, 1, uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
- uint16_t, 1, uint8_t, 1, uint16_t, 1)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
- uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
- uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
- uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
- uint16_t, uint32_t)
-
-void
-pixman_composite_src_n_8_asm_neon (int32_t w,
- int32_t h,
- uint8_t *dst,
- int32_t dst_stride,
- uint8_t src);
-
-void
-pixman_composite_src_n_0565_asm_neon (int32_t w,
- int32_t h,
- uint16_t *dst,
- int32_t dst_stride,
- uint16_t src);
-
-void
-pixman_composite_src_n_8888_asm_neon (int32_t w,
- int32_t h,
- uint32_t *dst,
- int32_t dst_stride,
- uint32_t src);
-
-static pixman_bool_t
-pixman_fill_neon (uint32_t *bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t _xor)
-{
- /* stride is always multiple of 32bit units in pixman */
- uint32_t byte_stride = stride * sizeof(uint32_t);
-
- switch (bpp)
- {
- case 8:
- pixman_composite_src_n_8_asm_neon (
- width,
- height,
- (uint8_t *)(((char *) bits) + y * byte_stride + x),
- byte_stride,
- _xor & 0xff);
- return TRUE;
- case 16:
- pixman_composite_src_n_0565_asm_neon (
- width,
- height,
- (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
- byte_stride / 2,
- _xor & 0xffff);
- return TRUE;
- case 32:
- pixman_composite_src_n_8888_asm_neon (
- width,
- height,
- (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
- byte_stride / 4,
- _xor);
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static pixman_bool_t
-pixman_blt_neon (uint32_t *src_bits,
- uint32_t *dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- if (src_bpp != dst_bpp)
- return FALSE;
-
- switch (src_bpp)
- {
- case 16:
- pixman_composite_src_0565_0565_asm_neon (
- width, height,
- (uint16_t *)(((char *) dst_bits) +
- dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
- (uint16_t *)(((char *) src_bits) +
- src_y * src_stride * 4 + src_x * 2), src_stride * 2);
- return TRUE;
- case 32:
- pixman_composite_src_8888_8888_asm_neon (
- width, height,
- (uint32_t *)(((char *) dst_bits) +
- dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
- (uint32_t *)(((char *) src_bits) +
- src_y * src_stride * 4 + src_x * 4), src_stride);
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static const pixman_fast_path_t arm_neon_fast_paths[] =
-{
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev),
- PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888),
- PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888),
- PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
- PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565),
- PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8),
- PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
- PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
- PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565),
- PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
- /* Note: NONE repeat is not supported yet */
- SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
- SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
-
- { PIXMAN_OP_NONE },
-};
-
-static pixman_bool_t
-arm_neon_blt (pixman_implementation_t *imp,
- uint32_t * src_bits,
- uint32_t * dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- if (!pixman_blt_neon (
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height))
-
- {
- return _pixman_implementation_blt (
- imp->delegate,
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height);
- }
-
- return TRUE;
-}
-
-static pixman_bool_t
-arm_neon_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
- return TRUE;
-
- return _pixman_implementation_fill (
- imp->delegate, bits, stride, bpp, x, y, width, height, xor);
-}
-
-#define BIND_COMBINE_U(name) \
-void \
-pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \
- const uint32_t *dst, \
- const uint32_t *src, \
- const uint32_t *mask); \
- \
-void \
-pixman_composite_scanline_##name##_asm_neon (int32_t w, \
- const uint32_t *dst, \
- const uint32_t *src); \
- \
-static void \
-neon_combine_##name##_u (pixman_implementation_t *imp, \
- pixman_op_t op, \
- uint32_t * dest, \
- const uint32_t * src, \
- const uint32_t * mask, \
- int width) \
-{ \
- if (mask) \
- pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \
- src, mask); \
- else \
- pixman_composite_scanline_##name##_asm_neon (width, dest, src); \
-}
-
-BIND_COMBINE_U (over)
-BIND_COMBINE_U (add)
-BIND_COMBINE_U (out_reverse)
-
-pixman_implementation_t *
-_pixman_implementation_create_arm_neon (void)
-{
-#ifdef USE_ARM_SIMD
- pixman_implementation_t *fallback = _pixman_implementation_create_arm_simd ();
-#else
- pixman_implementation_t *fallback = _pixman_implementation_create_fast_path ();
-#endif
- pixman_implementation_t *imp =
- _pixman_implementation_create (fallback, arm_neon_fast_paths);
-
- imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
- imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
- imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
-
- imp->blt = arm_neon_blt;
- imp->fill = arm_neon_fill;
-
- return imp;
-}
+/* + * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of ARM Ltd not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. ARM Ltd makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Ian Rickards (ian.rickards@arm.com) + * Author: Jonathan Morton (jonathan.morton@movial.com) + * Author: Markku Vire (markku.vire@movial.com) + * + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> +#include "pixman-private.h" +#include "pixman-arm-common.h" + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888, + uint8_t, 3, uint8_t, 3) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888, + uint16_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev, + uint8_t, 3, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev, + uint8_t, 3, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, + uint8_t, 1, uint16_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, + uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, + uint8_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, + uint8_t, 1, uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, + uint32_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, + uint16_t, uint32_t) + +void +pixman_composite_src_n_8_asm_neon (int32_t w, + int32_t h, + uint8_t *dst, + int32_t dst_stride, + uint8_t src); + +void +pixman_composite_src_n_0565_asm_neon (int32_t w, + int32_t h, + uint16_t *dst, + int32_t dst_stride, + uint16_t src); + +void +pixman_composite_src_n_8888_asm_neon (int32_t w, + int32_t h, + uint32_t *dst, + int32_t dst_stride, + uint32_t src); + +static pixman_bool_t +pixman_fill_neon (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) +{ + /* stride is always multiple of 32bit units in pixman */ + uint32_t byte_stride = stride * sizeof(uint32_t); + + switch (bpp) + { + case 8: + pixman_composite_src_n_8_asm_neon ( + width, + height, + (uint8_t *)(((char *) bits) + y * byte_stride + x), + byte_stride, + _xor & 0xff); + return TRUE; + case 16: + pixman_composite_src_n_0565_asm_neon ( + width, + height, + (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), + byte_stride / 2, + _xor & 0xffff); + return TRUE; + case 32: + pixman_composite_src_n_8888_asm_neon ( + width, + height, + (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), + byte_stride / 4, + _xor); + return TRUE; + default: + return FALSE; + } +} + +static pixman_bool_t +pixman_blt_neon (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + switch (src_bpp) + { + case 16: + pixman_composite_src_0565_0565_asm_neon ( + width, height, + (uint16_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, + (uint16_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 2), src_stride * 2); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_neon ( + width, height, + (uint32_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 4), dst_stride, + (uint32_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 4), src_stride); + return TRUE; + default: + return FALSE; + } +} + +static const pixman_fast_path_t arm_neon_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + + { PIXMAN_OP_NONE }, +}; + +static pixman_bool_t +arm_neon_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (!pixman_blt_neon ( + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height)) + + { + return _pixman_implementation_blt ( + imp->delegate, + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height); + } + + return TRUE; +} + +static pixman_bool_t +arm_neon_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor)) + return TRUE; + + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); +} + +#define BIND_COMBINE_U(name) \ +void \ +pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src, \ + const uint32_t *mask); \ + \ +void \ +pixman_composite_scanline_##name##_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src); \ + \ +static void \ +neon_combine_##name##_u (pixman_implementation_t *imp, \ + pixman_op_t op, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ +{ \ + if (mask) \ + pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \ + src, mask); \ + else \ + pixman_composite_scanline_##name##_asm_neon (width, dest, src); \ +} + +BIND_COMBINE_U (over) +BIND_COMBINE_U (add) +BIND_COMBINE_U (out_reverse) + +pixman_implementation_t * +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, arm_neon_fast_paths); + + imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u; + imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; + + imp->blt = arm_neon_blt; + imp->fill = arm_neon_fill; + + return imp; +} diff --git a/pixman/pixman/pixman-arm-simd.c b/pixman/pixman/pixman-arm-simd.c index e03dfe020..6bbc1094d 100644 --- a/pixman/pixman/pixman-arm-simd.c +++ b/pixman/pixman/pixman-arm-simd.c @@ -1,424 +1,423 @@ -/*
- * Copyright © 2008 Mozilla Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Mozilla Corporation not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission. Mozilla Corporation makes no
- * representations about the suitability of this software for any purpose. It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author: Jeff Muizelaar (jeff@infidigm.net)
- *
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include "pixman-private.h"
-#include "pixman-arm-common.h"
-#include "pixman-fast-path.h"
-
-#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
-
-void
-pixman_composite_add_8_8_asm_armv6 (int32_t width,
- int32_t height,
- uint8_t *dst_line,
- int32_t dst_stride,
- uint8_t *src_line,
- int32_t src_stride)
-{
- uint8_t *dst, *src;
- int32_t w;
- uint8_t s, d;
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- /* ensure both src and dst are properly aligned before doing 32 bit reads
- * we'll stay in this loop if src and dst have differing alignments
- */
- while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3)))
- {
- s = *src;
- d = *dst;
- asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s));
- *dst = d;
-
- dst++;
- src++;
- w--;
- }
-
- while (w >= 4)
- {
- asm ("uqadd8 %0, %1, %2"
- : "=r" (*(uint32_t*)dst)
- : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst));
- dst += 4;
- src += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *src;
- d = *dst;
- asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s));
- *dst = d;
-
- dst++;
- src++;
- w--;
- }
- }
-
-}
-
-void
-pixman_composite_over_8888_8888_asm_armv6 (int32_t width,
- int32_t height,
- uint32_t *dst_line,
- int32_t dst_stride,
- uint32_t *src_line,
- int32_t src_stride)
-{
- uint32_t *dst;
- uint32_t *src;
- int32_t w;
- uint32_t component_half = 0x800080;
- uint32_t upper_component_mask = 0xff00ff00;
- uint32_t alpha_mask = 0xff;
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
-/* #define inner_branch */
- asm volatile (
- "cmp %[w], #0\n\t"
- "beq 2f\n\t"
- "1:\n\t"
- /* load src */
- "ldr r5, [%[src]], #4\n\t"
-#ifdef inner_branch
- /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
- * The 0x0 case also allows us to avoid doing an unecessary data
- * write which is more valuable so we only check for that
- */
- "cmp r5, #0\n\t"
- "beq 3f\n\t"
-
- /* = 255 - alpha */
- "sub r8, %[alpha_mask], r5, lsr #24\n\t"
-
- "ldr r4, [%[dest]] \n\t"
-
-#else
- "ldr r4, [%[dest]] \n\t"
-
- /* = 255 - alpha */
- "sub r8, %[alpha_mask], r5, lsr #24\n\t"
-#endif
- "uxtb16 r6, r4\n\t"
- "uxtb16 r7, r4, ror #8\n\t"
-
- /* multiply by 257 and divide by 65536 */
- "mla r6, r6, r8, %[component_half]\n\t"
- "mla r7, r7, r8, %[component_half]\n\t"
-
- "uxtab16 r6, r6, r6, ror #8\n\t"
- "uxtab16 r7, r7, r7, ror #8\n\t"
-
- /* recombine the 0xff00ff00 bytes of r6 and r7 */
- "and r7, r7, %[upper_component_mask]\n\t"
- "uxtab16 r6, r7, r6, ror #8\n\t"
-
- "uqadd8 r5, r6, r5\n\t"
-
-#ifdef inner_branch
- "3:\n\t"
-
-#endif
- "str r5, [%[dest]], #4\n\t"
- /* increment counter and jmp to top */
- "subs %[w], %[w], #1\n\t"
- "bne 1b\n\t"
- "2:\n\t"
- : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
- : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
- [alpha_mask] "r" (alpha_mask)
- : "r4", "r5", "r6", "r7", "r8", "cc", "memory"
- );
- }
-}
-
-void
-pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width,
- int32_t height,
- uint32_t *dst_line,
- int32_t dst_stride,
- uint32_t *src_line,
- int32_t src_stride,
- uint32_t mask)
-{
- uint32_t *dst;
- uint32_t *src;
- int32_t w;
- uint32_t component_half = 0x800080;
- uint32_t alpha_mask = 0xff;
-
- mask = (mask) >> 24;
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
-/* #define inner_branch */
- asm volatile (
- "cmp %[w], #0\n\t"
- "beq 2f\n\t"
- "1:\n\t"
- /* load src */
- "ldr r5, [%[src]], #4\n\t"
-#ifdef inner_branch
- /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
- * The 0x0 case also allows us to avoid doing an unecessary data
- * write which is more valuable so we only check for that
- */
- "cmp r5, #0\n\t"
- "beq 3f\n\t"
-
-#endif
- "ldr r4, [%[dest]] \n\t"
-
- "uxtb16 r6, r5\n\t"
- "uxtb16 r7, r5, ror #8\n\t"
-
- /* multiply by alpha (r8) then by 257 and divide by 65536 */
- "mla r6, r6, %[mask_alpha], %[component_half]\n\t"
- "mla r7, r7, %[mask_alpha], %[component_half]\n\t"
-
- "uxtab16 r6, r6, r6, ror #8\n\t"
- "uxtab16 r7, r7, r7, ror #8\n\t"
-
- "uxtb16 r6, r6, ror #8\n\t"
- "uxtb16 r7, r7, ror #8\n\t"
-
- /* recombine */
- "orr r5, r6, r7, lsl #8\n\t"
-
- "uxtb16 r6, r4\n\t"
- "uxtb16 r7, r4, ror #8\n\t"
-
- /* 255 - alpha */
- "sub r8, %[alpha_mask], r5, lsr #24\n\t"
-
- /* multiply by alpha (r8) then by 257 and divide by 65536 */
- "mla r6, r6, r8, %[component_half]\n\t"
- "mla r7, r7, r8, %[component_half]\n\t"
-
- "uxtab16 r6, r6, r6, ror #8\n\t"
- "uxtab16 r7, r7, r7, ror #8\n\t"
-
- "uxtb16 r6, r6, ror #8\n\t"
- "uxtb16 r7, r7, ror #8\n\t"
-
- /* recombine */
- "orr r6, r6, r7, lsl #8\n\t"
-
- "uqadd8 r5, r6, r5\n\t"
-
-#ifdef inner_branch
- "3:\n\t"
-
-#endif
- "str r5, [%[dest]], #4\n\t"
- /* increment counter and jmp to top */
- "subs %[w], %[w], #1\n\t"
- "bne 1b\n\t"
- "2:\n\t"
- : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
- : [component_half] "r" (component_half), [mask_alpha] "r" (mask),
- [alpha_mask] "r" (alpha_mask)
- : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
- );
- }
-}
-
-void
-pixman_composite_over_n_8_8888_asm_armv6 (int32_t width,
- int32_t height,
- uint32_t *dst_line,
- int32_t dst_stride,
- uint32_t src,
- int32_t unused,
- uint8_t *mask_line,
- int32_t mask_stride)
-{
- uint32_t srca;
- uint32_t *dst;
- uint8_t *mask;
- int32_t w;
-
- srca = src >> 24;
-
- uint32_t component_mask = 0xff00ff;
- uint32_t component_half = 0x800080;
-
- uint32_t src_hi = (src >> 8) & component_mask;
- uint32_t src_lo = src & component_mask;
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
-/* #define inner_branch */
- asm volatile (
- "cmp %[w], #0\n\t"
- "beq 2f\n\t"
- "1:\n\t"
- /* load mask */
- "ldrb r5, [%[mask]], #1\n\t"
-#ifdef inner_branch
- /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
- * The 0x0 case also allows us to avoid doing an unecessary data
- * write which is more valuable so we only check for that
- */
- "cmp r5, #0\n\t"
- "beq 3f\n\t"
-
-#endif
- "ldr r4, [%[dest]] \n\t"
-
- /* multiply by alpha (r8) then by 257 and divide by 65536 */
- "mla r6, %[src_lo], r5, %[component_half]\n\t"
- "mla r7, %[src_hi], r5, %[component_half]\n\t"
-
- "uxtab16 r6, r6, r6, ror #8\n\t"
- "uxtab16 r7, r7, r7, ror #8\n\t"
-
- "uxtb16 r6, r6, ror #8\n\t"
- "uxtb16 r7, r7, ror #8\n\t"
-
- /* recombine */
- "orr r5, r6, r7, lsl #8\n\t"
-
- "uxtb16 r6, r4\n\t"
- "uxtb16 r7, r4, ror #8\n\t"
-
- /* we could simplify this to use 'sub' if we were
- * willing to give up a register for alpha_mask
- */
- "mvn r8, r5\n\t"
- "mov r8, r8, lsr #24\n\t"
-
- /* multiply by alpha (r8) then by 257 and divide by 65536 */
- "mla r6, r6, r8, %[component_half]\n\t"
- "mla r7, r7, r8, %[component_half]\n\t"
-
- "uxtab16 r6, r6, r6, ror #8\n\t"
- "uxtab16 r7, r7, r7, ror #8\n\t"
-
- "uxtb16 r6, r6, ror #8\n\t"
- "uxtb16 r7, r7, ror #8\n\t"
-
- /* recombine */
- "orr r6, r6, r7, lsl #8\n\t"
-
- "uqadd8 r5, r6, r5\n\t"
-
-#ifdef inner_branch
- "3:\n\t"
-
-#endif
- "str r5, [%[dest]], #4\n\t"
- /* increment counter and jmp to top */
- "subs %[w], %[w], #1\n\t"
- "bne 1b\n\t"
- "2:\n\t"
- : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
- : [component_half] "r" (component_half),
- [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
- : "r4", "r5", "r6", "r7", "r8", "cc", "memory");
- }
-}
-
-#endif
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
- uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
- uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
- uint8_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
- uint16_t, uint16_t)
-
-static const pixman_fast_path_t arm_simd_fast_paths[] =
-{
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
-
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
-
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
-
- { PIXMAN_OP_NONE },
-};
-
-pixman_implementation_t *
-_pixman_implementation_create_arm_simd (void)
-{
- pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
- pixman_implementation_t *imp = _pixman_implementation_create (general, arm_simd_fast_paths);
-
- return imp;
-}
+/* + * Copyright © 2008 Mozilla Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" +#include "pixman-arm-common.h" +#include "pixman-fast-path.h" + +#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */ + +void +pixman_composite_add_8_8_asm_armv6 (int32_t width, + int32_t height, + uint8_t *dst_line, + int32_t dst_stride, + uint8_t *src_line, + int32_t src_stride) +{ + uint8_t *dst, *src; + int32_t w; + uint8_t s, d; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + /* ensure both src and dst are properly aligned before doing 32 bit reads + * we'll stay in this loop if src and dst have differing alignments + */ + while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3))) + { + s = *src; + d = *dst; + asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); + *dst = d; + + dst++; + src++; + w--; + } + + while (w >= 4) + { + asm ("uqadd8 %0, %1, %2" + : "=r" (*(uint32_t*)dst) + : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst)); + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + s = *src; + d = *dst; + asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); + *dst = d; + + dst++; + src++; + w--; + } + } + +} + +void +pixman_composite_over_8888_8888_asm_armv6 (int32_t width, + int32_t height, + uint32_t *dst_line, + int32_t dst_stride, + uint32_t *src_line, + int32_t src_stride) +{ + uint32_t *dst; + uint32_t *src; + int32_t w; + uint32_t component_half = 0x800080; + uint32_t upper_component_mask = 0xff00ff00; + uint32_t alpha_mask = 0xff; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + +/* #define inner_branch */ + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + "ldr r4, [%[dest]] \n\t" + +#else + "ldr r4, [%[dest]] \n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" +#endif + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* multiply by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + /* recombine the 0xff00ff00 bytes of r6 and r7 */ + "and r7, r7, %[upper_component_mask]\n\t" + "uxtab16 r6, r7, r6, ror #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory" + ); + } +} + +void +pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width, + int32_t height, + uint32_t *dst_line, + int32_t dst_stride, + uint32_t *src_line, + int32_t src_stride, + uint32_t mask) +{ + uint32_t *dst; + uint32_t *src; + int32_t w; + uint32_t component_half = 0x800080; + uint32_t alpha_mask = 0xff; + + mask = (mask) >> 24; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + +/* #define inner_branch */ + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + "uxtb16 r6, r5\n\t" + "uxtb16 r7, r5, ror #8\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, %[mask_alpha], %[component_half]\n\t" + "mla r7, r7, %[mask_alpha], %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [mask_alpha] "r" (mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" + ); + } +} + +void +pixman_composite_over_n_8_8888_asm_armv6 (int32_t width, + int32_t height, + uint32_t *dst_line, + int32_t dst_stride, + uint32_t src, + int32_t unused, + uint8_t *mask_line, + int32_t mask_stride) +{ + uint32_t srca; + uint32_t *dst; + uint8_t *mask; + int32_t w; + + srca = src >> 24; + + uint32_t component_mask = 0xff00ff; + uint32_t component_half = 0x800080; + + uint32_t src_hi = (src >> 8) & component_mask; + uint32_t src_lo = src & component_mask; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + +/* #define inner_branch */ + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load mask */ + "ldrb r5, [%[mask]], #1\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, %[src_lo], r5, %[component_half]\n\t" + "mla r7, %[src_hi], r5, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* we could simplify this to use 'sub' if we were + * willing to give up a register for alpha_mask + */ + "mvn r8, r5\n\t" + "mov r8, r8, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) + : [component_half] "r" (component_half), + [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory"); + } +} + +#endif + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, + uint16_t, uint16_t) + +static const pixman_fast_path_t arm_simd_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888), + + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8), + + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), + + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths); + + return imp; +} diff --git a/pixman/pixman/pixman-cpu.c b/pixman/pixman/pixman-cpu.c index 70253d1ea..0e14ecb6f 100644 --- a/pixman/pixman/pixman-cpu.c +++ b/pixman/pixman/pixman-cpu.c @@ -576,28 +576,36 @@ pixman_have_sse2 (void) pixman_implementation_t * _pixman_choose_implementation (void) { -#ifdef USE_SSE2 - if (pixman_have_sse2 ()) - return _pixman_implementation_create_sse2 (); -#endif + pixman_implementation_t *imp; + + imp = _pixman_implementation_create_general(); + imp = _pixman_implementation_create_fast_path (imp); + #ifdef USE_MMX if (pixman_have_mmx ()) - return _pixman_implementation_create_mmx (); + imp = _pixman_implementation_create_mmx (imp); #endif -#ifdef USE_ARM_NEON - if (pixman_have_arm_neon ()) - return _pixman_implementation_create_arm_neon (); +#ifdef USE_SSE2 + if (pixman_have_sse2 ()) + imp = _pixman_implementation_create_sse2 (imp); #endif + #ifdef USE_ARM_SIMD if (pixman_have_arm_simd ()) - return _pixman_implementation_create_arm_simd (); + imp = _pixman_implementation_create_arm_simd (imp); +#endif + +#ifdef USE_ARM_NEON + if (pixman_have_arm_neon ()) + imp = _pixman_implementation_create_arm_neon (imp); #endif + #ifdef USE_VMX if (pixman_have_vmx ()) - return _pixman_implementation_create_vmx (); + imp = _pixman_implementation_create_vmx (imp); #endif - return _pixman_implementation_create_fast_path (); + return imp; } diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index 736e302c6..eb0971586 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -1,1936 +1,1935 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Keith Packard, SuSE, Inc.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include <string.h>
-#include <stdlib.h>
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-fast-path.h"
-
-static force_inline uint32_t
-fetch_24 (uint8_t *a)
-{
- if (((unsigned long)a) & 1)
- {
-#ifdef WORDS_BIGENDIAN
- return (*a << 16) | (*(uint16_t *)(a + 1));
-#else
- return *a | (*(uint16_t *)(a + 1) << 8);
-#endif
- }
- else
- {
-#ifdef WORDS_BIGENDIAN
- return (*(uint16_t *)a << 8) | *(a + 2);
-#else
- return *(uint16_t *)a | (*(a + 2) << 16);
-#endif
- }
-}
-
-static force_inline void
-store_24 (uint8_t *a,
- uint32_t v)
-{
- if (((unsigned long)a) & 1)
- {
-#ifdef WORDS_BIGENDIAN
- *a = (uint8_t) (v >> 16);
- *(uint16_t *)(a + 1) = (uint16_t) (v);
-#else
- *a = (uint8_t) (v);
- *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
-#endif
- }
- else
- {
-#ifdef WORDS_BIGENDIAN
- *(uint16_t *)a = (uint16_t)(v >> 8);
- *(a + 2) = (uint8_t)v;
-#else
- *(uint16_t *)a = (uint16_t)v;
- *(a + 2) = (uint8_t)(v >> 16);
-#endif
- }
-}
-
-static force_inline uint32_t
-over (uint32_t src,
- uint32_t dest)
-{
- uint32_t a = ~src >> 24;
-
- UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
-
- return dest;
-}
-
-static uint32_t
-in (uint32_t x,
- uint8_t y)
-{
- uint16_t a = y;
-
- UN8x4_MUL_UN8 (x, a);
-
- return x;
-}
-
-/*
- * Naming convention:
- *
- * op_src_mask_dest
- */
-static void
-fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *src, *src_line;
- uint32_t *dst, *dst_line;
- uint8_t *mask, *mask_line;
- int src_stride, mask_stride, dst_stride;
- uint8_t m;
- uint32_t s, d;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- src = src_line;
- src_line += src_stride;
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
-
- w = width;
- while (w--)
- {
- m = *mask++;
- if (m)
- {
- s = *src | 0xff000000;
-
- if (m == 0xff)
- {
- *dst = s;
- }
- else
- {
- d = in (s, m);
- *dst = over (d, *dst);
- }
- }
- src++;
- dst++;
- }
- }
-}
-
-static void
-fast_composite_in_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dest_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
- uint16_t t;
-
- src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
- srca = src >> 24;
-
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- if (srca == 0xff)
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
-
- if (m == 0)
- *dst = 0;
- else if (m != 0xff)
- *dst = MUL_UN8 (m, *dst, t);
-
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- m = MUL_UN8 (m, srca, t);
-
- if (m == 0)
- *dst = 0;
- else if (m != 0xff)
- *dst = MUL_UN8 (m, *dst, t);
-
- dst++;
- }
- }
- }
-}
-
-static void
-fast_composite_in_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dest_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint8_t s;
- uint16_t t;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
-
- if (s == 0)
- *dst = 0;
- else if (s != 0xff)
- *dst = MUL_UN8 (s, *dst, t);
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst_line, *dst, d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- *dst = src;
- else
- *dst = over (src, *dst);
- }
- else if (m)
- {
- d = in (src, m);
- *dst = over (d, *dst);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, s;
- uint32_t *dst_line, *dst, d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
-
- if (ma)
- {
- d = *dst;
- s = src;
-
- UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
-
- *dst = s;
- }
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca, s;
- uint32_t *dst_line, *dst, d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
- if (ma == 0xffffffff)
- {
- if (srca == 0xff)
- *dst = src;
- else
- *dst = over (src, *dst);
- }
- else if (ma)
- {
- d = *dst;
- s = src;
-
- UN8x4_MUL_UN8x4 (s, ma);
- UN8x4_MUL_UN8 (ma, srca);
- ma = ~ma;
- UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
- *dst = d;
- }
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint8_t *dst_line, *dst;
- uint32_t d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- {
- d = src;
- }
- else
- {
- d = fetch_24 (dst);
- d = over (src, d);
- }
- store_24 (dst, d);
- }
- else if (m)
- {
- d = over (in (src, m), fetch_24 (dst));
- store_24 (dst, d);
- }
- dst += 3;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- {
- d = src;
- }
- else
- {
- d = *dst;
- d = over (src, CONVERT_0565_TO_0888 (d));
- }
- *dst = CONVERT_8888_TO_0565 (d);
- }
- else if (m)
- {
- d = *dst;
- d = over (in (src, m), CONVERT_0565_TO_0888 (d));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca, s;
- uint16_t src16;
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- src16 = CONVERT_8888_TO_0565 (src);
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
- if (ma == 0xffffffff)
- {
- if (srca == 0xff)
- {
- *dst = src16;
- }
- else
- {
- d = *dst;
- d = over (src, CONVERT_0565_TO_0888 (d));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- }
- else if (ma)
- {
- d = *dst;
- d = CONVERT_0565_TO_0888 (d);
-
- s = src;
-
- UN8x4_MUL_UN8x4 (s, ma);
- UN8x4_MUL_UN8 (ma, srca);
- ma = ~ma;
- UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- uint8_t a;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (a == 0xff)
- *dst = s;
- else if (s)
- *dst = over (s, *dst);
- dst++;
- }
- }
-}
-
-static void
-fast_composite_src_x888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- *dst++ = (*src++) | 0xff000000;
- }
-}
-
-#if 0
-static void
-fast_composite_over_8888_0888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint32_t d;
- uint32_t *src_line, *src, s;
- uint8_t a;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (a)
- {
- if (a == 0xff)
- d = s;
- else
- d = over (s, fetch_24 (dst));
-
- store_24 (dst, d);
- }
- dst += 3;
- }
- }
-}
-#endif
-
-static void
-fast_composite_over_8888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint32_t *src_line, *src, s;
- uint8_t a;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (s)
- {
- if (a == 0xff)
- {
- d = s;
- }
- else
- {
- d = *dst;
- d = over (s, CONVERT_0565_TO_0888 (d));
- }
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_src_x888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- *dst = CONVERT_8888_TO_0565 (s);
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint8_t s, d;
- uint16_t t;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- if (s)
- {
- if (s != 0xff)
- {
- d = *dst;
- t = d + s;
- s = t | (0 - (t >> 8));
- }
- *dst = s;
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint32_t s, d;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- if (s)
- {
- if (s != 0xffffffff)
- {
- d = *dst;
- if (d)
- UN8x4_ADD_UN8x4 (s, d);
- }
- *dst = s;
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- int32_t w;
- uint32_t src;
- uint8_t sa;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- sa = (src >> 24);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- uint16_t tmp;
- uint16_t a;
- uint32_t m, d;
- uint32_t r;
-
- a = *mask++;
- d = *dst;
-
- m = MUL_UN8 (sa, a, tmp);
- r = ADD_UN8 (m, d, tmp);
-
- *dst++ = r;
- }
- }
-}
-
-#ifdef WORDS_BIGENDIAN
-#define CREATE_BITMASK(n) (0x80000000 >> (n))
-#define UPDATE_BITMASK(n) ((n) >> 1)
-#else
-#define CREATE_BITMASK(n) (1 << (n))
-#define UPDATE_BITMASK(n) ((n) << 1)
-#endif
-
-#define TEST_BIT(p, n) \
- (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
-#define SET_BIT(p, n) \
- do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
-
-static void
-fast_composite_add_1000_1000 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
- src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
- dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- /*
- * TODO: improve performance by processing uint32_t data instead
- * of individual bits
- */
- if (TEST_BIT (src, src_x + w))
- SET_BIT (dst, dest_x + w);
- }
- }
-}
-
-static void
-fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst, *dst_line;
- uint32_t *mask, *mask_line;
- int mask_stride, dst_stride;
- uint32_t bitcache, bitmask;
- int32_t w;
-
- if (width <= 0)
- return;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
- dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
- mask_stride, mask_line, 1);
- mask_line += mask_x >> 5;
-
- if (srca == 0xff)
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = src;
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = over (src, *dst);
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
-}
-
-static void
-fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst, *dst_line;
- uint32_t *mask, *mask_line;
- int mask_stride, dst_stride;
- uint32_t bitcache, bitmask;
- int32_t w;
- uint32_t d;
- uint16_t src565;
-
- if (width <= 0)
- return;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
- dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
- mask_stride, mask_line, 1);
- mask_line += mask_x >> 5;
-
- if (srca == 0xff)
- {
- src565 = CONVERT_8888_TO_0565 (src);
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = src565;
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- {
- d = over (src, CONVERT_0565_TO_0888 (*dst));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
-}
-
-/*
- * Simple bitblt
- */
-
-static void
-fast_composite_solid_fill (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (dst_image->bits.format == PIXMAN_a1)
- {
- src = src >> 31;
- }
- else if (dst_image->bits.format == PIXMAN_a8)
- {
- src = src >> 24;
- }
- else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
- dst_image->bits.format == PIXMAN_b5g6r5)
- {
- src = CONVERT_8888_TO_0565 (src);
- }
-
- pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
- dest_x, dest_y,
- width, height,
- src);
-}
-
-static void
-fast_composite_src_memcpy (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8;
- uint32_t n_bytes = width * bpp;
- int dst_stride, src_stride;
- uint8_t *dst;
- uint8_t *src;
-
- src_stride = src_image->bits.rowstride * 4;
- dst_stride = dst_image->bits.rowstride * 4;
-
- src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
- dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
-
- while (height--)
- {
- memcpy (dst, src, n_bytes);
-
- dst += dst_stride;
- src += src_stride;
- }
-}
-
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
-
-/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
-static force_inline void
-scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
- uint16_t * src,
- int32_t w,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx)
-{
- uint16_t tmp1, tmp2, tmp3, tmp4;
- while ((w -= 4) >= 0)
- {
- tmp1 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp2 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp3 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp4 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- *dst++ = tmp1;
- *dst++ = tmp2;
- *dst++ = tmp3;
- *dst++ = tmp4;
- }
- if (w & 2)
- {
- tmp1 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp2 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- *dst++ = tmp1;
- *dst++ = tmp2;
- }
- if (w & 1)
- *dst++ = src[pixman_fixed_to_int (vx)];
-}
-
-FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, COVER)
-FAST_NEAREST_MAINLOOP (565_565_none_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, NONE)
-FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, PAD)
-
-static force_inline uint32_t
-fetch_nearest (pixman_repeat_t src_repeat,
- pixman_format_code_t format,
- uint32_t *src, int x, int src_width)
-{
- if (repeat (src_repeat, &x, src_width))
- {
- if (format == PIXMAN_x8r8g8b8)
- return *(src + x) | 0xff000000;
- else
- return *(src + x);
- }
- else
- {
- return 0;
- }
-}
-
-static force_inline void
-combine_over (uint32_t s, uint32_t *dst)
-{
- if (s)
- {
- uint8_t ia = 0xff - (s >> 24);
-
- if (ia)
- UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
- else
- *dst = s;
- }
-}
-
-static force_inline void
-combine_src (uint32_t s, uint32_t *dst)
-{
- *dst = s;
-}
-
-static void
-fast_composite_scaled_nearest (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line;
- uint32_t *src_line;
- int dst_stride, src_stride;
- int src_width, src_height;
- pixman_repeat_t src_repeat;
- pixman_fixed_t unit_x, unit_y;
- pixman_format_code_t src_format;
- pixman_vector_t v;
- pixman_fixed_t vy;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
- * transformed from destination space to source space
- */
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (!pixman_transform_point_3d (src_image->common.transform, &v))
- return;
-
- unit_x = src_image->common.transform->matrix[0][0];
- unit_y = src_image->common.transform->matrix[1][1];
-
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
- v.vector[0] -= pixman_fixed_e;
- v.vector[1] -= pixman_fixed_e;
-
- src_height = src_image->bits.height;
- src_width = src_image->bits.width;
- src_repeat = src_image->common.repeat;
- src_format = src_image->bits.format;
-
- vy = v.vector[1];
- while (height--)
- {
- pixman_fixed_t vx = v.vector[0];
- int y = pixman_fixed_to_int (vy);
- uint32_t *dst = dst_line;
-
- dst_line += dst_stride;
-
- /* adjust the y location by a unit vector in the y direction
- * this is equivalent to transforming y+1 of the destination point to source space */
- vy += unit_y;
-
- if (!repeat (src_repeat, &y, src_height))
- {
- if (op == PIXMAN_OP_SRC)
- memset (dst, 0, sizeof (*dst) * width);
- }
- else
- {
- int w = width;
-
- uint32_t *src = src_line + y * src_stride;
-
- while (w >= 2)
- {
- uint32_t s1, s2;
- int x1, x2;
-
- x1 = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- x2 = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- w -= 2;
-
- s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
- s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
-
- if (op == PIXMAN_OP_OVER)
- {
- combine_over (s1, dst++);
- combine_over (s2, dst++);
- }
- else
- {
- combine_src (s1, dst++);
- combine_src (s2, dst++);
- }
- }
-
- while (w--)
- {
- uint32_t s;
- int x;
-
- x = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- s = fetch_nearest (src_repeat, src_format, src, x, src_width);
-
- if (op == PIXMAN_OP_OVER)
- combine_over (s, dst++);
- else
- combine_src (s, dst++);
- }
- }
- }
-}
-
-static const pixman_fast_path_t c_fast_paths[] =
-{
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
- PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
- PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
- PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
-
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
-
-#define NEAREST_FAST_PATH(op,s,d) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest, \
- }
-
- NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
- NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
-
- NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
- NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
-
- NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
- NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
-
- NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
- NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
-
- { PIXMAN_OP_NONE },
-};
-
-#ifdef WORDS_BIGENDIAN
-#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
-#else
-#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
-#endif
-
-static force_inline void
-pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
-{
- if (offs)
- {
- int leading_pixels = 32 - offs;
- if (leading_pixels >= width)
- {
- if (v)
- *dst |= A1_FILL_MASK (width, offs);
- else
- *dst &= ~A1_FILL_MASK (width, offs);
- return;
- }
- else
- {
- if (v)
- *dst++ |= A1_FILL_MASK (leading_pixels, offs);
- else
- *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
- width -= leading_pixels;
- }
- }
- while (width >= 32)
- {
- if (v)
- *dst++ = 0xFFFFFFFF;
- else
- *dst++ = 0;
- width -= 32;
- }
- if (width > 0)
- {
- if (v)
- *dst |= A1_FILL_MASK (width, 0);
- else
- *dst &= ~A1_FILL_MASK (width, 0);
- }
-}
-
-static void
-pixman_fill1 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- uint32_t *dst = bits + y * stride + (x >> 5);
- int offs = x & 31;
-
- if (xor & 1)
- {
- while (height--)
- {
- pixman_fill1_line (dst, offs, width, 1);
- dst += stride;
- }
- }
- else
- {
- while (height--)
- {
- pixman_fill1_line (dst, offs, width, 0);
- dst += stride;
- }
- }
-}
-
-static void
-pixman_fill8 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int byte_stride = stride * (int) sizeof (uint32_t);
- uint8_t *dst = (uint8_t *) bits;
- uint8_t v = xor & 0xff;
- int i;
-
- dst = dst + y * byte_stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- dst[i] = v;
-
- dst += byte_stride;
- }
-}
-
-static void
-pixman_fill16 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int short_stride =
- (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
- uint16_t *dst = (uint16_t *)bits;
- uint16_t v = xor & 0xffff;
- int i;
-
- dst = dst + y * short_stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- dst[i] = v;
-
- dst += short_stride;
- }
-}
-
-static void
-pixman_fill32 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int i;
-
- bits = bits + y * stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- bits[i] = xor;
-
- bits += stride;
- }
-}
-
-static pixman_bool_t
-fast_path_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- switch (bpp)
- {
- case 1:
- pixman_fill1 (bits, stride, x, y, width, height, xor);
- break;
-
- case 8:
- pixman_fill8 (bits, stride, x, y, width, height, xor);
- break;
-
- case 16:
- pixman_fill16 (bits, stride, x, y, width, height, xor);
- break;
-
- case 32:
- pixman_fill32 (bits, stride, x, y, width, height, xor);
- break;
-
- default:
- return _pixman_implementation_fill (
- imp->delegate, bits, stride, bpp, x, y, width, height, xor);
- break;
- }
-
- return TRUE;
-}
-
-pixman_implementation_t *
-_pixman_implementation_create_fast_path (void)
-{
- pixman_implementation_t *general = _pixman_implementation_create_general ();
- pixman_implementation_t *imp = _pixman_implementation_create (general, c_fast_paths);
-
- imp->fill = fast_path_fill;
-
- return imp;
-}
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <string.h> +#include <stdlib.h> +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-fast-path.h" + +static force_inline uint32_t +fetch_24 (uint8_t *a) +{ + if (((unsigned long)a) & 1) + { +#ifdef WORDS_BIGENDIAN + return (*a << 16) | (*(uint16_t *)(a + 1)); +#else + return *a | (*(uint16_t *)(a + 1) << 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + return (*(uint16_t *)a << 8) | *(a + 2); +#else + return *(uint16_t *)a | (*(a + 2) << 16); +#endif + } +} + +static force_inline void +store_24 (uint8_t *a, + uint32_t v) +{ + if (((unsigned long)a) & 1) + { +#ifdef WORDS_BIGENDIAN + *a = (uint8_t) (v >> 16); + *(uint16_t *)(a + 1) = (uint16_t) (v); +#else + *a = (uint8_t) (v); + *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + *(uint16_t *)a = (uint16_t)(v >> 8); + *(a + 2) = (uint8_t)v; +#else + *(uint16_t *)a = (uint16_t)v; + *(a + 2) = (uint8_t)(v >> 16); +#endif + } +} + +static force_inline uint32_t +over (uint32_t src, + uint32_t dest) +{ + uint32_t a = ~src >> 24; + + UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); + + return dest; +} + +static uint32_t +in (uint32_t x, + uint8_t y) +{ + uint16_t a = y; + + UN8x4_MUL_UN8 (x, a); + + return x; +} + +/* + * Naming convention: + * + * op_src_mask_dest + */ +static void +fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *src, *src_line; + uint32_t *dst, *dst_line; + uint8_t *mask, *mask_line; + int src_stride, mask_stride, dst_stride; + uint8_t m; + uint32_t s, d; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + while (w--) + { + m = *mask++; + if (m) + { + s = *src | 0xff000000; + + if (m == 0xff) + { + *dst = s; + } + else + { + d = in (s, m); + *dst = over (d, *dst); + } + } + src++; + dst++; + } + } +} + +static void +fast_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + uint16_t t; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + + if (m == 0) + *dst = 0; + else if (m != 0xff) + *dst = MUL_UN8 (m, *dst, t); + + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + m = MUL_UN8 (m, srca, t); + + if (m == 0) + *dst = 0; + else if (m != 0xff) + *dst = MUL_UN8 (m, *dst, t); + + dst++; + } + } + } +} + +static void +fast_composite_in_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + + if (s == 0) + *dst = 0; + else if (s != 0xff) + *dst = MUL_UN8 (s, *dst, t); + + dst++; + } + } +} + +static void +fast_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst_line, *dst, d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + *dst = src; + else + *dst = over (src, *dst); + } + else if (m) + { + d = in (src, m); + *dst = over (d, *dst); + } + dst++; + } + } +} + +static void +fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + + if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); + + *dst = s; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + if (ma == 0xffffffff) + { + if (srca == 0xff) + *dst = src; + else + *dst = over (src, *dst); + } + else if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = d; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8_0888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + d = src; + } + else + { + d = fetch_24 (dst); + d = over (src, d); + } + store_24 (dst, d); + } + else if (m) + { + d = over (in (src, m), fetch_24 (dst)); + store_24 (dst, d); + } + dst += 3; + } + } +} + +static void +fast_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + d = src; + } + else + { + d = *dst; + d = over (src, CONVERT_0565_TO_0888 (d)); + } + *dst = CONVERT_8888_TO_0565 (d); + } + else if (m) + { + d = *dst; + d = over (in (src, m), CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca, s; + uint16_t src16; + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + src16 = CONVERT_8888_TO_0565 (src); + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + if (ma == 0xffffffff) + { + if (srca == 0xff) + { + *dst = src16; + } + else + { + d = *dst; + d = over (src, CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); + } + } + else if (ma) + { + d = *dst; + d = CONVERT_0565_TO_0888 (d); + + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = CONVERT_8888_TO_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint8_t a; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (a == 0xff) + *dst = s; + else if (s) + *dst = over (s, *dst); + dst++; + } + } +} + +static void +fast_composite_src_x888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + *dst++ = (*src++) | 0xff000000; + } +} + +#if 0 +static void +fast_composite_over_8888_0888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (a) + { + if (a == 0xff) + d = s; + else + d = over (s, fetch_24 (dst)); + + store_24 (dst, d); + } + dst += 3; + } + } +} +#endif + +static void +fast_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (s) + { + if (a == 0xff) + { + d = s; + } + else + { + d = *dst; + d = over (s, CONVERT_0565_TO_0888 (d)); + } + *dst = CONVERT_8888_TO_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_src_x888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + *dst = CONVERT_8888_TO_0565 (s); + dst++; + } + } +} + +static void +fast_composite_add_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s, d; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + if (s != 0xff) + { + d = *dst; + t = d + s; + s = t | (0 - (t >> 8)); + } + *dst = s; + } + dst++; + } + } +} + +static void +fast_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint32_t s, d; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + if (s != 0xffffffff) + { + d = *dst; + if (d) + UN8x4_ADD_UN8x4 (s, d); + } + *dst = s; + } + dst++; + } + } +} + +static void +fast_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + uint8_t sa; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + sa = (src >> 24); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; + + a = *mask++; + d = *dst; + + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); + + *dst++ = r; + } + } +} + +#ifdef WORDS_BIGENDIAN +#define CREATE_BITMASK(n) (0x80000000 >> (n)) +#define UPDATE_BITMASK(n) ((n) >> 1) +#else +#define CREATE_BITMASK(n) (1 << (n)) +#define UPDATE_BITMASK(n) ((n) << 1) +#endif + +#define TEST_BIT(p, n) \ + (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) +#define SET_BIT(p, n) \ + do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); + +static void +fast_composite_add_1000_1000 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, + src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t, + dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + /* + * TODO: improve performance by processing uint32_t data instead + * of individual bits + */ + if (TEST_BIT (src, src_x + w)) + SET_BIT (dst, dest_x + w); + } + } +} + +static void +fast_composite_over_n_1_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = over (src, *dst); + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + +static void +fast_composite_over_n_1_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + uint32_t d; + uint16_t src565; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + src565 = CONVERT_8888_TO_0565 (src); + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src565; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + { + d = over (src, CONVERT_0565_TO_0888 (*dst)); + *dst = CONVERT_8888_TO_0565 (d); + } + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + +/* + * Simple bitblt + */ + +static void +fast_composite_solid_fill (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + if (dst_image->bits.format == PIXMAN_a1) + { + src = src >> 31; + } + else if (dst_image->bits.format == PIXMAN_a8) + { + src = src >> 24; + } + else if (dst_image->bits.format == PIXMAN_r5g6b5 || + dst_image->bits.format == PIXMAN_b5g6r5) + { + src = CONVERT_8888_TO_0565 (src); + } + + pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dst_image->bits.format), + dest_x, dest_y, + width, height, + src); +} + +static void +fast_composite_src_memcpy (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8; + uint32_t n_bytes = width * bpp; + int dst_stride, src_stride; + uint8_t *dst; + uint8_t *src; + + src_stride = src_image->bits.rowstride * 4; + dst_stride = dst_image->bits.rowstride * 4; + + src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; + dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp; + + while (height--) + { + memcpy (dst, src, n_bytes); + + dst += dst_stride; + src += src_stride; + } +} + +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) + +/* Use more unrolling for src_0565_0565 because it is typically CPU bound */ +static force_inline void +scaled_nearest_scanline_565_565_SRC (uint16_t * dst, + uint16_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx) +{ + uint16_t tmp1, tmp2, tmp3, tmp4; + while ((w -= 4) >= 0) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp3 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp4 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + *dst++ = tmp3; + *dst++ = tmp4; + } + if (w & 2) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + } + if (w & 1) + *dst++ = src[pixman_fixed_to_int (vx)]; +} + +FAST_NEAREST_MAINLOOP (565_565_cover_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, COVER) +FAST_NEAREST_MAINLOOP (565_565_none_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, NONE) +FAST_NEAREST_MAINLOOP (565_565_pad_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, PAD) + +static force_inline uint32_t +fetch_nearest (pixman_repeat_t src_repeat, + pixman_format_code_t format, + uint32_t *src, int x, int src_width) +{ + if (repeat (src_repeat, &x, src_width)) + { + if (format == PIXMAN_x8r8g8b8) + return *(src + x) | 0xff000000; + else + return *(src + x); + } + else + { + return 0; + } +} + +static force_inline void +combine_over (uint32_t s, uint32_t *dst) +{ + if (s) + { + uint8_t ia = 0xff - (s >> 24); + + if (ia) + UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s); + else + *dst = s; + } +} + +static force_inline void +combine_src (uint32_t s, uint32_t *dst) +{ + *dst = s; +} + +static void +fast_composite_scaled_nearest (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line; + uint32_t *src_line; + int dst_stride, src_stride; + int src_width, src_height; + pixman_repeat_t src_repeat; + pixman_fixed_t unit_x, unit_y; + pixman_format_code_t src_format; + pixman_vector_t v; + pixman_fixed_t vy; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be + * transformed from destination space to source space + */ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (src_image->common.transform, &v)) + return; + + unit_x = src_image->common.transform->matrix[0][0]; + unit_y = src_image->common.transform->matrix[1][1]; + + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ + v.vector[0] -= pixman_fixed_e; + v.vector[1] -= pixman_fixed_e; + + src_height = src_image->bits.height; + src_width = src_image->bits.width; + src_repeat = src_image->common.repeat; + src_format = src_image->bits.format; + + vy = v.vector[1]; + while (height--) + { + pixman_fixed_t vx = v.vector[0]; + int y = pixman_fixed_to_int (vy); + uint32_t *dst = dst_line; + + dst_line += dst_stride; + + /* adjust the y location by a unit vector in the y direction + * this is equivalent to transforming y+1 of the destination point to source space */ + vy += unit_y; + + if (!repeat (src_repeat, &y, src_height)) + { + if (op == PIXMAN_OP_SRC) + memset (dst, 0, sizeof (*dst) * width); + } + else + { + int w = width; + + uint32_t *src = src_line + y * src_stride; + + while (w >= 2) + { + uint32_t s1, s2; + int x1, x2; + + x1 = pixman_fixed_to_int (vx); + vx += unit_x; + + x2 = pixman_fixed_to_int (vx); + vx += unit_x; + + w -= 2; + + s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width); + s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width); + + if (op == PIXMAN_OP_OVER) + { + combine_over (s1, dst++); + combine_over (s2, dst++); + } + else + { + combine_src (s1, dst++); + combine_src (s2, dst++); + } + } + + while (w--) + { + uint32_t s; + int x; + + x = pixman_fixed_to_int (vx); + vx += unit_x; + + s = fetch_nearest (src_repeat, src_format, src, x, src_width); + + if (op == PIXMAN_OP_OVER) + combine_over (s, dst++); + else + combine_src (s, dst++); + } + } + } +} + +static const pixman_fast_path_t c_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000), + PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), + + SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), + +#define NEAREST_FAST_PATH(op,s,d) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest, \ + } + + NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), + + { PIXMAN_OP_NONE }, +}; + +#ifdef WORDS_BIGENDIAN +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n))) +#else +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs)) +#endif + +static force_inline void +pixman_fill1_line (uint32_t *dst, int offs, int width, int v) +{ + if (offs) + { + int leading_pixels = 32 - offs; + if (leading_pixels >= width) + { + if (v) + *dst |= A1_FILL_MASK (width, offs); + else + *dst &= ~A1_FILL_MASK (width, offs); + return; + } + else + { + if (v) + *dst++ |= A1_FILL_MASK (leading_pixels, offs); + else + *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); + width -= leading_pixels; + } + } + while (width >= 32) + { + if (v) + *dst++ = 0xFFFFFFFF; + else + *dst++ = 0; + width -= 32; + } + if (width > 0) + { + if (v) + *dst |= A1_FILL_MASK (width, 0); + else + *dst &= ~A1_FILL_MASK (width, 0); + } +} + +static void +pixman_fill1 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + uint32_t *dst = bits + y * stride + (x >> 5); + int offs = x & 31; + + if (xor & 1) + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 1); + dst += stride; + } + } + else + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 0); + dst += stride; + } + } +} + +static void +pixman_fill8 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + int byte_stride = stride * (int) sizeof (uint32_t); + uint8_t *dst = (uint8_t *) bits; + uint8_t v = xor & 0xff; + int i; + + dst = dst + y * byte_stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + dst[i] = v; + + dst += byte_stride; + } +} + +static void +pixman_fill16 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + int short_stride = + (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); + uint16_t *dst = (uint16_t *)bits; + uint16_t v = xor & 0xffff; + int i; + + dst = dst + y * short_stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + dst[i] = v; + + dst += short_stride; + } +} + +static void +pixman_fill32 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + int i; + + bits = bits + y * stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + bits[i] = xor; + + bits += stride; + } +} + +static pixman_bool_t +fast_path_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + switch (bpp) + { + case 1: + pixman_fill1 (bits, stride, x, y, width, height, xor); + break; + + case 8: + pixman_fill8 (bits, stride, x, y, width, height, xor); + break; + + case 16: + pixman_fill16 (bits, stride, x, y, width, height, xor); + break; + + case 32: + pixman_fill32 (bits, stride, x, y, width, height, xor); + break; + + default: + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); + break; + } + + return TRUE; +} + +pixman_implementation_t * +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); + + imp->fill = fast_path_fill; + + return imp; +} diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c index 6ca86aa42..88442f70e 100644 --- a/pixman/pixman/pixman-mmx.c +++ b/pixman/pixman/pixman-mmx.c @@ -464,7 +464,7 @@ mmx_combine_over_u (pixman_implementation_t *imp, while (dest < end) { - uint32_t ssrc = combine( src, mask); + uint32_t ssrc = combine (src, mask); uint32_t a = ssrc >> 24; if (a == 0xff) @@ -3356,10 +3356,9 @@ mmx_fill (pixman_implementation_t *imp, } pixman_implementation_t * -_pixman_implementation_create_mmx (void) +_pixman_implementation_create_mmx (pixman_implementation_t *fallback) { - pixman_implementation_t *general = _pixman_implementation_create_fast_path (); - pixman_implementation_t *imp = _pixman_implementation_create (general, mmx_fast_paths); + pixman_implementation_t *imp = _pixman_implementation_create (fallback, mmx_fast_paths); imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u; imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u; diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 1662d2c46..664260b9f 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -534,31 +534,31 @@ pixman_implementation_t * _pixman_implementation_create_general (void); pixman_implementation_t * -_pixman_implementation_create_fast_path (void); +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback); #ifdef USE_MMX pixman_implementation_t * -_pixman_implementation_create_mmx (void); +_pixman_implementation_create_mmx (pixman_implementation_t *fallback); #endif #ifdef USE_SSE2 pixman_implementation_t * -_pixman_implementation_create_sse2 (void); +_pixman_implementation_create_sse2 (pixman_implementation_t *fallback); #endif #ifdef USE_ARM_SIMD pixman_implementation_t * -_pixman_implementation_create_arm_simd (void); +_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback); #endif #ifdef USE_ARM_NEON pixman_implementation_t * -_pixman_implementation_create_arm_neon (void); +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback); #endif #ifdef USE_VMX pixman_implementation_t * -_pixman_implementation_create_vmx (void); +_pixman_implementation_create_vmx (pixman_implementation_t *fallback); #endif pixman_implementation_t * diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index 3c0a42f41..ae5545604 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -5957,13 +5957,8 @@ sse2_fill (pixman_implementation_t *imp, __attribute__((__force_align_arg_pointer__)) #endif pixman_implementation_t * -_pixman_implementation_create_sse2 (void) +_pixman_implementation_create_sse2 (pixman_implementation_t *fallback) { -#ifdef USE_MMX - pixman_implementation_t *fallback = _pixman_implementation_create_mmx (); -#else - pixman_implementation_t *fallback = _pixman_implementation_create_fast_path (); -#endif pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths); /* SSE2 constants */ diff --git a/pixman/pixman/pixman-vmx.c b/pixman/pixman/pixman-vmx.c index e811cf733..6868704a8 100644 --- a/pixman/pixman/pixman-vmx.c +++ b/pixman/pixman/pixman-vmx.c @@ -1613,10 +1613,9 @@ static const pixman_fast_path_t vmx_fast_paths[] = }; pixman_implementation_t * -_pixman_implementation_create_vmx (void) +_pixman_implementation_create_vmx (pixman_implementation_t *fallback) { - pixman_implementation_t *fast = _pixman_implementation_create_fast_path (); - pixman_implementation_t *imp = _pixman_implementation_create (fast, vmx_fast_paths); + pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths); /* Set up function pointers */ |