diff options
author | marha <marha@users.sourceforge.net> | 2011-04-27 06:58:32 +0000 |
---|---|---|
committer | marha <marha@users.sourceforge.net> | 2011-04-27 06:58:32 +0000 |
commit | 96d6df5da9cddedf4931bf8e17f96e242467c661 (patch) | |
tree | 07977c913b04e80b7dbd302e7a5890422aeacb1b /pixman | |
parent | 71372d36e1a3f0230b88808f70d35446fda12260 (diff) | |
download | vcxsrv-96d6df5da9cddedf4931bf8e17f96e242467c661.tar.gz vcxsrv-96d6df5da9cddedf4931bf8e17f96e242467c661.tar.bz2 vcxsrv-96d6df5da9cddedf4931bf8e17f96e242467c661.zip |
xserver libX11 libxtrans mesa pixman xkeyboard-config git update 27 Apr 2011
Diffstat (limited to 'pixman')
-rw-r--r-- | pixman/configure.ac | 51 | ||||
-rw-r--r-- | pixman/demos/tri-test.c | 96 | ||||
-rw-r--r-- | pixman/pixman/Makefile.am | 1 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-common.h | 863 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-neon-asm-bilinear.S | 768 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-neon-asm.S | 169 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-neon.c | 963 | ||||
-rw-r--r-- | pixman/pixman/pixman-trap.c | 1325 | ||||
-rw-r--r-- | pixman/test/composite-traps-test.c | 514 |
9 files changed, 2897 insertions, 1853 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac index db9a883b6..ef8162fc3 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -54,7 +54,7 @@ AC_PREREQ([2.57]) m4_define([pixman_major], 0) m4_define([pixman_minor], 21) -m4_define([pixman_micro], 7) +m4_define([pixman_micro], 9) m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) @@ -119,7 +119,7 @@ for w in -Werror -errwarn; do [CFLAGS=$w], [int main(int c, char **v) { (void)c; (void)v; return 0; }], [WERROR=$w; yesno=yes], [yesno=no]) - AC_MSG_RESULT($_yesno) + AC_MSG_RESULT($yesno) fi done @@ -192,35 +192,43 @@ dnl ========================================================================= dnl OpenMP for the test suite? dnl -# Check for OpenMP support (only supported by autoconf >=2.62) +# Check for OpenMP support only when autoconf support that (require autoconf >=2.62) OPENMP_CFLAGS= m4_ifdef([AC_OPENMP], [AC_OPENMP]) -m4_define([openmp_test_program],[dnl -#include <stdio.h> +if test "x$enable_openmp" = "xyes" && test "x$ac_cv_prog_c_openmp" = "xunsupported" ; then + AC_MSG_WARN([OpenMP support requested but found unsupported]) +fi -extern unsigned int lcg_seed; -#pragma omp threadprivate(lcg_seed) -unsigned int lcg_seed; +dnl May not fail to link without -Wall -Werror added +dnl So try to link only when openmp is supported +dnl ac_cv_prog_c_openmp is not defined when --disable-openmp is used +if test "x$ac_cv_prog_c_openmp" != "xunsupported" && test "x$ac_cv_prog_c_openmp" != "x"; then + m4_define([openmp_test_program],[dnl + #include <stdio.h> -unsigned function(unsigned a, unsigned b) -{ + extern unsigned int lcg_seed; + #pragma omp threadprivate(lcg_seed) + unsigned int lcg_seed; + + unsigned function(unsigned a, unsigned b) + { lcg_seed ^= b; return ((a + b) ^ a ) + lcg_seed; -} + } -int main(int argc, char **argv) -{ + int main(int argc, char **argv) + { int i; int n1 = 0, n2 = argc; unsigned checksum = 0; int verbose = argv != NULL; unsigned (*test_function)(unsigned, unsigned); test_function = function; - #pragma omp parallel for reduction(+:checksum) default(none) \ + #pragma omp parallel for reduction(+:checksum) default(none) \ shared(n1, n2, test_function, verbose) for (i = n1; i < n2; i++) - { + { unsigned crc = test_function (i, 0); if (verbose) printf ("%d: %08X\n", i, crc); @@ -228,18 +236,17 @@ int main(int argc, char **argv) } printf("%u\n", checksum); return 0; -} -]) + } + ]) -PIXMAN_LINK_WITH_ENV( + PIXMAN_LINK_WITH_ENV( [CFLAGS="$OPENMP_CFLAGS" LDFLAGS="$OPENMP_CFLAGS"], [openmp_test_program], [have_openmp=yes], [have_openmp=no]) -if test "x$have_openmp" = "xyes"; then - AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite]) -else - OPENMP_CFLAGS="" + if test "x$have_openmp" = "xyes" ; then + AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite]) + fi fi AC_SUBST(OPENMP_CFLAGS) diff --git a/pixman/demos/tri-test.c b/pixman/demos/tri-test.c index 23ea18cb3..a71869a6a 100644 --- a/pixman/demos/tri-test.c +++ b/pixman/demos/tri-test.c @@ -1,48 +1,48 @@ -#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "../test/utils.h"
-#include "gtk-utils.h"
-
-int
-main (int argc, char **argv)
-{
-#define WIDTH 200
-#define HEIGHT 200
-
-#define POINT(x,y) \
- { pixman_double_to_fixed ((x)), pixman_double_to_fixed ((y)) }
-
- pixman_image_t *src_img, *dest_img;
- pixman_triangle_t tris[4] =
- {
- { POINT (100, 100), POINT (10, 50), POINT (110, 10) },
- { POINT (100, 100), POINT (150, 10), POINT (200, 50) },
- { POINT (100, 100), POINT (10, 170), POINT (90, 175) },
- { POINT (100, 100), POINT (170, 150), POINT (120, 190) },
- };
- pixman_color_t color = { 0x4444, 0x4444, 0xffff, 0xffff };
- uint32_t *bits = malloc (WIDTH * HEIGHT * 4);
- int i;
-
- for (i = 0; i < WIDTH * HEIGHT; ++i)
- bits[i] = (i / HEIGHT) * 0x01010000;
-
- src_img = pixman_image_create_solid_fill (&color);
- dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4);
-
- pixman_composite_triangles (PIXMAN_OP_ATOP_REVERSE,
- src_img,
- dest_img,
- PIXMAN_a8,
- 200, 200,
- 35, 5,
- ARRAY_LENGTH (tris), tris);
- show_image (dest_img);
-
- pixman_image_unref (src_img);
- pixman_image_unref (dest_img);
- free (bits);
-
- return 0;
-}
+#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "../test/utils.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + +#define POINT(x,y) \ + { pixman_double_to_fixed ((x)), pixman_double_to_fixed ((y)) } + + pixman_image_t *src_img, *dest_img; + pixman_triangle_t tris[4] = + { + { POINT (100, 100), POINT (10, 50), POINT (110, 10) }, + { POINT (100, 100), POINT (150, 10), POINT (200, 50) }, + { POINT (100, 100), POINT (10, 170), POINT (90, 175) }, + { POINT (100, 100), POINT (170, 150), POINT (120, 190) }, + }; + pixman_color_t color = { 0x4444, 0x4444, 0xffff, 0xffff }; + uint32_t *bits = malloc (WIDTH * HEIGHT * 4); + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + bits[i] = (i / HEIGHT) * 0x01010000; + + src_img = pixman_image_create_solid_fill (&color); + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4); + + pixman_composite_triangles (PIXMAN_OP_ATOP_REVERSE, + src_img, + dest_img, + PIXMAN_a8, + 200, 200, + -5, 5, + ARRAY_LENGTH (tris), tris); + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (dest_img); + free (bits); + + return 0; +} diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am index d016e9f25..be0826680 100644 --- a/pixman/pixman/Makefile.am +++ b/pixman/pixman/Makefile.am @@ -115,6 +115,7 @@ libpixman_arm_neon_la_SOURCES = \ pixman-arm-neon.c \ pixman-arm-common.h \ pixman-arm-neon-asm.S \ + pixman-arm-neon-asm-bilinear.S \ pixman-arm-neon-asm.h libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) libpixman_arm_neon_la_LIBADD = $(DEP_LIBS) diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h index 2c435041a..6cd8be506 100644 --- a/pixman/pixman/pixman-arm-common.h +++ b/pixman/pixman/pixman-arm-common.h @@ -1,409 +1,454 @@ -/*
- * Copyright © 2010 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-#ifndef PIXMAN_ARM_COMMON_H
-#define PIXMAN_ARM_COMMON_H
-
-#include "pixman-fast-path.h"
-
-/* Define some macros which can expand into proxy functions between
- * ARM assembly optimized functions and the rest of pixman fast path API.
- *
- * All the low level ARM assembly functions have to use ARM EABI
- * calling convention and take up to 8 arguments:
- * width, height, dst, dst_stride, src, src_stride, mask, mask_stride
- *
- * The arguments are ordered with the most important coming first (the
- * first 4 arguments are passed to function in registers, the rest are
- * on stack). The last arguments are optional, for example if the
- * function is not using mask, then 'mask' and 'mask_stride' can be
- * omitted when doing a function call.
- *
- * Arguments 'src' and 'mask' contain either a pointer to the top left
- * pixel of the composited rectangle or a pixel color value depending
- * on the function type. In the case of just a color value (solid source
- * or mask), the corresponding stride argument is unused.
- */
-
-#define SKIP_ZERO_SRC 1
-#define SKIP_ZERO_MASK 2
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \
- src_type, src_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- int32_t dst_stride, src_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride); \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- uint32_t src); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- int32_t dst_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid ( \
- imp, src_image, dst_image->bits.format); \
- \
- if ((flags & SKIP_ZERO_SRC) && src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src); \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \
- mask_type, mask_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- uint32_t src, \
- int32_t unused, \
- mask_type *mask, \
- int32_t mask_stride); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- mask_type *mask_line; \
- int32_t dst_stride, mask_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid ( \
- imp, src_image, dst_image->bits.format); \
- \
- if ((flags & SKIP_ZERO_SRC) && src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src, 0, \
- mask_line, mask_stride); \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \
- src_type, src_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- uint32_t mask); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- int32_t dst_stride, src_stride; \
- uint32_t mask; \
- \
- mask = _pixman_image_get_solid ( \
- imp, mask_image, dst_image->bits.format); \
- \
- if ((flags & SKIP_ZERO_MASK) && mask == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask); \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \
- src_type, src_cnt, \
- mask_type, mask_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- mask_type *mask, \
- int32_t mask_stride); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- mask_type *mask_line; \
- int32_t dst_stride, src_stride, mask_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask_line, mask_stride); \
-}
-
-#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
- int32_t w, \
- dst_type * dst, \
- const src_type * src, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x); \
- \
-static force_inline void \
-scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \
- const src_type * ps, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
- vx, unit_x);\
-} \
- \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op, \
- src_type, dst_type, COVER) \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op, \
- src_type, dst_type, NONE) \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op, \
- src_type, dst_type, PAD)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
-
-#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
- int32_t w, \
- dst_type * dst, \
- const src_type * src, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- const uint8_t * mask); \
- \
-static force_inline void \
-scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \
- dst_type * pd, \
- const src_type * ps, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- if ((flags & SKIP_ZERO_SRC) && zero_src) \
- return; \
- pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
- vx, unit_x, \
- mask); \
-} \
- \
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op,\
- src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op,\
- src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op,\
- src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
-/*****************************************************************************/
-
-#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
- dst_type * dst, \
- const src_type * top, \
- const src_type * bottom, \
- int wt, \
- int wb, \
- pixman_fixed_t x, \
- pixman_fixed_t ux, \
- int width); \
- \
-static force_inline void \
-scaled_bilinear_scanline_##cputype##_##name##_##op ( \
- dst_type * dst, \
- const uint32_t * mask, \
- const src_type * src_top, \
- const src_type * src_bottom, \
- int32_t w, \
- int wt, \
- int wb, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- if ((flags & SKIP_ZERO_SRC) && zero_src) \
- return; \
- pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
- dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
-} \
- \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
- scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
- scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
- scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint32_t, dst_type, PAD, FALSE, FALSE)
-
-#endif
+/* + * Copyright © 2010 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +#ifndef PIXMAN_ARM_COMMON_H +#define PIXMAN_ARM_COMMON_H + +#include "pixman-fast-path.h" + +/* Define some macros which can expand into proxy functions between + * ARM assembly optimized functions and the rest of pixman fast path API. + * + * All the low level ARM assembly functions have to use ARM EABI + * calling convention and take up to 8 arguments: + * width, height, dst, dst_stride, src, src_stride, mask, mask_stride + * + * The arguments are ordered with the most important coming first (the + * first 4 arguments are passed to function in registers, the rest are + * on stack). The last arguments are optional, for example if the + * function is not using mask, then 'mask' and 'mask_stride' can be + * omitted when doing a function call. + * + * Arguments 'src' and 'mask' contain either a pointer to the top left + * pixel of the composited rectangle or a pixel color value depending + * on the function type. In the case of just a color value (solid source + * or mask), the corresponding stride argument is unused. + */ + +#define SKIP_ZERO_SRC 1 +#define SKIP_ZERO_MASK 2 + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + src_type *src_line; \ + int32_t dst_stride, src_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + uint32_t src); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + int32_t dst_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dst_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + uint32_t src, \ + int32_t unused, \ + mask_type *mask, \ + int32_t mask_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + mask_type *mask_line; \ + int32_t dst_stride, mask_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dst_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src, 0, \ + mask_line, mask_stride); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride, \ + uint32_t mask); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + src_type *src_line; \ + int32_t dst_stride, src_stride; \ + uint32_t mask; \ + \ + mask = _pixman_image_get_solid ( \ + imp, mask_image, dst_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_MASK) && mask == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride, \ + mask); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \ + src_type, src_cnt, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride, \ + mask_type *mask, \ + int32_t mask_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + src_type *src_line; \ + mask_type *mask_line; \ + int32_t dst_stride, src_stride, mask_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride, \ + mask_line, mask_stride); \ +} + +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x);\ +} \ + \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, COVER) \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, NONE) \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, PAD) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) + +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + const uint8_t * mask); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \ + dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x, \ + mask); \ +} \ + \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +/*****************************************************************************/ + +#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst_type * dst, \ + const src_type * top, \ + const src_type * bottom, \ + int wt, \ + int wb, \ + pixman_fixed_t x, \ + pixman_fixed_t ux, \ + int width); \ + \ +static force_inline void \ +scaled_bilinear_scanline_##cputype##_##name##_##op ( \ + dst_type * dst, \ + const uint32_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, PAD, FALSE, FALSE) + + +#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst_type * dst, \ + const uint8_t * mask, \ + const src_type * top, \ + const src_type * bottom, \ + int wt, \ + int wb, \ + pixman_fixed_t x, \ + pixman_fixed_t ux, \ + int width); \ + \ +static force_inline void \ +scaled_bilinear_scanline_##cputype##_##name##_##op ( \ + dst_type * dst, \ + const uint8_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + +#endif diff --git a/pixman/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman/pixman-arm-neon-asm-bilinear.S new file mode 100644 index 000000000..9a4a1ffba --- /dev/null +++ b/pixman/pixman/pixman-arm-neon-asm-bilinear.S @@ -0,0 +1,768 @@ +/* + * Copyright © 2011 SCore Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + * Author: Taekyun Kim (tkq.kim@samsung.com) + */ + +/* + * This file contains scaled bilinear scanline functions implemented + * using older siarhei's bilinear macro template. + * + * << General scanline function procedures >> + * 1. bilinear interpolate source pixels + * 2. load mask pixels + * 3. load destination pixels + * 4. duplicate mask to fill whole register + * 5. interleave source & destination pixels + * 6. apply mask to source pixels + * 7. combine source & destination pixels + * 8, Deinterleave final result + * 9. store destination pixels + * + * All registers with single number (i.e. src0, tmp0) are 64-bits registers. + * Registers with double numbers(src01, dst01) are 128-bits registers. + * All temp registers can be used freely outside the code block. + * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks. + * + * TODOs + * Support 0565 pixel format + * Optimization for two and last pixel cases + * + * Remarks + * There can be lots of pipeline stalls inside code block and between code blocks. + * Further optimizations will be done by new macro templates using head/tail_head/tail scheme. + */ + +/* Prevent the stack from becoming executable for no reason... */ +#if defined(__linux__) && defined (__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +.text +.fpu neon +.arch armv7a +.object_arch armv4 +.eabi_attribute 10, 0 +.eabi_attribute 12, 0 +.arm +.altmacro + +#include "pixman-arm-neon-asm.h" + +/* + * Bilinear macros from pixman-arm-neon-asm.S + */ + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm + +/* + * Bilinear scaling support code which tries to provide pixel fetching, color + * format conversion, and interpolation as separate macros which can be used + * as the basic building blocks for constructing bilinear scanline functions. + */ + +.macro bilinear_load_8888 reg1, reg2, tmp + mov TMP2, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #2 + add TMP2, BOTTOM, TMP2, asl #2 + vld1.32 {reg1}, [TMP1] + vld1.32 {reg2}, [TMP2] +.endm + +.macro bilinear_load_0565 reg1, reg2, tmp + mov TMP2, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + vld1.32 {reg2[0]}, [TMP1] + vld1.32 {reg2[1]}, [TMP2] + convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp +.endm + +.macro bilinear_load_and_vertical_interpolate_two_8888 \ + acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 + + bilinear_load_8888 reg1, reg2, tmp1 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + bilinear_load_8888 reg3, reg4, tmp2 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + bilinear_load_and_vertical_interpolate_two_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi + bilinear_load_and_vertical_interpolate_two_8888 \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi +.endm + +.macro bilinear_load_and_vertical_interpolate_two_0565 \ + acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi + + mov TMP2, X, asr #16 + add X, X, UX + mov TMP4, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + add TMP3, TOP, TMP4, asl #1 + add TMP4, BOTTOM, TMP4, asl #1 + vld1.32 {acc2lo[0]}, [TMP1] + vld1.32 {acc2hi[0]}, [TMP3] + vld1.32 {acc2lo[1]}, [TMP2] + vld1.32 {acc2hi[1]}, [TMP4] + convert_0565_to_x888 acc2, reg3, reg2, reg1 + vzip.u8 reg1, reg3 + vzip.u8 reg2, reg4 + vzip.u8 reg3, reg4 + vzip.u8 reg1, reg2 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_0565 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + mov TMP2, X, asr #16 + add X, X, UX + mov TMP4, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + add TMP3, TOP, TMP4, asl #1 + add TMP4, BOTTOM, TMP4, asl #1 + vld1.32 {xacc2lo[0]}, [TMP1] + vld1.32 {xacc2hi[0]}, [TMP3] + vld1.32 {xacc2lo[1]}, [TMP2] + vld1.32 {xacc2hi[1]}, [TMP4] + convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 + mov TMP2, X, asr #16 + add X, X, UX + mov TMP4, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + add TMP3, TOP, TMP4, asl #1 + add TMP4, BOTTOM, TMP4, asl #1 + vld1.32 {yacc2lo[0]}, [TMP1] + vzip.u8 xreg1, xreg3 + vld1.32 {yacc2hi[0]}, [TMP3] + vzip.u8 xreg2, xreg4 + vld1.32 {yacc2lo[1]}, [TMP2] + vzip.u8 xreg3, xreg4 + vld1.32 {yacc2hi[1]}, [TMP4] + vzip.u8 xreg1, xreg2 + convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 + vmull.u8 xacc1, xreg1, d28 + vzip.u8 yreg1, yreg3 + vmlal.u8 xacc1, xreg2, d29 + vzip.u8 yreg2, yreg4 + vmull.u8 xacc2, xreg3, d28 + vzip.u8 yreg3, yreg4 + vmlal.u8 xacc2, xreg4, d29 + vzip.u8 yreg1, yreg2 + vmull.u8 yacc1, yreg1, d28 + vmlal.u8 yacc1, yreg2, d29 + vmull.u8 yacc2, yreg3, d28 + vmlal.u8 yacc2, yreg4, d29 +.endm + +.macro bilinear_store_8888 numpix, tmp1, tmp2 +.if numpix == 4 + vst1.32 {d0, d1}, [OUT]! +.elseif numpix == 2 + vst1.32 {d0}, [OUT]! +.elseif numpix == 1 + vst1.32 {d0[0]}, [OUT, :32]! +.else + .error bilinear_store_8888 numpix is unsupported +.endif +.endm + +.macro bilinear_store_0565 numpix, tmp1, tmp2 + vuzp.u8 d0, d1 + vuzp.u8 d2, d3 + vuzp.u8 d1, d3 + vuzp.u8 d0, d2 + convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2 +.if numpix == 4 + vst1.16 {d2}, [OUT]! +.elseif numpix == 2 + vst1.32 {d2[0]}, [OUT]! +.elseif numpix == 1 + vst1.16 {d2[0]}, [OUT]! +.else + .error bilinear_store_0565 numpix is unsupported +.endif +.endm + + +/* + * Macros for loading mask pixels into register 'mask'. + * vdup must be done in somewhere else. + */ +.macro bilinear_load_mask_x numpix, mask +.endm + +.macro bilinear_load_mask_8 numpix, mask +.if numpix == 4 + vld1.32 {mask[0]}, [MASK]! +.elseif numpix == 2 + vld1.16 {mask[0]}, [MASK]! +.elseif numpix == 1 + vld1.8 {mask[0]}, [MASK]! +.else + .error bilinear_load_mask_8 numpix is unsupported +.endif +.endm + +.macro bilinear_load_mask mask_fmt, numpix, mask + bilinear_load_mask_&mask_fmt numpix, mask +.endm + + +/* + * Macros for loading destination pixels into register 'dst0' and 'dst1'. + * Interleave should be done somewhere else. + */ +.macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01 +.if numpix == 4 + vld1.32 {dst0, dst1}, [OUT] +.elseif numpix == 2 + vld1.32 {dst0}, [OUT] +.elseif numpix == 1 + vld1.32 {dst0[0]}, [OUT] +.else + .error bilinear_load_dst_8888 numpix is unsupported +.endif +.endm + +.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01 + bilinear_load_dst_8888 numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01 + bilinear_load_dst_8888 numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01 + bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01 +.endm + +/* + * Macros for duplicating partially loaded mask to fill entire register. + * We will apply mask to interleaved source pixels, that is + * (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3) + * (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3) + * So, we need to duplicate loaded mask into whole register. + * + * For two pixel case + * (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) + * (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) + * We can do some optimizations for this including one pixel cases. + */ +.macro bilinear_duplicate_mask_x numpix, mask +.endm + +.macro bilinear_duplicate_mask_8 numpix, mask +.if numpix == 4 + vdup.32 mask, mask[0] +.elseif numpix == 2 + vdup.16 mask, mask[0] +.elseif numpix == 1 + vdup.8 mask, mask[0] +.else + .error bilinear_duplicate_mask_8 is unsupported +.endif +.endm + +.macro bilinear_duplicate_mask mask_fmt, numpix, mask + bilinear_duplicate_mask_&mask_fmt numpix, mask +.endm + +/* + * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form. + * Interleave should be done when maks is enabled or operator is 'over'. + */ +.macro bilinear_interleave src0, src1, dst0, dst1 + vuzp.8 src0, src1 + vuzp.8 dst0, dst1 + vuzp.8 src0, src1 + vuzp.8 dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_x_src \ + numpix, src0, src1, src01, dst0, dst1, dst01 +.endm + +.macro bilinear_interleave_src_dst_x_over \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_x_add \ + numpix, src0, src1, src01, dst0, dst1, dst01 +.endm + +.macro bilinear_interleave_src_dst_8_src \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_8_over \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_8_add \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst \ + mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave_src_dst_&mask_fmt&_&op \ + numpix, src0, src1, src01, dst0, dst1, dst01 +.endm + + +/* + * Macros for applying masks to src pixels. (see combine_mask_u() function) + * src, dst should be in interleaved form. + * mask register should be in form (m0, m1, m2, m3). + */ +.macro bilinear_apply_mask_to_src_x \ + numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 +.endm + +.macro bilinear_apply_mask_to_src_8 \ + numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 + + vmull.u8 tmp01, src0, mask + vmull.u8 tmp23, src1, mask + /* bubbles */ + vrshr.u16 tmp45, tmp01, #8 + vrshr.u16 tmp67, tmp23, #8 + /* bubbles */ + vraddhn.u16 src0, tmp45, tmp01 + vraddhn.u16 src1, tmp67, tmp23 +.endm + +.macro bilinear_apply_mask_to_src \ + mask_fmt, numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 + + bilinear_apply_mask_to_src_&mask_fmt \ + numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 +.endm + + +/* + * Macros for combining src and destination pixels. + * Interleave or not is depending on operator 'op'. + */ +.macro bilinear_combine_src \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 +.endm + +.macro bilinear_combine_over \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 + + vdup.32 tmp8, src1[1] + /* bubbles */ + vmvn.8 tmp8, tmp8 + /* bubbles */ + vmull.u8 tmp01, dst0, tmp8 + /* bubbles */ + vmull.u8 tmp23, dst1, tmp8 + /* bubbles */ + vrshr.u16 tmp45, tmp01, #8 + vrshr.u16 tmp67, tmp23, #8 + /* bubbles */ + vraddhn.u16 dst0, tmp45, tmp01 + vraddhn.u16 dst1, tmp67, tmp23 + /* bubbles */ + vqadd.u8 src01, dst01, src01 +.endm + +.macro bilinear_combine_add \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 + + vqadd.u8 src01, dst01, src01 +.endm + +.macro bilinear_combine \ + op, numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 + + bilinear_combine_&op \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 +.endm + +/* + * Macros for final deinterleaving of destination pixels if needed. + */ +.macro bilinear_deinterleave numpix, dst0, dst1, dst01 + vuzp.8 dst0, dst1 + /* bubbles */ + vuzp.8 dst0, dst1 +.endm + +.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01 + bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01 +.endm + + +.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op + bilinear_load_&src_fmt d0, d1, d2 + bilinear_load_mask mask_fmt, 1, d4 + bilinear_load_dst dst_fmt, op, 1, d18, d19, q9 + vmull.u8 q1, d0, d28 + vmlal.u8 q1, d1, d29 + vshr.u16 d30, d24, #8 + /* 4 cycles bubble */ + vshll.u16 q0, d2, #8 + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + /* 5 cycles bubble */ + bilinear_duplicate_mask mask_fmt, 1, d4 + vshrn.u32 d0, q0, #16 + /* 3 cycles bubble */ + vmovn.u16 d0, q0 + /* 1 cycle bubble */ + bilinear_interleave_src_dst \ + mask_fmt, op, 1, d0, d1, q0, d18, d19, q9 + bilinear_apply_mask_to_src \ + mask_fmt, 1, d0, d1, q0, d4, \ + q3, q8, q10, q11 + bilinear_combine \ + op, 1, d0, d1, q0, d18, d19, q9, \ + q3, q8, q10, q11, d5 + bilinear_deinterleave_dst mask_fmt, op, 1, d0, d1, q0 + bilinear_store_&dst_fmt 1, q2, q3 +.endm + +.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op + bilinear_load_and_vertical_interpolate_two_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 + bilinear_load_mask mask_fmt, 2, d4 + bilinear_load_dst dst_fmt, op, 2, d18, d19, q9 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vshll.u16 q0, d2, #8 + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #8 + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshrn.u32 d30, q0, #16 + vshrn.u32 d31, q10, #16 + bilinear_duplicate_mask mask_fmt, 2, d4 + vmovn.u16 d0, q15 + bilinear_interleave_src_dst \ + mask_fmt, op, 2, d0, d1, q0, d18, d19, q9 + bilinear_apply_mask_to_src \ + mask_fmt, 2, d0, d1, q0, d4, \ + q3, q8, q10, q11 + bilinear_combine \ + op, 2, d0, d1, q0, d18, d19, q9, \ + q3, q8, q10, q11, d5 + bilinear_deinterleave_dst mask_fmt, op, 2, d0, d1, q0 + bilinear_store_&dst_fmt 2, q2, q3 +.endm + +.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op + bilinear_load_and_vertical_interpolate_four_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 \ + q3, q9, d4, d5, d16, d17, d18, d19 + pld [TMP1, PF_OFFS] + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vshll.u16 q0, d2, #8 + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #8 + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshr.u16 q15, q12, #8 + vshll.u16 q2, d6, #8 + vmlsl.u16 q2, d6, d30 + vmlal.u16 q2, d7, d30 + vshll.u16 q8, d18, #8 + bilinear_load_mask mask_fmt, 4, d30 + bilinear_load_dst dst_fmt, op, 4, d2, d3, q1 + pld [TMP2, PF_OFFS] + vmlsl.u16 q8, d18, d31 + vmlal.u16 q8, d19, d31 + vadd.u16 q12, q12, q13 + vshrn.u32 d0, q0, #16 + vshrn.u32 d1, q10, #16 + vshrn.u32 d4, q2, #16 + vshrn.u32 d5, q8, #16 + bilinear_duplicate_mask mask_fmt, 4, d30 + vmovn.u16 d0, q0 + vmovn.u16 d1, q2 + bilinear_interleave_src_dst \ + mask_fmt, op, 4, d0, d1, q0, d2, d3, q1 + bilinear_apply_mask_to_src \ + mask_fmt, 4, d0, d1, q0, d30, \ + q3, q8, q9, q10 + bilinear_combine \ + op, 4, d0, d1, q0, d2, d3, q1, \ + q3, q8, q9, q10, d22 + bilinear_deinterleave_dst mask_fmt, op, 4, d0, d1, q0 + bilinear_store_&dst_fmt 4, q2, q3 +.endm + +.macro generate_bilinear_scanline_func_src_dst \ + fname, src_fmt, dst_fmt, op, \ + bpp_shift, prefetch_distance + +pixman_asm_function fname + OUT .req r0 + TOP .req r1 + BOTTOM .req r2 + WT .req r3 + WB .req r4 + X .req r5 + UX .req r6 + WIDTH .req ip + TMP1 .req r3 + TMP2 .req r4 + PF_OFFS .req r7 + TMP3 .req r8 + TMP4 .req r9 + + mov ip, sp + push {r4, r5, r6, r7, r8, r9} + mov PF_OFFS, #prefetch_distance + ldmia ip, {WB, X, UX, WIDTH} + mul PF_OFFS, PF_OFFS, UX + + cmp WIDTH, #0 + ble 3f + + vdup.u16 q12, X + vdup.u16 q13, UX + vdup.u8 d28, WT + vdup.u8 d29, WB + vadd.u16 d25, d25, d26 + vadd.u16 q13, q13, q13 + + subs WIDTH, WIDTH, #4 + blt 1f + mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) +0: + bilinear_interpolate_four_pixels src_fmt, x, dst_fmt, op + subs WIDTH, WIDTH, #4 + bge 0b +1: + tst WIDTH, #2 + beq 2f + bilinear_interpolate_two_pixels src_fmt, x, dst_fmt, op +2: + tst WIDTH, #1 + beq 3f + bilinear_interpolate_last_pixel src_fmt, x, dst_fmt, op +3: + pop {r4, r5, r6, r7, r8, r9} + bx lr + + .unreq OUT + .unreq TOP + .unreq BOTTOM + .unreq WT + .unreq WB + .unreq X + .unreq UX + .unreq WIDTH + .unreq TMP1 + .unreq TMP2 + .unreq PF_OFFS + .unreq TMP3 + .unreq TMP4 +.endfunc + +.endm + +.macro generate_bilinear_scanline_func_src_a8_dst \ + fname, src_fmt, dst_fmt, op, \ + bpp_shift, prefetch_distance + +pixman_asm_function fname + OUT .req r0 + MASK .req r1 + TOP .req r2 + BOTTOM .req r3 + WT .req r4 + WB .req r5 + X .req r6 + UX .req r7 + WIDTH .req ip + TMP1 .req r4 + TMP2 .req r5 + PF_OFFS .req r8 + TMP3 .req r9 + TMP4 .req r10 + + mov ip, sp + push {r4, r5, r6, r7, r8, r9, r10, ip} + mov PF_OFFS, #prefetch_distance + ldmia ip, {WT, WB, X, UX, WIDTH} + mul PF_OFFS, PF_OFFS, UX + + cmp WIDTH, #0 + ble 3f + + vdup.u16 q12, X + vdup.u16 q13, UX + vdup.u8 d28, WT + vdup.u8 d29, WB + vadd.u16 d25, d25, d26 + vadd.u16 q13, q13, q13 + + subs WIDTH, WIDTH, #4 + blt 1f + mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) +0: + bilinear_interpolate_four_pixels src_fmt, 8, dst_fmt, op + subs WIDTH, WIDTH, #4 + bge 0b +1: + tst WIDTH, #2 + beq 2f + bilinear_interpolate_two_pixels src_fmt, 8, dst_fmt, op +2: + tst WIDTH, #1 + beq 3f + bilinear_interpolate_last_pixel src_fmt, 8, dst_fmt, op +3: + pop {r4, r5, r6, r7, r8, r9, r10, ip} + bx lr + + .unreq OUT + .unreq TOP + .unreq BOTTOM + .unreq WT + .unreq WB + .unreq X + .unreq UX + .unreq WIDTH + .unreq MASK + .unreq TMP1 + .unreq TMP2 + .unreq PF_OFFS + .unreq TMP3 + .unreq TMP4 +.endfunc + +.endm + +generate_bilinear_scanline_func_src_dst \ + pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \ + 8888, 8888, over, 2, 28 + +generate_bilinear_scanline_func_src_dst \ + pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \ + 8888, 8888, add, 2, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \ + 8888, 8888, src, 2, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \ + 8888, 0565, src, 2, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \ + 0565, 8888, src, 1, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \ + 0565, 0565, src, 1, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \ + 8888, 8888, over, 2, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \ + 8888, 8888, add, 2, 28 diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S index 5e9fda34e..833f18c2e 100644 --- a/pixman/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman/pixman-arm-neon-asm.S @@ -1426,6 +1426,175 @@ generate_composite_function \ /******************************************************************************/ +.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_head + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A] + * mask in {d24, d25, d26} [B, G, R] + * output: updated src in {d0, d1, d2 } [B, G, R] + * updated mask in {d24, d25, d26} [B, G, R] + */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d9 + vmull.u8 q6, d26, d10 + vmull.u8 q9, d11, d25 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d25, q9, q8 + /* + * convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + * and put data into d16 - blue, d17 - green, d18 - red + */ + vshrn.u16 d17, q2, #3 + vshrn.u16 d18, q2, #8 + vraddhn.u16 d26, q13, q6 + vsli.u16 q2, q2, #5 + vsri.u8 d18, d18, #5 + vsri.u8 d17, d17, #6 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in d16 - blue, d17 - green, d18 - red + */ + vmvn.8 q12, q12 + vshrn.u16 d16, q2, #2 + vmvn.8 d26, d26 + vmull.u8 q6, d16, d24 + vmull.u8 q7, d17, d25 + vmull.u8 q11, d18, d26 +.endm + +.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail + /* ... continue 'combine_over_ca' replacement */ + vrshr.u16 q10, q6, #8 + vrshr.u16 q14, q7, #8 + vrshr.u16 q15, q11, #8 + vraddhn.u16 d16, q10, q6 + vraddhn.u16 d17, q14, q7 + vraddhn.u16 d18, q15, q11 + vqadd.u8 q8, q0, q8 + vqadd.u8 d18, d2, d18 + /* + * convert the results in d16, d17, d18 to r5g6b5 and store + * them into {d28, d29} + */ + vshll.u8 q14, d18, #8 + vshll.u8 q10, d17, #8 + vshll.u8 q15, d16, #8 + vsri.u16 q14, q10, #5 + vsri.u16 q14, q15, #11 +.endm + +.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head + fetch_mask_pixblock + vrshr.u16 q10, q6, #8 + vrshr.u16 q14, q7, #8 + vld1.16 {d4, d5}, [DST_R, :128]! + vrshr.u16 q15, q11, #8 + vraddhn.u16 d16, q10, q6 + vraddhn.u16 d17, q14, q7 + vraddhn.u16 d22, q15, q11 + /* process_pixblock_head */ + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A] + * mask in {d24, d25, d26} [B, G, R] + * output: updated src in {d0, d1, d2 } [B, G, R] + * updated mask in {d24, d25, d26} [B, G, R] + */ + vmull.u8 q1, d25, d9 + vqadd.u8 q8, q0, q8 + vmull.u8 q0, d24, d8 + vqadd.u8 d22, d2, d22 + vmull.u8 q6, d26, d10 + /* + * convert the result in d16, d17, d22 to r5g6b5 and store + * it into {d28, d29} + */ + vshll.u8 q14, d22, #8 + vshll.u8 q10, d17, #8 + vshll.u8 q15, d16, #8 + vmull.u8 q9, d11, d25 + vsri.u16 q14, q10, #5 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vsri.u16 q14, q15, #11 + cache_preload 8, 8 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vraddhn.u16 d25, q9, q8 + /* + * convert 8 r5g6b5 pixel data from {d4, d5} to planar + * 8-bit format and put data into d16 - blue, d17 - green, + * d18 - red + */ + vshrn.u16 d17, q2, #3 + vshrn.u16 d18, q2, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d26, q13, q6 + vsli.u16 q2, q2, #5 + vsri.u8 d18, d18, #5 + vsri.u8 d17, d17, #6 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in d16 - blue, d17 - green, d18 - red + */ + vmvn.8 q12, q12 + vshrn.u16 d16, q2, #2 + vmvn.8 d26, d26 + vmull.u8 q7, d17, d25 + vmull.u8 q6, d16, d24 + vmull.u8 q11, d18, d26 + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_n_8888_0565_ca_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8888_0565_ca_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_0565_ca_asm_neon, 0, 32, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_0565_ca_init, \ + pixman_composite_over_n_8888_0565_ca_cleanup, \ + pixman_composite_over_n_8888_0565_ca_process_pixblock_head, \ + pixman_composite_over_n_8888_0565_ca_process_pixblock_tail, \ + pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head + +/******************************************************************************/ + .macro pixman_composite_in_n_8_process_pixblock_head /* expecting source data in {d0, d1, d2, d3} */ /* and destination data in {d4, d5, d6, d7} */ diff --git a/pixman/pixman/pixman-arm-neon.c b/pixman/pixman/pixman-arm-neon.c index 5213a2007..e5127a65f 100644 --- a/pixman/pixman/pixman-arm-neon.c +++ b/pixman/pixman/pixman-arm-neon.c @@ -1,460 +1,503 @@ -/*
- * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of ARM Ltd not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission. ARM Ltd makes no
- * representations about the suitability of this software for any purpose. It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author: Ian Rickards (ian.rickards@arm.com)
- * Author: Jonathan Morton (jonathan.morton@movial.com)
- * Author: Markku Vire (markku.vire@movial.com)
- *
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <string.h>
-#include "pixman-private.h"
-#include "pixman-arm-common.h"
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
- uint16_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
- uint8_t, 3, uint8_t, 3)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
- uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
- uint16_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
- uint8_t, 3, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
- uint8_t, 3, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
- uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
- uint8_t, 1, uint16_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
- uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
- uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
- uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
- uint8_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
- uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
- uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
- uint8_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
- uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
- uint16_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
- uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
- uint8_t, 1, uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
- uint16_t, 1, uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
- uint32_t, 1, uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
- uint32_t, 1, uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
- uint32_t, 1, uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
- uint32_t, 1, uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
- uint32_t, 1, uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
- uint16_t, 1, uint8_t, 1, uint16_t, 1)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
- uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
- uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
- uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
- uint16_t, uint32_t)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
- OVER, uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
- OVER, uint16_t, uint16_t)
-
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
- uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
- uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
- uint16_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
- uint16_t, uint16_t)
-
-void
-pixman_composite_src_n_8_asm_neon (int32_t w,
- int32_t h,
- uint8_t *dst,
- int32_t dst_stride,
- uint8_t src);
-
-void
-pixman_composite_src_n_0565_asm_neon (int32_t w,
- int32_t h,
- uint16_t *dst,
- int32_t dst_stride,
- uint16_t src);
-
-void
-pixman_composite_src_n_8888_asm_neon (int32_t w,
- int32_t h,
- uint32_t *dst,
- int32_t dst_stride,
- uint32_t src);
-
-static pixman_bool_t
-pixman_fill_neon (uint32_t *bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t _xor)
-{
- /* stride is always multiple of 32bit units in pixman */
- uint32_t byte_stride = stride * sizeof(uint32_t);
-
- switch (bpp)
- {
- case 8:
- pixman_composite_src_n_8_asm_neon (
- width,
- height,
- (uint8_t *)(((char *) bits) + y * byte_stride + x),
- byte_stride,
- _xor & 0xff);
- return TRUE;
- case 16:
- pixman_composite_src_n_0565_asm_neon (
- width,
- height,
- (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
- byte_stride / 2,
- _xor & 0xffff);
- return TRUE;
- case 32:
- pixman_composite_src_n_8888_asm_neon (
- width,
- height,
- (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
- byte_stride / 4,
- _xor);
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static pixman_bool_t
-pixman_blt_neon (uint32_t *src_bits,
- uint32_t *dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- if (src_bpp != dst_bpp)
- return FALSE;
-
- switch (src_bpp)
- {
- case 16:
- pixman_composite_src_0565_0565_asm_neon (
- width, height,
- (uint16_t *)(((char *) dst_bits) +
- dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
- (uint16_t *)(((char *) src_bits) +
- src_y * src_stride * 4 + src_x * 2), src_stride * 2);
- return TRUE;
- case 32:
- pixman_composite_src_8888_8888_asm_neon (
- width, height,
- (uint32_t *)(((char *) dst_bits) +
- dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
- (uint32_t *)(((char *) src_bits) +
- src_y * src_stride * 4 + src_x * 4), src_stride);
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static const pixman_fast_path_t arm_neon_fast_paths[] =
-{
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev),
- PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888),
- PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888),
- PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
- PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565),
- PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8),
- PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
- PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
- PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565),
- PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
- /* Note: NONE repeat is not supported yet */
- SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
- SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
-
- PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
-
- SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
- SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
- SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
-
- SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
- SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
-
- SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
- SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
-
- { PIXMAN_OP_NONE },
-};
-
-static pixman_bool_t
-arm_neon_blt (pixman_implementation_t *imp,
- uint32_t * src_bits,
- uint32_t * dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- if (!pixman_blt_neon (
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height))
-
- {
- return _pixman_implementation_blt (
- imp->delegate,
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height);
- }
-
- return TRUE;
-}
-
-static pixman_bool_t
-arm_neon_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
- return TRUE;
-
- return _pixman_implementation_fill (
- imp->delegate, bits, stride, bpp, x, y, width, height, xor);
-}
-
-#define BIND_COMBINE_U(name) \
-void \
-pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \
- const uint32_t *dst, \
- const uint32_t *src, \
- const uint32_t *mask); \
- \
-void \
-pixman_composite_scanline_##name##_asm_neon (int32_t w, \
- const uint32_t *dst, \
- const uint32_t *src); \
- \
-static void \
-neon_combine_##name##_u (pixman_implementation_t *imp, \
- pixman_op_t op, \
- uint32_t * dest, \
- const uint32_t * src, \
- const uint32_t * mask, \
- int width) \
-{ \
- if (mask) \
- pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \
- src, mask); \
- else \
- pixman_composite_scanline_##name##_asm_neon (width, dest, src); \
-}
-
-BIND_COMBINE_U (over)
-BIND_COMBINE_U (add)
-BIND_COMBINE_U (out_reverse)
-
-pixman_implementation_t *
-_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback)
-{
- pixman_implementation_t *imp =
- _pixman_implementation_create (fallback, arm_neon_fast_paths);
-
- imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
- imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
- imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
-
- imp->blt = arm_neon_blt;
- imp->fill = arm_neon_fill;
-
- return imp;
-}
+/* + * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of ARM Ltd not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. ARM Ltd makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Ian Rickards (ian.rickards@arm.com) + * Author: Jonathan Morton (jonathan.morton@movial.com) + * Author: Markku Vire (markku.vire@movial.com) + * + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> +#include "pixman-private.h" +#include "pixman-arm-common.h" + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888, + uint8_t, 3, uint8_t, 3) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888, + uint16_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev, + uint8_t, 3, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev, + uint8_t, 3, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, + uint8_t, 1, uint16_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, + uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, + uint8_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, + uint8_t, 1, uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, + uint32_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, + uint16_t, uint32_t) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, + OVER, uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, + OVER, uint16_t, uint16_t) + +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC, + uint16_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD, + uint32_t, uint32_t) + +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC, + uint16_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD, + uint32_t, uint32_t) + +void +pixman_composite_src_n_8_asm_neon (int32_t w, + int32_t h, + uint8_t *dst, + int32_t dst_stride, + uint8_t src); + +void +pixman_composite_src_n_0565_asm_neon (int32_t w, + int32_t h, + uint16_t *dst, + int32_t dst_stride, + uint16_t src); + +void +pixman_composite_src_n_8888_asm_neon (int32_t w, + int32_t h, + uint32_t *dst, + int32_t dst_stride, + uint32_t src); + +static pixman_bool_t +pixman_fill_neon (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) +{ + /* stride is always multiple of 32bit units in pixman */ + uint32_t byte_stride = stride * sizeof(uint32_t); + + switch (bpp) + { + case 8: + pixman_composite_src_n_8_asm_neon ( + width, + height, + (uint8_t *)(((char *) bits) + y * byte_stride + x), + byte_stride, + _xor & 0xff); + return TRUE; + case 16: + pixman_composite_src_n_0565_asm_neon ( + width, + height, + (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), + byte_stride / 2, + _xor & 0xffff); + return TRUE; + case 32: + pixman_composite_src_n_8888_asm_neon ( + width, + height, + (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), + byte_stride / 4, + _xor); + return TRUE; + default: + return FALSE; + } +} + +static pixman_bool_t +pixman_blt_neon (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + switch (src_bpp) + { + case 16: + pixman_composite_src_0565_0565_asm_neon ( + width, height, + (uint16_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, + (uint16_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 2), src_stride * 2); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_neon ( + width, height, + (uint32_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 4), dst_stride, + (uint32_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 4), src_stride); + return TRUE; + default: + return FALSE; + } +} + +static const pixman_fast_path_t arm_neon_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, neon_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, neon_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888), + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), + + { PIXMAN_OP_NONE }, +}; + +static pixman_bool_t +arm_neon_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (!pixman_blt_neon ( + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height)) + + { + return _pixman_implementation_blt ( + imp->delegate, + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height); + } + + return TRUE; +} + +static pixman_bool_t +arm_neon_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor)) + return TRUE; + + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); +} + +#define BIND_COMBINE_U(name) \ +void \ +pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src, \ + const uint32_t *mask); \ + \ +void \ +pixman_composite_scanline_##name##_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src); \ + \ +static void \ +neon_combine_##name##_u (pixman_implementation_t *imp, \ + pixman_op_t op, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ +{ \ + if (mask) \ + pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \ + src, mask); \ + else \ + pixman_composite_scanline_##name##_asm_neon (width, dest, src); \ +} + +BIND_COMBINE_U (over) +BIND_COMBINE_U (add) +BIND_COMBINE_U (out_reverse) + +pixman_implementation_t * +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, arm_neon_fast_paths); + + imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u; + imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; + + imp->blt = arm_neon_blt; + imp->fill = arm_neon_fill; + + return imp; +} diff --git a/pixman/pixman/pixman-trap.c b/pixman/pixman/pixman-trap.c index 6e85acd49..c99f03ecc 100644 --- a/pixman/pixman/pixman-trap.c +++ b/pixman/pixman/pixman-trap.c @@ -1,657 +1,668 @@ -/*
- * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
- * Copyright © 2004 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission. Keith Packard makes no
- * representations about the suitability of this software for any purpose. It
- * is provided "as is" without express or implied warranty.
- *
- * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "pixman-private.h"
-
-/*
- * Compute the smallest value greater than or equal to y which is on a
- * grid row.
- */
-
-PIXMAN_EXPORT pixman_fixed_t
-pixman_sample_ceil_y (pixman_fixed_t y, int n)
-{
- pixman_fixed_t f = pixman_fixed_frac (y);
- pixman_fixed_t i = pixman_fixed_floor (y);
-
- f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
- Y_FRAC_FIRST (n);
-
- if (f > Y_FRAC_LAST (n))
- {
- if (pixman_fixed_to_int (i) == 0x7fff)
- {
- f = 0xffff; /* saturate */
- }
- else
- {
- f = Y_FRAC_FIRST (n);
- i += pixman_fixed_1;
- }
- }
- return (i | f);
-}
-
-/*
- * Compute the largest value strictly less than y which is on a
- * grid row.
- */
-PIXMAN_EXPORT pixman_fixed_t
-pixman_sample_floor_y (pixman_fixed_t y,
- int n)
-{
- pixman_fixed_t f = pixman_fixed_frac (y);
- pixman_fixed_t i = pixman_fixed_floor (y);
-
- f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
- Y_FRAC_FIRST (n);
-
- if (f < Y_FRAC_FIRST (n))
- {
- if (pixman_fixed_to_int (i) == 0x8000)
- {
- f = 0; /* saturate */
- }
- else
- {
- f = Y_FRAC_LAST (n);
- i -= pixman_fixed_1;
- }
- }
- return (i | f);
-}
-
-/*
- * Step an edge by any amount (including negative values)
- */
-PIXMAN_EXPORT void
-pixman_edge_step (pixman_edge_t *e,
- int n)
-{
- pixman_fixed_48_16_t ne;
-
- e->x += n * e->stepx;
-
- ne = e->e + n * (pixman_fixed_48_16_t) e->dx;
-
- if (n >= 0)
- {
- if (ne > 0)
- {
- int nx = (ne + e->dy - 1) / e->dy;
- e->e = ne - nx * (pixman_fixed_48_16_t) e->dy;
- e->x += nx * e->signdx;
- }
- }
- else
- {
- if (ne <= -e->dy)
- {
- int nx = (-ne) / e->dy;
- e->e = ne + nx * (pixman_fixed_48_16_t) e->dy;
- e->x -= nx * e->signdx;
- }
- }
-}
-
-/*
- * A private routine to initialize the multi-step
- * elements of an edge structure
- */
-static void
-_pixman_edge_multi_init (pixman_edge_t * e,
- int n,
- pixman_fixed_t *stepx_p,
- pixman_fixed_t *dx_p)
-{
- pixman_fixed_t stepx;
- pixman_fixed_48_16_t ne;
-
- ne = n * (pixman_fixed_48_16_t) e->dx;
- stepx = n * e->stepx;
-
- if (ne > 0)
- {
- int nx = ne / e->dy;
- ne -= nx * e->dy;
- stepx += nx * e->signdx;
- }
-
- *dx_p = ne;
- *stepx_p = stepx;
-}
-
-/*
- * Initialize one edge structure given the line endpoints and a
- * starting y value
- */
-PIXMAN_EXPORT void
-pixman_edge_init (pixman_edge_t *e,
- int n,
- pixman_fixed_t y_start,
- pixman_fixed_t x_top,
- pixman_fixed_t y_top,
- pixman_fixed_t x_bot,
- pixman_fixed_t y_bot)
-{
- pixman_fixed_t dx, dy;
-
- e->x = x_top;
- e->e = 0;
- dx = x_bot - x_top;
- dy = y_bot - y_top;
- e->dy = dy;
- e->dx = 0;
-
- if (dy)
- {
- if (dx >= 0)
- {
- e->signdx = 1;
- e->stepx = dx / dy;
- e->dx = dx % dy;
- e->e = -dy;
- }
- else
- {
- e->signdx = -1;
- e->stepx = -(-dx / dy);
- e->dx = -dx % dy;
- e->e = 0;
- }
-
- _pixman_edge_multi_init (e, STEP_Y_SMALL (n),
- &e->stepx_small, &e->dx_small);
-
- _pixman_edge_multi_init (e, STEP_Y_BIG (n),
- &e->stepx_big, &e->dx_big);
- }
- pixman_edge_step (e, y_start - y_top);
-}
-
-/*
- * Initialize one edge structure given a line, starting y value
- * and a pixel offset for the line
- */
-PIXMAN_EXPORT void
-pixman_line_fixed_edge_init (pixman_edge_t * e,
- int n,
- pixman_fixed_t y,
- const pixman_line_fixed_t *line,
- int x_off,
- int y_off)
-{
- pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off);
- pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off);
- const pixman_point_fixed_t *top, *bot;
-
- if (line->p1.y <= line->p2.y)
- {
- top = &line->p1;
- bot = &line->p2;
- }
- else
- {
- top = &line->p2;
- bot = &line->p1;
- }
-
- pixman_edge_init (e, n, y,
- top->x + x_off_fixed,
- top->y + y_off_fixed,
- bot->x + x_off_fixed,
- bot->y + y_off_fixed);
-}
-
-PIXMAN_EXPORT void
-pixman_add_traps (pixman_image_t * image,
- int16_t x_off,
- int16_t y_off,
- int ntrap,
- pixman_trap_t * traps)
-{
- int bpp;
- int height;
-
- pixman_fixed_t x_off_fixed;
- pixman_fixed_t y_off_fixed;
- pixman_edge_t l, r;
- pixman_fixed_t t, b;
-
- _pixman_image_validate (image);
-
- height = image->bits.height;
- bpp = PIXMAN_FORMAT_BPP (image->bits.format);
-
- x_off_fixed = pixman_int_to_fixed (x_off);
- y_off_fixed = pixman_int_to_fixed (y_off);
-
- while (ntrap--)
- {
- t = traps->top.y + y_off_fixed;
- if (t < 0)
- t = 0;
- t = pixman_sample_ceil_y (t, bpp);
-
- b = traps->bot.y + y_off_fixed;
- if (pixman_fixed_to_int (b) >= height)
- b = pixman_int_to_fixed (height) - 1;
- b = pixman_sample_floor_y (b, bpp);
-
- if (b >= t)
- {
- /* initialize edge walkers */
- pixman_edge_init (&l, bpp, t,
- traps->top.l + x_off_fixed,
- traps->top.y + y_off_fixed,
- traps->bot.l + x_off_fixed,
- traps->bot.y + y_off_fixed);
-
- pixman_edge_init (&r, bpp, t,
- traps->top.r + x_off_fixed,
- traps->top.y + y_off_fixed,
- traps->bot.r + x_off_fixed,
- traps->bot.y + y_off_fixed);
-
- pixman_rasterize_edges (image, &l, &r, t, b);
- }
-
- traps++;
- }
-}
-
-#if 0
-static void
-dump_image (pixman_image_t *image,
- const char * title)
-{
- int i, j;
-
- if (!image->type == BITS)
- printf ("%s is not a regular image\n", title);
-
- if (!image->bits.format == PIXMAN_a8)
- printf ("%s is not an alpha mask\n", title);
-
- printf ("\n\n\n%s: \n", title);
-
- for (i = 0; i < image->bits.height; ++i)
- {
- uint8_t *line =
- (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]);
-
- for (j = 0; j < image->bits.width; ++j)
- printf ("%c", line[j] ? '#' : ' ');
-
- printf ("\n");
- }
-}
-#endif
-
-PIXMAN_EXPORT void
-pixman_add_trapezoids (pixman_image_t * image,
- int16_t x_off,
- int y_off,
- int ntraps,
- const pixman_trapezoid_t *traps)
-{
- int i;
-
-#if 0
- dump_image (image, "before");
-#endif
-
- for (i = 0; i < ntraps; ++i)
- {
- const pixman_trapezoid_t *trap = &(traps[i]);
-
- if (!pixman_trapezoid_valid (trap))
- continue;
-
- pixman_rasterize_trapezoid (image, trap, x_off, y_off);
- }
-
-#if 0
- dump_image (image, "after");
-#endif
-}
-
-PIXMAN_EXPORT void
-pixman_rasterize_trapezoid (pixman_image_t * image,
- const pixman_trapezoid_t *trap,
- int x_off,
- int y_off)
-{
- int bpp;
- int height;
-
- pixman_fixed_t y_off_fixed;
- pixman_edge_t l, r;
- pixman_fixed_t t, b;
-
- return_if_fail (image->type == BITS);
-
- _pixman_image_validate (image);
-
- if (!pixman_trapezoid_valid (trap))
- return;
-
- height = image->bits.height;
- bpp = PIXMAN_FORMAT_BPP (image->bits.format);
-
- y_off_fixed = pixman_int_to_fixed (y_off);
-
- t = trap->top + y_off_fixed;
- if (t < 0)
- t = 0;
- t = pixman_sample_ceil_y (t, bpp);
-
- b = trap->bottom + y_off_fixed;
- if (pixman_fixed_to_int (b) >= height)
- b = pixman_int_to_fixed (height) - 1;
- b = pixman_sample_floor_y (b, bpp);
-
- if (b >= t)
- {
- /* initialize edge walkers */
- pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off);
- pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off);
-
- pixman_rasterize_edges (image, &l, &r, t, b);
- }
-}
-
-PIXMAN_EXPORT void
-pixman_composite_trapezoids (pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * dst,
- pixman_format_code_t mask_format,
- int x_src,
- int y_src,
- int x_dst,
- int y_dst,
- int n_traps,
- const pixman_trapezoid_t * traps)
-{
- int i;
-
- if (n_traps <= 0)
- return;
-
- _pixman_image_validate (src);
- _pixman_image_validate (dst);
-
- if (op == PIXMAN_OP_ADD &&
- (src->common.flags & FAST_PATH_IS_OPAQUE) &&
- (mask_format == dst->common.extended_format_code) &&
- !(dst->common.have_clip_region))
- {
- for (i = 0; i < n_traps; ++i)
- {
- const pixman_trapezoid_t *trap = &(traps[i]);
-
- if (!pixman_trapezoid_valid (trap))
- continue;
-
- pixman_rasterize_trapezoid (dst, trap, 0, 0);
- }
- }
- else
- {
- pixman_image_t *tmp;
- pixman_box32_t box;
- int x_rel, y_rel;
-
- box.x1 = INT32_MAX;
- box.y1 = INT32_MAX;
- box.x2 = INT32_MIN;
- box.y2 = INT32_MIN;
-
- for (i = 0; i < n_traps; ++i)
- {
- const pixman_trapezoid_t *trap = &(traps[i]);
- int y1, y2;
-
- if (!pixman_trapezoid_valid (trap))
- continue;
-
- y1 = pixman_fixed_to_int (trap->top);
- if (y1 < box.y1)
- box.y1 = y1;
-
- y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom));
- if (y2 > box.y2)
- box.y2 = y2;
-
-#define EXTEND_MIN(x) \
- if (pixman_fixed_to_int ((x)) < box.x1) \
- box.x1 = pixman_fixed_to_int ((x));
-#define EXTEND_MAX(x) \
- if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box.x2) \
- box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x)));
-
-#define EXTEND(x) \
- EXTEND_MIN(x); \
- EXTEND_MAX(x);
-
- EXTEND(trap->left.p1.x);
- EXTEND(trap->left.p2.x);
- EXTEND(trap->right.p1.x);
- EXTEND(trap->right.p2.x);
- }
-
- if (box.x1 >= box.x2 || box.y1 >= box.y2)
- return;
-
- tmp = pixman_image_create_bits (
- mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1);
-
- for (i = 0; i < n_traps; ++i)
- {
- const pixman_trapezoid_t *trap = &(traps[i]);
-
- if (!pixman_trapezoid_valid (trap))
- continue;
-
- pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
- }
-
- x_rel = box.x1 + x_src - x_dst;
- y_rel = box.y1 + y_src - y_dst;
-
- pixman_image_composite (op, src, tmp, dst,
- x_rel, y_rel, 0, 0, box.x1, box.y1,
- box.x2 - box.x1, box.y2 - box.y1);
-
- pixman_image_unref (tmp);
- }
-}
-
-static int
-greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b)
-{
- if (a->y == b->y)
- return a->x > b->x;
- return a->y > b->y;
-}
-
-/*
- * Note that the definition of this function is a bit odd because
- * of the X coordinate space (y increasing downwards).
- */
-static int
-clockwise (const pixman_point_fixed_t *ref,
- const pixman_point_fixed_t *a,
- const pixman_point_fixed_t *b)
-{
- pixman_point_fixed_t ad, bd;
-
- ad.x = a->x - ref->x;
- ad.y = a->y - ref->y;
- bd.x = b->x - ref->x;
- bd.y = b->y - ref->y;
-
- return ((pixman_fixed_32_32_t) bd.y * ad.x -
- (pixman_fixed_32_32_t) ad.y * bd.x) < 0;
-}
-
-static void
-triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps)
-{
- const pixman_point_fixed_t *top, *left, *right, *tmp;
-
- top = &tri->p1;
- left = &tri->p2;
- right = &tri->p3;
-
- if (greater_y (top, left))
- {
- tmp = left;
- left = top;
- top = tmp;
- }
-
- if (greater_y (top, right))
- {
- tmp = right;
- right = top;
- top = tmp;
- }
-
- if (clockwise (top, right, left))
- {
- tmp = right;
- right = left;
- left = tmp;
- }
-
- /*
- * Two cases:
- *
- * + +
- * / \ / \
- * / \ / \
- * / + + \
- * / -- -- \
- * / -- -- \
- * / --- --- \
- * +-- --+
- */
-
- traps->top = top->y;
- traps->left.p1 = *top;
- traps->left.p2 = *left;
- traps->right.p1 = *top;
- traps->right.p2 = *right;
-
- if (right->y < left->y)
- traps->bottom = right->y;
- else
- traps->bottom = left->y;
-
- traps++;
-
- *traps = *(traps - 1);
-
- if (right->y < left->y)
- {
- traps->top = right->y;
- traps->bottom = left->y;
- traps->right.p1 = *right;
- traps->right.p2 = *left;
- }
- else
- {
- traps->top = left->y;
- traps->bottom = right->y;
- traps->left.p1 = *left;
- traps->left.p2 = *right;
- }
-}
-
-static pixman_trapezoid_t *
-convert_triangles (int n_tris, const pixman_triangle_t *tris)
-{
- pixman_trapezoid_t *traps;
- int i;
-
- if (n_tris <= 0)
- return NULL;
-
- traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t));
- if (!traps)
- return NULL;
-
- for (i = 0; i < n_tris; ++i)
- triangle_to_trapezoids (&(tris[i]), traps + 2 * i);
-
- return traps;
-}
-
-PIXMAN_EXPORT void
-pixman_composite_triangles (pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * dst,
- pixman_format_code_t mask_format,
- int x_src,
- int y_src,
- int x_dst,
- int y_dst,
- int n_tris,
- const pixman_triangle_t * tris)
-{
- pixman_trapezoid_t *traps;
-
- if ((traps = convert_triangles (n_tris, tris)))
- {
- pixman_composite_trapezoids (op, src, dst, mask_format,
- x_src, y_src, x_dst, y_dst,
- n_tris * 2, traps);
-
- free (traps);
- }
-}
-
-PIXMAN_EXPORT void
-pixman_add_triangles (pixman_image_t *image,
- int32_t x_off,
- int32_t y_off,
- int n_tris,
- const pixman_triangle_t *tris)
-{
- pixman_trapezoid_t *traps;
-
- if ((traps = convert_triangles (n_tris, tris)))
- {
- pixman_add_trapezoids (image, x_off, y_off,
- n_tris * 2, traps);
-
- free (traps);
- }
-}
+/* + * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc. + * Copyright © 2004 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include "pixman-private.h" + +/* + * Compute the smallest value greater than or equal to y which is on a + * grid row. + */ + +PIXMAN_EXPORT pixman_fixed_t +pixman_sample_ceil_y (pixman_fixed_t y, int n) +{ + pixman_fixed_t f = pixman_fixed_frac (y); + pixman_fixed_t i = pixman_fixed_floor (y); + + f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + + Y_FRAC_FIRST (n); + + if (f > Y_FRAC_LAST (n)) + { + if (pixman_fixed_to_int (i) == 0x7fff) + { + f = 0xffff; /* saturate */ + } + else + { + f = Y_FRAC_FIRST (n); + i += pixman_fixed_1; + } + } + return (i | f); +} + +/* + * Compute the largest value strictly less than y which is on a + * grid row. + */ +PIXMAN_EXPORT pixman_fixed_t +pixman_sample_floor_y (pixman_fixed_t y, + int n) +{ + pixman_fixed_t f = pixman_fixed_frac (y); + pixman_fixed_t i = pixman_fixed_floor (y); + + f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + + Y_FRAC_FIRST (n); + + if (f < Y_FRAC_FIRST (n)) + { + if (pixman_fixed_to_int (i) == 0x8000) + { + f = 0; /* saturate */ + } + else + { + f = Y_FRAC_LAST (n); + i -= pixman_fixed_1; + } + } + return (i | f); +} + +/* + * Step an edge by any amount (including negative values) + */ +PIXMAN_EXPORT void +pixman_edge_step (pixman_edge_t *e, + int n) +{ + pixman_fixed_48_16_t ne; + + e->x += n * e->stepx; + + ne = e->e + n * (pixman_fixed_48_16_t) e->dx; + + if (n >= 0) + { + if (ne > 0) + { + int nx = (ne + e->dy - 1) / e->dy; + e->e = ne - nx * (pixman_fixed_48_16_t) e->dy; + e->x += nx * e->signdx; + } + } + else + { + if (ne <= -e->dy) + { + int nx = (-ne) / e->dy; + e->e = ne + nx * (pixman_fixed_48_16_t) e->dy; + e->x -= nx * e->signdx; + } + } +} + +/* + * A private routine to initialize the multi-step + * elements of an edge structure + */ +static void +_pixman_edge_multi_init (pixman_edge_t * e, + int n, + pixman_fixed_t *stepx_p, + pixman_fixed_t *dx_p) +{ + pixman_fixed_t stepx; + pixman_fixed_48_16_t ne; + + ne = n * (pixman_fixed_48_16_t) e->dx; + stepx = n * e->stepx; + + if (ne > 0) + { + int nx = ne / e->dy; + ne -= nx * e->dy; + stepx += nx * e->signdx; + } + + *dx_p = ne; + *stepx_p = stepx; +} + +/* + * Initialize one edge structure given the line endpoints and a + * starting y value + */ +PIXMAN_EXPORT void +pixman_edge_init (pixman_edge_t *e, + int n, + pixman_fixed_t y_start, + pixman_fixed_t x_top, + pixman_fixed_t y_top, + pixman_fixed_t x_bot, + pixman_fixed_t y_bot) +{ + pixman_fixed_t dx, dy; + + e->x = x_top; + e->e = 0; + dx = x_bot - x_top; + dy = y_bot - y_top; + e->dy = dy; + e->dx = 0; + + if (dy) + { + if (dx >= 0) + { + e->signdx = 1; + e->stepx = dx / dy; + e->dx = dx % dy; + e->e = -dy; + } + else + { + e->signdx = -1; + e->stepx = -(-dx / dy); + e->dx = -dx % dy; + e->e = 0; + } + + _pixman_edge_multi_init (e, STEP_Y_SMALL (n), + &e->stepx_small, &e->dx_small); + + _pixman_edge_multi_init (e, STEP_Y_BIG (n), + &e->stepx_big, &e->dx_big); + } + pixman_edge_step (e, y_start - y_top); +} + +/* + * Initialize one edge structure given a line, starting y value + * and a pixel offset for the line + */ +PIXMAN_EXPORT void +pixman_line_fixed_edge_init (pixman_edge_t * e, + int n, + pixman_fixed_t y, + const pixman_line_fixed_t *line, + int x_off, + int y_off) +{ + pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off); + pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off); + const pixman_point_fixed_t *top, *bot; + + if (line->p1.y <= line->p2.y) + { + top = &line->p1; + bot = &line->p2; + } + else + { + top = &line->p2; + bot = &line->p1; + } + + pixman_edge_init (e, n, y, + top->x + x_off_fixed, + top->y + y_off_fixed, + bot->x + x_off_fixed, + bot->y + y_off_fixed); +} + +PIXMAN_EXPORT void +pixman_add_traps (pixman_image_t * image, + int16_t x_off, + int16_t y_off, + int ntrap, + pixman_trap_t * traps) +{ + int bpp; + int height; + + pixman_fixed_t x_off_fixed; + pixman_fixed_t y_off_fixed; + pixman_edge_t l, r; + pixman_fixed_t t, b; + + _pixman_image_validate (image); + + height = image->bits.height; + bpp = PIXMAN_FORMAT_BPP (image->bits.format); + + x_off_fixed = pixman_int_to_fixed (x_off); + y_off_fixed = pixman_int_to_fixed (y_off); + + while (ntrap--) + { + t = traps->top.y + y_off_fixed; + if (t < 0) + t = 0; + t = pixman_sample_ceil_y (t, bpp); + + b = traps->bot.y + y_off_fixed; + if (pixman_fixed_to_int (b) >= height) + b = pixman_int_to_fixed (height) - 1; + b = pixman_sample_floor_y (b, bpp); + + if (b >= t) + { + /* initialize edge walkers */ + pixman_edge_init (&l, bpp, t, + traps->top.l + x_off_fixed, + traps->top.y + y_off_fixed, + traps->bot.l + x_off_fixed, + traps->bot.y + y_off_fixed); + + pixman_edge_init (&r, bpp, t, + traps->top.r + x_off_fixed, + traps->top.y + y_off_fixed, + traps->bot.r + x_off_fixed, + traps->bot.y + y_off_fixed); + + pixman_rasterize_edges (image, &l, &r, t, b); + } + + traps++; + } +} + +#if 0 +static void +dump_image (pixman_image_t *image, + const char * title) +{ + int i, j; + + if (!image->type == BITS) + printf ("%s is not a regular image\n", title); + + if (!image->bits.format == PIXMAN_a8) + printf ("%s is not an alpha mask\n", title); + + printf ("\n\n\n%s: \n", title); + + for (i = 0; i < image->bits.height; ++i) + { + uint8_t *line = + (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]); + + for (j = 0; j < image->bits.width; ++j) + printf ("%c", line[j] ? '#' : ' '); + + printf ("\n"); + } +} +#endif + +PIXMAN_EXPORT void +pixman_add_trapezoids (pixman_image_t * image, + int16_t x_off, + int y_off, + int ntraps, + const pixman_trapezoid_t *traps) +{ + int i; + +#if 0 + dump_image (image, "before"); +#endif + + for (i = 0; i < ntraps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (image, trap, x_off, y_off); + } + +#if 0 + dump_image (image, "after"); +#endif +} + +PIXMAN_EXPORT void +pixman_rasterize_trapezoid (pixman_image_t * image, + const pixman_trapezoid_t *trap, + int x_off, + int y_off) +{ + int bpp; + int height; + + pixman_fixed_t y_off_fixed; + pixman_edge_t l, r; + pixman_fixed_t t, b; + + return_if_fail (image->type == BITS); + + _pixman_image_validate (image); + + if (!pixman_trapezoid_valid (trap)) + return; + + height = image->bits.height; + bpp = PIXMAN_FORMAT_BPP (image->bits.format); + + y_off_fixed = pixman_int_to_fixed (y_off); + + t = trap->top + y_off_fixed; + if (t < 0) + t = 0; + t = pixman_sample_ceil_y (t, bpp); + + b = trap->bottom + y_off_fixed; + if (pixman_fixed_to_int (b) >= height) + b = pixman_int_to_fixed (height) - 1; + b = pixman_sample_floor_y (b, bpp); + + if (b >= t) + { + /* initialize edge walkers */ + pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off); + pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off); + + pixman_rasterize_edges (image, &l, &r, t, b); + } +} + +/* + * pixman_composite_trapezoids() + * + * All the trapezoids are conceptually rendered to an infinitely big image. + * The (0, 0) coordinates of this image are then aligned with the (x, y) + * coordinates of the source image, and then both images are aligned with + * the (x, y) coordinates of the destination. Then, in principle, compositing + * of these three images takes place across the entire destination. + * + * FIXME: However, there is currently a bug, where we restrict this compositing + * to the bounding box of the trapezoids. This is incorrect for operators such + * as SRC and IN where blank source pixels do have an effect on the destination. + */ +PIXMAN_EXPORT void +pixman_composite_trapezoids (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_traps, + const pixman_trapezoid_t * traps) +{ + int i; + + if (n_traps <= 0) + return; + + _pixman_image_validate (src); + _pixman_image_validate (dst); + + if (op == PIXMAN_OP_ADD && + (src->common.flags & FAST_PATH_IS_OPAQUE) && + (mask_format == dst->common.extended_format_code) && + !(dst->common.have_clip_region)) + { + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst); + } + } + else + { + pixman_image_t *tmp; + pixman_box32_t box; + + box.x1 = INT32_MAX; + box.y1 = INT32_MAX; + box.x2 = INT32_MIN; + box.y2 = INT32_MIN; + + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + int y1, y2; + + if (!pixman_trapezoid_valid (trap)) + continue; + + y1 = pixman_fixed_to_int (trap->top); + if (y1 < box.y1) + box.y1 = y1; + + y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom)); + if (y2 > box.y2) + box.y2 = y2; + +#define EXTEND_MIN(x) \ + if (pixman_fixed_to_int ((x)) < box.x1) \ + box.x1 = pixman_fixed_to_int ((x)); +#define EXTEND_MAX(x) \ + if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box.x2) \ + box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); + +#define EXTEND(x) \ + EXTEND_MIN(x); \ + EXTEND_MAX(x); + + EXTEND(trap->left.p1.x); + EXTEND(trap->left.p2.x); + EXTEND(trap->right.p1.x); + EXTEND(trap->right.p2.x); + } + + if (box.x1 >= box.x2 || box.y1 >= box.y2) + return; + + tmp = pixman_image_create_bits ( + mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1); + + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); + } + + pixman_image_composite (op, src, tmp, dst, + x_src + box.x1, y_src + box.y1, + 0, 0, + x_dst + box.x1, y_dst + box.y1, + box.x2 - box.x1, box.y2 - box.y1); + + pixman_image_unref (tmp); + } +} + +static int +greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b) +{ + if (a->y == b->y) + return a->x > b->x; + return a->y > b->y; +} + +/* + * Note that the definition of this function is a bit odd because + * of the X coordinate space (y increasing downwards). + */ +static int +clockwise (const pixman_point_fixed_t *ref, + const pixman_point_fixed_t *a, + const pixman_point_fixed_t *b) +{ + pixman_point_fixed_t ad, bd; + + ad.x = a->x - ref->x; + ad.y = a->y - ref->y; + bd.x = b->x - ref->x; + bd.y = b->y - ref->y; + + return ((pixman_fixed_32_32_t) bd.y * ad.x - + (pixman_fixed_32_32_t) ad.y * bd.x) < 0; +} + +static void +triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps) +{ + const pixman_point_fixed_t *top, *left, *right, *tmp; + + top = &tri->p1; + left = &tri->p2; + right = &tri->p3; + + if (greater_y (top, left)) + { + tmp = left; + left = top; + top = tmp; + } + + if (greater_y (top, right)) + { + tmp = right; + right = top; + top = tmp; + } + + if (clockwise (top, right, left)) + { + tmp = right; + right = left; + left = tmp; + } + + /* + * Two cases: + * + * + + + * / \ / \ + * / \ / \ + * / + + \ + * / -- -- \ + * / -- -- \ + * / --- --- \ + * +-- --+ + */ + + traps->top = top->y; + traps->left.p1 = *top; + traps->left.p2 = *left; + traps->right.p1 = *top; + traps->right.p2 = *right; + + if (right->y < left->y) + traps->bottom = right->y; + else + traps->bottom = left->y; + + traps++; + + *traps = *(traps - 1); + + if (right->y < left->y) + { + traps->top = right->y; + traps->bottom = left->y; + traps->right.p1 = *right; + traps->right.p2 = *left; + } + else + { + traps->top = left->y; + traps->bottom = right->y; + traps->left.p1 = *left; + traps->left.p2 = *right; + } +} + +static pixman_trapezoid_t * +convert_triangles (int n_tris, const pixman_triangle_t *tris) +{ + pixman_trapezoid_t *traps; + int i; + + if (n_tris <= 0) + return NULL; + + traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t)); + if (!traps) + return NULL; + + for (i = 0; i < n_tris; ++i) + triangle_to_trapezoids (&(tris[i]), traps + 2 * i); + + return traps; +} + +PIXMAN_EXPORT void +pixman_composite_triangles (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_tris, + const pixman_triangle_t * tris) +{ + pixman_trapezoid_t *traps; + + if ((traps = convert_triangles (n_tris, tris))) + { + pixman_composite_trapezoids (op, src, dst, mask_format, + x_src, y_src, x_dst, y_dst, + n_tris * 2, traps); + + free (traps); + } +} + +PIXMAN_EXPORT void +pixman_add_triangles (pixman_image_t *image, + int32_t x_off, + int32_t y_off, + int n_tris, + const pixman_triangle_t *tris) +{ + pixman_trapezoid_t *traps; + + if ((traps = convert_triangles (n_tris, tris))) + { + pixman_add_trapezoids (image, x_off, y_off, + n_tris * 2, traps); + + free (traps); + } +} diff --git a/pixman/test/composite-traps-test.c b/pixman/test/composite-traps-test.c index 9ea7293ce..fa6d8a988 100644 --- a/pixman/test/composite-traps-test.c +++ b/pixman/test/composite-traps-test.c @@ -1,257 +1,257 @@ -/* Based loosely on scaling-test */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "utils.h"
-
-#define MAX_SRC_WIDTH 48
-#define MAX_SRC_HEIGHT 48
-#define MAX_DST_WIDTH 48
-#define MAX_DST_HEIGHT 48
-#define MAX_STRIDE 4
-
-static pixman_format_code_t formats[] =
-{
- PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_r5g6b5, PIXMAN_a1, PIXMAN_a4
-};
-
-static pixman_format_code_t mask_formats[] =
-{
- PIXMAN_a1, PIXMAN_a4, PIXMAN_a8,
-};
-
-static pixman_op_t operators[] =
-{
- PIXMAN_OP_OVER, PIXMAN_OP_ADD, PIXMAN_OP_SRC, PIXMAN_OP_IN
-};
-
-#define RANDOM_ELT(array) \
- ((array)[lcg_rand_n(ARRAY_LENGTH((array)))])
-
-static void
-destroy_bits (pixman_image_t *image, void *data)
-{
- fence_free (data);
-}
-
-static pixman_fixed_t
-random_fixed (int n)
-{
- return lcg_rand_N (n << 16);
-}
-
-/*
- * Composite operation with pseudorandom images
- */
-uint32_t
-test_composite (int testnum,
- int verbose)
-{
- int i;
- pixman_image_t * src_img;
- pixman_image_t * dst_img;
- pixman_region16_t clip;
- int dst_width, dst_height;
- int dst_stride;
- int dst_x, dst_y;
- int dst_bpp;
- pixman_op_t op;
- uint32_t * dst_bits;
- uint32_t crc32;
- pixman_format_code_t mask_format, dst_format;
- pixman_trapezoid_t *traps;
- int src_x, src_y;
- int n_traps;
-
- static pixman_color_t colors[] =
- {
- { 0xffff, 0xffff, 0xffff, 0xffff },
- { 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0xabcd, 0xabcd, 0x0000, 0xabcd },
- { 0x0000, 0x0000, 0x0000, 0xffff },
- { 0x0101, 0x0101, 0x0101, 0x0101 },
- { 0x7777, 0x6666, 0x5555, 0x9999 },
- };
-
- FLOAT_REGS_CORRUPTION_DETECTOR_START ();
-
- lcg_srand (testnum);
-
- op = RANDOM_ELT (operators);
- mask_format = RANDOM_ELT (mask_formats);
-
- /* Create source image */
-
- if (lcg_rand_n (4) == 0)
- {
- src_img = pixman_image_create_solid_fill (
- &(colors[lcg_rand_n (ARRAY_LENGTH (colors))]));
-
- src_x = 10;
- src_y = 234;
- }
- else
- {
- pixman_format_code_t src_format = RANDOM_ELT(formats);
- int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8;
- int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
- int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
- int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
- uint32_t *bits;
-
- src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
- src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
-
- src_stride = (src_stride + 3) & ~3;
-
- bits = (uint32_t *)make_random_bytes (src_stride * src_height);
-
- src_img = pixman_image_create_bits (
- src_format, src_width, src_height, bits, src_stride);
-
- pixman_image_set_destroy_function (src_img, destroy_bits, bits);
-
- if (lcg_rand_n (8) == 0)
- {
- pixman_box16_t clip_boxes[2];
- int n = lcg_rand_n (2) + 1;
-
- for (i = 0; i < n; i++)
- {
- clip_boxes[i].x1 = lcg_rand_n (src_width);
- clip_boxes[i].y1 = lcg_rand_n (src_height);
- clip_boxes[i].x2 =
- clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
- clip_boxes[i].y2 =
- clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
-
- if (verbose)
- {
- printf ("source clip box: [%d,%d-%d,%d]\n",
- clip_boxes[i].x1, clip_boxes[i].y1,
- clip_boxes[i].x2, clip_boxes[i].y2);
- }
- }
-
- pixman_region_init_rects (&clip, clip_boxes, n);
- pixman_image_set_clip_region (src_img, &clip);
- pixman_image_set_source_clipping (src_img, 1);
- pixman_region_fini (&clip);
- }
-
- image_endian_swap (src_img);
- }
-
- /* Create destination image */
- {
- dst_format = RANDOM_ELT(formats);
- dst_bpp = (PIXMAN_FORMAT_BPP (dst_format) + 7) / 8;
- dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
- dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
- dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
- dst_stride = (dst_stride + 3) & ~3;
-
- dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height);
-
- dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
- dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
-
- dst_img = pixman_image_create_bits (
- dst_format, dst_width, dst_height, dst_bits, dst_stride);
-
- image_endian_swap (dst_img);
- }
-
- /* Create traps */
- {
- int i;
-
- n_traps = lcg_rand_n (25);
- traps = fence_malloc (n_traps * sizeof (pixman_trapezoid_t));
-
- for (i = 0; i < n_traps; ++i)
- {
- pixman_trapezoid_t *t = &(traps[i]);
-
- t->top = random_fixed (MAX_DST_HEIGHT) - MAX_DST_HEIGHT / 2;
- t->bottom = t->top + random_fixed (MAX_DST_HEIGHT);
- t->left.p1.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2;
- t->left.p1.y = t->top - random_fixed (50);
- t->left.p2.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2;
- t->left.p2.y = t->bottom + random_fixed (50);
- t->right.p1.x = t->left.p1.x + random_fixed (MAX_DST_WIDTH);
- t->right.p1.y = t->top - random_fixed (50);
- t->right.p2.x = t->left.p2.x + random_fixed (MAX_DST_WIDTH);
- t->right.p2.y = t->bottom - random_fixed (50);
- }
- }
-
- if (lcg_rand_n (8) == 0)
- {
- pixman_box16_t clip_boxes[2];
- int n = lcg_rand_n (2) + 1;
- for (i = 0; i < n; i++)
- {
- clip_boxes[i].x1 = lcg_rand_n (dst_width);
- clip_boxes[i].y1 = lcg_rand_n (dst_height);
- clip_boxes[i].x2 =
- clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
- clip_boxes[i].y2 =
- clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
-
- if (verbose)
- {
- printf ("destination clip box: [%d,%d-%d,%d]\n",
- clip_boxes[i].x1, clip_boxes[i].y1,
- clip_boxes[i].x2, clip_boxes[i].y2);
- }
- }
- pixman_region_init_rects (&clip, clip_boxes, n);
- pixman_image_set_clip_region (dst_img, &clip);
- pixman_region_fini (&clip);
- }
-
- pixman_composite_trapezoids (op, src_img, dst_img, mask_format,
- src_x, src_y, dst_x, dst_y, n_traps, traps);
-
- if (dst_format == PIXMAN_x8r8g8b8)
- {
- /* ignore unused part */
- for (i = 0; i < dst_stride * dst_height / 4; i++)
- dst_bits[i] &= 0xFFFFFF;
- }
-
- image_endian_swap (dst_img);
-
- if (verbose)
- {
- int j;
-
- for (i = 0; i < dst_height; i++)
- {
- for (j = 0; j < dst_stride; j++)
- printf ("%02X ", *((uint8_t *)dst_bits + i * dst_stride + j));
-
- printf ("\n");
- }
- }
-
- crc32 = compute_crc32 (0, dst_bits, dst_stride * dst_height);
-
- fence_free (dst_bits);
-
- pixman_image_unref (src_img);
- pixman_image_unref (dst_img);
- fence_free (traps);
-
- FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
- return crc32;
-}
-
-int
-main (int argc, const char *argv[])
-{
- return fuzzer_test_main("composite traps", 40000, 0xA34F95C7,
- test_composite, argc, argv);
-}
+/* Based loosely on scaling-test */ + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include "utils.h" + +#define MAX_SRC_WIDTH 48 +#define MAX_SRC_HEIGHT 48 +#define MAX_DST_WIDTH 48 +#define MAX_DST_HEIGHT 48 +#define MAX_STRIDE 4 + +static pixman_format_code_t formats[] = +{ + PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_r5g6b5, PIXMAN_a1, PIXMAN_a4 +}; + +static pixman_format_code_t mask_formats[] = +{ + PIXMAN_a1, PIXMAN_a4, PIXMAN_a8, +}; + +static pixman_op_t operators[] = +{ + PIXMAN_OP_OVER, PIXMAN_OP_ADD, PIXMAN_OP_SRC, PIXMAN_OP_IN +}; + +#define RANDOM_ELT(array) \ + ((array)[lcg_rand_n(ARRAY_LENGTH((array)))]) + +static void +destroy_bits (pixman_image_t *image, void *data) +{ + fence_free (data); +} + +static pixman_fixed_t +random_fixed (int n) +{ + return lcg_rand_N (n << 16); +} + +/* + * Composite operation with pseudorandom images + */ +uint32_t +test_composite (int testnum, + int verbose) +{ + int i; + pixman_image_t * src_img; + pixman_image_t * dst_img; + pixman_region16_t clip; + int dst_width, dst_height; + int dst_stride; + int dst_x, dst_y; + int dst_bpp; + pixman_op_t op; + uint32_t * dst_bits; + uint32_t crc32; + pixman_format_code_t mask_format, dst_format; + pixman_trapezoid_t *traps; + int src_x, src_y; + int n_traps; + + static pixman_color_t colors[] = + { + { 0xffff, 0xffff, 0xffff, 0xffff }, + { 0x0000, 0x0000, 0x0000, 0x0000 }, + { 0xabcd, 0xabcd, 0x0000, 0xabcd }, + { 0x0000, 0x0000, 0x0000, 0xffff }, + { 0x0101, 0x0101, 0x0101, 0x0101 }, + { 0x7777, 0x6666, 0x5555, 0x9999 }, + }; + + FLOAT_REGS_CORRUPTION_DETECTOR_START (); + + lcg_srand (testnum); + + op = RANDOM_ELT (operators); + mask_format = RANDOM_ELT (mask_formats); + + /* Create source image */ + + if (lcg_rand_n (4) == 0) + { + src_img = pixman_image_create_solid_fill ( + &(colors[lcg_rand_n (ARRAY_LENGTH (colors))])); + + src_x = 10; + src_y = 234; + } + else + { + pixman_format_code_t src_format = RANDOM_ELT(formats); + int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8; + int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; + int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; + uint32_t *bits; + + src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); + + src_stride = (src_stride + 3) & ~3; + + bits = (uint32_t *)make_random_bytes (src_stride * src_height); + + src_img = pixman_image_create_bits ( + src_format, src_width, src_height, bits, src_stride); + + pixman_image_set_destroy_function (src_img, destroy_bits, bits); + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (src_width); + clip_boxes[i].y1 = lcg_rand_n (src_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("source clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (src_img, &clip); + pixman_image_set_source_clipping (src_img, 1); + pixman_region_fini (&clip); + } + + image_endian_swap (src_img); + } + + /* Create destination image */ + { + dst_format = RANDOM_ELT(formats); + dst_bpp = (PIXMAN_FORMAT_BPP (dst_format) + 7) / 8; + dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; + dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; + dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; + dst_stride = (dst_stride + 3) & ~3; + + dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height); + + dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); + + dst_img = pixman_image_create_bits ( + dst_format, dst_width, dst_height, dst_bits, dst_stride); + + image_endian_swap (dst_img); + } + + /* Create traps */ + { + int i; + + n_traps = lcg_rand_n (25); + traps = fence_malloc (n_traps * sizeof (pixman_trapezoid_t)); + + for (i = 0; i < n_traps; ++i) + { + pixman_trapezoid_t *t = &(traps[i]); + + t->top = random_fixed (MAX_DST_HEIGHT) - MAX_DST_HEIGHT / 2; + t->bottom = t->top + random_fixed (MAX_DST_HEIGHT); + t->left.p1.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2; + t->left.p1.y = t->top - random_fixed (50); + t->left.p2.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2; + t->left.p2.y = t->bottom + random_fixed (50); + t->right.p1.x = t->left.p1.x + random_fixed (MAX_DST_WIDTH); + t->right.p1.y = t->top - random_fixed (50); + t->right.p2.x = t->left.p2.x + random_fixed (MAX_DST_WIDTH); + t->right.p2.y = t->bottom - random_fixed (50); + } + } + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (dst_width); + clip_boxes[i].y1 = lcg_rand_n (dst_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("destination clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (dst_img, &clip); + pixman_region_fini (&clip); + } + + pixman_composite_trapezoids (op, src_img, dst_img, mask_format, + src_x, src_y, dst_x, dst_y, n_traps, traps); + + if (dst_format == PIXMAN_x8r8g8b8) + { + /* ignore unused part */ + for (i = 0; i < dst_stride * dst_height / 4; i++) + dst_bits[i] &= 0xFFFFFF; + } + + image_endian_swap (dst_img); + + if (verbose) + { + int j; + + for (i = 0; i < dst_height; i++) + { + for (j = 0; j < dst_stride; j++) + printf ("%02X ", *((uint8_t *)dst_bits + i * dst_stride + j)); + + printf ("\n"); + } + } + + crc32 = compute_crc32 (0, dst_bits, dst_stride * dst_height); + + fence_free (dst_bits); + + pixman_image_unref (src_img); + pixman_image_unref (dst_img); + fence_free (traps); + + FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main("composite traps", 40000, 0xE3112106, + test_composite, argc, argv); +} |