From 96d6df5da9cddedf4931bf8e17f96e242467c661 Mon Sep 17 00:00:00 2001 From: marha Date: Wed, 27 Apr 2011 06:58:32 +0000 Subject: xserver libX11 libxtrans mesa pixman xkeyboard-config git update 27 Apr 2011 --- pixman/configure.ac | 51 +- pixman/demos/tri-test.c | 96 +- pixman/pixman/Makefile.am | 1 + pixman/pixman/pixman-arm-common.h | 863 +++++++++-------- pixman/pixman/pixman-arm-neon-asm-bilinear.S | 768 +++++++++++++++ pixman/pixman/pixman-arm-neon-asm.S | 169 ++++ pixman/pixman/pixman-arm-neon.c | 963 ++++++++++--------- pixman/pixman/pixman-trap.c | 1325 +++++++++++++------------- pixman/test/composite-traps-test.c | 514 +++++----- 9 files changed, 2897 insertions(+), 1853 deletions(-) create mode 100644 pixman/pixman/pixman-arm-neon-asm-bilinear.S (limited to 'pixman') diff --git a/pixman/configure.ac b/pixman/configure.ac index db9a883b6..ef8162fc3 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -54,7 +54,7 @@ AC_PREREQ([2.57]) m4_define([pixman_major], 0) m4_define([pixman_minor], 21) -m4_define([pixman_micro], 7) +m4_define([pixman_micro], 9) m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) @@ -119,7 +119,7 @@ for w in -Werror -errwarn; do [CFLAGS=$w], [int main(int c, char **v) { (void)c; (void)v; return 0; }], [WERROR=$w; yesno=yes], [yesno=no]) - AC_MSG_RESULT($_yesno) + AC_MSG_RESULT($yesno) fi done @@ -192,35 +192,43 @@ dnl ========================================================================= dnl OpenMP for the test suite? dnl -# Check for OpenMP support (only supported by autoconf >=2.62) +# Check for OpenMP support only when autoconf support that (require autoconf >=2.62) OPENMP_CFLAGS= m4_ifdef([AC_OPENMP], [AC_OPENMP]) -m4_define([openmp_test_program],[dnl -#include +if test "x$enable_openmp" = "xyes" && test "x$ac_cv_prog_c_openmp" = "xunsupported" ; then + AC_MSG_WARN([OpenMP support requested but found unsupported]) +fi -extern unsigned int lcg_seed; -#pragma omp threadprivate(lcg_seed) -unsigned int lcg_seed; +dnl May not fail to link without -Wall -Werror added +dnl So try to link only when openmp is supported +dnl ac_cv_prog_c_openmp is not defined when --disable-openmp is used +if test "x$ac_cv_prog_c_openmp" != "xunsupported" && test "x$ac_cv_prog_c_openmp" != "x"; then + m4_define([openmp_test_program],[dnl + #include -unsigned function(unsigned a, unsigned b) -{ + extern unsigned int lcg_seed; + #pragma omp threadprivate(lcg_seed) + unsigned int lcg_seed; + + unsigned function(unsigned a, unsigned b) + { lcg_seed ^= b; return ((a + b) ^ a ) + lcg_seed; -} + } -int main(int argc, char **argv) -{ + int main(int argc, char **argv) + { int i; int n1 = 0, n2 = argc; unsigned checksum = 0; int verbose = argv != NULL; unsigned (*test_function)(unsigned, unsigned); test_function = function; - #pragma omp parallel for reduction(+:checksum) default(none) \ + #pragma omp parallel for reduction(+:checksum) default(none) \ shared(n1, n2, test_function, verbose) for (i = n1; i < n2; i++) - { + { unsigned crc = test_function (i, 0); if (verbose) printf ("%d: %08X\n", i, crc); @@ -228,18 +236,17 @@ int main(int argc, char **argv) } printf("%u\n", checksum); return 0; -} -]) + } + ]) -PIXMAN_LINK_WITH_ENV( + PIXMAN_LINK_WITH_ENV( [CFLAGS="$OPENMP_CFLAGS" LDFLAGS="$OPENMP_CFLAGS"], [openmp_test_program], [have_openmp=yes], [have_openmp=no]) -if test "x$have_openmp" = "xyes"; then - AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite]) -else - OPENMP_CFLAGS="" + if test "x$have_openmp" = "xyes" ; then + AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite]) + fi fi AC_SUBST(OPENMP_CFLAGS) diff --git a/pixman/demos/tri-test.c b/pixman/demos/tri-test.c index 23ea18cb3..a71869a6a 100644 --- a/pixman/demos/tri-test.c +++ b/pixman/demos/tri-test.c @@ -1,48 +1,48 @@ -#include -#include -#include -#include "../test/utils.h" -#include "gtk-utils.h" - -int -main (int argc, char **argv) -{ -#define WIDTH 200 -#define HEIGHT 200 - -#define POINT(x,y) \ - { pixman_double_to_fixed ((x)), pixman_double_to_fixed ((y)) } - - pixman_image_t *src_img, *dest_img; - pixman_triangle_t tris[4] = - { - { POINT (100, 100), POINT (10, 50), POINT (110, 10) }, - { POINT (100, 100), POINT (150, 10), POINT (200, 50) }, - { POINT (100, 100), POINT (10, 170), POINT (90, 175) }, - { POINT (100, 100), POINT (170, 150), POINT (120, 190) }, - }; - pixman_color_t color = { 0x4444, 0x4444, 0xffff, 0xffff }; - uint32_t *bits = malloc (WIDTH * HEIGHT * 4); - int i; - - for (i = 0; i < WIDTH * HEIGHT; ++i) - bits[i] = (i / HEIGHT) * 0x01010000; - - src_img = pixman_image_create_solid_fill (&color); - dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4); - - pixman_composite_triangles (PIXMAN_OP_ATOP_REVERSE, - src_img, - dest_img, - PIXMAN_a8, - 200, 200, - 35, 5, - ARRAY_LENGTH (tris), tris); - show_image (dest_img); - - pixman_image_unref (src_img); - pixman_image_unref (dest_img); - free (bits); - - return 0; -} +#include +#include +#include +#include "../test/utils.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + +#define POINT(x,y) \ + { pixman_double_to_fixed ((x)), pixman_double_to_fixed ((y)) } + + pixman_image_t *src_img, *dest_img; + pixman_triangle_t tris[4] = + { + { POINT (100, 100), POINT (10, 50), POINT (110, 10) }, + { POINT (100, 100), POINT (150, 10), POINT (200, 50) }, + { POINT (100, 100), POINT (10, 170), POINT (90, 175) }, + { POINT (100, 100), POINT (170, 150), POINT (120, 190) }, + }; + pixman_color_t color = { 0x4444, 0x4444, 0xffff, 0xffff }; + uint32_t *bits = malloc (WIDTH * HEIGHT * 4); + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + bits[i] = (i / HEIGHT) * 0x01010000; + + src_img = pixman_image_create_solid_fill (&color); + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4); + + pixman_composite_triangles (PIXMAN_OP_ATOP_REVERSE, + src_img, + dest_img, + PIXMAN_a8, + 200, 200, + -5, 5, + ARRAY_LENGTH (tris), tris); + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (dest_img); + free (bits); + + return 0; +} diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am index d016e9f25..be0826680 100644 --- a/pixman/pixman/Makefile.am +++ b/pixman/pixman/Makefile.am @@ -115,6 +115,7 @@ libpixman_arm_neon_la_SOURCES = \ pixman-arm-neon.c \ pixman-arm-common.h \ pixman-arm-neon-asm.S \ + pixman-arm-neon-asm-bilinear.S \ pixman-arm-neon-asm.h libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) libpixman_arm_neon_la_LIBADD = $(DEP_LIBS) diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h index 2c435041a..6cd8be506 100644 --- a/pixman/pixman/pixman-arm-common.h +++ b/pixman/pixman/pixman-arm-common.h @@ -1,409 +1,454 @@ -/* - * Copyright © 2010 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -#ifndef PIXMAN_ARM_COMMON_H -#define PIXMAN_ARM_COMMON_H - -#include "pixman-fast-path.h" - -/* Define some macros which can expand into proxy functions between - * ARM assembly optimized functions and the rest of pixman fast path API. - * - * All the low level ARM assembly functions have to use ARM EABI - * calling convention and take up to 8 arguments: - * width, height, dst, dst_stride, src, src_stride, mask, mask_stride - * - * The arguments are ordered with the most important coming first (the - * first 4 arguments are passed to function in registers, the rest are - * on stack). The last arguments are optional, for example if the - * function is not using mask, then 'mask' and 'mask_stride' can be - * omitted when doing a function call. - * - * Arguments 'src' and 'mask' contain either a pointer to the top left - * pixel of the composited rectangle or a pixel color value depending - * on the function type. In the case of just a color value (solid source - * or mask), the corresponding stride argument is unused. - */ - -#define SKIP_ZERO_SRC 1 -#define SKIP_ZERO_MASK 2 - -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \ - src_type, src_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - src_type *src, \ - int32_t src_stride); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - src_type *src_line; \ - int32_t dst_stride, src_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src_line, src_stride); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - uint32_t src); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - int32_t dst_stride; \ - uint32_t src; \ - \ - src = _pixman_image_get_solid ( \ - imp, src_image, dst_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_SRC) && src == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ - mask_type, mask_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - uint32_t src, \ - int32_t unused, \ - mask_type *mask, \ - int32_t mask_stride); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - mask_type *mask_line; \ - int32_t dst_stride, mask_stride; \ - uint32_t src; \ - \ - src = _pixman_image_get_solid ( \ - imp, src_image, dst_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_SRC) && src == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ - mask_stride, mask_line, mask_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src, 0, \ - mask_line, mask_stride); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \ - src_type, src_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - src_type *src, \ - int32_t src_stride, \ - uint32_t mask); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - src_type *src_line; \ - int32_t dst_stride, src_stride; \ - uint32_t mask; \ - \ - mask = _pixman_image_get_solid ( \ - imp, mask_image, dst_image->bits.format); \ - \ - if ((flags & SKIP_ZERO_MASK) && mask == 0) \ - return; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src_line, src_stride, \ - mask); \ -} - -#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \ - src_type, src_cnt, \ - mask_type, mask_cnt, \ - dst_type, dst_cnt) \ -void \ -pixman_composite_##name##_asm_##cputype (int32_t w, \ - int32_t h, \ - dst_type *dst, \ - int32_t dst_stride, \ - src_type *src, \ - int32_t src_stride, \ - mask_type *mask, \ - int32_t mask_stride); \ - \ -static void \ -cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type *dst_line; \ - src_type *src_line; \ - mask_type *mask_line; \ - int32_t dst_stride, src_stride, mask_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ - dst_stride, dst_line, dst_cnt); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ - src_stride, src_line, src_cnt); \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ - mask_stride, mask_line, mask_cnt); \ - \ - pixman_composite_##name##_asm_##cputype (width, height, \ - dst_line, dst_stride, \ - src_line, src_stride, \ - mask_line, mask_stride); \ -} - -#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ - int32_t w, \ - dst_type * dst, \ - const src_type * src, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x); \ - \ -static force_inline void \ -scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \ - const src_type * ps, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ - vx, unit_x);\ -} \ - \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, COVER) \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, NONE) \ -FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op, \ - src_type, dst_type, PAD) - -/* Provide entries for the fast path table */ -#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) - -#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ - int32_t w, \ - dst_type * dst, \ - const src_type * src, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - const uint8_t * mask); \ - \ -static force_inline void \ -scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \ - dst_type * pd, \ - const src_type * ps, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - if ((flags & SKIP_ZERO_SRC) && zero_src) \ - return; \ - pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ - vx, unit_x, \ - mask); \ -} \ - \ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ -FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ - scaled_nearest_scanline_##cputype##_##name##_##op,\ - src_type, uint8_t, dst_type, PAD, TRUE, FALSE) - -/* Provide entries for the fast path table */ -#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) - -/*****************************************************************************/ - -#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \ - src_type, dst_type) \ -void \ -pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ - dst_type * dst, \ - const src_type * top, \ - const src_type * bottom, \ - int wt, \ - int wb, \ - pixman_fixed_t x, \ - pixman_fixed_t ux, \ - int width); \ - \ -static force_inline void \ -scaled_bilinear_scanline_##cputype##_##name##_##op ( \ - dst_type * dst, \ - const uint32_t * mask, \ - const src_type * src_top, \ - const src_type * src_bottom, \ - int32_t w, \ - int wt, \ - int wb, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t zero_src) \ -{ \ - if ((flags & SKIP_ZERO_SRC) && zero_src) \ - return; \ - pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ - dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \ -} \ - \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \ -FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ - scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, PAD, FALSE, FALSE) - -#endif +/* + * Copyright © 2010 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +#ifndef PIXMAN_ARM_COMMON_H +#define PIXMAN_ARM_COMMON_H + +#include "pixman-fast-path.h" + +/* Define some macros which can expand into proxy functions between + * ARM assembly optimized functions and the rest of pixman fast path API. + * + * All the low level ARM assembly functions have to use ARM EABI + * calling convention and take up to 8 arguments: + * width, height, dst, dst_stride, src, src_stride, mask, mask_stride + * + * The arguments are ordered with the most important coming first (the + * first 4 arguments are passed to function in registers, the rest are + * on stack). The last arguments are optional, for example if the + * function is not using mask, then 'mask' and 'mask_stride' can be + * omitted when doing a function call. + * + * Arguments 'src' and 'mask' contain either a pointer to the top left + * pixel of the composited rectangle or a pixel color value depending + * on the function type. In the case of just a color value (solid source + * or mask), the corresponding stride argument is unused. + */ + +#define SKIP_ZERO_SRC 1 +#define SKIP_ZERO_MASK 2 + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + src_type *src_line; \ + int32_t dst_stride, src_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + uint32_t src); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + int32_t dst_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dst_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + uint32_t src, \ + int32_t unused, \ + mask_type *mask, \ + int32_t mask_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + mask_type *mask_line; \ + int32_t dst_stride, mask_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dst_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src, 0, \ + mask_line, mask_stride); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride, \ + uint32_t mask); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + src_type *src_line; \ + int32_t dst_stride, src_stride; \ + uint32_t mask; \ + \ + mask = _pixman_image_get_solid ( \ + imp, mask_image, dst_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_MASK) && mask == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride, \ + mask); \ +} + +#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \ + src_type, src_cnt, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_##cputype (int32_t w, \ + int32_t h, \ + dst_type *dst, \ + int32_t dst_stride, \ + src_type *src, \ + int32_t src_stride, \ + mask_type *mask, \ + int32_t mask_stride); \ + \ +static void \ +cputype##_composite_##name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type *dst_line; \ + src_type *src_line; \ + mask_type *mask_line; \ + int32_t dst_stride, src_stride, mask_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + pixman_composite_##name##_asm_##cputype (width, height, \ + dst_line, dst_stride, \ + src_line, src_stride, \ + mask_line, mask_stride); \ +} + +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x);\ +} \ + \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, COVER) \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, NONE) \ +FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op, \ + src_type, dst_type, PAD) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) + +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + const uint8_t * mask); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \ + dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x, \ + mask); \ +} \ + \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +/*****************************************************************************/ + +#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst_type * dst, \ + const src_type * top, \ + const src_type * bottom, \ + int wt, \ + int wb, \ + pixman_fixed_t x, \ + pixman_fixed_t ux, \ + int width); \ + \ +static force_inline void \ +scaled_bilinear_scanline_##cputype##_##name##_##op ( \ + dst_type * dst, \ + const uint32_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, PAD, FALSE, FALSE) + + +#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst_type * dst, \ + const uint8_t * mask, \ + const src_type * top, \ + const src_type * bottom, \ + int wt, \ + int wb, \ + pixman_fixed_t x, \ + pixman_fixed_t ux, \ + int width); \ + \ +static force_inline void \ +scaled_bilinear_scanline_##cputype##_##name##_##op ( \ + dst_type * dst, \ + const uint8_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ + dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + +#endif diff --git a/pixman/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman/pixman-arm-neon-asm-bilinear.S new file mode 100644 index 000000000..9a4a1ffba --- /dev/null +++ b/pixman/pixman/pixman-arm-neon-asm-bilinear.S @@ -0,0 +1,768 @@ +/* + * Copyright © 2011 SCore Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + * Author: Taekyun Kim (tkq.kim@samsung.com) + */ + +/* + * This file contains scaled bilinear scanline functions implemented + * using older siarhei's bilinear macro template. + * + * << General scanline function procedures >> + * 1. bilinear interpolate source pixels + * 2. load mask pixels + * 3. load destination pixels + * 4. duplicate mask to fill whole register + * 5. interleave source & destination pixels + * 6. apply mask to source pixels + * 7. combine source & destination pixels + * 8, Deinterleave final result + * 9. store destination pixels + * + * All registers with single number (i.e. src0, tmp0) are 64-bits registers. + * Registers with double numbers(src01, dst01) are 128-bits registers. + * All temp registers can be used freely outside the code block. + * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks. + * + * TODOs + * Support 0565 pixel format + * Optimization for two and last pixel cases + * + * Remarks + * There can be lots of pipeline stalls inside code block and between code blocks. + * Further optimizations will be done by new macro templates using head/tail_head/tail scheme. + */ + +/* Prevent the stack from becoming executable for no reason... */ +#if defined(__linux__) && defined (__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +.text +.fpu neon +.arch armv7a +.object_arch armv4 +.eabi_attribute 10, 0 +.eabi_attribute 12, 0 +.arm +.altmacro + +#include "pixman-arm-neon-asm.h" + +/* + * Bilinear macros from pixman-arm-neon-asm.S + */ + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm + +/* + * Bilinear scaling support code which tries to provide pixel fetching, color + * format conversion, and interpolation as separate macros which can be used + * as the basic building blocks for constructing bilinear scanline functions. + */ + +.macro bilinear_load_8888 reg1, reg2, tmp + mov TMP2, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #2 + add TMP2, BOTTOM, TMP2, asl #2 + vld1.32 {reg1}, [TMP1] + vld1.32 {reg2}, [TMP2] +.endm + +.macro bilinear_load_0565 reg1, reg2, tmp + mov TMP2, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + vld1.32 {reg2[0]}, [TMP1] + vld1.32 {reg2[1]}, [TMP2] + convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp +.endm + +.macro bilinear_load_and_vertical_interpolate_two_8888 \ + acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 + + bilinear_load_8888 reg1, reg2, tmp1 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + bilinear_load_8888 reg3, reg4, tmp2 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + bilinear_load_and_vertical_interpolate_two_8888 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi + bilinear_load_and_vertical_interpolate_two_8888 \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi +.endm + +.macro bilinear_load_and_vertical_interpolate_two_0565 \ + acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi + + mov TMP2, X, asr #16 + add X, X, UX + mov TMP4, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + add TMP3, TOP, TMP4, asl #1 + add TMP4, BOTTOM, TMP4, asl #1 + vld1.32 {acc2lo[0]}, [TMP1] + vld1.32 {acc2hi[0]}, [TMP3] + vld1.32 {acc2lo[1]}, [TMP2] + vld1.32 {acc2hi[1]}, [TMP4] + convert_0565_to_x888 acc2, reg3, reg2, reg1 + vzip.u8 reg1, reg3 + vzip.u8 reg2, reg4 + vzip.u8 reg3, reg4 + vzip.u8 reg1, reg2 + vmull.u8 acc1, reg1, d28 + vmlal.u8 acc1, reg2, d29 + vmull.u8 acc2, reg3, d28 + vmlal.u8 acc2, reg4, d29 +.endm + +.macro bilinear_load_and_vertical_interpolate_four_0565 \ + xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ + yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + + mov TMP2, X, asr #16 + add X, X, UX + mov TMP4, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + add TMP3, TOP, TMP4, asl #1 + add TMP4, BOTTOM, TMP4, asl #1 + vld1.32 {xacc2lo[0]}, [TMP1] + vld1.32 {xacc2hi[0]}, [TMP3] + vld1.32 {xacc2lo[1]}, [TMP2] + vld1.32 {xacc2hi[1]}, [TMP4] + convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 + mov TMP2, X, asr #16 + add X, X, UX + mov TMP4, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP2, asl #1 + add TMP2, BOTTOM, TMP2, asl #1 + add TMP3, TOP, TMP4, asl #1 + add TMP4, BOTTOM, TMP4, asl #1 + vld1.32 {yacc2lo[0]}, [TMP1] + vzip.u8 xreg1, xreg3 + vld1.32 {yacc2hi[0]}, [TMP3] + vzip.u8 xreg2, xreg4 + vld1.32 {yacc2lo[1]}, [TMP2] + vzip.u8 xreg3, xreg4 + vld1.32 {yacc2hi[1]}, [TMP4] + vzip.u8 xreg1, xreg2 + convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 + vmull.u8 xacc1, xreg1, d28 + vzip.u8 yreg1, yreg3 + vmlal.u8 xacc1, xreg2, d29 + vzip.u8 yreg2, yreg4 + vmull.u8 xacc2, xreg3, d28 + vzip.u8 yreg3, yreg4 + vmlal.u8 xacc2, xreg4, d29 + vzip.u8 yreg1, yreg2 + vmull.u8 yacc1, yreg1, d28 + vmlal.u8 yacc1, yreg2, d29 + vmull.u8 yacc2, yreg3, d28 + vmlal.u8 yacc2, yreg4, d29 +.endm + +.macro bilinear_store_8888 numpix, tmp1, tmp2 +.if numpix == 4 + vst1.32 {d0, d1}, [OUT]! +.elseif numpix == 2 + vst1.32 {d0}, [OUT]! +.elseif numpix == 1 + vst1.32 {d0[0]}, [OUT, :32]! +.else + .error bilinear_store_8888 numpix is unsupported +.endif +.endm + +.macro bilinear_store_0565 numpix, tmp1, tmp2 + vuzp.u8 d0, d1 + vuzp.u8 d2, d3 + vuzp.u8 d1, d3 + vuzp.u8 d0, d2 + convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2 +.if numpix == 4 + vst1.16 {d2}, [OUT]! +.elseif numpix == 2 + vst1.32 {d2[0]}, [OUT]! +.elseif numpix == 1 + vst1.16 {d2[0]}, [OUT]! +.else + .error bilinear_store_0565 numpix is unsupported +.endif +.endm + + +/* + * Macros for loading mask pixels into register 'mask'. + * vdup must be done in somewhere else. + */ +.macro bilinear_load_mask_x numpix, mask +.endm + +.macro bilinear_load_mask_8 numpix, mask +.if numpix == 4 + vld1.32 {mask[0]}, [MASK]! +.elseif numpix == 2 + vld1.16 {mask[0]}, [MASK]! +.elseif numpix == 1 + vld1.8 {mask[0]}, [MASK]! +.else + .error bilinear_load_mask_8 numpix is unsupported +.endif +.endm + +.macro bilinear_load_mask mask_fmt, numpix, mask + bilinear_load_mask_&mask_fmt numpix, mask +.endm + + +/* + * Macros for loading destination pixels into register 'dst0' and 'dst1'. + * Interleave should be done somewhere else. + */ +.macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01 +.if numpix == 4 + vld1.32 {dst0, dst1}, [OUT] +.elseif numpix == 2 + vld1.32 {dst0}, [OUT] +.elseif numpix == 1 + vld1.32 {dst0[0]}, [OUT] +.else + .error bilinear_load_dst_8888 numpix is unsupported +.endif +.endm + +.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01 + bilinear_load_dst_8888 numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01 + bilinear_load_dst_8888 numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01 + bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01 +.endm + +/* + * Macros for duplicating partially loaded mask to fill entire register. + * We will apply mask to interleaved source pixels, that is + * (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3) + * (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3) + * So, we need to duplicate loaded mask into whole register. + * + * For two pixel case + * (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) + * (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) + * We can do some optimizations for this including one pixel cases. + */ +.macro bilinear_duplicate_mask_x numpix, mask +.endm + +.macro bilinear_duplicate_mask_8 numpix, mask +.if numpix == 4 + vdup.32 mask, mask[0] +.elseif numpix == 2 + vdup.16 mask, mask[0] +.elseif numpix == 1 + vdup.8 mask, mask[0] +.else + .error bilinear_duplicate_mask_8 is unsupported +.endif +.endm + +.macro bilinear_duplicate_mask mask_fmt, numpix, mask + bilinear_duplicate_mask_&mask_fmt numpix, mask +.endm + +/* + * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form. + * Interleave should be done when maks is enabled or operator is 'over'. + */ +.macro bilinear_interleave src0, src1, dst0, dst1 + vuzp.8 src0, src1 + vuzp.8 dst0, dst1 + vuzp.8 src0, src1 + vuzp.8 dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_x_src \ + numpix, src0, src1, src01, dst0, dst1, dst01 +.endm + +.macro bilinear_interleave_src_dst_x_over \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_x_add \ + numpix, src0, src1, src01, dst0, dst1, dst01 +.endm + +.macro bilinear_interleave_src_dst_8_src \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_8_over \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst_8_add \ + numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave src0, src1, dst0, dst1 +.endm + +.macro bilinear_interleave_src_dst \ + mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01 + + bilinear_interleave_src_dst_&mask_fmt&_&op \ + numpix, src0, src1, src01, dst0, dst1, dst01 +.endm + + +/* + * Macros for applying masks to src pixels. (see combine_mask_u() function) + * src, dst should be in interleaved form. + * mask register should be in form (m0, m1, m2, m3). + */ +.macro bilinear_apply_mask_to_src_x \ + numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 +.endm + +.macro bilinear_apply_mask_to_src_8 \ + numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 + + vmull.u8 tmp01, src0, mask + vmull.u8 tmp23, src1, mask + /* bubbles */ + vrshr.u16 tmp45, tmp01, #8 + vrshr.u16 tmp67, tmp23, #8 + /* bubbles */ + vraddhn.u16 src0, tmp45, tmp01 + vraddhn.u16 src1, tmp67, tmp23 +.endm + +.macro bilinear_apply_mask_to_src \ + mask_fmt, numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 + + bilinear_apply_mask_to_src_&mask_fmt \ + numpix, src0, src1, src01, mask, \ + tmp01, tmp23, tmp45, tmp67 +.endm + + +/* + * Macros for combining src and destination pixels. + * Interleave or not is depending on operator 'op'. + */ +.macro bilinear_combine_src \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 +.endm + +.macro bilinear_combine_over \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 + + vdup.32 tmp8, src1[1] + /* bubbles */ + vmvn.8 tmp8, tmp8 + /* bubbles */ + vmull.u8 tmp01, dst0, tmp8 + /* bubbles */ + vmull.u8 tmp23, dst1, tmp8 + /* bubbles */ + vrshr.u16 tmp45, tmp01, #8 + vrshr.u16 tmp67, tmp23, #8 + /* bubbles */ + vraddhn.u16 dst0, tmp45, tmp01 + vraddhn.u16 dst1, tmp67, tmp23 + /* bubbles */ + vqadd.u8 src01, dst01, src01 +.endm + +.macro bilinear_combine_add \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 + + vqadd.u8 src01, dst01, src01 +.endm + +.macro bilinear_combine \ + op, numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 + + bilinear_combine_&op \ + numpix, src0, src1, src01, dst0, dst1, dst01, \ + tmp01, tmp23, tmp45, tmp67, tmp8 +.endm + +/* + * Macros for final deinterleaving of destination pixels if needed. + */ +.macro bilinear_deinterleave numpix, dst0, dst1, dst01 + vuzp.8 dst0, dst1 + /* bubbles */ + vuzp.8 dst0, dst1 +.endm + +.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01 + bilinear_deinterleave numpix, dst0, dst1, dst01 +.endm + +.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01 + bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01 +.endm + + +.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op + bilinear_load_&src_fmt d0, d1, d2 + bilinear_load_mask mask_fmt, 1, d4 + bilinear_load_dst dst_fmt, op, 1, d18, d19, q9 + vmull.u8 q1, d0, d28 + vmlal.u8 q1, d1, d29 + vshr.u16 d30, d24, #8 + /* 4 cycles bubble */ + vshll.u16 q0, d2, #8 + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + /* 5 cycles bubble */ + bilinear_duplicate_mask mask_fmt, 1, d4 + vshrn.u32 d0, q0, #16 + /* 3 cycles bubble */ + vmovn.u16 d0, q0 + /* 1 cycle bubble */ + bilinear_interleave_src_dst \ + mask_fmt, op, 1, d0, d1, q0, d18, d19, q9 + bilinear_apply_mask_to_src \ + mask_fmt, 1, d0, d1, q0, d4, \ + q3, q8, q10, q11 + bilinear_combine \ + op, 1, d0, d1, q0, d18, d19, q9, \ + q3, q8, q10, q11, d5 + bilinear_deinterleave_dst mask_fmt, op, 1, d0, d1, q0 + bilinear_store_&dst_fmt 1, q2, q3 +.endm + +.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op + bilinear_load_and_vertical_interpolate_two_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 + bilinear_load_mask mask_fmt, 2, d4 + bilinear_load_dst dst_fmt, op, 2, d18, d19, q9 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vshll.u16 q0, d2, #8 + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #8 + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshrn.u32 d30, q0, #16 + vshrn.u32 d31, q10, #16 + bilinear_duplicate_mask mask_fmt, 2, d4 + vmovn.u16 d0, q15 + bilinear_interleave_src_dst \ + mask_fmt, op, 2, d0, d1, q0, d18, d19, q9 + bilinear_apply_mask_to_src \ + mask_fmt, 2, d0, d1, q0, d4, \ + q3, q8, q10, q11 + bilinear_combine \ + op, 2, d0, d1, q0, d18, d19, q9, \ + q3, q8, q10, q11, d5 + bilinear_deinterleave_dst mask_fmt, op, 2, d0, d1, q0 + bilinear_store_&dst_fmt 2, q2, q3 +.endm + +.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op + bilinear_load_and_vertical_interpolate_four_&src_fmt \ + q1, q11, d0, d1, d20, d21, d22, d23 \ + q3, q9, d4, d5, d16, d17, d18, d19 + pld [TMP1, PF_OFFS] + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vshll.u16 q0, d2, #8 + vmlsl.u16 q0, d2, d30 + vmlal.u16 q0, d3, d30 + vshll.u16 q10, d22, #8 + vmlsl.u16 q10, d22, d31 + vmlal.u16 q10, d23, d31 + vshr.u16 q15, q12, #8 + vshll.u16 q2, d6, #8 + vmlsl.u16 q2, d6, d30 + vmlal.u16 q2, d7, d30 + vshll.u16 q8, d18, #8 + bilinear_load_mask mask_fmt, 4, d30 + bilinear_load_dst dst_fmt, op, 4, d2, d3, q1 + pld [TMP2, PF_OFFS] + vmlsl.u16 q8, d18, d31 + vmlal.u16 q8, d19, d31 + vadd.u16 q12, q12, q13 + vshrn.u32 d0, q0, #16 + vshrn.u32 d1, q10, #16 + vshrn.u32 d4, q2, #16 + vshrn.u32 d5, q8, #16 + bilinear_duplicate_mask mask_fmt, 4, d30 + vmovn.u16 d0, q0 + vmovn.u16 d1, q2 + bilinear_interleave_src_dst \ + mask_fmt, op, 4, d0, d1, q0, d2, d3, q1 + bilinear_apply_mask_to_src \ + mask_fmt, 4, d0, d1, q0, d30, \ + q3, q8, q9, q10 + bilinear_combine \ + op, 4, d0, d1, q0, d2, d3, q1, \ + q3, q8, q9, q10, d22 + bilinear_deinterleave_dst mask_fmt, op, 4, d0, d1, q0 + bilinear_store_&dst_fmt 4, q2, q3 +.endm + +.macro generate_bilinear_scanline_func_src_dst \ + fname, src_fmt, dst_fmt, op, \ + bpp_shift, prefetch_distance + +pixman_asm_function fname + OUT .req r0 + TOP .req r1 + BOTTOM .req r2 + WT .req r3 + WB .req r4 + X .req r5 + UX .req r6 + WIDTH .req ip + TMP1 .req r3 + TMP2 .req r4 + PF_OFFS .req r7 + TMP3 .req r8 + TMP4 .req r9 + + mov ip, sp + push {r4, r5, r6, r7, r8, r9} + mov PF_OFFS, #prefetch_distance + ldmia ip, {WB, X, UX, WIDTH} + mul PF_OFFS, PF_OFFS, UX + + cmp WIDTH, #0 + ble 3f + + vdup.u16 q12, X + vdup.u16 q13, UX + vdup.u8 d28, WT + vdup.u8 d29, WB + vadd.u16 d25, d25, d26 + vadd.u16 q13, q13, q13 + + subs WIDTH, WIDTH, #4 + blt 1f + mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) +0: + bilinear_interpolate_four_pixels src_fmt, x, dst_fmt, op + subs WIDTH, WIDTH, #4 + bge 0b +1: + tst WIDTH, #2 + beq 2f + bilinear_interpolate_two_pixels src_fmt, x, dst_fmt, op +2: + tst WIDTH, #1 + beq 3f + bilinear_interpolate_last_pixel src_fmt, x, dst_fmt, op +3: + pop {r4, r5, r6, r7, r8, r9} + bx lr + + .unreq OUT + .unreq TOP + .unreq BOTTOM + .unreq WT + .unreq WB + .unreq X + .unreq UX + .unreq WIDTH + .unreq TMP1 + .unreq TMP2 + .unreq PF_OFFS + .unreq TMP3 + .unreq TMP4 +.endfunc + +.endm + +.macro generate_bilinear_scanline_func_src_a8_dst \ + fname, src_fmt, dst_fmt, op, \ + bpp_shift, prefetch_distance + +pixman_asm_function fname + OUT .req r0 + MASK .req r1 + TOP .req r2 + BOTTOM .req r3 + WT .req r4 + WB .req r5 + X .req r6 + UX .req r7 + WIDTH .req ip + TMP1 .req r4 + TMP2 .req r5 + PF_OFFS .req r8 + TMP3 .req r9 + TMP4 .req r10 + + mov ip, sp + push {r4, r5, r6, r7, r8, r9, r10, ip} + mov PF_OFFS, #prefetch_distance + ldmia ip, {WT, WB, X, UX, WIDTH} + mul PF_OFFS, PF_OFFS, UX + + cmp WIDTH, #0 + ble 3f + + vdup.u16 q12, X + vdup.u16 q13, UX + vdup.u8 d28, WT + vdup.u8 d29, WB + vadd.u16 d25, d25, d26 + vadd.u16 q13, q13, q13 + + subs WIDTH, WIDTH, #4 + blt 1f + mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) +0: + bilinear_interpolate_four_pixels src_fmt, 8, dst_fmt, op + subs WIDTH, WIDTH, #4 + bge 0b +1: + tst WIDTH, #2 + beq 2f + bilinear_interpolate_two_pixels src_fmt, 8, dst_fmt, op +2: + tst WIDTH, #1 + beq 3f + bilinear_interpolate_last_pixel src_fmt, 8, dst_fmt, op +3: + pop {r4, r5, r6, r7, r8, r9, r10, ip} + bx lr + + .unreq OUT + .unreq TOP + .unreq BOTTOM + .unreq WT + .unreq WB + .unreq X + .unreq UX + .unreq WIDTH + .unreq MASK + .unreq TMP1 + .unreq TMP2 + .unreq PF_OFFS + .unreq TMP3 + .unreq TMP4 +.endfunc + +.endm + +generate_bilinear_scanline_func_src_dst \ + pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \ + 8888, 8888, over, 2, 28 + +generate_bilinear_scanline_func_src_dst \ + pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \ + 8888, 8888, add, 2, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \ + 8888, 8888, src, 2, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \ + 8888, 0565, src, 2, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \ + 0565, 8888, src, 1, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \ + 0565, 0565, src, 1, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \ + 8888, 8888, over, 2, 28 + +generate_bilinear_scanline_func_src_a8_dst \ + pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \ + 8888, 8888, add, 2, 28 diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S index 5e9fda34e..833f18c2e 100644 --- a/pixman/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman/pixman-arm-neon-asm.S @@ -1426,6 +1426,175 @@ generate_composite_function \ /******************************************************************************/ +.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_head + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A] + * mask in {d24, d25, d26} [B, G, R] + * output: updated src in {d0, d1, d2 } [B, G, R] + * updated mask in {d24, d25, d26} [B, G, R] + */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d9 + vmull.u8 q6, d26, d10 + vmull.u8 q9, d11, d25 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d25, q9, q8 + /* + * convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + * and put data into d16 - blue, d17 - green, d18 - red + */ + vshrn.u16 d17, q2, #3 + vshrn.u16 d18, q2, #8 + vraddhn.u16 d26, q13, q6 + vsli.u16 q2, q2, #5 + vsri.u8 d18, d18, #5 + vsri.u8 d17, d17, #6 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in d16 - blue, d17 - green, d18 - red + */ + vmvn.8 q12, q12 + vshrn.u16 d16, q2, #2 + vmvn.8 d26, d26 + vmull.u8 q6, d16, d24 + vmull.u8 q7, d17, d25 + vmull.u8 q11, d18, d26 +.endm + +.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail + /* ... continue 'combine_over_ca' replacement */ + vrshr.u16 q10, q6, #8 + vrshr.u16 q14, q7, #8 + vrshr.u16 q15, q11, #8 + vraddhn.u16 d16, q10, q6 + vraddhn.u16 d17, q14, q7 + vraddhn.u16 d18, q15, q11 + vqadd.u8 q8, q0, q8 + vqadd.u8 d18, d2, d18 + /* + * convert the results in d16, d17, d18 to r5g6b5 and store + * them into {d28, d29} + */ + vshll.u8 q14, d18, #8 + vshll.u8 q10, d17, #8 + vshll.u8 q15, d16, #8 + vsri.u16 q14, q10, #5 + vsri.u16 q14, q15, #11 +.endm + +.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head + fetch_mask_pixblock + vrshr.u16 q10, q6, #8 + vrshr.u16 q14, q7, #8 + vld1.16 {d4, d5}, [DST_R, :128]! + vrshr.u16 q15, q11, #8 + vraddhn.u16 d16, q10, q6 + vraddhn.u16 d17, q14, q7 + vraddhn.u16 d22, q15, q11 + /* process_pixblock_head */ + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A] + * mask in {d24, d25, d26} [B, G, R] + * output: updated src in {d0, d1, d2 } [B, G, R] + * updated mask in {d24, d25, d26} [B, G, R] + */ + vmull.u8 q1, d25, d9 + vqadd.u8 q8, q0, q8 + vmull.u8 q0, d24, d8 + vqadd.u8 d22, d2, d22 + vmull.u8 q6, d26, d10 + /* + * convert the result in d16, d17, d22 to r5g6b5 and store + * it into {d28, d29} + */ + vshll.u8 q14, d22, #8 + vshll.u8 q10, d17, #8 + vshll.u8 q15, d16, #8 + vmull.u8 q9, d11, d25 + vsri.u16 q14, q10, #5 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vsri.u16 q14, q15, #11 + cache_preload 8, 8 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vraddhn.u16 d25, q9, q8 + /* + * convert 8 r5g6b5 pixel data from {d4, d5} to planar + * 8-bit format and put data into d16 - blue, d17 - green, + * d18 - red + */ + vshrn.u16 d17, q2, #3 + vshrn.u16 d18, q2, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d26, q13, q6 + vsli.u16 q2, q2, #5 + vsri.u8 d18, d18, #5 + vsri.u8 d17, d17, #6 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in d16 - blue, d17 - green, d18 - red + */ + vmvn.8 q12, q12 + vshrn.u16 d16, q2, #2 + vmvn.8 d26, d26 + vmull.u8 q7, d17, d25 + vmull.u8 q6, d16, d24 + vmull.u8 q11, d18, d26 + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_n_8888_0565_ca_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8888_0565_ca_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_0565_ca_asm_neon, 0, 32, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_0565_ca_init, \ + pixman_composite_over_n_8888_0565_ca_cleanup, \ + pixman_composite_over_n_8888_0565_ca_process_pixblock_head, \ + pixman_composite_over_n_8888_0565_ca_process_pixblock_tail, \ + pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head + +/******************************************************************************/ + .macro pixman_composite_in_n_8_process_pixblock_head /* expecting source data in {d0, d1, d2, d3} */ /* and destination data in {d4, d5, d6, d7} */ diff --git a/pixman/pixman/pixman-arm-neon.c b/pixman/pixman/pixman-arm-neon.c index 5213a2007..e5127a65f 100644 --- a/pixman/pixman/pixman-arm-neon.c +++ b/pixman/pixman/pixman-arm-neon.c @@ -1,460 +1,503 @@ -/* - * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of ARM Ltd not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. ARM Ltd makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Ian Rickards (ian.rickards@arm.com) - * Author: Jonathan Morton (jonathan.morton@movial.com) - * Author: Markku Vire (markku.vire@movial.com) - * - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include "pixman-private.h" -#include "pixman-arm-common.h" - -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565, - uint16_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888, - uint8_t, 3, uint8_t, 3) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888, - uint16_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev, - uint8_t, 3, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev, - uint8_t, 3, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, - uint8_t, 1, uint16_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, - uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, - uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, - uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, - uint8_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, - uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, - uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, - uint8_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, - uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, - uint32_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, - uint16_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, - uint8_t, 1, uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, - uint16_t, 1, uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, - uint32_t, 1, uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, - uint32_t, 1, uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, - uint32_t, 1, uint8_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888, - uint32_t, 1, uint32_t, 1, uint32_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, - uint32_t, 1, uint8_t, 1, uint16_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, - uint16_t, 1, uint8_t, 1, uint16_t, 1) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER, - uint32_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER, - uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, - uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, - uint16_t, uint32_t) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, - OVER, uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, - OVER, uint16_t, uint16_t) - -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, - uint32_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, - uint32_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC, - uint16_t, uint32_t) -PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC, - uint16_t, uint16_t) - -void -pixman_composite_src_n_8_asm_neon (int32_t w, - int32_t h, - uint8_t *dst, - int32_t dst_stride, - uint8_t src); - -void -pixman_composite_src_n_0565_asm_neon (int32_t w, - int32_t h, - uint16_t *dst, - int32_t dst_stride, - uint16_t src); - -void -pixman_composite_src_n_8888_asm_neon (int32_t w, - int32_t h, - uint32_t *dst, - int32_t dst_stride, - uint32_t src); - -static pixman_bool_t -pixman_fill_neon (uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t _xor) -{ - /* stride is always multiple of 32bit units in pixman */ - uint32_t byte_stride = stride * sizeof(uint32_t); - - switch (bpp) - { - case 8: - pixman_composite_src_n_8_asm_neon ( - width, - height, - (uint8_t *)(((char *) bits) + y * byte_stride + x), - byte_stride, - _xor & 0xff); - return TRUE; - case 16: - pixman_composite_src_n_0565_asm_neon ( - width, - height, - (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), - byte_stride / 2, - _xor & 0xffff); - return TRUE; - case 32: - pixman_composite_src_n_8888_asm_neon ( - width, - height, - (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), - byte_stride / 4, - _xor); - return TRUE; - default: - return FALSE; - } -} - -static pixman_bool_t -pixman_blt_neon (uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) -{ - if (src_bpp != dst_bpp) - return FALSE; - - switch (src_bpp) - { - case 16: - pixman_composite_src_0565_0565_asm_neon ( - width, height, - (uint16_t *)(((char *) dst_bits) + - dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, - (uint16_t *)(((char *) src_bits) + - src_y * src_stride * 4 + src_x * 2), src_stride * 2); - return TRUE; - case 32: - pixman_composite_src_8888_8888_asm_neon ( - width, height, - (uint32_t *)(((char *) dst_bits) + - dst_y * dst_stride * 4 + dst_x * 4), dst_stride, - (uint32_t *)(((char *) src_bits) + - src_y * src_stride * 4 + src_x * 4), src_stride); - return TRUE; - default: - return FALSE; - } -} - -static const pixman_fast_path_t arm_neon_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), - PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), - PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), - PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), - PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565), - PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565), - PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), - PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), - PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), - /* Note: NONE repeat is not supported yet */ - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), - SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), - - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), - PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), - - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), - - SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), - SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), - - SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888), - SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565), - - { PIXMAN_OP_NONE }, -}; - -static pixman_bool_t -arm_neon_blt (pixman_implementation_t *imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) -{ - if (!pixman_blt_neon ( - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height)) - - { - return _pixman_implementation_blt ( - imp->delegate, - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height); - } - - return TRUE; -} - -static pixman_bool_t -arm_neon_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor)) - return TRUE; - - return _pixman_implementation_fill ( - imp->delegate, bits, stride, bpp, x, y, width, height, xor); -} - -#define BIND_COMBINE_U(name) \ -void \ -pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \ - const uint32_t *dst, \ - const uint32_t *src, \ - const uint32_t *mask); \ - \ -void \ -pixman_composite_scanline_##name##_asm_neon (int32_t w, \ - const uint32_t *dst, \ - const uint32_t *src); \ - \ -static void \ -neon_combine_##name##_u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint32_t * dest, \ - const uint32_t * src, \ - const uint32_t * mask, \ - int width) \ -{ \ - if (mask) \ - pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \ - src, mask); \ - else \ - pixman_composite_scanline_##name##_asm_neon (width, dest, src); \ -} - -BIND_COMBINE_U (over) -BIND_COMBINE_U (add) -BIND_COMBINE_U (out_reverse) - -pixman_implementation_t * -_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = - _pixman_implementation_create (fallback, arm_neon_fast_paths); - - imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u; - imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; - - imp->blt = arm_neon_blt; - imp->fill = arm_neon_fill; - - return imp; -} +/* + * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of ARM Ltd not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. ARM Ltd makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Ian Rickards (ian.rickards@arm.com) + * Author: Jonathan Morton (jonathan.morton@movial.com) + * Author: Markku Vire (markku.vire@movial.com) + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include "pixman-private.h" +#include "pixman-arm-common.h" + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888, + uint8_t, 3, uint8_t, 3) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888, + uint16_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev, + uint8_t, 3, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev, + uint8_t, 3, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, + uint8_t, 1, uint16_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, + uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, + uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, + uint8_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, + uint32_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, + uint8_t, 1, uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, + uint32_t, 1, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888, + uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, + uint32_t, 1, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, + uint16_t, 1, uint8_t, 1, uint16_t, 1) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, + uint16_t, uint32_t) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, + OVER, uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, + OVER, uint16_t, uint16_t) + +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC, + uint16_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD, + uint32_t, uint32_t) + +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC, + uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC, + uint16_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER, + uint32_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD, + uint32_t, uint32_t) + +void +pixman_composite_src_n_8_asm_neon (int32_t w, + int32_t h, + uint8_t *dst, + int32_t dst_stride, + uint8_t src); + +void +pixman_composite_src_n_0565_asm_neon (int32_t w, + int32_t h, + uint16_t *dst, + int32_t dst_stride, + uint16_t src); + +void +pixman_composite_src_n_8888_asm_neon (int32_t w, + int32_t h, + uint32_t *dst, + int32_t dst_stride, + uint32_t src); + +static pixman_bool_t +pixman_fill_neon (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) +{ + /* stride is always multiple of 32bit units in pixman */ + uint32_t byte_stride = stride * sizeof(uint32_t); + + switch (bpp) + { + case 8: + pixman_composite_src_n_8_asm_neon ( + width, + height, + (uint8_t *)(((char *) bits) + y * byte_stride + x), + byte_stride, + _xor & 0xff); + return TRUE; + case 16: + pixman_composite_src_n_0565_asm_neon ( + width, + height, + (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), + byte_stride / 2, + _xor & 0xffff); + return TRUE; + case 32: + pixman_composite_src_n_8888_asm_neon ( + width, + height, + (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), + byte_stride / 4, + _xor); + return TRUE; + default: + return FALSE; + } +} + +static pixman_bool_t +pixman_blt_neon (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + switch (src_bpp) + { + case 16: + pixman_composite_src_0565_0565_asm_neon ( + width, height, + (uint16_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, + (uint16_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 2), src_stride * 2); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_neon ( + width, height, + (uint32_t *)(((char *) dst_bits) + + dst_y * dst_stride * 4 + dst_x * 4), dst_stride, + (uint32_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 4), src_stride); + return TRUE; + default: + return FALSE; + } +} + +static const pixman_fast_path_t arm_neon_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, neon_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, neon_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565), + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888), + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), + + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), + + { PIXMAN_OP_NONE }, +}; + +static pixman_bool_t +arm_neon_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (!pixman_blt_neon ( + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height)) + + { + return _pixman_implementation_blt ( + imp->delegate, + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height); + } + + return TRUE; +} + +static pixman_bool_t +arm_neon_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor)) + return TRUE; + + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); +} + +#define BIND_COMBINE_U(name) \ +void \ +pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src, \ + const uint32_t *mask); \ + \ +void \ +pixman_composite_scanline_##name##_asm_neon (int32_t w, \ + const uint32_t *dst, \ + const uint32_t *src); \ + \ +static void \ +neon_combine_##name##_u (pixman_implementation_t *imp, \ + pixman_op_t op, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ +{ \ + if (mask) \ + pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \ + src, mask); \ + else \ + pixman_composite_scanline_##name##_asm_neon (width, dest, src); \ +} + +BIND_COMBINE_U (over) +BIND_COMBINE_U (add) +BIND_COMBINE_U (out_reverse) + +pixman_implementation_t * +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, arm_neon_fast_paths); + + imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u; + imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; + + imp->blt = arm_neon_blt; + imp->fill = arm_neon_fill; + + return imp; +} diff --git a/pixman/pixman/pixman-trap.c b/pixman/pixman/pixman-trap.c index 6e85acd49..c99f03ecc 100644 --- a/pixman/pixman/pixman-trap.c +++ b/pixman/pixman/pixman-trap.c @@ -1,657 +1,668 @@ -/* - * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc. - * Copyright © 2004 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include "pixman-private.h" - -/* - * Compute the smallest value greater than or equal to y which is on a - * grid row. - */ - -PIXMAN_EXPORT pixman_fixed_t -pixman_sample_ceil_y (pixman_fixed_t y, int n) -{ - pixman_fixed_t f = pixman_fixed_frac (y); - pixman_fixed_t i = pixman_fixed_floor (y); - - f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + - Y_FRAC_FIRST (n); - - if (f > Y_FRAC_LAST (n)) - { - if (pixman_fixed_to_int (i) == 0x7fff) - { - f = 0xffff; /* saturate */ - } - else - { - f = Y_FRAC_FIRST (n); - i += pixman_fixed_1; - } - } - return (i | f); -} - -/* - * Compute the largest value strictly less than y which is on a - * grid row. - */ -PIXMAN_EXPORT pixman_fixed_t -pixman_sample_floor_y (pixman_fixed_t y, - int n) -{ - pixman_fixed_t f = pixman_fixed_frac (y); - pixman_fixed_t i = pixman_fixed_floor (y); - - f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + - Y_FRAC_FIRST (n); - - if (f < Y_FRAC_FIRST (n)) - { - if (pixman_fixed_to_int (i) == 0x8000) - { - f = 0; /* saturate */ - } - else - { - f = Y_FRAC_LAST (n); - i -= pixman_fixed_1; - } - } - return (i | f); -} - -/* - * Step an edge by any amount (including negative values) - */ -PIXMAN_EXPORT void -pixman_edge_step (pixman_edge_t *e, - int n) -{ - pixman_fixed_48_16_t ne; - - e->x += n * e->stepx; - - ne = e->e + n * (pixman_fixed_48_16_t) e->dx; - - if (n >= 0) - { - if (ne > 0) - { - int nx = (ne + e->dy - 1) / e->dy; - e->e = ne - nx * (pixman_fixed_48_16_t) e->dy; - e->x += nx * e->signdx; - } - } - else - { - if (ne <= -e->dy) - { - int nx = (-ne) / e->dy; - e->e = ne + nx * (pixman_fixed_48_16_t) e->dy; - e->x -= nx * e->signdx; - } - } -} - -/* - * A private routine to initialize the multi-step - * elements of an edge structure - */ -static void -_pixman_edge_multi_init (pixman_edge_t * e, - int n, - pixman_fixed_t *stepx_p, - pixman_fixed_t *dx_p) -{ - pixman_fixed_t stepx; - pixman_fixed_48_16_t ne; - - ne = n * (pixman_fixed_48_16_t) e->dx; - stepx = n * e->stepx; - - if (ne > 0) - { - int nx = ne / e->dy; - ne -= nx * e->dy; - stepx += nx * e->signdx; - } - - *dx_p = ne; - *stepx_p = stepx; -} - -/* - * Initialize one edge structure given the line endpoints and a - * starting y value - */ -PIXMAN_EXPORT void -pixman_edge_init (pixman_edge_t *e, - int n, - pixman_fixed_t y_start, - pixman_fixed_t x_top, - pixman_fixed_t y_top, - pixman_fixed_t x_bot, - pixman_fixed_t y_bot) -{ - pixman_fixed_t dx, dy; - - e->x = x_top; - e->e = 0; - dx = x_bot - x_top; - dy = y_bot - y_top; - e->dy = dy; - e->dx = 0; - - if (dy) - { - if (dx >= 0) - { - e->signdx = 1; - e->stepx = dx / dy; - e->dx = dx % dy; - e->e = -dy; - } - else - { - e->signdx = -1; - e->stepx = -(-dx / dy); - e->dx = -dx % dy; - e->e = 0; - } - - _pixman_edge_multi_init (e, STEP_Y_SMALL (n), - &e->stepx_small, &e->dx_small); - - _pixman_edge_multi_init (e, STEP_Y_BIG (n), - &e->stepx_big, &e->dx_big); - } - pixman_edge_step (e, y_start - y_top); -} - -/* - * Initialize one edge structure given a line, starting y value - * and a pixel offset for the line - */ -PIXMAN_EXPORT void -pixman_line_fixed_edge_init (pixman_edge_t * e, - int n, - pixman_fixed_t y, - const pixman_line_fixed_t *line, - int x_off, - int y_off) -{ - pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off); - pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off); - const pixman_point_fixed_t *top, *bot; - - if (line->p1.y <= line->p2.y) - { - top = &line->p1; - bot = &line->p2; - } - else - { - top = &line->p2; - bot = &line->p1; - } - - pixman_edge_init (e, n, y, - top->x + x_off_fixed, - top->y + y_off_fixed, - bot->x + x_off_fixed, - bot->y + y_off_fixed); -} - -PIXMAN_EXPORT void -pixman_add_traps (pixman_image_t * image, - int16_t x_off, - int16_t y_off, - int ntrap, - pixman_trap_t * traps) -{ - int bpp; - int height; - - pixman_fixed_t x_off_fixed; - pixman_fixed_t y_off_fixed; - pixman_edge_t l, r; - pixman_fixed_t t, b; - - _pixman_image_validate (image); - - height = image->bits.height; - bpp = PIXMAN_FORMAT_BPP (image->bits.format); - - x_off_fixed = pixman_int_to_fixed (x_off); - y_off_fixed = pixman_int_to_fixed (y_off); - - while (ntrap--) - { - t = traps->top.y + y_off_fixed; - if (t < 0) - t = 0; - t = pixman_sample_ceil_y (t, bpp); - - b = traps->bot.y + y_off_fixed; - if (pixman_fixed_to_int (b) >= height) - b = pixman_int_to_fixed (height) - 1; - b = pixman_sample_floor_y (b, bpp); - - if (b >= t) - { - /* initialize edge walkers */ - pixman_edge_init (&l, bpp, t, - traps->top.l + x_off_fixed, - traps->top.y + y_off_fixed, - traps->bot.l + x_off_fixed, - traps->bot.y + y_off_fixed); - - pixman_edge_init (&r, bpp, t, - traps->top.r + x_off_fixed, - traps->top.y + y_off_fixed, - traps->bot.r + x_off_fixed, - traps->bot.y + y_off_fixed); - - pixman_rasterize_edges (image, &l, &r, t, b); - } - - traps++; - } -} - -#if 0 -static void -dump_image (pixman_image_t *image, - const char * title) -{ - int i, j; - - if (!image->type == BITS) - printf ("%s is not a regular image\n", title); - - if (!image->bits.format == PIXMAN_a8) - printf ("%s is not an alpha mask\n", title); - - printf ("\n\n\n%s: \n", title); - - for (i = 0; i < image->bits.height; ++i) - { - uint8_t *line = - (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]); - - for (j = 0; j < image->bits.width; ++j) - printf ("%c", line[j] ? '#' : ' '); - - printf ("\n"); - } -} -#endif - -PIXMAN_EXPORT void -pixman_add_trapezoids (pixman_image_t * image, - int16_t x_off, - int y_off, - int ntraps, - const pixman_trapezoid_t *traps) -{ - int i; - -#if 0 - dump_image (image, "before"); -#endif - - for (i = 0; i < ntraps; ++i) - { - const pixman_trapezoid_t *trap = &(traps[i]); - - if (!pixman_trapezoid_valid (trap)) - continue; - - pixman_rasterize_trapezoid (image, trap, x_off, y_off); - } - -#if 0 - dump_image (image, "after"); -#endif -} - -PIXMAN_EXPORT void -pixman_rasterize_trapezoid (pixman_image_t * image, - const pixman_trapezoid_t *trap, - int x_off, - int y_off) -{ - int bpp; - int height; - - pixman_fixed_t y_off_fixed; - pixman_edge_t l, r; - pixman_fixed_t t, b; - - return_if_fail (image->type == BITS); - - _pixman_image_validate (image); - - if (!pixman_trapezoid_valid (trap)) - return; - - height = image->bits.height; - bpp = PIXMAN_FORMAT_BPP (image->bits.format); - - y_off_fixed = pixman_int_to_fixed (y_off); - - t = trap->top + y_off_fixed; - if (t < 0) - t = 0; - t = pixman_sample_ceil_y (t, bpp); - - b = trap->bottom + y_off_fixed; - if (pixman_fixed_to_int (b) >= height) - b = pixman_int_to_fixed (height) - 1; - b = pixman_sample_floor_y (b, bpp); - - if (b >= t) - { - /* initialize edge walkers */ - pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off); - pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off); - - pixman_rasterize_edges (image, &l, &r, t, b); - } -} - -PIXMAN_EXPORT void -pixman_composite_trapezoids (pixman_op_t op, - pixman_image_t * src, - pixman_image_t * dst, - pixman_format_code_t mask_format, - int x_src, - int y_src, - int x_dst, - int y_dst, - int n_traps, - const pixman_trapezoid_t * traps) -{ - int i; - - if (n_traps <= 0) - return; - - _pixman_image_validate (src); - _pixman_image_validate (dst); - - if (op == PIXMAN_OP_ADD && - (src->common.flags & FAST_PATH_IS_OPAQUE) && - (mask_format == dst->common.extended_format_code) && - !(dst->common.have_clip_region)) - { - for (i = 0; i < n_traps; ++i) - { - const pixman_trapezoid_t *trap = &(traps[i]); - - if (!pixman_trapezoid_valid (trap)) - continue; - - pixman_rasterize_trapezoid (dst, trap, 0, 0); - } - } - else - { - pixman_image_t *tmp; - pixman_box32_t box; - int x_rel, y_rel; - - box.x1 = INT32_MAX; - box.y1 = INT32_MAX; - box.x2 = INT32_MIN; - box.y2 = INT32_MIN; - - for (i = 0; i < n_traps; ++i) - { - const pixman_trapezoid_t *trap = &(traps[i]); - int y1, y2; - - if (!pixman_trapezoid_valid (trap)) - continue; - - y1 = pixman_fixed_to_int (trap->top); - if (y1 < box.y1) - box.y1 = y1; - - y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom)); - if (y2 > box.y2) - box.y2 = y2; - -#define EXTEND_MIN(x) \ - if (pixman_fixed_to_int ((x)) < box.x1) \ - box.x1 = pixman_fixed_to_int ((x)); -#define EXTEND_MAX(x) \ - if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box.x2) \ - box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); - -#define EXTEND(x) \ - EXTEND_MIN(x); \ - EXTEND_MAX(x); - - EXTEND(trap->left.p1.x); - EXTEND(trap->left.p2.x); - EXTEND(trap->right.p1.x); - EXTEND(trap->right.p2.x); - } - - if (box.x1 >= box.x2 || box.y1 >= box.y2) - return; - - tmp = pixman_image_create_bits ( - mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1); - - for (i = 0; i < n_traps; ++i) - { - const pixman_trapezoid_t *trap = &(traps[i]); - - if (!pixman_trapezoid_valid (trap)) - continue; - - pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); - } - - x_rel = box.x1 + x_src - x_dst; - y_rel = box.y1 + y_src - y_dst; - - pixman_image_composite (op, src, tmp, dst, - x_rel, y_rel, 0, 0, box.x1, box.y1, - box.x2 - box.x1, box.y2 - box.y1); - - pixman_image_unref (tmp); - } -} - -static int -greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b) -{ - if (a->y == b->y) - return a->x > b->x; - return a->y > b->y; -} - -/* - * Note that the definition of this function is a bit odd because - * of the X coordinate space (y increasing downwards). - */ -static int -clockwise (const pixman_point_fixed_t *ref, - const pixman_point_fixed_t *a, - const pixman_point_fixed_t *b) -{ - pixman_point_fixed_t ad, bd; - - ad.x = a->x - ref->x; - ad.y = a->y - ref->y; - bd.x = b->x - ref->x; - bd.y = b->y - ref->y; - - return ((pixman_fixed_32_32_t) bd.y * ad.x - - (pixman_fixed_32_32_t) ad.y * bd.x) < 0; -} - -static void -triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps) -{ - const pixman_point_fixed_t *top, *left, *right, *tmp; - - top = &tri->p1; - left = &tri->p2; - right = &tri->p3; - - if (greater_y (top, left)) - { - tmp = left; - left = top; - top = tmp; - } - - if (greater_y (top, right)) - { - tmp = right; - right = top; - top = tmp; - } - - if (clockwise (top, right, left)) - { - tmp = right; - right = left; - left = tmp; - } - - /* - * Two cases: - * - * + + - * / \ / \ - * / \ / \ - * / + + \ - * / -- -- \ - * / -- -- \ - * / --- --- \ - * +-- --+ - */ - - traps->top = top->y; - traps->left.p1 = *top; - traps->left.p2 = *left; - traps->right.p1 = *top; - traps->right.p2 = *right; - - if (right->y < left->y) - traps->bottom = right->y; - else - traps->bottom = left->y; - - traps++; - - *traps = *(traps - 1); - - if (right->y < left->y) - { - traps->top = right->y; - traps->bottom = left->y; - traps->right.p1 = *right; - traps->right.p2 = *left; - } - else - { - traps->top = left->y; - traps->bottom = right->y; - traps->left.p1 = *left; - traps->left.p2 = *right; - } -} - -static pixman_trapezoid_t * -convert_triangles (int n_tris, const pixman_triangle_t *tris) -{ - pixman_trapezoid_t *traps; - int i; - - if (n_tris <= 0) - return NULL; - - traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t)); - if (!traps) - return NULL; - - for (i = 0; i < n_tris; ++i) - triangle_to_trapezoids (&(tris[i]), traps + 2 * i); - - return traps; -} - -PIXMAN_EXPORT void -pixman_composite_triangles (pixman_op_t op, - pixman_image_t * src, - pixman_image_t * dst, - pixman_format_code_t mask_format, - int x_src, - int y_src, - int x_dst, - int y_dst, - int n_tris, - const pixman_triangle_t * tris) -{ - pixman_trapezoid_t *traps; - - if ((traps = convert_triangles (n_tris, tris))) - { - pixman_composite_trapezoids (op, src, dst, mask_format, - x_src, y_src, x_dst, y_dst, - n_tris * 2, traps); - - free (traps); - } -} - -PIXMAN_EXPORT void -pixman_add_triangles (pixman_image_t *image, - int32_t x_off, - int32_t y_off, - int n_tris, - const pixman_triangle_t *tris) -{ - pixman_trapezoid_t *traps; - - if ((traps = convert_triangles (n_tris, tris))) - { - pixman_add_trapezoids (image, x_off, y_off, - n_tris * 2, traps); - - free (traps); - } -} +/* + * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc. + * Copyright © 2004 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include "pixman-private.h" + +/* + * Compute the smallest value greater than or equal to y which is on a + * grid row. + */ + +PIXMAN_EXPORT pixman_fixed_t +pixman_sample_ceil_y (pixman_fixed_t y, int n) +{ + pixman_fixed_t f = pixman_fixed_frac (y); + pixman_fixed_t i = pixman_fixed_floor (y); + + f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + + Y_FRAC_FIRST (n); + + if (f > Y_FRAC_LAST (n)) + { + if (pixman_fixed_to_int (i) == 0x7fff) + { + f = 0xffff; /* saturate */ + } + else + { + f = Y_FRAC_FIRST (n); + i += pixman_fixed_1; + } + } + return (i | f); +} + +/* + * Compute the largest value strictly less than y which is on a + * grid row. + */ +PIXMAN_EXPORT pixman_fixed_t +pixman_sample_floor_y (pixman_fixed_t y, + int n) +{ + pixman_fixed_t f = pixman_fixed_frac (y); + pixman_fixed_t i = pixman_fixed_floor (y); + + f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + + Y_FRAC_FIRST (n); + + if (f < Y_FRAC_FIRST (n)) + { + if (pixman_fixed_to_int (i) == 0x8000) + { + f = 0; /* saturate */ + } + else + { + f = Y_FRAC_LAST (n); + i -= pixman_fixed_1; + } + } + return (i | f); +} + +/* + * Step an edge by any amount (including negative values) + */ +PIXMAN_EXPORT void +pixman_edge_step (pixman_edge_t *e, + int n) +{ + pixman_fixed_48_16_t ne; + + e->x += n * e->stepx; + + ne = e->e + n * (pixman_fixed_48_16_t) e->dx; + + if (n >= 0) + { + if (ne > 0) + { + int nx = (ne + e->dy - 1) / e->dy; + e->e = ne - nx * (pixman_fixed_48_16_t) e->dy; + e->x += nx * e->signdx; + } + } + else + { + if (ne <= -e->dy) + { + int nx = (-ne) / e->dy; + e->e = ne + nx * (pixman_fixed_48_16_t) e->dy; + e->x -= nx * e->signdx; + } + } +} + +/* + * A private routine to initialize the multi-step + * elements of an edge structure + */ +static void +_pixman_edge_multi_init (pixman_edge_t * e, + int n, + pixman_fixed_t *stepx_p, + pixman_fixed_t *dx_p) +{ + pixman_fixed_t stepx; + pixman_fixed_48_16_t ne; + + ne = n * (pixman_fixed_48_16_t) e->dx; + stepx = n * e->stepx; + + if (ne > 0) + { + int nx = ne / e->dy; + ne -= nx * e->dy; + stepx += nx * e->signdx; + } + + *dx_p = ne; + *stepx_p = stepx; +} + +/* + * Initialize one edge structure given the line endpoints and a + * starting y value + */ +PIXMAN_EXPORT void +pixman_edge_init (pixman_edge_t *e, + int n, + pixman_fixed_t y_start, + pixman_fixed_t x_top, + pixman_fixed_t y_top, + pixman_fixed_t x_bot, + pixman_fixed_t y_bot) +{ + pixman_fixed_t dx, dy; + + e->x = x_top; + e->e = 0; + dx = x_bot - x_top; + dy = y_bot - y_top; + e->dy = dy; + e->dx = 0; + + if (dy) + { + if (dx >= 0) + { + e->signdx = 1; + e->stepx = dx / dy; + e->dx = dx % dy; + e->e = -dy; + } + else + { + e->signdx = -1; + e->stepx = -(-dx / dy); + e->dx = -dx % dy; + e->e = 0; + } + + _pixman_edge_multi_init (e, STEP_Y_SMALL (n), + &e->stepx_small, &e->dx_small); + + _pixman_edge_multi_init (e, STEP_Y_BIG (n), + &e->stepx_big, &e->dx_big); + } + pixman_edge_step (e, y_start - y_top); +} + +/* + * Initialize one edge structure given a line, starting y value + * and a pixel offset for the line + */ +PIXMAN_EXPORT void +pixman_line_fixed_edge_init (pixman_edge_t * e, + int n, + pixman_fixed_t y, + const pixman_line_fixed_t *line, + int x_off, + int y_off) +{ + pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off); + pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off); + const pixman_point_fixed_t *top, *bot; + + if (line->p1.y <= line->p2.y) + { + top = &line->p1; + bot = &line->p2; + } + else + { + top = &line->p2; + bot = &line->p1; + } + + pixman_edge_init (e, n, y, + top->x + x_off_fixed, + top->y + y_off_fixed, + bot->x + x_off_fixed, + bot->y + y_off_fixed); +} + +PIXMAN_EXPORT void +pixman_add_traps (pixman_image_t * image, + int16_t x_off, + int16_t y_off, + int ntrap, + pixman_trap_t * traps) +{ + int bpp; + int height; + + pixman_fixed_t x_off_fixed; + pixman_fixed_t y_off_fixed; + pixman_edge_t l, r; + pixman_fixed_t t, b; + + _pixman_image_validate (image); + + height = image->bits.height; + bpp = PIXMAN_FORMAT_BPP (image->bits.format); + + x_off_fixed = pixman_int_to_fixed (x_off); + y_off_fixed = pixman_int_to_fixed (y_off); + + while (ntrap--) + { + t = traps->top.y + y_off_fixed; + if (t < 0) + t = 0; + t = pixman_sample_ceil_y (t, bpp); + + b = traps->bot.y + y_off_fixed; + if (pixman_fixed_to_int (b) >= height) + b = pixman_int_to_fixed (height) - 1; + b = pixman_sample_floor_y (b, bpp); + + if (b >= t) + { + /* initialize edge walkers */ + pixman_edge_init (&l, bpp, t, + traps->top.l + x_off_fixed, + traps->top.y + y_off_fixed, + traps->bot.l + x_off_fixed, + traps->bot.y + y_off_fixed); + + pixman_edge_init (&r, bpp, t, + traps->top.r + x_off_fixed, + traps->top.y + y_off_fixed, + traps->bot.r + x_off_fixed, + traps->bot.y + y_off_fixed); + + pixman_rasterize_edges (image, &l, &r, t, b); + } + + traps++; + } +} + +#if 0 +static void +dump_image (pixman_image_t *image, + const char * title) +{ + int i, j; + + if (!image->type == BITS) + printf ("%s is not a regular image\n", title); + + if (!image->bits.format == PIXMAN_a8) + printf ("%s is not an alpha mask\n", title); + + printf ("\n\n\n%s: \n", title); + + for (i = 0; i < image->bits.height; ++i) + { + uint8_t *line = + (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]); + + for (j = 0; j < image->bits.width; ++j) + printf ("%c", line[j] ? '#' : ' '); + + printf ("\n"); + } +} +#endif + +PIXMAN_EXPORT void +pixman_add_trapezoids (pixman_image_t * image, + int16_t x_off, + int y_off, + int ntraps, + const pixman_trapezoid_t *traps) +{ + int i; + +#if 0 + dump_image (image, "before"); +#endif + + for (i = 0; i < ntraps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (image, trap, x_off, y_off); + } + +#if 0 + dump_image (image, "after"); +#endif +} + +PIXMAN_EXPORT void +pixman_rasterize_trapezoid (pixman_image_t * image, + const pixman_trapezoid_t *trap, + int x_off, + int y_off) +{ + int bpp; + int height; + + pixman_fixed_t y_off_fixed; + pixman_edge_t l, r; + pixman_fixed_t t, b; + + return_if_fail (image->type == BITS); + + _pixman_image_validate (image); + + if (!pixman_trapezoid_valid (trap)) + return; + + height = image->bits.height; + bpp = PIXMAN_FORMAT_BPP (image->bits.format); + + y_off_fixed = pixman_int_to_fixed (y_off); + + t = trap->top + y_off_fixed; + if (t < 0) + t = 0; + t = pixman_sample_ceil_y (t, bpp); + + b = trap->bottom + y_off_fixed; + if (pixman_fixed_to_int (b) >= height) + b = pixman_int_to_fixed (height) - 1; + b = pixman_sample_floor_y (b, bpp); + + if (b >= t) + { + /* initialize edge walkers */ + pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off); + pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off); + + pixman_rasterize_edges (image, &l, &r, t, b); + } +} + +/* + * pixman_composite_trapezoids() + * + * All the trapezoids are conceptually rendered to an infinitely big image. + * The (0, 0) coordinates of this image are then aligned with the (x, y) + * coordinates of the source image, and then both images are aligned with + * the (x, y) coordinates of the destination. Then, in principle, compositing + * of these three images takes place across the entire destination. + * + * FIXME: However, there is currently a bug, where we restrict this compositing + * to the bounding box of the trapezoids. This is incorrect for operators such + * as SRC and IN where blank source pixels do have an effect on the destination. + */ +PIXMAN_EXPORT void +pixman_composite_trapezoids (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_traps, + const pixman_trapezoid_t * traps) +{ + int i; + + if (n_traps <= 0) + return; + + _pixman_image_validate (src); + _pixman_image_validate (dst); + + if (op == PIXMAN_OP_ADD && + (src->common.flags & FAST_PATH_IS_OPAQUE) && + (mask_format == dst->common.extended_format_code) && + !(dst->common.have_clip_region)) + { + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst); + } + } + else + { + pixman_image_t *tmp; + pixman_box32_t box; + + box.x1 = INT32_MAX; + box.y1 = INT32_MAX; + box.x2 = INT32_MIN; + box.y2 = INT32_MIN; + + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + int y1, y2; + + if (!pixman_trapezoid_valid (trap)) + continue; + + y1 = pixman_fixed_to_int (trap->top); + if (y1 < box.y1) + box.y1 = y1; + + y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom)); + if (y2 > box.y2) + box.y2 = y2; + +#define EXTEND_MIN(x) \ + if (pixman_fixed_to_int ((x)) < box.x1) \ + box.x1 = pixman_fixed_to_int ((x)); +#define EXTEND_MAX(x) \ + if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box.x2) \ + box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); + +#define EXTEND(x) \ + EXTEND_MIN(x); \ + EXTEND_MAX(x); + + EXTEND(trap->left.p1.x); + EXTEND(trap->left.p2.x); + EXTEND(trap->right.p1.x); + EXTEND(trap->right.p2.x); + } + + if (box.x1 >= box.x2 || box.y1 >= box.y2) + return; + + tmp = pixman_image_create_bits ( + mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1); + + for (i = 0; i < n_traps; ++i) + { + const pixman_trapezoid_t *trap = &(traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); + } + + pixman_image_composite (op, src, tmp, dst, + x_src + box.x1, y_src + box.y1, + 0, 0, + x_dst + box.x1, y_dst + box.y1, + box.x2 - box.x1, box.y2 - box.y1); + + pixman_image_unref (tmp); + } +} + +static int +greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b) +{ + if (a->y == b->y) + return a->x > b->x; + return a->y > b->y; +} + +/* + * Note that the definition of this function is a bit odd because + * of the X coordinate space (y increasing downwards). + */ +static int +clockwise (const pixman_point_fixed_t *ref, + const pixman_point_fixed_t *a, + const pixman_point_fixed_t *b) +{ + pixman_point_fixed_t ad, bd; + + ad.x = a->x - ref->x; + ad.y = a->y - ref->y; + bd.x = b->x - ref->x; + bd.y = b->y - ref->y; + + return ((pixman_fixed_32_32_t) bd.y * ad.x - + (pixman_fixed_32_32_t) ad.y * bd.x) < 0; +} + +static void +triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps) +{ + const pixman_point_fixed_t *top, *left, *right, *tmp; + + top = &tri->p1; + left = &tri->p2; + right = &tri->p3; + + if (greater_y (top, left)) + { + tmp = left; + left = top; + top = tmp; + } + + if (greater_y (top, right)) + { + tmp = right; + right = top; + top = tmp; + } + + if (clockwise (top, right, left)) + { + tmp = right; + right = left; + left = tmp; + } + + /* + * Two cases: + * + * + + + * / \ / \ + * / \ / \ + * / + + \ + * / -- -- \ + * / -- -- \ + * / --- --- \ + * +-- --+ + */ + + traps->top = top->y; + traps->left.p1 = *top; + traps->left.p2 = *left; + traps->right.p1 = *top; + traps->right.p2 = *right; + + if (right->y < left->y) + traps->bottom = right->y; + else + traps->bottom = left->y; + + traps++; + + *traps = *(traps - 1); + + if (right->y < left->y) + { + traps->top = right->y; + traps->bottom = left->y; + traps->right.p1 = *right; + traps->right.p2 = *left; + } + else + { + traps->top = left->y; + traps->bottom = right->y; + traps->left.p1 = *left; + traps->left.p2 = *right; + } +} + +static pixman_trapezoid_t * +convert_triangles (int n_tris, const pixman_triangle_t *tris) +{ + pixman_trapezoid_t *traps; + int i; + + if (n_tris <= 0) + return NULL; + + traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t)); + if (!traps) + return NULL; + + for (i = 0; i < n_tris; ++i) + triangle_to_trapezoids (&(tris[i]), traps + 2 * i); + + return traps; +} + +PIXMAN_EXPORT void +pixman_composite_triangles (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_tris, + const pixman_triangle_t * tris) +{ + pixman_trapezoid_t *traps; + + if ((traps = convert_triangles (n_tris, tris))) + { + pixman_composite_trapezoids (op, src, dst, mask_format, + x_src, y_src, x_dst, y_dst, + n_tris * 2, traps); + + free (traps); + } +} + +PIXMAN_EXPORT void +pixman_add_triangles (pixman_image_t *image, + int32_t x_off, + int32_t y_off, + int n_tris, + const pixman_triangle_t *tris) +{ + pixman_trapezoid_t *traps; + + if ((traps = convert_triangles (n_tris, tris))) + { + pixman_add_trapezoids (image, x_off, y_off, + n_tris * 2, traps); + + free (traps); + } +} diff --git a/pixman/test/composite-traps-test.c b/pixman/test/composite-traps-test.c index 9ea7293ce..fa6d8a988 100644 --- a/pixman/test/composite-traps-test.c +++ b/pixman/test/composite-traps-test.c @@ -1,257 +1,257 @@ -/* Based loosely on scaling-test */ - -#include -#include -#include -#include "utils.h" - -#define MAX_SRC_WIDTH 48 -#define MAX_SRC_HEIGHT 48 -#define MAX_DST_WIDTH 48 -#define MAX_DST_HEIGHT 48 -#define MAX_STRIDE 4 - -static pixman_format_code_t formats[] = -{ - PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_r5g6b5, PIXMAN_a1, PIXMAN_a4 -}; - -static pixman_format_code_t mask_formats[] = -{ - PIXMAN_a1, PIXMAN_a4, PIXMAN_a8, -}; - -static pixman_op_t operators[] = -{ - PIXMAN_OP_OVER, PIXMAN_OP_ADD, PIXMAN_OP_SRC, PIXMAN_OP_IN -}; - -#define RANDOM_ELT(array) \ - ((array)[lcg_rand_n(ARRAY_LENGTH((array)))]) - -static void -destroy_bits (pixman_image_t *image, void *data) -{ - fence_free (data); -} - -static pixman_fixed_t -random_fixed (int n) -{ - return lcg_rand_N (n << 16); -} - -/* - * Composite operation with pseudorandom images - */ -uint32_t -test_composite (int testnum, - int verbose) -{ - int i; - pixman_image_t * src_img; - pixman_image_t * dst_img; - pixman_region16_t clip; - int dst_width, dst_height; - int dst_stride; - int dst_x, dst_y; - int dst_bpp; - pixman_op_t op; - uint32_t * dst_bits; - uint32_t crc32; - pixman_format_code_t mask_format, dst_format; - pixman_trapezoid_t *traps; - int src_x, src_y; - int n_traps; - - static pixman_color_t colors[] = - { - { 0xffff, 0xffff, 0xffff, 0xffff }, - { 0x0000, 0x0000, 0x0000, 0x0000 }, - { 0xabcd, 0xabcd, 0x0000, 0xabcd }, - { 0x0000, 0x0000, 0x0000, 0xffff }, - { 0x0101, 0x0101, 0x0101, 0x0101 }, - { 0x7777, 0x6666, 0x5555, 0x9999 }, - }; - - FLOAT_REGS_CORRUPTION_DETECTOR_START (); - - lcg_srand (testnum); - - op = RANDOM_ELT (operators); - mask_format = RANDOM_ELT (mask_formats); - - /* Create source image */ - - if (lcg_rand_n (4) == 0) - { - src_img = pixman_image_create_solid_fill ( - &(colors[lcg_rand_n (ARRAY_LENGTH (colors))])); - - src_x = 10; - src_y = 234; - } - else - { - pixman_format_code_t src_format = RANDOM_ELT(formats); - int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8; - int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; - int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; - int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; - uint32_t *bits; - - src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); - src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); - - src_stride = (src_stride + 3) & ~3; - - bits = (uint32_t *)make_random_bytes (src_stride * src_height); - - src_img = pixman_image_create_bits ( - src_format, src_width, src_height, bits, src_stride); - - pixman_image_set_destroy_function (src_img, destroy_bits, bits); - - if (lcg_rand_n (8) == 0) - { - pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; - - for (i = 0; i < n; i++) - { - clip_boxes[i].x1 = lcg_rand_n (src_width); - clip_boxes[i].y1 = lcg_rand_n (src_height); - clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); - clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); - - if (verbose) - { - printf ("source clip box: [%d,%d-%d,%d]\n", - clip_boxes[i].x1, clip_boxes[i].y1, - clip_boxes[i].x2, clip_boxes[i].y2); - } - } - - pixman_region_init_rects (&clip, clip_boxes, n); - pixman_image_set_clip_region (src_img, &clip); - pixman_image_set_source_clipping (src_img, 1); - pixman_region_fini (&clip); - } - - image_endian_swap (src_img); - } - - /* Create destination image */ - { - dst_format = RANDOM_ELT(formats); - dst_bpp = (PIXMAN_FORMAT_BPP (dst_format) + 7) / 8; - dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; - dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; - dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; - dst_stride = (dst_stride + 3) & ~3; - - dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height); - - dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); - dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); - - dst_img = pixman_image_create_bits ( - dst_format, dst_width, dst_height, dst_bits, dst_stride); - - image_endian_swap (dst_img); - } - - /* Create traps */ - { - int i; - - n_traps = lcg_rand_n (25); - traps = fence_malloc (n_traps * sizeof (pixman_trapezoid_t)); - - for (i = 0; i < n_traps; ++i) - { - pixman_trapezoid_t *t = &(traps[i]); - - t->top = random_fixed (MAX_DST_HEIGHT) - MAX_DST_HEIGHT / 2; - t->bottom = t->top + random_fixed (MAX_DST_HEIGHT); - t->left.p1.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2; - t->left.p1.y = t->top - random_fixed (50); - t->left.p2.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2; - t->left.p2.y = t->bottom + random_fixed (50); - t->right.p1.x = t->left.p1.x + random_fixed (MAX_DST_WIDTH); - t->right.p1.y = t->top - random_fixed (50); - t->right.p2.x = t->left.p2.x + random_fixed (MAX_DST_WIDTH); - t->right.p2.y = t->bottom - random_fixed (50); - } - } - - if (lcg_rand_n (8) == 0) - { - pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; - for (i = 0; i < n; i++) - { - clip_boxes[i].x1 = lcg_rand_n (dst_width); - clip_boxes[i].y1 = lcg_rand_n (dst_height); - clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); - clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); - - if (verbose) - { - printf ("destination clip box: [%d,%d-%d,%d]\n", - clip_boxes[i].x1, clip_boxes[i].y1, - clip_boxes[i].x2, clip_boxes[i].y2); - } - } - pixman_region_init_rects (&clip, clip_boxes, n); - pixman_image_set_clip_region (dst_img, &clip); - pixman_region_fini (&clip); - } - - pixman_composite_trapezoids (op, src_img, dst_img, mask_format, - src_x, src_y, dst_x, dst_y, n_traps, traps); - - if (dst_format == PIXMAN_x8r8g8b8) - { - /* ignore unused part */ - for (i = 0; i < dst_stride * dst_height / 4; i++) - dst_bits[i] &= 0xFFFFFF; - } - - image_endian_swap (dst_img); - - if (verbose) - { - int j; - - for (i = 0; i < dst_height; i++) - { - for (j = 0; j < dst_stride; j++) - printf ("%02X ", *((uint8_t *)dst_bits + i * dst_stride + j)); - - printf ("\n"); - } - } - - crc32 = compute_crc32 (0, dst_bits, dst_stride * dst_height); - - fence_free (dst_bits); - - pixman_image_unref (src_img); - pixman_image_unref (dst_img); - fence_free (traps); - - FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); - return crc32; -} - -int -main (int argc, const char *argv[]) -{ - return fuzzer_test_main("composite traps", 40000, 0xA34F95C7, - test_composite, argc, argv); -} +/* Based loosely on scaling-test */ + +#include +#include +#include +#include "utils.h" + +#define MAX_SRC_WIDTH 48 +#define MAX_SRC_HEIGHT 48 +#define MAX_DST_WIDTH 48 +#define MAX_DST_HEIGHT 48 +#define MAX_STRIDE 4 + +static pixman_format_code_t formats[] = +{ + PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_r5g6b5, PIXMAN_a1, PIXMAN_a4 +}; + +static pixman_format_code_t mask_formats[] = +{ + PIXMAN_a1, PIXMAN_a4, PIXMAN_a8, +}; + +static pixman_op_t operators[] = +{ + PIXMAN_OP_OVER, PIXMAN_OP_ADD, PIXMAN_OP_SRC, PIXMAN_OP_IN +}; + +#define RANDOM_ELT(array) \ + ((array)[lcg_rand_n(ARRAY_LENGTH((array)))]) + +static void +destroy_bits (pixman_image_t *image, void *data) +{ + fence_free (data); +} + +static pixman_fixed_t +random_fixed (int n) +{ + return lcg_rand_N (n << 16); +} + +/* + * Composite operation with pseudorandom images + */ +uint32_t +test_composite (int testnum, + int verbose) +{ + int i; + pixman_image_t * src_img; + pixman_image_t * dst_img; + pixman_region16_t clip; + int dst_width, dst_height; + int dst_stride; + int dst_x, dst_y; + int dst_bpp; + pixman_op_t op; + uint32_t * dst_bits; + uint32_t crc32; + pixman_format_code_t mask_format, dst_format; + pixman_trapezoid_t *traps; + int src_x, src_y; + int n_traps; + + static pixman_color_t colors[] = + { + { 0xffff, 0xffff, 0xffff, 0xffff }, + { 0x0000, 0x0000, 0x0000, 0x0000 }, + { 0xabcd, 0xabcd, 0x0000, 0xabcd }, + { 0x0000, 0x0000, 0x0000, 0xffff }, + { 0x0101, 0x0101, 0x0101, 0x0101 }, + { 0x7777, 0x6666, 0x5555, 0x9999 }, + }; + + FLOAT_REGS_CORRUPTION_DETECTOR_START (); + + lcg_srand (testnum); + + op = RANDOM_ELT (operators); + mask_format = RANDOM_ELT (mask_formats); + + /* Create source image */ + + if (lcg_rand_n (4) == 0) + { + src_img = pixman_image_create_solid_fill ( + &(colors[lcg_rand_n (ARRAY_LENGTH (colors))])); + + src_x = 10; + src_y = 234; + } + else + { + pixman_format_code_t src_format = RANDOM_ELT(formats); + int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8; + int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; + int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; + uint32_t *bits; + + src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); + + src_stride = (src_stride + 3) & ~3; + + bits = (uint32_t *)make_random_bytes (src_stride * src_height); + + src_img = pixman_image_create_bits ( + src_format, src_width, src_height, bits, src_stride); + + pixman_image_set_destroy_function (src_img, destroy_bits, bits); + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (src_width); + clip_boxes[i].y1 = lcg_rand_n (src_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("source clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (src_img, &clip); + pixman_image_set_source_clipping (src_img, 1); + pixman_region_fini (&clip); + } + + image_endian_swap (src_img); + } + + /* Create destination image */ + { + dst_format = RANDOM_ELT(formats); + dst_bpp = (PIXMAN_FORMAT_BPP (dst_format) + 7) / 8; + dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; + dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; + dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; + dst_stride = (dst_stride + 3) & ~3; + + dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height); + + dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); + + dst_img = pixman_image_create_bits ( + dst_format, dst_width, dst_height, dst_bits, dst_stride); + + image_endian_swap (dst_img); + } + + /* Create traps */ + { + int i; + + n_traps = lcg_rand_n (25); + traps = fence_malloc (n_traps * sizeof (pixman_trapezoid_t)); + + for (i = 0; i < n_traps; ++i) + { + pixman_trapezoid_t *t = &(traps[i]); + + t->top = random_fixed (MAX_DST_HEIGHT) - MAX_DST_HEIGHT / 2; + t->bottom = t->top + random_fixed (MAX_DST_HEIGHT); + t->left.p1.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2; + t->left.p1.y = t->top - random_fixed (50); + t->left.p2.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2; + t->left.p2.y = t->bottom + random_fixed (50); + t->right.p1.x = t->left.p1.x + random_fixed (MAX_DST_WIDTH); + t->right.p1.y = t->top - random_fixed (50); + t->right.p2.x = t->left.p2.x + random_fixed (MAX_DST_WIDTH); + t->right.p2.y = t->bottom - random_fixed (50); + } + } + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (dst_width); + clip_boxes[i].y1 = lcg_rand_n (dst_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("destination clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (dst_img, &clip); + pixman_region_fini (&clip); + } + + pixman_composite_trapezoids (op, src_img, dst_img, mask_format, + src_x, src_y, dst_x, dst_y, n_traps, traps); + + if (dst_format == PIXMAN_x8r8g8b8) + { + /* ignore unused part */ + for (i = 0; i < dst_stride * dst_height / 4; i++) + dst_bits[i] &= 0xFFFFFF; + } + + image_endian_swap (dst_img); + + if (verbose) + { + int j; + + for (i = 0; i < dst_height; i++) + { + for (j = 0; j < dst_stride; j++) + printf ("%02X ", *((uint8_t *)dst_bits + i * dst_stride + j)); + + printf ("\n"); + } + } + + crc32 = compute_crc32 (0, dst_bits, dst_stride * dst_height); + + fence_free (dst_bits); + + pixman_image_unref (src_img); + pixman_image_unref (dst_img); + fence_free (traps); + + FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main("composite traps", 40000, 0xE3112106, + test_composite, argc, argv); +} -- cgit v1.2.3