aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman')
-rw-r--r--pixman/configure.ac51
-rw-r--r--pixman/demos/tri-test.c96
-rw-r--r--pixman/pixman/Makefile.am1
-rw-r--r--pixman/pixman/pixman-arm-common.h863
-rw-r--r--pixman/pixman/pixman-arm-neon-asm-bilinear.S768
-rw-r--r--pixman/pixman/pixman-arm-neon-asm.S169
-rw-r--r--pixman/pixman/pixman-arm-neon.c963
-rw-r--r--pixman/pixman/pixman-trap.c1325
-rw-r--r--pixman/test/composite-traps-test.c514
9 files changed, 2897 insertions, 1853 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac
index db9a883b6..ef8162fc3 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
m4_define([pixman_major], 0)
m4_define([pixman_minor], 21)
-m4_define([pixman_micro], 7)
+m4_define([pixman_micro], 9)
m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
@@ -119,7 +119,7 @@ for w in -Werror -errwarn; do
[CFLAGS=$w],
[int main(int c, char **v) { (void)c; (void)v; return 0; }],
[WERROR=$w; yesno=yes], [yesno=no])
- AC_MSG_RESULT($_yesno)
+ AC_MSG_RESULT($yesno)
fi
done
@@ -192,35 +192,43 @@ dnl =========================================================================
dnl OpenMP for the test suite?
dnl
-# Check for OpenMP support (only supported by autoconf >=2.62)
+# Check for OpenMP support only when autoconf support that (require autoconf >=2.62)
OPENMP_CFLAGS=
m4_ifdef([AC_OPENMP], [AC_OPENMP])
-m4_define([openmp_test_program],[dnl
-#include <stdio.h>
+if test "x$enable_openmp" = "xyes" && test "x$ac_cv_prog_c_openmp" = "xunsupported" ; then
+ AC_MSG_WARN([OpenMP support requested but found unsupported])
+fi
-extern unsigned int lcg_seed;
-#pragma omp threadprivate(lcg_seed)
-unsigned int lcg_seed;
+dnl May not fail to link without -Wall -Werror added
+dnl So try to link only when openmp is supported
+dnl ac_cv_prog_c_openmp is not defined when --disable-openmp is used
+if test "x$ac_cv_prog_c_openmp" != "xunsupported" && test "x$ac_cv_prog_c_openmp" != "x"; then
+ m4_define([openmp_test_program],[dnl
+ #include <stdio.h>
-unsigned function(unsigned a, unsigned b)
-{
+ extern unsigned int lcg_seed;
+ #pragma omp threadprivate(lcg_seed)
+ unsigned int lcg_seed;
+
+ unsigned function(unsigned a, unsigned b)
+ {
lcg_seed ^= b;
return ((a + b) ^ a ) + lcg_seed;
-}
+ }
-int main(int argc, char **argv)
-{
+ int main(int argc, char **argv)
+ {
int i;
int n1 = 0, n2 = argc;
unsigned checksum = 0;
int verbose = argv != NULL;
unsigned (*test_function)(unsigned, unsigned);
test_function = function;
- #pragma omp parallel for reduction(+:checksum) default(none) \
+ #pragma omp parallel for reduction(+:checksum) default(none) \
shared(n1, n2, test_function, verbose)
for (i = n1; i < n2; i++)
- {
+ {
unsigned crc = test_function (i, 0);
if (verbose)
printf ("%d: %08X\n", i, crc);
@@ -228,18 +236,17 @@ int main(int argc, char **argv)
}
printf("%u\n", checksum);
return 0;
-}
-])
+ }
+ ])
-PIXMAN_LINK_WITH_ENV(
+ PIXMAN_LINK_WITH_ENV(
[CFLAGS="$OPENMP_CFLAGS" LDFLAGS="$OPENMP_CFLAGS"],
[openmp_test_program],
[have_openmp=yes],
[have_openmp=no])
-if test "x$have_openmp" = "xyes"; then
- AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite])
-else
- OPENMP_CFLAGS=""
+ if test "x$have_openmp" = "xyes" ; then
+ AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite])
+ fi
fi
AC_SUBST(OPENMP_CFLAGS)
diff --git a/pixman/demos/tri-test.c b/pixman/demos/tri-test.c
index 23ea18cb3..a71869a6a 100644
--- a/pixman/demos/tri-test.c
+++ b/pixman/demos/tri-test.c
@@ -1,48 +1,48 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "../test/utils.h"
-#include "gtk-utils.h"
-
-int
-main (int argc, char **argv)
-{
-#define WIDTH 200
-#define HEIGHT 200
-
-#define POINT(x,y) \
- { pixman_double_to_fixed ((x)), pixman_double_to_fixed ((y)) }
-
- pixman_image_t *src_img, *dest_img;
- pixman_triangle_t tris[4] =
- {
- { POINT (100, 100), POINT (10, 50), POINT (110, 10) },
- { POINT (100, 100), POINT (150, 10), POINT (200, 50) },
- { POINT (100, 100), POINT (10, 170), POINT (90, 175) },
- { POINT (100, 100), POINT (170, 150), POINT (120, 190) },
- };
- pixman_color_t color = { 0x4444, 0x4444, 0xffff, 0xffff };
- uint32_t *bits = malloc (WIDTH * HEIGHT * 4);
- int i;
-
- for (i = 0; i < WIDTH * HEIGHT; ++i)
- bits[i] = (i / HEIGHT) * 0x01010000;
-
- src_img = pixman_image_create_solid_fill (&color);
- dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4);
-
- pixman_composite_triangles (PIXMAN_OP_ATOP_REVERSE,
- src_img,
- dest_img,
- PIXMAN_a8,
- 200, 200,
- 35, 5,
- ARRAY_LENGTH (tris), tris);
- show_image (dest_img);
-
- pixman_image_unref (src_img);
- pixman_image_unref (dest_img);
- free (bits);
-
- return 0;
-}
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "../test/utils.h"
+#include "gtk-utils.h"
+
+int
+main (int argc, char **argv)
+{
+#define WIDTH 200
+#define HEIGHT 200
+
+#define POINT(x,y) \
+ { pixman_double_to_fixed ((x)), pixman_double_to_fixed ((y)) }
+
+ pixman_image_t *src_img, *dest_img;
+ pixman_triangle_t tris[4] =
+ {
+ { POINT (100, 100), POINT (10, 50), POINT (110, 10) },
+ { POINT (100, 100), POINT (150, 10), POINT (200, 50) },
+ { POINT (100, 100), POINT (10, 170), POINT (90, 175) },
+ { POINT (100, 100), POINT (170, 150), POINT (120, 190) },
+ };
+ pixman_color_t color = { 0x4444, 0x4444, 0xffff, 0xffff };
+ uint32_t *bits = malloc (WIDTH * HEIGHT * 4);
+ int i;
+
+ for (i = 0; i < WIDTH * HEIGHT; ++i)
+ bits[i] = (i / HEIGHT) * 0x01010000;
+
+ src_img = pixman_image_create_solid_fill (&color);
+ dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4);
+
+ pixman_composite_triangles (PIXMAN_OP_ATOP_REVERSE,
+ src_img,
+ dest_img,
+ PIXMAN_a8,
+ 200, 200,
+ -5, 5,
+ ARRAY_LENGTH (tris), tris);
+ show_image (dest_img);
+
+ pixman_image_unref (src_img);
+ pixman_image_unref (dest_img);
+ free (bits);
+
+ return 0;
+}
diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am
index d016e9f25..be0826680 100644
--- a/pixman/pixman/Makefile.am
+++ b/pixman/pixman/Makefile.am
@@ -115,6 +115,7 @@ libpixman_arm_neon_la_SOURCES = \
pixman-arm-neon.c \
pixman-arm-common.h \
pixman-arm-neon-asm.S \
+ pixman-arm-neon-asm-bilinear.S \
pixman-arm-neon-asm.h
libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
libpixman_arm_neon_la_LIBADD = $(DEP_LIBS)
diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h
index 2c435041a..6cd8be506 100644
--- a/pixman/pixman/pixman-arm-common.h
+++ b/pixman/pixman/pixman-arm-common.h
@@ -1,409 +1,454 @@
-/*
- * Copyright © 2010 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-#ifndef PIXMAN_ARM_COMMON_H
-#define PIXMAN_ARM_COMMON_H
-
-#include "pixman-fast-path.h"
-
-/* Define some macros which can expand into proxy functions between
- * ARM assembly optimized functions and the rest of pixman fast path API.
- *
- * All the low level ARM assembly functions have to use ARM EABI
- * calling convention and take up to 8 arguments:
- * width, height, dst, dst_stride, src, src_stride, mask, mask_stride
- *
- * The arguments are ordered with the most important coming first (the
- * first 4 arguments are passed to function in registers, the rest are
- * on stack). The last arguments are optional, for example if the
- * function is not using mask, then 'mask' and 'mask_stride' can be
- * omitted when doing a function call.
- *
- * Arguments 'src' and 'mask' contain either a pointer to the top left
- * pixel of the composited rectangle or a pixel color value depending
- * on the function type. In the case of just a color value (solid source
- * or mask), the corresponding stride argument is unused.
- */
-
-#define SKIP_ZERO_SRC 1
-#define SKIP_ZERO_MASK 2
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \
- src_type, src_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- int32_t dst_stride, src_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride); \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- uint32_t src); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- int32_t dst_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid ( \
- imp, src_image, dst_image->bits.format); \
- \
- if ((flags & SKIP_ZERO_SRC) && src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src); \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \
- mask_type, mask_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- uint32_t src, \
- int32_t unused, \
- mask_type *mask, \
- int32_t mask_stride); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- mask_type *mask_line; \
- int32_t dst_stride, mask_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid ( \
- imp, src_image, dst_image->bits.format); \
- \
- if ((flags & SKIP_ZERO_SRC) && src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src, 0, \
- mask_line, mask_stride); \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \
- src_type, src_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- uint32_t mask); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- int32_t dst_stride, src_stride; \
- uint32_t mask; \
- \
- mask = _pixman_image_get_solid ( \
- imp, mask_image, dst_image->bits.format); \
- \
- if ((flags & SKIP_ZERO_MASK) && mask == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask); \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \
- src_type, src_cnt, \
- mask_type, mask_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_##cputype (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- mask_type *mask, \
- int32_t mask_stride); \
- \
-static void \
-cputype##_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- mask_type *mask_line; \
- int32_t dst_stride, src_stride, mask_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- pixman_composite_##name##_asm_##cputype (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask_line, mask_stride); \
-}
-
-#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
- int32_t w, \
- dst_type * dst, \
- const src_type * src, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x); \
- \
-static force_inline void \
-scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \
- const src_type * ps, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
- vx, unit_x);\
-} \
- \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op, \
- src_type, dst_type, COVER) \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op, \
- src_type, dst_type, NONE) \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op, \
- src_type, dst_type, PAD)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
-
-#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
- int32_t w, \
- dst_type * dst, \
- const src_type * src, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- const uint8_t * mask); \
- \
-static force_inline void \
-scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \
- dst_type * pd, \
- const src_type * ps, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- if ((flags & SKIP_ZERO_SRC) && zero_src) \
- return; \
- pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
- vx, unit_x, \
- mask); \
-} \
- \
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op,\
- src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op,\
- src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
-FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
- scaled_nearest_scanline_##cputype##_##name##_##op,\
- src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
-/*****************************************************************************/
-
-#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
- dst_type * dst, \
- const src_type * top, \
- const src_type * bottom, \
- int wt, \
- int wb, \
- pixman_fixed_t x, \
- pixman_fixed_t ux, \
- int width); \
- \
-static force_inline void \
-scaled_bilinear_scanline_##cputype##_##name##_##op ( \
- dst_type * dst, \
- const uint32_t * mask, \
- const src_type * src_top, \
- const src_type * src_bottom, \
- int32_t w, \
- int wt, \
- int wb, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- if ((flags & SKIP_ZERO_SRC) && zero_src) \
- return; \
- pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
- dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
-} \
- \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
- scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
- scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \
-FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
- scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint32_t, dst_type, PAD, FALSE, FALSE)
-
-#endif
+/*
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+#ifndef PIXMAN_ARM_COMMON_H
+#define PIXMAN_ARM_COMMON_H
+
+#include "pixman-fast-path.h"
+
+/* Define some macros which can expand into proxy functions between
+ * ARM assembly optimized functions and the rest of pixman fast path API.
+ *
+ * All the low level ARM assembly functions have to use ARM EABI
+ * calling convention and take up to 8 arguments:
+ * width, height, dst, dst_stride, src, src_stride, mask, mask_stride
+ *
+ * The arguments are ordered with the most important coming first (the
+ * first 4 arguments are passed to function in registers, the rest are
+ * on stack). The last arguments are optional, for example if the
+ * function is not using mask, then 'mask' and 'mask_stride' can be
+ * omitted when doing a function call.
+ *
+ * Arguments 'src' and 'mask' contain either a pointer to the top left
+ * pixel of the composited rectangle or a pixel color value depending
+ * on the function type. In the case of just a color value (solid source
+ * or mask), the corresponding stride argument is unused.
+ */
+
+#define SKIP_ZERO_SRC 1
+#define SKIP_ZERO_MASK 2
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ int32_t dst_stride, src_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ int32_t dst_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid ( \
+ imp, src_image, dst_image->bits.format); \
+ \
+ if ((flags & SKIP_ZERO_SRC) && src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src, \
+ int32_t unused, \
+ mask_type *mask, \
+ int32_t mask_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ mask_type *mask_line; \
+ int32_t dst_stride, mask_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid ( \
+ imp, src_image, dst_image->bits.format); \
+ \
+ if ((flags & SKIP_ZERO_SRC) && src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src, 0, \
+ mask_line, mask_stride); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ uint32_t mask); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ int32_t dst_stride, src_stride; \
+ uint32_t mask; \
+ \
+ mask = _pixman_image_get_solid ( \
+ imp, mask_image, dst_image->bits.format); \
+ \
+ if ((flags & SKIP_ZERO_MASK) && mask == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \
+ src_type, src_cnt, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ mask_type *mask, \
+ int32_t mask_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ mask_type *mask_line; \
+ int32_t dst_stride, src_stride, mask_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask_line, mask_stride); \
+}
+
+#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op, \
+ src_type, dst_type) \
+void \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
+ int32_t w, \
+ dst_type * dst, \
+ const src_type * src, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x); \
+ \
+static force_inline void \
+scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \
+ const src_type * ps, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
+ vx, unit_x);\
+} \
+ \
+FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op, \
+ src_type, dst_type, COVER) \
+FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op, \
+ src_type, dst_type, NONE) \
+FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op, \
+ src_type, dst_type, PAD)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+
+#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \
+ src_type, dst_type) \
+void \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
+ int32_t w, \
+ dst_type * dst, \
+ const src_type * src, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ const uint8_t * mask); \
+ \
+static force_inline void \
+scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \
+ dst_type * pd, \
+ const src_type * ps, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
+ return; \
+ pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
+ vx, unit_x, \
+ mask); \
+} \
+ \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op,\
+ src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op,\
+ src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op,\
+ src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+/*****************************************************************************/
+
+#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \
+ src_type, dst_type) \
+void \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
+ dst_type * dst, \
+ const src_type * top, \
+ const src_type * bottom, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t x, \
+ pixman_fixed_t ux, \
+ int width); \
+ \
+static force_inline void \
+scaled_bilinear_scanline_##cputype##_##name##_##op ( \
+ dst_type * dst, \
+ const uint32_t * mask, \
+ const src_type * src_top, \
+ const src_type * src_bottom, \
+ int32_t w, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
+ return; \
+ pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
+ dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
+} \
+ \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
+ src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
+ src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
+ src_type, uint32_t, dst_type, PAD, FALSE, FALSE)
+
+
+#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op, \
+ src_type, dst_type) \
+void \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
+ dst_type * dst, \
+ const uint8_t * mask, \
+ const src_type * top, \
+ const src_type * bottom, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t x, \
+ pixman_fixed_t ux, \
+ int width); \
+ \
+static force_inline void \
+scaled_bilinear_scanline_##cputype##_##name##_##op ( \
+ dst_type * dst, \
+ const uint8_t * mask, \
+ const src_type * src_top, \
+ const src_type * src_bottom, \
+ int32_t w, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
+ return; \
+ pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
+ dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
+} \
+ \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
+ src_type, uint8_t, dst_type, COVER, TRUE, FALSE) \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
+ src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
+ src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+#endif
diff --git a/pixman/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman/pixman-arm-neon-asm-bilinear.S
new file mode 100644
index 000000000..9a4a1ffba
--- /dev/null
+++ b/pixman/pixman/pixman-arm-neon-asm-bilinear.S
@@ -0,0 +1,768 @@
+/*
+ * Copyright © 2011 SCore Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ * Author: Taekyun Kim (tkq.kim@samsung.com)
+ */
+
+/*
+ * This file contains scaled bilinear scanline functions implemented
+ * using older siarhei's bilinear macro template.
+ *
+ * << General scanline function procedures >>
+ * 1. bilinear interpolate source pixels
+ * 2. load mask pixels
+ * 3. load destination pixels
+ * 4. duplicate mask to fill whole register
+ * 5. interleave source & destination pixels
+ * 6. apply mask to source pixels
+ * 7. combine source & destination pixels
+ * 8, Deinterleave final result
+ * 9. store destination pixels
+ *
+ * All registers with single number (i.e. src0, tmp0) are 64-bits registers.
+ * Registers with double numbers(src01, dst01) are 128-bits registers.
+ * All temp registers can be used freely outside the code block.
+ * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks.
+ *
+ * TODOs
+ * Support 0565 pixel format
+ * Optimization for two and last pixel cases
+ *
+ * Remarks
+ * There can be lots of pipeline stalls inside code block and between code blocks.
+ * Further optimizations will be done by new macro templates using head/tail_head/tail scheme.
+ */
+
+/* Prevent the stack from becoming executable for no reason... */
+#if defined(__linux__) && defined (__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+.text
+.fpu neon
+.arch armv7a
+.object_arch armv4
+.eabi_attribute 10, 0
+.eabi_attribute 12, 0
+.arm
+.altmacro
+
+#include "pixman-arm-neon-asm.h"
+
+/*
+ * Bilinear macros from pixman-arm-neon-asm.S
+ */
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+ .func fname
+ .global fname
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+.endm
+
+/*
+ * Bilinear scaling support code which tries to provide pixel fetching, color
+ * format conversion, and interpolation as separate macros which can be used
+ * as the basic building blocks for constructing bilinear scanline functions.
+ */
+
+.macro bilinear_load_8888 reg1, reg2, tmp
+ mov TMP2, X, asr #16
+ add X, X, UX
+ add TMP1, TOP, TMP2, asl #2
+ add TMP2, BOTTOM, TMP2, asl #2
+ vld1.32 {reg1}, [TMP1]
+ vld1.32 {reg2}, [TMP2]
+.endm
+
+.macro bilinear_load_0565 reg1, reg2, tmp
+ mov TMP2, X, asr #16
+ add X, X, UX
+ add TMP1, TOP, TMP2, asl #1
+ add TMP2, BOTTOM, TMP2, asl #1
+ vld1.32 {reg2[0]}, [TMP1]
+ vld1.32 {reg2[1]}, [TMP2]
+ convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_two_8888 \
+ acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
+
+ bilinear_load_8888 reg1, reg2, tmp1
+ vmull.u8 acc1, reg1, d28
+ vmlal.u8 acc1, reg2, d29
+ bilinear_load_8888 reg3, reg4, tmp2
+ vmull.u8 acc2, reg3, d28
+ vmlal.u8 acc2, reg4, d29
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_four_8888 \
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+
+ bilinear_load_and_vertical_interpolate_two_8888 \
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
+ bilinear_load_and_vertical_interpolate_two_8888 \
+ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_two_0565 \
+ acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
+
+ mov TMP2, X, asr #16
+ add X, X, UX
+ mov TMP4, X, asr #16
+ add X, X, UX
+ add TMP1, TOP, TMP2, asl #1
+ add TMP2, BOTTOM, TMP2, asl #1
+ add TMP3, TOP, TMP4, asl #1
+ add TMP4, BOTTOM, TMP4, asl #1
+ vld1.32 {acc2lo[0]}, [TMP1]
+ vld1.32 {acc2hi[0]}, [TMP3]
+ vld1.32 {acc2lo[1]}, [TMP2]
+ vld1.32 {acc2hi[1]}, [TMP4]
+ convert_0565_to_x888 acc2, reg3, reg2, reg1
+ vzip.u8 reg1, reg3
+ vzip.u8 reg2, reg4
+ vzip.u8 reg3, reg4
+ vzip.u8 reg1, reg2
+ vmull.u8 acc1, reg1, d28
+ vmlal.u8 acc1, reg2, d29
+ vmull.u8 acc2, reg3, d28
+ vmlal.u8 acc2, reg4, d29
+.endm
+
+.macro bilinear_load_and_vertical_interpolate_four_0565 \
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+
+ mov TMP2, X, asr #16
+ add X, X, UX
+ mov TMP4, X, asr #16
+ add X, X, UX
+ add TMP1, TOP, TMP2, asl #1
+ add TMP2, BOTTOM, TMP2, asl #1
+ add TMP3, TOP, TMP4, asl #1
+ add TMP4, BOTTOM, TMP4, asl #1
+ vld1.32 {xacc2lo[0]}, [TMP1]
+ vld1.32 {xacc2hi[0]}, [TMP3]
+ vld1.32 {xacc2lo[1]}, [TMP2]
+ vld1.32 {xacc2hi[1]}, [TMP4]
+ convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
+ mov TMP2, X, asr #16
+ add X, X, UX
+ mov TMP4, X, asr #16
+ add X, X, UX
+ add TMP1, TOP, TMP2, asl #1
+ add TMP2, BOTTOM, TMP2, asl #1
+ add TMP3, TOP, TMP4, asl #1
+ add TMP4, BOTTOM, TMP4, asl #1
+ vld1.32 {yacc2lo[0]}, [TMP1]
+ vzip.u8 xreg1, xreg3
+ vld1.32 {yacc2hi[0]}, [TMP3]
+ vzip.u8 xreg2, xreg4
+ vld1.32 {yacc2lo[1]}, [TMP2]
+ vzip.u8 xreg3, xreg4
+ vld1.32 {yacc2hi[1]}, [TMP4]
+ vzip.u8 xreg1, xreg2
+ convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
+ vmull.u8 xacc1, xreg1, d28
+ vzip.u8 yreg1, yreg3
+ vmlal.u8 xacc1, xreg2, d29
+ vzip.u8 yreg2, yreg4
+ vmull.u8 xacc2, xreg3, d28
+ vzip.u8 yreg3, yreg4
+ vmlal.u8 xacc2, xreg4, d29
+ vzip.u8 yreg1, yreg2
+ vmull.u8 yacc1, yreg1, d28
+ vmlal.u8 yacc1, yreg2, d29
+ vmull.u8 yacc2, yreg3, d28
+ vmlal.u8 yacc2, yreg4, d29
+.endm
+
+.macro bilinear_store_8888 numpix, tmp1, tmp2
+.if numpix == 4
+ vst1.32 {d0, d1}, [OUT]!
+.elseif numpix == 2
+ vst1.32 {d0}, [OUT]!
+.elseif numpix == 1
+ vst1.32 {d0[0]}, [OUT, :32]!
+.else
+ .error bilinear_store_8888 numpix is unsupported
+.endif
+.endm
+
+.macro bilinear_store_0565 numpix, tmp1, tmp2
+ vuzp.u8 d0, d1
+ vuzp.u8 d2, d3
+ vuzp.u8 d1, d3
+ vuzp.u8 d0, d2
+ convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
+.if numpix == 4
+ vst1.16 {d2}, [OUT]!
+.elseif numpix == 2
+ vst1.32 {d2[0]}, [OUT]!
+.elseif numpix == 1
+ vst1.16 {d2[0]}, [OUT]!
+.else
+ .error bilinear_store_0565 numpix is unsupported
+.endif
+.endm
+
+
+/*
+ * Macros for loading mask pixels into register 'mask'.
+ * vdup must be done in somewhere else.
+ */
+.macro bilinear_load_mask_x numpix, mask
+.endm
+
+.macro bilinear_load_mask_8 numpix, mask
+.if numpix == 4
+ vld1.32 {mask[0]}, [MASK]!
+.elseif numpix == 2
+ vld1.16 {mask[0]}, [MASK]!
+.elseif numpix == 1
+ vld1.8 {mask[0]}, [MASK]!
+.else
+ .error bilinear_load_mask_8 numpix is unsupported
+.endif
+.endm
+
+.macro bilinear_load_mask mask_fmt, numpix, mask
+ bilinear_load_mask_&mask_fmt numpix, mask
+.endm
+
+
+/*
+ * Macros for loading destination pixels into register 'dst0' and 'dst1'.
+ * Interleave should be done somewhere else.
+ */
+.macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01
+.if numpix == 4
+ vld1.32 {dst0, dst1}, [OUT]
+.elseif numpix == 2
+ vld1.32 {dst0}, [OUT]
+.elseif numpix == 1
+ vld1.32 {dst0[0]}, [OUT]
+.else
+ .error bilinear_load_dst_8888 numpix is unsupported
+.endif
+.endm
+
+.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01
+ bilinear_load_dst_8888 numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01
+ bilinear_load_dst_8888 numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01
+ bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01
+.endm
+
+/*
+ * Macros for duplicating partially loaded mask to fill entire register.
+ * We will apply mask to interleaved source pixels, that is
+ * (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3)
+ * (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3)
+ * So, we need to duplicate loaded mask into whole register.
+ *
+ * For two pixel case
+ * (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
+ * (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
+ * We can do some optimizations for this including one pixel cases.
+ */
+.macro bilinear_duplicate_mask_x numpix, mask
+.endm
+
+.macro bilinear_duplicate_mask_8 numpix, mask
+.if numpix == 4
+ vdup.32 mask, mask[0]
+.elseif numpix == 2
+ vdup.16 mask, mask[0]
+.elseif numpix == 1
+ vdup.8 mask, mask[0]
+.else
+ .error bilinear_duplicate_mask_8 is unsupported
+.endif
+.endm
+
+.macro bilinear_duplicate_mask mask_fmt, numpix, mask
+ bilinear_duplicate_mask_&mask_fmt numpix, mask
+.endm
+
+/*
+ * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form.
+ * Interleave should be done when maks is enabled or operator is 'over'.
+ */
+.macro bilinear_interleave src0, src1, dst0, dst1
+ vuzp.8 src0, src1
+ vuzp.8 dst0, dst1
+ vuzp.8 src0, src1
+ vuzp.8 dst0, dst1
+.endm
+
+.macro bilinear_interleave_src_dst_x_src \
+ numpix, src0, src1, src01, dst0, dst1, dst01
+.endm
+
+.macro bilinear_interleave_src_dst_x_over \
+ numpix, src0, src1, src01, dst0, dst1, dst01
+
+ bilinear_interleave src0, src1, dst0, dst1
+.endm
+
+.macro bilinear_interleave_src_dst_x_add \
+ numpix, src0, src1, src01, dst0, dst1, dst01
+.endm
+
+.macro bilinear_interleave_src_dst_8_src \
+ numpix, src0, src1, src01, dst0, dst1, dst01
+
+ bilinear_interleave src0, src1, dst0, dst1
+.endm
+
+.macro bilinear_interleave_src_dst_8_over \
+ numpix, src0, src1, src01, dst0, dst1, dst01
+
+ bilinear_interleave src0, src1, dst0, dst1
+.endm
+
+.macro bilinear_interleave_src_dst_8_add \
+ numpix, src0, src1, src01, dst0, dst1, dst01
+
+ bilinear_interleave src0, src1, dst0, dst1
+.endm
+
+.macro bilinear_interleave_src_dst \
+ mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01
+
+ bilinear_interleave_src_dst_&mask_fmt&_&op \
+ numpix, src0, src1, src01, dst0, dst1, dst01
+.endm
+
+
+/*
+ * Macros for applying masks to src pixels. (see combine_mask_u() function)
+ * src, dst should be in interleaved form.
+ * mask register should be in form (m0, m1, m2, m3).
+ */
+.macro bilinear_apply_mask_to_src_x \
+ numpix, src0, src1, src01, mask, \
+ tmp01, tmp23, tmp45, tmp67
+.endm
+
+.macro bilinear_apply_mask_to_src_8 \
+ numpix, src0, src1, src01, mask, \
+ tmp01, tmp23, tmp45, tmp67
+
+ vmull.u8 tmp01, src0, mask
+ vmull.u8 tmp23, src1, mask
+ /* bubbles */
+ vrshr.u16 tmp45, tmp01, #8
+ vrshr.u16 tmp67, tmp23, #8
+ /* bubbles */
+ vraddhn.u16 src0, tmp45, tmp01
+ vraddhn.u16 src1, tmp67, tmp23
+.endm
+
+.macro bilinear_apply_mask_to_src \
+ mask_fmt, numpix, src0, src1, src01, mask, \
+ tmp01, tmp23, tmp45, tmp67
+
+ bilinear_apply_mask_to_src_&mask_fmt \
+ numpix, src0, src1, src01, mask, \
+ tmp01, tmp23, tmp45, tmp67
+.endm
+
+
+/*
+ * Macros for combining src and destination pixels.
+ * Interleave or not is depending on operator 'op'.
+ */
+.macro bilinear_combine_src \
+ numpix, src0, src1, src01, dst0, dst1, dst01, \
+ tmp01, tmp23, tmp45, tmp67, tmp8
+.endm
+
+.macro bilinear_combine_over \
+ numpix, src0, src1, src01, dst0, dst1, dst01, \
+ tmp01, tmp23, tmp45, tmp67, tmp8
+
+ vdup.32 tmp8, src1[1]
+ /* bubbles */
+ vmvn.8 tmp8, tmp8
+ /* bubbles */
+ vmull.u8 tmp01, dst0, tmp8
+ /* bubbles */
+ vmull.u8 tmp23, dst1, tmp8
+ /* bubbles */
+ vrshr.u16 tmp45, tmp01, #8
+ vrshr.u16 tmp67, tmp23, #8
+ /* bubbles */
+ vraddhn.u16 dst0, tmp45, tmp01
+ vraddhn.u16 dst1, tmp67, tmp23
+ /* bubbles */
+ vqadd.u8 src01, dst01, src01
+.endm
+
+.macro bilinear_combine_add \
+ numpix, src0, src1, src01, dst0, dst1, dst01, \
+ tmp01, tmp23, tmp45, tmp67, tmp8
+
+ vqadd.u8 src01, dst01, src01
+.endm
+
+.macro bilinear_combine \
+ op, numpix, src0, src1, src01, dst0, dst1, dst01, \
+ tmp01, tmp23, tmp45, tmp67, tmp8
+
+ bilinear_combine_&op \
+ numpix, src0, src1, src01, dst0, dst1, dst01, \
+ tmp01, tmp23, tmp45, tmp67, tmp8
+.endm
+
+/*
+ * Macros for final deinterleaving of destination pixels if needed.
+ */
+.macro bilinear_deinterleave numpix, dst0, dst1, dst01
+ vuzp.8 dst0, dst1
+ /* bubbles */
+ vuzp.8 dst0, dst1
+.endm
+
+.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01
+ bilinear_deinterleave numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01
+ bilinear_deinterleave numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01
+ bilinear_deinterleave numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01
+ bilinear_deinterleave numpix, dst0, dst1, dst01
+.endm
+
+.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01
+ bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01
+.endm
+
+
+.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op
+ bilinear_load_&src_fmt d0, d1, d2
+ bilinear_load_mask mask_fmt, 1, d4
+ bilinear_load_dst dst_fmt, op, 1, d18, d19, q9
+ vmull.u8 q1, d0, d28
+ vmlal.u8 q1, d1, d29
+ vshr.u16 d30, d24, #8
+ /* 4 cycles bubble */
+ vshll.u16 q0, d2, #8
+ vmlsl.u16 q0, d2, d30
+ vmlal.u16 q0, d3, d30
+ /* 5 cycles bubble */
+ bilinear_duplicate_mask mask_fmt, 1, d4
+ vshrn.u32 d0, q0, #16
+ /* 3 cycles bubble */
+ vmovn.u16 d0, q0
+ /* 1 cycle bubble */
+ bilinear_interleave_src_dst \
+ mask_fmt, op, 1, d0, d1, q0, d18, d19, q9
+ bilinear_apply_mask_to_src \
+ mask_fmt, 1, d0, d1, q0, d4, \
+ q3, q8, q10, q11
+ bilinear_combine \
+ op, 1, d0, d1, q0, d18, d19, q9, \
+ q3, q8, q10, q11, d5
+ bilinear_deinterleave_dst mask_fmt, op, 1, d0, d1, q0
+ bilinear_store_&dst_fmt 1, q2, q3
+.endm
+
+.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op
+ bilinear_load_and_vertical_interpolate_two_&src_fmt \
+ q1, q11, d0, d1, d20, d21, d22, d23
+ bilinear_load_mask mask_fmt, 2, d4
+ bilinear_load_dst dst_fmt, op, 2, d18, d19, q9
+ vshr.u16 q15, q12, #8
+ vadd.u16 q12, q12, q13
+ vshll.u16 q0, d2, #8
+ vmlsl.u16 q0, d2, d30
+ vmlal.u16 q0, d3, d30
+ vshll.u16 q10, d22, #8
+ vmlsl.u16 q10, d22, d31
+ vmlal.u16 q10, d23, d31
+ vshrn.u32 d30, q0, #16
+ vshrn.u32 d31, q10, #16
+ bilinear_duplicate_mask mask_fmt, 2, d4
+ vmovn.u16 d0, q15
+ bilinear_interleave_src_dst \
+ mask_fmt, op, 2, d0, d1, q0, d18, d19, q9
+ bilinear_apply_mask_to_src \
+ mask_fmt, 2, d0, d1, q0, d4, \
+ q3, q8, q10, q11
+ bilinear_combine \
+ op, 2, d0, d1, q0, d18, d19, q9, \
+ q3, q8, q10, q11, d5
+ bilinear_deinterleave_dst mask_fmt, op, 2, d0, d1, q0
+ bilinear_store_&dst_fmt 2, q2, q3
+.endm
+
+.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op
+ bilinear_load_and_vertical_interpolate_four_&src_fmt \
+ q1, q11, d0, d1, d20, d21, d22, d23 \
+ q3, q9, d4, d5, d16, d17, d18, d19
+ pld [TMP1, PF_OFFS]
+ vshr.u16 q15, q12, #8
+ vadd.u16 q12, q12, q13
+ vshll.u16 q0, d2, #8
+ vmlsl.u16 q0, d2, d30
+ vmlal.u16 q0, d3, d30
+ vshll.u16 q10, d22, #8
+ vmlsl.u16 q10, d22, d31
+ vmlal.u16 q10, d23, d31
+ vshr.u16 q15, q12, #8
+ vshll.u16 q2, d6, #8
+ vmlsl.u16 q2, d6, d30
+ vmlal.u16 q2, d7, d30
+ vshll.u16 q8, d18, #8
+ bilinear_load_mask mask_fmt, 4, d30
+ bilinear_load_dst dst_fmt, op, 4, d2, d3, q1
+ pld [TMP2, PF_OFFS]
+ vmlsl.u16 q8, d18, d31
+ vmlal.u16 q8, d19, d31
+ vadd.u16 q12, q12, q13
+ vshrn.u32 d0, q0, #16
+ vshrn.u32 d1, q10, #16
+ vshrn.u32 d4, q2, #16
+ vshrn.u32 d5, q8, #16
+ bilinear_duplicate_mask mask_fmt, 4, d30
+ vmovn.u16 d0, q0
+ vmovn.u16 d1, q2
+ bilinear_interleave_src_dst \
+ mask_fmt, op, 4, d0, d1, q0, d2, d3, q1
+ bilinear_apply_mask_to_src \
+ mask_fmt, 4, d0, d1, q0, d30, \
+ q3, q8, q9, q10
+ bilinear_combine \
+ op, 4, d0, d1, q0, d2, d3, q1, \
+ q3, q8, q9, q10, d22
+ bilinear_deinterleave_dst mask_fmt, op, 4, d0, d1, q0
+ bilinear_store_&dst_fmt 4, q2, q3
+.endm
+
+.macro generate_bilinear_scanline_func_src_dst \
+ fname, src_fmt, dst_fmt, op, \
+ bpp_shift, prefetch_distance
+
+pixman_asm_function fname
+ OUT .req r0
+ TOP .req r1
+ BOTTOM .req r2
+ WT .req r3
+ WB .req r4
+ X .req r5
+ UX .req r6
+ WIDTH .req ip
+ TMP1 .req r3
+ TMP2 .req r4
+ PF_OFFS .req r7
+ TMP3 .req r8
+ TMP4 .req r9
+
+ mov ip, sp
+ push {r4, r5, r6, r7, r8, r9}
+ mov PF_OFFS, #prefetch_distance
+ ldmia ip, {WB, X, UX, WIDTH}
+ mul PF_OFFS, PF_OFFS, UX
+
+ cmp WIDTH, #0
+ ble 3f
+
+ vdup.u16 q12, X
+ vdup.u16 q13, UX
+ vdup.u8 d28, WT
+ vdup.u8 d29, WB
+ vadd.u16 d25, d25, d26
+ vadd.u16 q13, q13, q13
+
+ subs WIDTH, WIDTH, #4
+ blt 1f
+ mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
+0:
+ bilinear_interpolate_four_pixels src_fmt, x, dst_fmt, op
+ subs WIDTH, WIDTH, #4
+ bge 0b
+1:
+ tst WIDTH, #2
+ beq 2f
+ bilinear_interpolate_two_pixels src_fmt, x, dst_fmt, op
+2:
+ tst WIDTH, #1
+ beq 3f
+ bilinear_interpolate_last_pixel src_fmt, x, dst_fmt, op
+3:
+ pop {r4, r5, r6, r7, r8, r9}
+ bx lr
+
+ .unreq OUT
+ .unreq TOP
+ .unreq BOTTOM
+ .unreq WT
+ .unreq WB
+ .unreq X
+ .unreq UX
+ .unreq WIDTH
+ .unreq TMP1
+ .unreq TMP2
+ .unreq PF_OFFS
+ .unreq TMP3
+ .unreq TMP4
+.endfunc
+
+.endm
+
+.macro generate_bilinear_scanline_func_src_a8_dst \
+ fname, src_fmt, dst_fmt, op, \
+ bpp_shift, prefetch_distance
+
+pixman_asm_function fname
+ OUT .req r0
+ MASK .req r1
+ TOP .req r2
+ BOTTOM .req r3
+ WT .req r4
+ WB .req r5
+ X .req r6
+ UX .req r7
+ WIDTH .req ip
+ TMP1 .req r4
+ TMP2 .req r5
+ PF_OFFS .req r8
+ TMP3 .req r9
+ TMP4 .req r10
+
+ mov ip, sp
+ push {r4, r5, r6, r7, r8, r9, r10, ip}
+ mov PF_OFFS, #prefetch_distance
+ ldmia ip, {WT, WB, X, UX, WIDTH}
+ mul PF_OFFS, PF_OFFS, UX
+
+ cmp WIDTH, #0
+ ble 3f
+
+ vdup.u16 q12, X
+ vdup.u16 q13, UX
+ vdup.u8 d28, WT
+ vdup.u8 d29, WB
+ vadd.u16 d25, d25, d26
+ vadd.u16 q13, q13, q13
+
+ subs WIDTH, WIDTH, #4
+ blt 1f
+ mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
+0:
+ bilinear_interpolate_four_pixels src_fmt, 8, dst_fmt, op
+ subs WIDTH, WIDTH, #4
+ bge 0b
+1:
+ tst WIDTH, #2
+ beq 2f
+ bilinear_interpolate_two_pixels src_fmt, 8, dst_fmt, op
+2:
+ tst WIDTH, #1
+ beq 3f
+ bilinear_interpolate_last_pixel src_fmt, 8, dst_fmt, op
+3:
+ pop {r4, r5, r6, r7, r8, r9, r10, ip}
+ bx lr
+
+ .unreq OUT
+ .unreq TOP
+ .unreq BOTTOM
+ .unreq WT
+ .unreq WB
+ .unreq X
+ .unreq UX
+ .unreq WIDTH
+ .unreq MASK
+ .unreq TMP1
+ .unreq TMP2
+ .unreq PF_OFFS
+ .unreq TMP3
+ .unreq TMP4
+.endfunc
+
+.endm
+
+generate_bilinear_scanline_func_src_dst \
+ pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \
+ 8888, 8888, over, 2, 28
+
+generate_bilinear_scanline_func_src_dst \
+ pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \
+ 8888, 8888, add, 2, 28
+
+generate_bilinear_scanline_func_src_a8_dst \
+ pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \
+ 8888, 8888, src, 2, 28
+
+generate_bilinear_scanline_func_src_a8_dst \
+ pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \
+ 8888, 0565, src, 2, 28
+
+generate_bilinear_scanline_func_src_a8_dst \
+ pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \
+ 0565, 8888, src, 1, 28
+
+generate_bilinear_scanline_func_src_a8_dst \
+ pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \
+ 0565, 0565, src, 1, 28
+
+generate_bilinear_scanline_func_src_a8_dst \
+ pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \
+ 8888, 8888, over, 2, 28
+
+generate_bilinear_scanline_func_src_a8_dst \
+ pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \
+ 8888, 8888, add, 2, 28
diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S
index 5e9fda34e..833f18c2e 100644
--- a/pixman/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman/pixman-arm-neon-asm.S
@@ -1426,6 +1426,175 @@ generate_composite_function \
/******************************************************************************/
+.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_head
+ /*
+ * 'combine_mask_ca' replacement
+ *
+ * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A]
+ * mask in {d24, d25, d26} [B, G, R]
+ * output: updated src in {d0, d1, d2 } [B, G, R]
+ * updated mask in {d24, d25, d26} [B, G, R]
+ */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d25, d9
+ vmull.u8 q6, d26, d10
+ vmull.u8 q9, d11, d25
+ vmull.u8 q12, d11, d24
+ vmull.u8 q13, d11, d26
+ vrshr.u16 q8, q0, #8
+ vrshr.u16 q10, q1, #8
+ vrshr.u16 q11, q6, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q10
+ vraddhn.u16 d2, q6, q11
+ vrshr.u16 q11, q12, #8
+ vrshr.u16 q8, q9, #8
+ vrshr.u16 q6, q13, #8
+ vraddhn.u16 d24, q12, q11
+ vraddhn.u16 d25, q9, q8
+ /*
+ * convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ * and put data into d16 - blue, d17 - green, d18 - red
+ */
+ vshrn.u16 d17, q2, #3
+ vshrn.u16 d18, q2, #8
+ vraddhn.u16 d26, q13, q6
+ vsli.u16 q2, q2, #5
+ vsri.u8 d18, d18, #5
+ vsri.u8 d17, d17, #6
+ /*
+ * 'combine_over_ca' replacement
+ *
+ * output: updated dest in d16 - blue, d17 - green, d18 - red
+ */
+ vmvn.8 q12, q12
+ vshrn.u16 d16, q2, #2
+ vmvn.8 d26, d26
+ vmull.u8 q6, d16, d24
+ vmull.u8 q7, d17, d25
+ vmull.u8 q11, d18, d26
+.endm
+
+.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail
+ /* ... continue 'combine_over_ca' replacement */
+ vrshr.u16 q10, q6, #8
+ vrshr.u16 q14, q7, #8
+ vrshr.u16 q15, q11, #8
+ vraddhn.u16 d16, q10, q6
+ vraddhn.u16 d17, q14, q7
+ vraddhn.u16 d18, q15, q11
+ vqadd.u8 q8, q0, q8
+ vqadd.u8 d18, d2, d18
+ /*
+ * convert the results in d16, d17, d18 to r5g6b5 and store
+ * them into {d28, d29}
+ */
+ vshll.u8 q14, d18, #8
+ vshll.u8 q10, d17, #8
+ vshll.u8 q15, d16, #8
+ vsri.u16 q14, q10, #5
+ vsri.u16 q14, q15, #11
+.endm
+
+.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head
+ fetch_mask_pixblock
+ vrshr.u16 q10, q6, #8
+ vrshr.u16 q14, q7, #8
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vrshr.u16 q15, q11, #8
+ vraddhn.u16 d16, q10, q6
+ vraddhn.u16 d17, q14, q7
+ vraddhn.u16 d22, q15, q11
+ /* process_pixblock_head */
+ /*
+ * 'combine_mask_ca' replacement
+ *
+ * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A]
+ * mask in {d24, d25, d26} [B, G, R]
+ * output: updated src in {d0, d1, d2 } [B, G, R]
+ * updated mask in {d24, d25, d26} [B, G, R]
+ */
+ vmull.u8 q1, d25, d9
+ vqadd.u8 q8, q0, q8
+ vmull.u8 q0, d24, d8
+ vqadd.u8 d22, d2, d22
+ vmull.u8 q6, d26, d10
+ /*
+ * convert the result in d16, d17, d22 to r5g6b5 and store
+ * it into {d28, d29}
+ */
+ vshll.u8 q14, d22, #8
+ vshll.u8 q10, d17, #8
+ vshll.u8 q15, d16, #8
+ vmull.u8 q9, d11, d25
+ vsri.u16 q14, q10, #5
+ vmull.u8 q12, d11, d24
+ vmull.u8 q13, d11, d26
+ vsri.u16 q14, q15, #11
+ cache_preload 8, 8
+ vrshr.u16 q8, q0, #8
+ vrshr.u16 q10, q1, #8
+ vrshr.u16 q11, q6, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q10
+ vraddhn.u16 d2, q6, q11
+ vrshr.u16 q11, q12, #8
+ vrshr.u16 q8, q9, #8
+ vrshr.u16 q6, q13, #8
+ vraddhn.u16 d25, q9, q8
+ /*
+ * convert 8 r5g6b5 pixel data from {d4, d5} to planar
+ * 8-bit format and put data into d16 - blue, d17 - green,
+ * d18 - red
+ */
+ vshrn.u16 d17, q2, #3
+ vshrn.u16 d18, q2, #8
+ vraddhn.u16 d24, q12, q11
+ vraddhn.u16 d26, q13, q6
+ vsli.u16 q2, q2, #5
+ vsri.u8 d18, d18, #5
+ vsri.u8 d17, d17, #6
+ /*
+ * 'combine_over_ca' replacement
+ *
+ * output: updated dest in d16 - blue, d17 - green, d18 - red
+ */
+ vmvn.8 q12, q12
+ vshrn.u16 d16, q2, #2
+ vmvn.8 d26, d26
+ vmull.u8 q7, d17, d25
+ vmull.u8 q6, d16, d24
+ vmull.u8 q11, d18, d26
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_n_8888_0565_ca_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8888_0565_ca_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_0565_ca_asm_neon, 0, 32, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8888_0565_ca_init, \
+ pixman_composite_over_n_8888_0565_ca_cleanup, \
+ pixman_composite_over_n_8888_0565_ca_process_pixblock_head, \
+ pixman_composite_over_n_8888_0565_ca_process_pixblock_tail, \
+ pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head
+
+/******************************************************************************/
+
.macro pixman_composite_in_n_8_process_pixblock_head
/* expecting source data in {d0, d1, d2, d3} */
/* and destination data in {d4, d5, d6, d7} */
diff --git a/pixman/pixman/pixman-arm-neon.c b/pixman/pixman/pixman-arm-neon.c
index 5213a2007..e5127a65f 100644
--- a/pixman/pixman/pixman-arm-neon.c
+++ b/pixman/pixman/pixman-arm-neon.c
@@ -1,460 +1,503 @@
-/*
- * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of ARM Ltd not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission. ARM Ltd makes no
- * representations about the suitability of this software for any purpose. It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author: Ian Rickards (ian.rickards@arm.com)
- * Author: Jonathan Morton (jonathan.morton@movial.com)
- * Author: Markku Vire (markku.vire@movial.com)
- *
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <string.h>
-#include "pixman-private.h"
-#include "pixman-arm-common.h"
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
- uint16_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
- uint8_t, 3, uint8_t, 3)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
- uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
- uint16_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
- uint8_t, 3, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
- uint8_t, 3, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
- uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
- uint8_t, 1, uint16_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
- uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
- uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
- uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
- uint8_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
- uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
- uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
- uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
- uint8_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
- uint32_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
- uint16_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
- uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
- uint8_t, 1, uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
- uint16_t, 1, uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
- uint32_t, 1, uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
- uint32_t, 1, uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
- uint32_t, 1, uint8_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
- uint32_t, 1, uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
- uint32_t, 1, uint8_t, 1, uint16_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
- uint16_t, 1, uint8_t, 1, uint16_t, 1)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
- uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
- uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
- uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
- uint16_t, uint32_t)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
- OVER, uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
- OVER, uint16_t, uint16_t)
-
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
- uint32_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
- uint32_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
- uint16_t, uint32_t)
-PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
- uint16_t, uint16_t)
-
-void
-pixman_composite_src_n_8_asm_neon (int32_t w,
- int32_t h,
- uint8_t *dst,
- int32_t dst_stride,
- uint8_t src);
-
-void
-pixman_composite_src_n_0565_asm_neon (int32_t w,
- int32_t h,
- uint16_t *dst,
- int32_t dst_stride,
- uint16_t src);
-
-void
-pixman_composite_src_n_8888_asm_neon (int32_t w,
- int32_t h,
- uint32_t *dst,
- int32_t dst_stride,
- uint32_t src);
-
-static pixman_bool_t
-pixman_fill_neon (uint32_t *bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t _xor)
-{
- /* stride is always multiple of 32bit units in pixman */
- uint32_t byte_stride = stride * sizeof(uint32_t);
-
- switch (bpp)
- {
- case 8:
- pixman_composite_src_n_8_asm_neon (
- width,
- height,
- (uint8_t *)(((char *) bits) + y * byte_stride + x),
- byte_stride,
- _xor & 0xff);
- return TRUE;
- case 16:
- pixman_composite_src_n_0565_asm_neon (
- width,
- height,
- (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
- byte_stride / 2,
- _xor & 0xffff);
- return TRUE;
- case 32:
- pixman_composite_src_n_8888_asm_neon (
- width,
- height,
- (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
- byte_stride / 4,
- _xor);
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static pixman_bool_t
-pixman_blt_neon (uint32_t *src_bits,
- uint32_t *dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- if (src_bpp != dst_bpp)
- return FALSE;
-
- switch (src_bpp)
- {
- case 16:
- pixman_composite_src_0565_0565_asm_neon (
- width, height,
- (uint16_t *)(((char *) dst_bits) +
- dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
- (uint16_t *)(((char *) src_bits) +
- src_y * src_stride * 4 + src_x * 2), src_stride * 2);
- return TRUE;
- case 32:
- pixman_composite_src_8888_8888_asm_neon (
- width, height,
- (uint32_t *)(((char *) dst_bits) +
- dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
- (uint32_t *)(((char *) src_bits) +
- src_y * src_stride * 4 + src_x * 4), src_stride);
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-static const pixman_fast_path_t arm_neon_fast_paths[] =
-{
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev),
- PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888),
- PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888),
- PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
- PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565),
- PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8),
- PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
- PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
- PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565),
- PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
- PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
- /* Note: NONE repeat is not supported yet */
- SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
- SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
-
- PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
-
- PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
- PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
-
- SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
- SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
- SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
-
- SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
- SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
-
- SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
- SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
-
- { PIXMAN_OP_NONE },
-};
-
-static pixman_bool_t
-arm_neon_blt (pixman_implementation_t *imp,
- uint32_t * src_bits,
- uint32_t * dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- if (!pixman_blt_neon (
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height))
-
- {
- return _pixman_implementation_blt (
- imp->delegate,
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height);
- }
-
- return TRUE;
-}
-
-static pixman_bool_t
-arm_neon_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
- return TRUE;
-
- return _pixman_implementation_fill (
- imp->delegate, bits, stride, bpp, x, y, width, height, xor);
-}
-
-#define BIND_COMBINE_U(name) \
-void \
-pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \
- const uint32_t *dst, \
- const uint32_t *src, \
- const uint32_t *mask); \
- \
-void \
-pixman_composite_scanline_##name##_asm_neon (int32_t w, \
- const uint32_t *dst, \
- const uint32_t *src); \
- \
-static void \
-neon_combine_##name##_u (pixman_implementation_t *imp, \
- pixman_op_t op, \
- uint32_t * dest, \
- const uint32_t * src, \
- const uint32_t * mask, \
- int width) \
-{ \
- if (mask) \
- pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \
- src, mask); \
- else \
- pixman_composite_scanline_##name##_asm_neon (width, dest, src); \
-}
-
-BIND_COMBINE_U (over)
-BIND_COMBINE_U (add)
-BIND_COMBINE_U (out_reverse)
-
-pixman_implementation_t *
-_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback)
-{
- pixman_implementation_t *imp =
- _pixman_implementation_create (fallback, arm_neon_fast_paths);
-
- imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
- imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
- imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
-
- imp->blt = arm_neon_blt;
- imp->fill = arm_neon_fill;
-
- return imp;
-}
+/*
+ * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of ARM Ltd not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. ARM Ltd makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Ian Rickards (ian.rickards@arm.com)
+ * Author: Jonathan Morton (jonathan.morton@movial.com)
+ * Author: Markku Vire (markku.vire@movial.com)
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+#include "pixman-private.h"
+#include "pixman-arm-common.h"
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
+ uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
+ uint8_t, 3, uint8_t, 3)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
+ uint16_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
+ uint8_t, 3, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
+ uint8_t, 3, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
+ uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
+ uint8_t, 1, uint16_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
+ uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
+ uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
+ uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
+ uint8_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
+ uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
+ uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
+ uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
+ uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+ uint8_t, 1, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
+ uint16_t, 1, uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
+ uint32_t, 1, uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
+ uint32_t, 1, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
+ uint32_t, 1, uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
+ uint32_t, 1, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
+ uint32_t, 1, uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
+ uint16_t, 1, uint8_t, 1, uint16_t, 1)
+
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
+ uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
+ uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
+ uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
+ uint16_t, uint32_t)
+
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
+ OVER, uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+ OVER, uint16_t, uint16_t)
+
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
+ uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
+ uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
+ uint16_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
+ uint16_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
+ uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
+ uint32_t, uint32_t)
+
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC,
+ uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC,
+ uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC,
+ uint16_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC,
+ uint16_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER,
+ uint32_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD,
+ uint32_t, uint32_t)
+
+void
+pixman_composite_src_n_8_asm_neon (int32_t w,
+ int32_t h,
+ uint8_t *dst,
+ int32_t dst_stride,
+ uint8_t src);
+
+void
+pixman_composite_src_n_0565_asm_neon (int32_t w,
+ int32_t h,
+ uint16_t *dst,
+ int32_t dst_stride,
+ uint16_t src);
+
+void
+pixman_composite_src_n_8888_asm_neon (int32_t w,
+ int32_t h,
+ uint32_t *dst,
+ int32_t dst_stride,
+ uint32_t src);
+
+static pixman_bool_t
+pixman_fill_neon (uint32_t *bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t _xor)
+{
+ /* stride is always multiple of 32bit units in pixman */
+ uint32_t byte_stride = stride * sizeof(uint32_t);
+
+ switch (bpp)
+ {
+ case 8:
+ pixman_composite_src_n_8_asm_neon (
+ width,
+ height,
+ (uint8_t *)(((char *) bits) + y * byte_stride + x),
+ byte_stride,
+ _xor & 0xff);
+ return TRUE;
+ case 16:
+ pixman_composite_src_n_0565_asm_neon (
+ width,
+ height,
+ (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
+ byte_stride / 2,
+ _xor & 0xffff);
+ return TRUE;
+ case 32:
+ pixman_composite_src_n_8888_asm_neon (
+ width,
+ height,
+ (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
+ byte_stride / 4,
+ _xor);
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static pixman_bool_t
+pixman_blt_neon (uint32_t *src_bits,
+ uint32_t *dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
+{
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ switch (src_bpp)
+ {
+ case 16:
+ pixman_composite_src_0565_0565_asm_neon (
+ width, height,
+ (uint16_t *)(((char *) dst_bits) +
+ dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
+ (uint16_t *)(((char *) src_bits) +
+ src_y * src_stride * 4 + src_x * 2), src_stride * 2);
+ return TRUE;
+ case 32:
+ pixman_composite_src_8888_8888_asm_neon (
+ width, height,
+ (uint32_t *)(((char *) dst_bits) +
+ dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
+ (uint32_t *)(((char *) src_bits) +
+ src_y * src_stride * 4 + src_x * 4), src_stride);
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static const pixman_fast_path_t arm_neon_fast_paths[] =
+{
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev),
+ PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, neon_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, neon_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565),
+ PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565),
+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565),
+
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
+
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
+
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
+
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
+ /* Note: NONE repeat is not supported yet */
+ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
+
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
+
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
+
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
+
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+
+ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
+
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+
+ SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565),
+
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565),
+
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
+
+ { PIXMAN_OP_NONE },
+};
+
+static pixman_bool_t
+arm_neon_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
+{
+ if (!pixman_blt_neon (
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height))
+
+ {
+ return _pixman_implementation_blt (
+ imp->delegate,
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height);
+ }
+
+ return TRUE;
+}
+
+static pixman_bool_t
+arm_neon_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
+ return TRUE;
+
+ return _pixman_implementation_fill (
+ imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+}
+
+#define BIND_COMBINE_U(name) \
+void \
+pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \
+ const uint32_t *dst, \
+ const uint32_t *src, \
+ const uint32_t *mask); \
+ \
+void \
+pixman_composite_scanline_##name##_asm_neon (int32_t w, \
+ const uint32_t *dst, \
+ const uint32_t *src); \
+ \
+static void \
+neon_combine_##name##_u (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ uint32_t * dest, \
+ const uint32_t * src, \
+ const uint32_t * mask, \
+ int width) \
+{ \
+ if (mask) \
+ pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \
+ src, mask); \
+ else \
+ pixman_composite_scanline_##name##_asm_neon (width, dest, src); \
+}
+
+BIND_COMBINE_U (over)
+BIND_COMBINE_U (add)
+BIND_COMBINE_U (out_reverse)
+
+pixman_implementation_t *
+_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback)
+{
+ pixman_implementation_t *imp =
+ _pixman_implementation_create (fallback, arm_neon_fast_paths);
+
+ imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
+ imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
+ imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
+
+ imp->blt = arm_neon_blt;
+ imp->fill = arm_neon_fill;
+
+ return imp;
+}
diff --git a/pixman/pixman/pixman-trap.c b/pixman/pixman/pixman-trap.c
index 6e85acd49..c99f03ecc 100644
--- a/pixman/pixman/pixman-trap.c
+++ b/pixman/pixman/pixman-trap.c
@@ -1,657 +1,668 @@
-/*
- * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
- * Copyright © 2004 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission. Keith Packard makes no
- * representations about the suitability of this software for any purpose. It
- * is provided "as is" without express or implied warranty.
- *
- * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "pixman-private.h"
-
-/*
- * Compute the smallest value greater than or equal to y which is on a
- * grid row.
- */
-
-PIXMAN_EXPORT pixman_fixed_t
-pixman_sample_ceil_y (pixman_fixed_t y, int n)
-{
- pixman_fixed_t f = pixman_fixed_frac (y);
- pixman_fixed_t i = pixman_fixed_floor (y);
-
- f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
- Y_FRAC_FIRST (n);
-
- if (f > Y_FRAC_LAST (n))
- {
- if (pixman_fixed_to_int (i) == 0x7fff)
- {
- f = 0xffff; /* saturate */
- }
- else
- {
- f = Y_FRAC_FIRST (n);
- i += pixman_fixed_1;
- }
- }
- return (i | f);
-}
-
-/*
- * Compute the largest value strictly less than y which is on a
- * grid row.
- */
-PIXMAN_EXPORT pixman_fixed_t
-pixman_sample_floor_y (pixman_fixed_t y,
- int n)
-{
- pixman_fixed_t f = pixman_fixed_frac (y);
- pixman_fixed_t i = pixman_fixed_floor (y);
-
- f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
- Y_FRAC_FIRST (n);
-
- if (f < Y_FRAC_FIRST (n))
- {
- if (pixman_fixed_to_int (i) == 0x8000)
- {
- f = 0; /* saturate */
- }
- else
- {
- f = Y_FRAC_LAST (n);
- i -= pixman_fixed_1;
- }
- }
- return (i | f);
-}
-
-/*
- * Step an edge by any amount (including negative values)
- */
-PIXMAN_EXPORT void
-pixman_edge_step (pixman_edge_t *e,
- int n)
-{
- pixman_fixed_48_16_t ne;
-
- e->x += n * e->stepx;
-
- ne = e->e + n * (pixman_fixed_48_16_t) e->dx;
-
- if (n >= 0)
- {
- if (ne > 0)
- {
- int nx = (ne + e->dy - 1) / e->dy;
- e->e = ne - nx * (pixman_fixed_48_16_t) e->dy;
- e->x += nx * e->signdx;
- }
- }
- else
- {
- if (ne <= -e->dy)
- {
- int nx = (-ne) / e->dy;
- e->e = ne + nx * (pixman_fixed_48_16_t) e->dy;
- e->x -= nx * e->signdx;
- }
- }
-}
-
-/*
- * A private routine to initialize the multi-step
- * elements of an edge structure
- */
-static void
-_pixman_edge_multi_init (pixman_edge_t * e,
- int n,
- pixman_fixed_t *stepx_p,
- pixman_fixed_t *dx_p)
-{
- pixman_fixed_t stepx;
- pixman_fixed_48_16_t ne;
-
- ne = n * (pixman_fixed_48_16_t) e->dx;
- stepx = n * e->stepx;
-
- if (ne > 0)
- {
- int nx = ne / e->dy;
- ne -= nx * e->dy;
- stepx += nx * e->signdx;
- }
-
- *dx_p = ne;
- *stepx_p = stepx;
-}
-
-/*
- * Initialize one edge structure given the line endpoints and a
- * starting y value
- */
-PIXMAN_EXPORT void
-pixman_edge_init (pixman_edge_t *e,
- int n,
- pixman_fixed_t y_start,
- pixman_fixed_t x_top,
- pixman_fixed_t y_top,
- pixman_fixed_t x_bot,
- pixman_fixed_t y_bot)
-{
- pixman_fixed_t dx, dy;
-
- e->x = x_top;
- e->e = 0;
- dx = x_bot - x_top;
- dy = y_bot - y_top;
- e->dy = dy;
- e->dx = 0;
-
- if (dy)
- {
- if (dx >= 0)
- {
- e->signdx = 1;
- e->stepx = dx / dy;
- e->dx = dx % dy;
- e->e = -dy;
- }
- else
- {
- e->signdx = -1;
- e->stepx = -(-dx / dy);
- e->dx = -dx % dy;
- e->e = 0;
- }
-
- _pixman_edge_multi_init (e, STEP_Y_SMALL (n),
- &e->stepx_small, &e->dx_small);
-
- _pixman_edge_multi_init (e, STEP_Y_BIG (n),
- &e->stepx_big, &e->dx_big);
- }
- pixman_edge_step (e, y_start - y_top);
-}
-
-/*
- * Initialize one edge structure given a line, starting y value
- * and a pixel offset for the line
- */
-PIXMAN_EXPORT void
-pixman_line_fixed_edge_init (pixman_edge_t * e,
- int n,
- pixman_fixed_t y,
- const pixman_line_fixed_t *line,
- int x_off,
- int y_off)
-{
- pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off);
- pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off);
- const pixman_point_fixed_t *top, *bot;
-
- if (line->p1.y <= line->p2.y)
- {
- top = &line->p1;
- bot = &line->p2;
- }
- else
- {
- top = &line->p2;
- bot = &line->p1;
- }
-
- pixman_edge_init (e, n, y,
- top->x + x_off_fixed,
- top->y + y_off_fixed,
- bot->x + x_off_fixed,
- bot->y + y_off_fixed);
-}
-
-PIXMAN_EXPORT void
-pixman_add_traps (pixman_image_t * image,
- int16_t x_off,
- int16_t y_off,
- int ntrap,
- pixman_trap_t * traps)
-{
- int bpp;
- int height;
-
- pixman_fixed_t x_off_fixed;
- pixman_fixed_t y_off_fixed;
- pixman_edge_t l, r;
- pixman_fixed_t t, b;
-
- _pixman_image_validate (image);
-
- height = image->bits.height;
- bpp = PIXMAN_FORMAT_BPP (image->bits.format);
-
- x_off_fixed = pixman_int_to_fixed (x_off);
- y_off_fixed = pixman_int_to_fixed (y_off);
-
- while (ntrap--)
- {
- t = traps->top.y + y_off_fixed;
- if (t < 0)
- t = 0;
- t = pixman_sample_ceil_y (t, bpp);
-
- b = traps->bot.y + y_off_fixed;
- if (pixman_fixed_to_int (b) >= height)
- b = pixman_int_to_fixed (height) - 1;
- b = pixman_sample_floor_y (b, bpp);
-
- if (b >= t)
- {
- /* initialize edge walkers */
- pixman_edge_init (&l, bpp, t,
- traps->top.l + x_off_fixed,
- traps->top.y + y_off_fixed,
- traps->bot.l + x_off_fixed,
- traps->bot.y + y_off_fixed);
-
- pixman_edge_init (&r, bpp, t,
- traps->top.r + x_off_fixed,
- traps->top.y + y_off_fixed,
- traps->bot.r + x_off_fixed,
- traps->bot.y + y_off_fixed);
-
- pixman_rasterize_edges (image, &l, &r, t, b);
- }
-
- traps++;
- }
-}
-
-#if 0
-static void
-dump_image (pixman_image_t *image,
- const char * title)
-{
- int i, j;
-
- if (!image->type == BITS)
- printf ("%s is not a regular image\n", title);
-
- if (!image->bits.format == PIXMAN_a8)
- printf ("%s is not an alpha mask\n", title);
-
- printf ("\n\n\n%s: \n", title);
-
- for (i = 0; i < image->bits.height; ++i)
- {
- uint8_t *line =
- (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]);
-
- for (j = 0; j < image->bits.width; ++j)
- printf ("%c", line[j] ? '#' : ' ');
-
- printf ("\n");
- }
-}
-#endif
-
-PIXMAN_EXPORT void
-pixman_add_trapezoids (pixman_image_t * image,
- int16_t x_off,
- int y_off,
- int ntraps,
- const pixman_trapezoid_t *traps)
-{
- int i;
-
-#if 0
- dump_image (image, "before");
-#endif
-
- for (i = 0; i < ntraps; ++i)
- {
- const pixman_trapezoid_t *trap = &(traps[i]);
-
- if (!pixman_trapezoid_valid (trap))
- continue;
-
- pixman_rasterize_trapezoid (image, trap, x_off, y_off);
- }
-
-#if 0
- dump_image (image, "after");
-#endif
-}
-
-PIXMAN_EXPORT void
-pixman_rasterize_trapezoid (pixman_image_t * image,
- const pixman_trapezoid_t *trap,
- int x_off,
- int y_off)
-{
- int bpp;
- int height;
-
- pixman_fixed_t y_off_fixed;
- pixman_edge_t l, r;
- pixman_fixed_t t, b;
-
- return_if_fail (image->type == BITS);
-
- _pixman_image_validate (image);
-
- if (!pixman_trapezoid_valid (trap))
- return;
-
- height = image->bits.height;
- bpp = PIXMAN_FORMAT_BPP (image->bits.format);
-
- y_off_fixed = pixman_int_to_fixed (y_off);
-
- t = trap->top + y_off_fixed;
- if (t < 0)
- t = 0;
- t = pixman_sample_ceil_y (t, bpp);
-
- b = trap->bottom + y_off_fixed;
- if (pixman_fixed_to_int (b) >= height)
- b = pixman_int_to_fixed (height) - 1;
- b = pixman_sample_floor_y (b, bpp);
-
- if (b >= t)
- {
- /* initialize edge walkers */
- pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off);
- pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off);
-
- pixman_rasterize_edges (image, &l, &r, t, b);
- }
-}
-
-PIXMAN_EXPORT void
-pixman_composite_trapezoids (pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * dst,
- pixman_format_code_t mask_format,
- int x_src,
- int y_src,
- int x_dst,
- int y_dst,
- int n_traps,
- const pixman_trapezoid_t * traps)
-{
- int i;
-
- if (n_traps <= 0)
- return;
-
- _pixman_image_validate (src);
- _pixman_image_validate (dst);
-
- if (op == PIXMAN_OP_ADD &&
- (src->common.flags & FAST_PATH_IS_OPAQUE) &&
- (mask_format == dst->common.extended_format_code) &&
- !(dst->common.have_clip_region))
- {
- for (i = 0; i < n_traps; ++i)
- {
- const pixman_trapezoid_t *trap = &(traps[i]);
-
- if (!pixman_trapezoid_valid (trap))
- continue;
-
- pixman_rasterize_trapezoid (dst, trap, 0, 0);
- }
- }
- else
- {
- pixman_image_t *tmp;
- pixman_box32_t box;
- int x_rel, y_rel;
-
- box.x1 = INT32_MAX;
- box.y1 = INT32_MAX;
- box.x2 = INT32_MIN;
- box.y2 = INT32_MIN;
-
- for (i = 0; i < n_traps; ++i)
- {
- const pixman_trapezoid_t *trap = &(traps[i]);
- int y1, y2;
-
- if (!pixman_trapezoid_valid (trap))
- continue;
-
- y1 = pixman_fixed_to_int (trap->top);
- if (y1 < box.y1)
- box.y1 = y1;
-
- y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom));
- if (y2 > box.y2)
- box.y2 = y2;
-
-#define EXTEND_MIN(x) \
- if (pixman_fixed_to_int ((x)) < box.x1) \
- box.x1 = pixman_fixed_to_int ((x));
-#define EXTEND_MAX(x) \
- if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box.x2) \
- box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x)));
-
-#define EXTEND(x) \
- EXTEND_MIN(x); \
- EXTEND_MAX(x);
-
- EXTEND(trap->left.p1.x);
- EXTEND(trap->left.p2.x);
- EXTEND(trap->right.p1.x);
- EXTEND(trap->right.p2.x);
- }
-
- if (box.x1 >= box.x2 || box.y1 >= box.y2)
- return;
-
- tmp = pixman_image_create_bits (
- mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1);
-
- for (i = 0; i < n_traps; ++i)
- {
- const pixman_trapezoid_t *trap = &(traps[i]);
-
- if (!pixman_trapezoid_valid (trap))
- continue;
-
- pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
- }
-
- x_rel = box.x1 + x_src - x_dst;
- y_rel = box.y1 + y_src - y_dst;
-
- pixman_image_composite (op, src, tmp, dst,
- x_rel, y_rel, 0, 0, box.x1, box.y1,
- box.x2 - box.x1, box.y2 - box.y1);
-
- pixman_image_unref (tmp);
- }
-}
-
-static int
-greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b)
-{
- if (a->y == b->y)
- return a->x > b->x;
- return a->y > b->y;
-}
-
-/*
- * Note that the definition of this function is a bit odd because
- * of the X coordinate space (y increasing downwards).
- */
-static int
-clockwise (const pixman_point_fixed_t *ref,
- const pixman_point_fixed_t *a,
- const pixman_point_fixed_t *b)
-{
- pixman_point_fixed_t ad, bd;
-
- ad.x = a->x - ref->x;
- ad.y = a->y - ref->y;
- bd.x = b->x - ref->x;
- bd.y = b->y - ref->y;
-
- return ((pixman_fixed_32_32_t) bd.y * ad.x -
- (pixman_fixed_32_32_t) ad.y * bd.x) < 0;
-}
-
-static void
-triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps)
-{
- const pixman_point_fixed_t *top, *left, *right, *tmp;
-
- top = &tri->p1;
- left = &tri->p2;
- right = &tri->p3;
-
- if (greater_y (top, left))
- {
- tmp = left;
- left = top;
- top = tmp;
- }
-
- if (greater_y (top, right))
- {
- tmp = right;
- right = top;
- top = tmp;
- }
-
- if (clockwise (top, right, left))
- {
- tmp = right;
- right = left;
- left = tmp;
- }
-
- /*
- * Two cases:
- *
- * + +
- * / \ / \
- * / \ / \
- * / + + \
- * / -- -- \
- * / -- -- \
- * / --- --- \
- * +-- --+
- */
-
- traps->top = top->y;
- traps->left.p1 = *top;
- traps->left.p2 = *left;
- traps->right.p1 = *top;
- traps->right.p2 = *right;
-
- if (right->y < left->y)
- traps->bottom = right->y;
- else
- traps->bottom = left->y;
-
- traps++;
-
- *traps = *(traps - 1);
-
- if (right->y < left->y)
- {
- traps->top = right->y;
- traps->bottom = left->y;
- traps->right.p1 = *right;
- traps->right.p2 = *left;
- }
- else
- {
- traps->top = left->y;
- traps->bottom = right->y;
- traps->left.p1 = *left;
- traps->left.p2 = *right;
- }
-}
-
-static pixman_trapezoid_t *
-convert_triangles (int n_tris, const pixman_triangle_t *tris)
-{
- pixman_trapezoid_t *traps;
- int i;
-
- if (n_tris <= 0)
- return NULL;
-
- traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t));
- if (!traps)
- return NULL;
-
- for (i = 0; i < n_tris; ++i)
- triangle_to_trapezoids (&(tris[i]), traps + 2 * i);
-
- return traps;
-}
-
-PIXMAN_EXPORT void
-pixman_composite_triangles (pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * dst,
- pixman_format_code_t mask_format,
- int x_src,
- int y_src,
- int x_dst,
- int y_dst,
- int n_tris,
- const pixman_triangle_t * tris)
-{
- pixman_trapezoid_t *traps;
-
- if ((traps = convert_triangles (n_tris, tris)))
- {
- pixman_composite_trapezoids (op, src, dst, mask_format,
- x_src, y_src, x_dst, y_dst,
- n_tris * 2, traps);
-
- free (traps);
- }
-}
-
-PIXMAN_EXPORT void
-pixman_add_triangles (pixman_image_t *image,
- int32_t x_off,
- int32_t y_off,
- int n_tris,
- const pixman_triangle_t *tris)
-{
- pixman_trapezoid_t *traps;
-
- if ((traps = convert_triangles (n_tris, tris)))
- {
- pixman_add_trapezoids (image, x_off, y_off,
- n_tris * 2, traps);
-
- free (traps);
- }
-}
+/*
+ * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
+ * Copyright © 2004 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Keith Packard makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "pixman-private.h"
+
+/*
+ * Compute the smallest value greater than or equal to y which is on a
+ * grid row.
+ */
+
+PIXMAN_EXPORT pixman_fixed_t
+pixman_sample_ceil_y (pixman_fixed_t y, int n)
+{
+ pixman_fixed_t f = pixman_fixed_frac (y);
+ pixman_fixed_t i = pixman_fixed_floor (y);
+
+ f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
+ Y_FRAC_FIRST (n);
+
+ if (f > Y_FRAC_LAST (n))
+ {
+ if (pixman_fixed_to_int (i) == 0x7fff)
+ {
+ f = 0xffff; /* saturate */
+ }
+ else
+ {
+ f = Y_FRAC_FIRST (n);
+ i += pixman_fixed_1;
+ }
+ }
+ return (i | f);
+}
+
+/*
+ * Compute the largest value strictly less than y which is on a
+ * grid row.
+ */
+PIXMAN_EXPORT pixman_fixed_t
+pixman_sample_floor_y (pixman_fixed_t y,
+ int n)
+{
+ pixman_fixed_t f = pixman_fixed_frac (y);
+ pixman_fixed_t i = pixman_fixed_floor (y);
+
+ f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
+ Y_FRAC_FIRST (n);
+
+ if (f < Y_FRAC_FIRST (n))
+ {
+ if (pixman_fixed_to_int (i) == 0x8000)
+ {
+ f = 0; /* saturate */
+ }
+ else
+ {
+ f = Y_FRAC_LAST (n);
+ i -= pixman_fixed_1;
+ }
+ }
+ return (i | f);
+}
+
+/*
+ * Step an edge by any amount (including negative values)
+ */
+PIXMAN_EXPORT void
+pixman_edge_step (pixman_edge_t *e,
+ int n)
+{
+ pixman_fixed_48_16_t ne;
+
+ e->x += n * e->stepx;
+
+ ne = e->e + n * (pixman_fixed_48_16_t) e->dx;
+
+ if (n >= 0)
+ {
+ if (ne > 0)
+ {
+ int nx = (ne + e->dy - 1) / e->dy;
+ e->e = ne - nx * (pixman_fixed_48_16_t) e->dy;
+ e->x += nx * e->signdx;
+ }
+ }
+ else
+ {
+ if (ne <= -e->dy)
+ {
+ int nx = (-ne) / e->dy;
+ e->e = ne + nx * (pixman_fixed_48_16_t) e->dy;
+ e->x -= nx * e->signdx;
+ }
+ }
+}
+
+/*
+ * A private routine to initialize the multi-step
+ * elements of an edge structure
+ */
+static void
+_pixman_edge_multi_init (pixman_edge_t * e,
+ int n,
+ pixman_fixed_t *stepx_p,
+ pixman_fixed_t *dx_p)
+{
+ pixman_fixed_t stepx;
+ pixman_fixed_48_16_t ne;
+
+ ne = n * (pixman_fixed_48_16_t) e->dx;
+ stepx = n * e->stepx;
+
+ if (ne > 0)
+ {
+ int nx = ne / e->dy;
+ ne -= nx * e->dy;
+ stepx += nx * e->signdx;
+ }
+
+ *dx_p = ne;
+ *stepx_p = stepx;
+}
+
+/*
+ * Initialize one edge structure given the line endpoints and a
+ * starting y value
+ */
+PIXMAN_EXPORT void
+pixman_edge_init (pixman_edge_t *e,
+ int n,
+ pixman_fixed_t y_start,
+ pixman_fixed_t x_top,
+ pixman_fixed_t y_top,
+ pixman_fixed_t x_bot,
+ pixman_fixed_t y_bot)
+{
+ pixman_fixed_t dx, dy;
+
+ e->x = x_top;
+ e->e = 0;
+ dx = x_bot - x_top;
+ dy = y_bot - y_top;
+ e->dy = dy;
+ e->dx = 0;
+
+ if (dy)
+ {
+ if (dx >= 0)
+ {
+ e->signdx = 1;
+ e->stepx = dx / dy;
+ e->dx = dx % dy;
+ e->e = -dy;
+ }
+ else
+ {
+ e->signdx = -1;
+ e->stepx = -(-dx / dy);
+ e->dx = -dx % dy;
+ e->e = 0;
+ }
+
+ _pixman_edge_multi_init (e, STEP_Y_SMALL (n),
+ &e->stepx_small, &e->dx_small);
+
+ _pixman_edge_multi_init (e, STEP_Y_BIG (n),
+ &e->stepx_big, &e->dx_big);
+ }
+ pixman_edge_step (e, y_start - y_top);
+}
+
+/*
+ * Initialize one edge structure given a line, starting y value
+ * and a pixel offset for the line
+ */
+PIXMAN_EXPORT void
+pixman_line_fixed_edge_init (pixman_edge_t * e,
+ int n,
+ pixman_fixed_t y,
+ const pixman_line_fixed_t *line,
+ int x_off,
+ int y_off)
+{
+ pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off);
+ pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off);
+ const pixman_point_fixed_t *top, *bot;
+
+ if (line->p1.y <= line->p2.y)
+ {
+ top = &line->p1;
+ bot = &line->p2;
+ }
+ else
+ {
+ top = &line->p2;
+ bot = &line->p1;
+ }
+
+ pixman_edge_init (e, n, y,
+ top->x + x_off_fixed,
+ top->y + y_off_fixed,
+ bot->x + x_off_fixed,
+ bot->y + y_off_fixed);
+}
+
+PIXMAN_EXPORT void
+pixman_add_traps (pixman_image_t * image,
+ int16_t x_off,
+ int16_t y_off,
+ int ntrap,
+ pixman_trap_t * traps)
+{
+ int bpp;
+ int height;
+
+ pixman_fixed_t x_off_fixed;
+ pixman_fixed_t y_off_fixed;
+ pixman_edge_t l, r;
+ pixman_fixed_t t, b;
+
+ _pixman_image_validate (image);
+
+ height = image->bits.height;
+ bpp = PIXMAN_FORMAT_BPP (image->bits.format);
+
+ x_off_fixed = pixman_int_to_fixed (x_off);
+ y_off_fixed = pixman_int_to_fixed (y_off);
+
+ while (ntrap--)
+ {
+ t = traps->top.y + y_off_fixed;
+ if (t < 0)
+ t = 0;
+ t = pixman_sample_ceil_y (t, bpp);
+
+ b = traps->bot.y + y_off_fixed;
+ if (pixman_fixed_to_int (b) >= height)
+ b = pixman_int_to_fixed (height) - 1;
+ b = pixman_sample_floor_y (b, bpp);
+
+ if (b >= t)
+ {
+ /* initialize edge walkers */
+ pixman_edge_init (&l, bpp, t,
+ traps->top.l + x_off_fixed,
+ traps->top.y + y_off_fixed,
+ traps->bot.l + x_off_fixed,
+ traps->bot.y + y_off_fixed);
+
+ pixman_edge_init (&r, bpp, t,
+ traps->top.r + x_off_fixed,
+ traps->top.y + y_off_fixed,
+ traps->bot.r + x_off_fixed,
+ traps->bot.y + y_off_fixed);
+
+ pixman_rasterize_edges (image, &l, &r, t, b);
+ }
+
+ traps++;
+ }
+}
+
+#if 0
+static void
+dump_image (pixman_image_t *image,
+ const char * title)
+{
+ int i, j;
+
+ if (!image->type == BITS)
+ printf ("%s is not a regular image\n", title);
+
+ if (!image->bits.format == PIXMAN_a8)
+ printf ("%s is not an alpha mask\n", title);
+
+ printf ("\n\n\n%s: \n", title);
+
+ for (i = 0; i < image->bits.height; ++i)
+ {
+ uint8_t *line =
+ (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]);
+
+ for (j = 0; j < image->bits.width; ++j)
+ printf ("%c", line[j] ? '#' : ' ');
+
+ printf ("\n");
+ }
+}
+#endif
+
+PIXMAN_EXPORT void
+pixman_add_trapezoids (pixman_image_t * image,
+ int16_t x_off,
+ int y_off,
+ int ntraps,
+ const pixman_trapezoid_t *traps)
+{
+ int i;
+
+#if 0
+ dump_image (image, "before");
+#endif
+
+ for (i = 0; i < ntraps; ++i)
+ {
+ const pixman_trapezoid_t *trap = &(traps[i]);
+
+ if (!pixman_trapezoid_valid (trap))
+ continue;
+
+ pixman_rasterize_trapezoid (image, trap, x_off, y_off);
+ }
+
+#if 0
+ dump_image (image, "after");
+#endif
+}
+
+PIXMAN_EXPORT void
+pixman_rasterize_trapezoid (pixman_image_t * image,
+ const pixman_trapezoid_t *trap,
+ int x_off,
+ int y_off)
+{
+ int bpp;
+ int height;
+
+ pixman_fixed_t y_off_fixed;
+ pixman_edge_t l, r;
+ pixman_fixed_t t, b;
+
+ return_if_fail (image->type == BITS);
+
+ _pixman_image_validate (image);
+
+ if (!pixman_trapezoid_valid (trap))
+ return;
+
+ height = image->bits.height;
+ bpp = PIXMAN_FORMAT_BPP (image->bits.format);
+
+ y_off_fixed = pixman_int_to_fixed (y_off);
+
+ t = trap->top + y_off_fixed;
+ if (t < 0)
+ t = 0;
+ t = pixman_sample_ceil_y (t, bpp);
+
+ b = trap->bottom + y_off_fixed;
+ if (pixman_fixed_to_int (b) >= height)
+ b = pixman_int_to_fixed (height) - 1;
+ b = pixman_sample_floor_y (b, bpp);
+
+ if (b >= t)
+ {
+ /* initialize edge walkers */
+ pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off);
+ pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off);
+
+ pixman_rasterize_edges (image, &l, &r, t, b);
+ }
+}
+
+/*
+ * pixman_composite_trapezoids()
+ *
+ * All the trapezoids are conceptually rendered to an infinitely big image.
+ * The (0, 0) coordinates of this image are then aligned with the (x, y)
+ * coordinates of the source image, and then both images are aligned with
+ * the (x, y) coordinates of the destination. Then, in principle, compositing
+ * of these three images takes place across the entire destination.
+ *
+ * FIXME: However, there is currently a bug, where we restrict this compositing
+ * to the bounding box of the trapezoids. This is incorrect for operators such
+ * as SRC and IN where blank source pixels do have an effect on the destination.
+ */
+PIXMAN_EXPORT void
+pixman_composite_trapezoids (pixman_op_t op,
+ pixman_image_t * src,
+ pixman_image_t * dst,
+ pixman_format_code_t mask_format,
+ int x_src,
+ int y_src,
+ int x_dst,
+ int y_dst,
+ int n_traps,
+ const pixman_trapezoid_t * traps)
+{
+ int i;
+
+ if (n_traps <= 0)
+ return;
+
+ _pixman_image_validate (src);
+ _pixman_image_validate (dst);
+
+ if (op == PIXMAN_OP_ADD &&
+ (src->common.flags & FAST_PATH_IS_OPAQUE) &&
+ (mask_format == dst->common.extended_format_code) &&
+ !(dst->common.have_clip_region))
+ {
+ for (i = 0; i < n_traps; ++i)
+ {
+ const pixman_trapezoid_t *trap = &(traps[i]);
+
+ if (!pixman_trapezoid_valid (trap))
+ continue;
+
+ pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst);
+ }
+ }
+ else
+ {
+ pixman_image_t *tmp;
+ pixman_box32_t box;
+
+ box.x1 = INT32_MAX;
+ box.y1 = INT32_MAX;
+ box.x2 = INT32_MIN;
+ box.y2 = INT32_MIN;
+
+ for (i = 0; i < n_traps; ++i)
+ {
+ const pixman_trapezoid_t *trap = &(traps[i]);
+ int y1, y2;
+
+ if (!pixman_trapezoid_valid (trap))
+ continue;
+
+ y1 = pixman_fixed_to_int (trap->top);
+ if (y1 < box.y1)
+ box.y1 = y1;
+
+ y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom));
+ if (y2 > box.y2)
+ box.y2 = y2;
+
+#define EXTEND_MIN(x) \
+ if (pixman_fixed_to_int ((x)) < box.x1) \
+ box.x1 = pixman_fixed_to_int ((x));
+#define EXTEND_MAX(x) \
+ if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box.x2) \
+ box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x)));
+
+#define EXTEND(x) \
+ EXTEND_MIN(x); \
+ EXTEND_MAX(x);
+
+ EXTEND(trap->left.p1.x);
+ EXTEND(trap->left.p2.x);
+ EXTEND(trap->right.p1.x);
+ EXTEND(trap->right.p2.x);
+ }
+
+ if (box.x1 >= box.x2 || box.y1 >= box.y2)
+ return;
+
+ tmp = pixman_image_create_bits (
+ mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1);
+
+ for (i = 0; i < n_traps; ++i)
+ {
+ const pixman_trapezoid_t *trap = &(traps[i]);
+
+ if (!pixman_trapezoid_valid (trap))
+ continue;
+
+ pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
+ }
+
+ pixman_image_composite (op, src, tmp, dst,
+ x_src + box.x1, y_src + box.y1,
+ 0, 0,
+ x_dst + box.x1, y_dst + box.y1,
+ box.x2 - box.x1, box.y2 - box.y1);
+
+ pixman_image_unref (tmp);
+ }
+}
+
+static int
+greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b)
+{
+ if (a->y == b->y)
+ return a->x > b->x;
+ return a->y > b->y;
+}
+
+/*
+ * Note that the definition of this function is a bit odd because
+ * of the X coordinate space (y increasing downwards).
+ */
+static int
+clockwise (const pixman_point_fixed_t *ref,
+ const pixman_point_fixed_t *a,
+ const pixman_point_fixed_t *b)
+{
+ pixman_point_fixed_t ad, bd;
+
+ ad.x = a->x - ref->x;
+ ad.y = a->y - ref->y;
+ bd.x = b->x - ref->x;
+ bd.y = b->y - ref->y;
+
+ return ((pixman_fixed_32_32_t) bd.y * ad.x -
+ (pixman_fixed_32_32_t) ad.y * bd.x) < 0;
+}
+
+static void
+triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps)
+{
+ const pixman_point_fixed_t *top, *left, *right, *tmp;
+
+ top = &tri->p1;
+ left = &tri->p2;
+ right = &tri->p3;
+
+ if (greater_y (top, left))
+ {
+ tmp = left;
+ left = top;
+ top = tmp;
+ }
+
+ if (greater_y (top, right))
+ {
+ tmp = right;
+ right = top;
+ top = tmp;
+ }
+
+ if (clockwise (top, right, left))
+ {
+ tmp = right;
+ right = left;
+ left = tmp;
+ }
+
+ /*
+ * Two cases:
+ *
+ * + +
+ * / \ / \
+ * / \ / \
+ * / + + \
+ * / -- -- \
+ * / -- -- \
+ * / --- --- \
+ * +-- --+
+ */
+
+ traps->top = top->y;
+ traps->left.p1 = *top;
+ traps->left.p2 = *left;
+ traps->right.p1 = *top;
+ traps->right.p2 = *right;
+
+ if (right->y < left->y)
+ traps->bottom = right->y;
+ else
+ traps->bottom = left->y;
+
+ traps++;
+
+ *traps = *(traps - 1);
+
+ if (right->y < left->y)
+ {
+ traps->top = right->y;
+ traps->bottom = left->y;
+ traps->right.p1 = *right;
+ traps->right.p2 = *left;
+ }
+ else
+ {
+ traps->top = left->y;
+ traps->bottom = right->y;
+ traps->left.p1 = *left;
+ traps->left.p2 = *right;
+ }
+}
+
+static pixman_trapezoid_t *
+convert_triangles (int n_tris, const pixman_triangle_t *tris)
+{
+ pixman_trapezoid_t *traps;
+ int i;
+
+ if (n_tris <= 0)
+ return NULL;
+
+ traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t));
+ if (!traps)
+ return NULL;
+
+ for (i = 0; i < n_tris; ++i)
+ triangle_to_trapezoids (&(tris[i]), traps + 2 * i);
+
+ return traps;
+}
+
+PIXMAN_EXPORT void
+pixman_composite_triangles (pixman_op_t op,
+ pixman_image_t * src,
+ pixman_image_t * dst,
+ pixman_format_code_t mask_format,
+ int x_src,
+ int y_src,
+ int x_dst,
+ int y_dst,
+ int n_tris,
+ const pixman_triangle_t * tris)
+{
+ pixman_trapezoid_t *traps;
+
+ if ((traps = convert_triangles (n_tris, tris)))
+ {
+ pixman_composite_trapezoids (op, src, dst, mask_format,
+ x_src, y_src, x_dst, y_dst,
+ n_tris * 2, traps);
+
+ free (traps);
+ }
+}
+
+PIXMAN_EXPORT void
+pixman_add_triangles (pixman_image_t *image,
+ int32_t x_off,
+ int32_t y_off,
+ int n_tris,
+ const pixman_triangle_t *tris)
+{
+ pixman_trapezoid_t *traps;
+
+ if ((traps = convert_triangles (n_tris, tris)))
+ {
+ pixman_add_trapezoids (image, x_off, y_off,
+ n_tris * 2, traps);
+
+ free (traps);
+ }
+}
diff --git a/pixman/test/composite-traps-test.c b/pixman/test/composite-traps-test.c
index 9ea7293ce..fa6d8a988 100644
--- a/pixman/test/composite-traps-test.c
+++ b/pixman/test/composite-traps-test.c
@@ -1,257 +1,257 @@
-/* Based loosely on scaling-test */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "utils.h"
-
-#define MAX_SRC_WIDTH 48
-#define MAX_SRC_HEIGHT 48
-#define MAX_DST_WIDTH 48
-#define MAX_DST_HEIGHT 48
-#define MAX_STRIDE 4
-
-static pixman_format_code_t formats[] =
-{
- PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_r5g6b5, PIXMAN_a1, PIXMAN_a4
-};
-
-static pixman_format_code_t mask_formats[] =
-{
- PIXMAN_a1, PIXMAN_a4, PIXMAN_a8,
-};
-
-static pixman_op_t operators[] =
-{
- PIXMAN_OP_OVER, PIXMAN_OP_ADD, PIXMAN_OP_SRC, PIXMAN_OP_IN
-};
-
-#define RANDOM_ELT(array) \
- ((array)[lcg_rand_n(ARRAY_LENGTH((array)))])
-
-static void
-destroy_bits (pixman_image_t *image, void *data)
-{
- fence_free (data);
-}
-
-static pixman_fixed_t
-random_fixed (int n)
-{
- return lcg_rand_N (n << 16);
-}
-
-/*
- * Composite operation with pseudorandom images
- */
-uint32_t
-test_composite (int testnum,
- int verbose)
-{
- int i;
- pixman_image_t * src_img;
- pixman_image_t * dst_img;
- pixman_region16_t clip;
- int dst_width, dst_height;
- int dst_stride;
- int dst_x, dst_y;
- int dst_bpp;
- pixman_op_t op;
- uint32_t * dst_bits;
- uint32_t crc32;
- pixman_format_code_t mask_format, dst_format;
- pixman_trapezoid_t *traps;
- int src_x, src_y;
- int n_traps;
-
- static pixman_color_t colors[] =
- {
- { 0xffff, 0xffff, 0xffff, 0xffff },
- { 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0xabcd, 0xabcd, 0x0000, 0xabcd },
- { 0x0000, 0x0000, 0x0000, 0xffff },
- { 0x0101, 0x0101, 0x0101, 0x0101 },
- { 0x7777, 0x6666, 0x5555, 0x9999 },
- };
-
- FLOAT_REGS_CORRUPTION_DETECTOR_START ();
-
- lcg_srand (testnum);
-
- op = RANDOM_ELT (operators);
- mask_format = RANDOM_ELT (mask_formats);
-
- /* Create source image */
-
- if (lcg_rand_n (4) == 0)
- {
- src_img = pixman_image_create_solid_fill (
- &(colors[lcg_rand_n (ARRAY_LENGTH (colors))]));
-
- src_x = 10;
- src_y = 234;
- }
- else
- {
- pixman_format_code_t src_format = RANDOM_ELT(formats);
- int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8;
- int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
- int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
- int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
- uint32_t *bits;
-
- src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
- src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
-
- src_stride = (src_stride + 3) & ~3;
-
- bits = (uint32_t *)make_random_bytes (src_stride * src_height);
-
- src_img = pixman_image_create_bits (
- src_format, src_width, src_height, bits, src_stride);
-
- pixman_image_set_destroy_function (src_img, destroy_bits, bits);
-
- if (lcg_rand_n (8) == 0)
- {
- pixman_box16_t clip_boxes[2];
- int n = lcg_rand_n (2) + 1;
-
- for (i = 0; i < n; i++)
- {
- clip_boxes[i].x1 = lcg_rand_n (src_width);
- clip_boxes[i].y1 = lcg_rand_n (src_height);
- clip_boxes[i].x2 =
- clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
- clip_boxes[i].y2 =
- clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
-
- if (verbose)
- {
- printf ("source clip box: [%d,%d-%d,%d]\n",
- clip_boxes[i].x1, clip_boxes[i].y1,
- clip_boxes[i].x2, clip_boxes[i].y2);
- }
- }
-
- pixman_region_init_rects (&clip, clip_boxes, n);
- pixman_image_set_clip_region (src_img, &clip);
- pixman_image_set_source_clipping (src_img, 1);
- pixman_region_fini (&clip);
- }
-
- image_endian_swap (src_img);
- }
-
- /* Create destination image */
- {
- dst_format = RANDOM_ELT(formats);
- dst_bpp = (PIXMAN_FORMAT_BPP (dst_format) + 7) / 8;
- dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
- dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
- dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
- dst_stride = (dst_stride + 3) & ~3;
-
- dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height);
-
- dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
- dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
-
- dst_img = pixman_image_create_bits (
- dst_format, dst_width, dst_height, dst_bits, dst_stride);
-
- image_endian_swap (dst_img);
- }
-
- /* Create traps */
- {
- int i;
-
- n_traps = lcg_rand_n (25);
- traps = fence_malloc (n_traps * sizeof (pixman_trapezoid_t));
-
- for (i = 0; i < n_traps; ++i)
- {
- pixman_trapezoid_t *t = &(traps[i]);
-
- t->top = random_fixed (MAX_DST_HEIGHT) - MAX_DST_HEIGHT / 2;
- t->bottom = t->top + random_fixed (MAX_DST_HEIGHT);
- t->left.p1.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2;
- t->left.p1.y = t->top - random_fixed (50);
- t->left.p2.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2;
- t->left.p2.y = t->bottom + random_fixed (50);
- t->right.p1.x = t->left.p1.x + random_fixed (MAX_DST_WIDTH);
- t->right.p1.y = t->top - random_fixed (50);
- t->right.p2.x = t->left.p2.x + random_fixed (MAX_DST_WIDTH);
- t->right.p2.y = t->bottom - random_fixed (50);
- }
- }
-
- if (lcg_rand_n (8) == 0)
- {
- pixman_box16_t clip_boxes[2];
- int n = lcg_rand_n (2) + 1;
- for (i = 0; i < n; i++)
- {
- clip_boxes[i].x1 = lcg_rand_n (dst_width);
- clip_boxes[i].y1 = lcg_rand_n (dst_height);
- clip_boxes[i].x2 =
- clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
- clip_boxes[i].y2 =
- clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
-
- if (verbose)
- {
- printf ("destination clip box: [%d,%d-%d,%d]\n",
- clip_boxes[i].x1, clip_boxes[i].y1,
- clip_boxes[i].x2, clip_boxes[i].y2);
- }
- }
- pixman_region_init_rects (&clip, clip_boxes, n);
- pixman_image_set_clip_region (dst_img, &clip);
- pixman_region_fini (&clip);
- }
-
- pixman_composite_trapezoids (op, src_img, dst_img, mask_format,
- src_x, src_y, dst_x, dst_y, n_traps, traps);
-
- if (dst_format == PIXMAN_x8r8g8b8)
- {
- /* ignore unused part */
- for (i = 0; i < dst_stride * dst_height / 4; i++)
- dst_bits[i] &= 0xFFFFFF;
- }
-
- image_endian_swap (dst_img);
-
- if (verbose)
- {
- int j;
-
- for (i = 0; i < dst_height; i++)
- {
- for (j = 0; j < dst_stride; j++)
- printf ("%02X ", *((uint8_t *)dst_bits + i * dst_stride + j));
-
- printf ("\n");
- }
- }
-
- crc32 = compute_crc32 (0, dst_bits, dst_stride * dst_height);
-
- fence_free (dst_bits);
-
- pixman_image_unref (src_img);
- pixman_image_unref (dst_img);
- fence_free (traps);
-
- FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
- return crc32;
-}
-
-int
-main (int argc, const char *argv[])
-{
- return fuzzer_test_main("composite traps", 40000, 0xA34F95C7,
- test_composite, argc, argv);
-}
+/* Based loosely on scaling-test */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define MAX_SRC_WIDTH 48
+#define MAX_SRC_HEIGHT 48
+#define MAX_DST_WIDTH 48
+#define MAX_DST_HEIGHT 48
+#define MAX_STRIDE 4
+
+static pixman_format_code_t formats[] =
+{
+ PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_r5g6b5, PIXMAN_a1, PIXMAN_a4
+};
+
+static pixman_format_code_t mask_formats[] =
+{
+ PIXMAN_a1, PIXMAN_a4, PIXMAN_a8,
+};
+
+static pixman_op_t operators[] =
+{
+ PIXMAN_OP_OVER, PIXMAN_OP_ADD, PIXMAN_OP_SRC, PIXMAN_OP_IN
+};
+
+#define RANDOM_ELT(array) \
+ ((array)[lcg_rand_n(ARRAY_LENGTH((array)))])
+
+static void
+destroy_bits (pixman_image_t *image, void *data)
+{
+ fence_free (data);
+}
+
+static pixman_fixed_t
+random_fixed (int n)
+{
+ return lcg_rand_N (n << 16);
+}
+
+/*
+ * Composite operation with pseudorandom images
+ */
+uint32_t
+test_composite (int testnum,
+ int verbose)
+{
+ int i;
+ pixman_image_t * src_img;
+ pixman_image_t * dst_img;
+ pixman_region16_t clip;
+ int dst_width, dst_height;
+ int dst_stride;
+ int dst_x, dst_y;
+ int dst_bpp;
+ pixman_op_t op;
+ uint32_t * dst_bits;
+ uint32_t crc32;
+ pixman_format_code_t mask_format, dst_format;
+ pixman_trapezoid_t *traps;
+ int src_x, src_y;
+ int n_traps;
+
+ static pixman_color_t colors[] =
+ {
+ { 0xffff, 0xffff, 0xffff, 0xffff },
+ { 0x0000, 0x0000, 0x0000, 0x0000 },
+ { 0xabcd, 0xabcd, 0x0000, 0xabcd },
+ { 0x0000, 0x0000, 0x0000, 0xffff },
+ { 0x0101, 0x0101, 0x0101, 0x0101 },
+ { 0x7777, 0x6666, 0x5555, 0x9999 },
+ };
+
+ FLOAT_REGS_CORRUPTION_DETECTOR_START ();
+
+ lcg_srand (testnum);
+
+ op = RANDOM_ELT (operators);
+ mask_format = RANDOM_ELT (mask_formats);
+
+ /* Create source image */
+
+ if (lcg_rand_n (4) == 0)
+ {
+ src_img = pixman_image_create_solid_fill (
+ &(colors[lcg_rand_n (ARRAY_LENGTH (colors))]));
+
+ src_x = 10;
+ src_y = 234;
+ }
+ else
+ {
+ pixman_format_code_t src_format = RANDOM_ELT(formats);
+ int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8;
+ int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
+ int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+ int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
+ uint32_t *bits;
+
+ src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
+ src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
+
+ src_stride = (src_stride + 3) & ~3;
+
+ bits = (uint32_t *)make_random_bytes (src_stride * src_height);
+
+ src_img = pixman_image_create_bits (
+ src_format, src_width, src_height, bits, src_stride);
+
+ pixman_image_set_destroy_function (src_img, destroy_bits, bits);
+
+ if (lcg_rand_n (8) == 0)
+ {
+ pixman_box16_t clip_boxes[2];
+ int n = lcg_rand_n (2) + 1;
+
+ for (i = 0; i < n; i++)
+ {
+ clip_boxes[i].x1 = lcg_rand_n (src_width);
+ clip_boxes[i].y1 = lcg_rand_n (src_height);
+ clip_boxes[i].x2 =
+ clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
+ clip_boxes[i].y2 =
+ clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
+
+ if (verbose)
+ {
+ printf ("source clip box: [%d,%d-%d,%d]\n",
+ clip_boxes[i].x1, clip_boxes[i].y1,
+ clip_boxes[i].x2, clip_boxes[i].y2);
+ }
+ }
+
+ pixman_region_init_rects (&clip, clip_boxes, n);
+ pixman_image_set_clip_region (src_img, &clip);
+ pixman_image_set_source_clipping (src_img, 1);
+ pixman_region_fini (&clip);
+ }
+
+ image_endian_swap (src_img);
+ }
+
+ /* Create destination image */
+ {
+ dst_format = RANDOM_ELT(formats);
+ dst_bpp = (PIXMAN_FORMAT_BPP (dst_format) + 7) / 8;
+ dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
+ dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
+ dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
+ dst_stride = (dst_stride + 3) & ~3;
+
+ dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height);
+
+ dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
+ dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
+
+ dst_img = pixman_image_create_bits (
+ dst_format, dst_width, dst_height, dst_bits, dst_stride);
+
+ image_endian_swap (dst_img);
+ }
+
+ /* Create traps */
+ {
+ int i;
+
+ n_traps = lcg_rand_n (25);
+ traps = fence_malloc (n_traps * sizeof (pixman_trapezoid_t));
+
+ for (i = 0; i < n_traps; ++i)
+ {
+ pixman_trapezoid_t *t = &(traps[i]);
+
+ t->top = random_fixed (MAX_DST_HEIGHT) - MAX_DST_HEIGHT / 2;
+ t->bottom = t->top + random_fixed (MAX_DST_HEIGHT);
+ t->left.p1.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2;
+ t->left.p1.y = t->top - random_fixed (50);
+ t->left.p2.x = random_fixed (MAX_DST_WIDTH) - MAX_DST_WIDTH / 2;
+ t->left.p2.y = t->bottom + random_fixed (50);
+ t->right.p1.x = t->left.p1.x + random_fixed (MAX_DST_WIDTH);
+ t->right.p1.y = t->top - random_fixed (50);
+ t->right.p2.x = t->left.p2.x + random_fixed (MAX_DST_WIDTH);
+ t->right.p2.y = t->bottom - random_fixed (50);
+ }
+ }
+
+ if (lcg_rand_n (8) == 0)
+ {
+ pixman_box16_t clip_boxes[2];
+ int n = lcg_rand_n (2) + 1;
+ for (i = 0; i < n; i++)
+ {
+ clip_boxes[i].x1 = lcg_rand_n (dst_width);
+ clip_boxes[i].y1 = lcg_rand_n (dst_height);
+ clip_boxes[i].x2 =
+ clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
+ clip_boxes[i].y2 =
+ clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
+
+ if (verbose)
+ {
+ printf ("destination clip box: [%d,%d-%d,%d]\n",
+ clip_boxes[i].x1, clip_boxes[i].y1,
+ clip_boxes[i].x2, clip_boxes[i].y2);
+ }
+ }
+ pixman_region_init_rects (&clip, clip_boxes, n);
+ pixman_image_set_clip_region (dst_img, &clip);
+ pixman_region_fini (&clip);
+ }
+
+ pixman_composite_trapezoids (op, src_img, dst_img, mask_format,
+ src_x, src_y, dst_x, dst_y, n_traps, traps);
+
+ if (dst_format == PIXMAN_x8r8g8b8)
+ {
+ /* ignore unused part */
+ for (i = 0; i < dst_stride * dst_height / 4; i++)
+ dst_bits[i] &= 0xFFFFFF;
+ }
+
+ image_endian_swap (dst_img);
+
+ if (verbose)
+ {
+ int j;
+
+ for (i = 0; i < dst_height; i++)
+ {
+ for (j = 0; j < dst_stride; j++)
+ printf ("%02X ", *((uint8_t *)dst_bits + i * dst_stride + j));
+
+ printf ("\n");
+ }
+ }
+
+ crc32 = compute_crc32 (0, dst_bits, dst_stride * dst_height);
+
+ fence_free (dst_bits);
+
+ pixman_image_unref (src_img);
+ pixman_image_unref (dst_img);
+ fence_free (traps);
+
+ FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
+ return crc32;
+}
+
+int
+main (int argc, const char *argv[])
+{
+ return fuzzer_test_main("composite traps", 40000, 0xE3112106,
+ test_composite, argc, argv);
+}