From 990bc3f015a4f8fce2eb918375defcd44980a845 Mon Sep 17 00:00:00 2001 From: marha Date: Fri, 8 Jun 2012 09:33:13 +0200 Subject: Used synchronise script to update files --- pixman/.gitignore | 81 ++ pixman/COPYING | 84 +- pixman/INSTALL | 468 +++--- pixman/TODO | 542 +++---- pixman/pixman-1-uninstalled.pc.in | 10 +- pixman/pixman-1.pc.in | 22 +- pixman/pixman/make-combine.pl | 172 +-- pixman/pixman/pixman-access-accessors.c | 6 +- pixman/pixman/pixman-arm-neon-asm.h | 2354 +++++++++++++++---------------- pixman/pixman/pixman-edge-accessors.c | 8 +- pixman/pixman/pixman-edge-imp.h | 364 ++--- pixman/pixman/pixman-edge.c | 768 +++++----- pixman/pixman/pixman-matrix.c | 1532 ++++++++++---------- pixman/pixman/pixman-timer.c | 132 +- pixman/pixman/pixman-version.h.in | 100 +- pixman/test/fuzzer-find-diff.pl | 136 +- pixman/test/region-test.c | 246 ++-- 17 files changed, 3553 insertions(+), 3472 deletions(-) create mode 100644 pixman/.gitignore (limited to 'pixman') diff --git a/pixman/.gitignore b/pixman/.gitignore new file mode 100644 index 000000000..98612c91f --- /dev/null +++ b/pixman/.gitignore @@ -0,0 +1,81 @@ +Makefile +Makefile.in +.deps +.libs +.msg +*.pc +*.lo +*.la +*.a +*.o +*~ +aclocal.m4 +autom4te.cache +compile +config.guess +config.log +config.status +config.sub +configure +depcomp +install-sh +libtool +ltmain.sh +missing +stamp-h? +config.h +config.h.in +.*.swp +demos/alpha-test +demos/checkerboard +demos/clip-in +demos/clip-test +demos/composite-test +demos/convolution-test +demos/gradient-test +demos/quad2quad +demos/radial-test +demos/screen-test +demos/trap-test +demos/tri-test +pixman/pixman-combine32.c +pixman/pixman-combine32.h +pixman/pixman-combine64.c +pixman/pixman-combine64.h +pixman/pixman-version.h +test/a1-trap-test +test/affine-test +test/alpha-loop +test/alphamap +test/alpha-test +test/blitters-test +test/clip-in +test/clip-test +test/composite +test/composite-test +test/composite-traps-test +test/convolution-test +test/fetch-test +test/gradient-crash-test +test/gradient-test +test/lowlevel-blt-bench +test/oob-test +test/pdf-op-test +test/region-contains-test +test/region-test +test/region-translate +test/region-translate-test +test/scaling-crash-test +test/scaling-helpers-test +test/scaling-test +test/screen-test +test/stress-test +test/trap-crasher +test/trap-test +test/window-test +*.pdb +*.dll +*.lib +*.ilk +*.obj +*.exe diff --git a/pixman/COPYING b/pixman/COPYING index 11b022bc2..6168dea56 100644 --- a/pixman/COPYING +++ b/pixman/COPYING @@ -1,42 +1,42 @@ -The following is the MIT license, agreed upon by most contributors. -Copyright holders of new code should use this license statement where -possible. They may also add themselves to the list below. - -/* - * Copyright 1987, 1988, 1989, 1998 The Open Group - * Copyright 1987, 1988, 1989 Digital Equipment Corporation - * Copyright 1999, 2004, 2008 Keith Packard - * Copyright 2000 SuSE, Inc. - * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc. - * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc. - * Copyright 2004 Nicholas Miell - * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech - * Copyright 2005 Trolltech AS - * Copyright 2007 Luca Barbato - * Copyright 2008 Aaron Plattner, NVIDIA Corporation - * Copyright 2008 Rodrigo Kumpera - * Copyright 2008 André Tupinambá - * Copyright 2008 Mozilla Corporation - * Copyright 2008 Frederic Plourde - * Copyright 2009, Oracle and/or its affiliates. All rights reserved. - * Copyright 2009, 2010 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ +The following is the MIT license, agreed upon by most contributors. +Copyright holders of new code should use this license statement where +possible. They may also add themselves to the list below. + +/* + * Copyright 1987, 1988, 1989, 1998 The Open Group + * Copyright 1987, 1988, 1989 Digital Equipment Corporation + * Copyright 1999, 2004, 2008 Keith Packard + * Copyright 2000 SuSE, Inc. + * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc. + * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc. + * Copyright 2004 Nicholas Miell + * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech + * Copyright 2005 Trolltech AS + * Copyright 2007 Luca Barbato + * Copyright 2008 Aaron Plattner, NVIDIA Corporation + * Copyright 2008 Rodrigo Kumpera + * Copyright 2008 André Tupinambá + * Copyright 2008 Mozilla Corporation + * Copyright 2008 Frederic Plourde + * Copyright 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright 2009, 2010 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ diff --git a/pixman/INSTALL b/pixman/INSTALL index 5458714e1..cf1202b66 100644 --- a/pixman/INSTALL +++ b/pixman/INSTALL @@ -1,234 +1,234 @@ -Installation Instructions -************************* - -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, -2006 Free Software Foundation, Inc. - -This file is free documentation; the Free Software Foundation gives -unlimited permission to copy, distribute and modify it. - -Basic Installation -================== - -Briefly, the shell commands `./configure; make; make install' should -configure, build, and install this package. The following -more-detailed instructions are generic; see the `README' file for -instructions specific to this package. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). - - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. Caching is -disabled by default to prevent problems with accidental use of stale -cache files. - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you -may remove or edit it. - - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You need `configure.ac' if -you want to change it or regenerate `configure' using a newer version -of `autoconf'. - -The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. - - Running `configure' might take a while. While running, it prints - some messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package. - - 4. Type `make install' to install the programs and any data files and - documentation. - - 5. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - -Compilers and Options -===================== - -Some systems require unusual options for compilation or linking that the -`configure' script does not know about. Run `./configure --help' for -details on some of the pertinent environment variables. - - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: - - ./configure CC=c99 CFLAGS=-g LIBS=-lposix - - *Note Defining Variables::, for more details. - -Compiling For Multiple Architectures -==================================== - -You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you can use GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. - - With a non-GNU `make', it is safer to compile the package for one -architecture at a time in the source code directory. After you have -installed the package for one architecture, use `make distclean' before -reconfiguring for another architecture. - -Installation Names -================== - -By default, `make install' installs the package's commands under -`/usr/local/bin', include files under `/usr/local/include', etc. You -can specify an installation prefix other than `/usr/local' by giving -`configure' the option `--prefix=PREFIX'. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -pass the option `--exec-prefix=PREFIX' to `configure', the package uses -PREFIX as the prefix for installing programs and libraries. -Documentation and other data files still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=DIR' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - -Optional Features -================= - -Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - -Specifying the System Type -========================== - -There may be some features `configure' cannot figure out automatically, -but needs to determine by the type of machine the package will run on. -Usually, assuming the package is built to be run on the _same_ -architectures, `configure' can figure that out, but if it prints a -message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: - - CPU-COMPANY-SYSTEM - -where SYSTEM can have one of these forms: - - OS KERNEL-OS - - See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the machine type. - - If you are _building_ compiler tools for cross-compiling, you should -use the option `--target=TYPE' to select the type of system they will -produce code for. - - If you want to _use_ a cross compiler, that generates code for a -platform different from the build platform, you should specify the -"host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. - -Sharing Defaults -================ - -If you want to set default values for `configure' scripts to share, you -can create a site shell script called `config.site' that gives default -values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - -Defining Variables -================== - -Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: - - ./configure CC=/usr/local2/bin/gcc - -causes the specified `gcc' to be used as the C compiler (unless it is -overridden in the site shell script). - -Unfortunately, this technique does not work for `CONFIG_SHELL' due to -an Autoconf bug. Until the bug is fixed you can use this workaround: - - CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash - -`configure' Invocation -====================== - -`configure' recognizes the following options to control how it operates. - -`--help' -`-h' - Print a summary of the options to `configure', and exit. - -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`--cache-file=FILE' - Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to - disable caching. - -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error - messages will still be shown). - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. - +Installation Instructions +************************* + +Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, +2006 Free Software Foundation, Inc. + +This file is free documentation; the Free Software Foundation gives +unlimited permission to copy, distribute and modify it. + +Basic Installation +================== + +Briefly, the shell commands `./configure; make; make install' should +configure, build, and install this package. The following +more-detailed instructions are generic; see the `README' file for +instructions specific to this package. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. Caching is +disabled by default to prevent problems with accidental use of stale +cache files. + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You need `configure.ac' if +you want to change it or regenerate `configure' using a newer version +of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. + + Running `configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + +Compilers and Options +===================== + +Some systems require unusual options for compilation or linking that the +`configure' script does not know about. Run `./configure --help' for +details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + +You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + With a non-GNU `make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use `make distclean' before +reconfiguring for another architecture. + +Installation Names +================== + +By default, `make install' installs the package's commands under +`/usr/local/bin', include files under `/usr/local/include', etc. You +can specify an installation prefix other than `/usr/local' by giving +`configure' the option `--prefix=PREFIX'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option `--exec-prefix=PREFIX' to `configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=DIR' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + +Optional Features +================= + +Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + +Specifying the System Type +========================== + +There may be some features `configure' cannot figure out automatically, +but needs to determine by the type of machine the package will run on. +Usually, assuming the package is built to be run on the _same_ +architectures, `configure' can figure that out, but if it prints a +message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option `--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + +If you want to set default values for `configure' scripts to share, you +can create a site shell script called `config.site' that gives default +values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + +Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified `gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for `CONFIG_SHELL' due to +an Autoconf bug. Until the bug is fixed you can use this workaround: + + CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash + +`configure' Invocation +====================== + +`configure' recognizes the following options to control how it operates. + +`--help' +`-h' + Print a summary of the options to `configure', and exit. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. + diff --git a/pixman/TODO b/pixman/TODO index 4434ec7cb..465abe7b5 100644 --- a/pixman/TODO +++ b/pixman/TODO @@ -1,271 +1,271 @@ - - Testing - - Test implementations against each other - - Test both with and without the operator strength reduction. - They shold be identical. - - - SSE 2 issues: - - - Use MM_HINT_NTA instead of MM_HINT_T0 - - - Use of fbCompositeOver_x888x8x8888sse2() - - - Update the RLEASING file - - - Things to keep in mind if breaking ABI: - - - There should be a guard #ifndef I_AM_EITHER_CAIRO_OR_THE_X_SERVER - - - X server will require 16.16 essentially forever. Can we get - the required precision by simply adding offset_x/y to the - relevant rendering API? - - - Get rid of workaround for X server bug. - - - pixman_image_set_indexed() should copy its argument, and X - should be ported over to use a pixman_image as the - representation of a Picture, rather than creating one on each - operation. - - - We should get rid of pixman_set_static_pointers() - - - We should get rid of the various trapezoid helper functions(). - (They only exist because they are theoretically available to - drivers). - - - 16 bit regions should be deleted - - - There should only be one trap rasterization API. - - - The PIXMAN_g8/c8/etc formats should use the A channel - to indicate the actual depth. That way PIXMAN_x4c4 and PIXMAN_c8 - won't collide. - - - Maybe bite the bullet and make configure.ac generate a pixman-types.h - file that can be included from pixman.h to avoid the #ifdef magic - in pixman.h - - - Make pixman_region_point_in() survive a NULL box, then fix up - pixman-compose.c - - - Possibly look into inlining the fetch functions - - - There is a bug with source clipping demonstrated by clip-test in the - test directory. If we interprete source clipping as given in - destination coordinates, which is probably the only sane choice, - then the result should have two red bars down the sides. - - - Test suite - - - Add a general way of dealing with architecture specific - fast-paths. The current idea is to have each operation that can - be optimized is called through a function pointer that is - initially set to an initialization function that is responsible for - setting the function pointer to the appropriate fast-path. - - - Go through things marked FIXME - - - Add calls to prepare and finish access where necessary. grep for - ACCESS_MEM, and make sure they are correctly wrapped in prepare - and finish. - - - restore READ/WRITE in the fbcompose combiners since they sometimes - store directly to destination drawables. - - - It probably makes sense to move the more strange X region API - into pixman as well, but guarded with PIXMAN_XORG_COMPATIBILITY - - - Reinstate the FbBits typedef? At the moment we don't - even have the FbBits type; we just use uint32_t everywhere. - - Keith says in bug 2335: - - The 64-bit code in fb (pixman) is probably broken; it hasn't been - used in quite some time as PCI (and AGP) is 32-bits wide, so - doing things 64-bits at a time is a net loss. To quickly fix - this, I suggest just using 32-bit datatypes by setting - IC_SHIFT to 5 for all machines. - - - Consider optimizing the 8/16 bit solid fills in pixman-util.c by - storing more than one value at a time. - - - Add an image cache to prevent excessive malloc/free. Note that pixman - needs to be thread safe when used from cairo. - - - Moving to 24.8 coordinates. This is tricky because X is still - defined as 16.16 and will be basically forever. It's possible we - could do this by adding extra offset_x/y parameters to the - trapezoid calls. The X server could then just call the API with - (0, 0). Cairo would have to make sure that the delta *within* a - batch of trapezoids does not exceed 16 bit. - - - Consider adding actual backends. Brain dump: - - A backend is something that knows how to - - - Create images - - Composite three images - - Rasterize trapezoids - - Do solid fills and blits - - These operations are provided by a vtable that the backend will - create when it is initialized. Initial backends: - - - VMX - - SSE2 - - MMX - - Plain Old C - - When the SIMD backends are initialized, they will be passed a - pointer to the Plain Old C backend that they can use for fallback - purposes. - - Images would gain a vtable as well that would contain things like - - - Read scanline - - Write scanline - - (Or even read_patch/write_patch as suggested by Keith a while - back). - - This could simplify the compositing code considerably. - - - Review the pixman_format_code_t enum to make sure it will support - future formats. Some formats we will probably need: - - ARGB/ABGR with 16/32/64 bit integer/floating channels - YUV2, - YV12 - - Also we may need the ability to distinguish between PICT_c8 and - PICT_x4c4. (This could be done by interpreting the A channel as - the depth for TYPE_COLOR and TYPE_GRAY formats). - - A possibility may be to reserve the two top bits and make them - encode "number of places to shift the channel widths given" Since - these bits are 00 at the moment everything will continue to work, - but these additional widths will be allowed: - - All even widths between 18-32 - All multiples of four widths between 33 and 64 - All multiples of eight between 64 and 128 - - This means things like r21g22b21 won't work - is that worth - worrying about? I don't think so. And of course the bpp field - can't handle a depth of over 256, so > 64 bit channels arent' - really all that useful. - - We could reserve one extra bit to indicate floating point, but - we may also just add - - PIXMAN_TYPE_ARGB_FLOAT - PIXMAN_TYPE_BGRA_FLOAT - PIXMAN_TYPE_A_FLOAT - - image types. With five bits we can support up to 32 different - format types, which should be enough for everybody, even if we - decide to support all the various video formats here: - - http://www.fourcc.org/yuv.php - - It may make sense to have a PIXMAN_TYPE_YUV, and then use the - channel bits to specify the exact subtype. - - Another possibility is to add - - PIXMAN_TYPE_ARGB_W - PIXMAN_TYPE_ARGB_WW - - where the channel widths would get 16 and 32 added to them, - respectively. - - What about color spaces such a linear vs. srGB etc.? - - -done: - -- Use pixmanFillsse2 and pixmanBltsse2 - -- Be consistent about calling sse2 sse2 - -- Rename "SSE" to "MMX_EXTENSIONS". (Deleted mmx extensions). - -- Commented-out uses of fbCompositeCopyAreasse2() - -- Consider whether calling regions region16 is really such a great - idea. Vlad wants 32 bit regions for Cairo. This will break X server - ABI, but should otherwise be mostly harmless, though a - pixman_region_get_boxes16() may be useful. - -- Altivec signal issue (Company has fix, there is also a patch by - dwmw2 in rawhide). - -- Behdad's MMX issue - see list - -- SSE2 issues: - - Crashes in Mozilla because of unaligned stack. Possible fixes - - Make use of gcc 4.2 feature to align the stack - - Write some sort of trampoline that aligns the stack - before calling SSE functions. - -- Get rid of the switch-of-doom; replace it with a big table - describing the various fast paths. - -- Make source clipping optional. - - done: source clipping happens through an indirection. - still needs to make the indirection settable. (And call it - from X) - -- Run cairo test suite; fix bugs - - one bug in source-scale-clip - - - Remove the warning suppression in the ACCESS_MEM macro and fix the - warnings that are real - - irrelevant now. - -- make the wrapper functions global instead of image specific - - this won't work since pixman is linked to both fb and wfb - -- Add non-mmx solid fill - -- Make sure the endian-ness macros are defined correctly. - -- The rectangles in a region probably shouldn't be returned const as - the X server will be changing them. - -- Right now we _always_ have a clip region, which is empty by default. - Why does this work at all? It probably doesn't. The server - distinguishes two cases, one where nothing is clipped (CT_NONE), and - one where there is a clip region (CT_REGION). - -- Default clip region should be the full image - - - Test if pseudo color still works. It does, but it also shows that - copying a pixman_indexed_t on every composite operation is not - going to fly. So, for now set_indexed() does not copy the - indexed table. - - Also just the malloc() to allocate a pixman image shows up pretty - high. - - Options include - - - Make all the setters not copy their arguments - - - Possibly combined with going back to the stack allocated - approach that we already use for regions. - - - Keep a cached pixman_image_t around for every picture. It would - have to be kept uptodate every time something changes about the - picture. - - - Break the X server ABI and simply have the relevant parameter - stored in the pixman image. This would have the additional benefits - that: - - - We can get rid of the annoying repeat field which is duplicated - elsewhere. - - - We can use pixman_color_t and pixman_gradient_stop_t - etc. instead of the types that are defined in - renderproto.h - + - Testing + - Test implementations against each other + - Test both with and without the operator strength reduction. + They shold be identical. + + - SSE 2 issues: + + - Use MM_HINT_NTA instead of MM_HINT_T0 + + - Use of fbCompositeOver_x888x8x8888sse2() + + - Update the RLEASING file + + - Things to keep in mind if breaking ABI: + + - There should be a guard #ifndef I_AM_EITHER_CAIRO_OR_THE_X_SERVER + + - X server will require 16.16 essentially forever. Can we get + the required precision by simply adding offset_x/y to the + relevant rendering API? + + - Get rid of workaround for X server bug. + + - pixman_image_set_indexed() should copy its argument, and X + should be ported over to use a pixman_image as the + representation of a Picture, rather than creating one on each + operation. + + - We should get rid of pixman_set_static_pointers() + + - We should get rid of the various trapezoid helper functions(). + (They only exist because they are theoretically available to + drivers). + + - 16 bit regions should be deleted + + - There should only be one trap rasterization API. + + - The PIXMAN_g8/c8/etc formats should use the A channel + to indicate the actual depth. That way PIXMAN_x4c4 and PIXMAN_c8 + won't collide. + + - Maybe bite the bullet and make configure.ac generate a pixman-types.h + file that can be included from pixman.h to avoid the #ifdef magic + in pixman.h + + - Make pixman_region_point_in() survive a NULL box, then fix up + pixman-compose.c + + - Possibly look into inlining the fetch functions + + - There is a bug with source clipping demonstrated by clip-test in the + test directory. If we interprete source clipping as given in + destination coordinates, which is probably the only sane choice, + then the result should have two red bars down the sides. + + - Test suite + + - Add a general way of dealing with architecture specific + fast-paths. The current idea is to have each operation that can + be optimized is called through a function pointer that is + initially set to an initialization function that is responsible for + setting the function pointer to the appropriate fast-path. + + - Go through things marked FIXME + + - Add calls to prepare and finish access where necessary. grep for + ACCESS_MEM, and make sure they are correctly wrapped in prepare + and finish. + + - restore READ/WRITE in the fbcompose combiners since they sometimes + store directly to destination drawables. + + - It probably makes sense to move the more strange X region API + into pixman as well, but guarded with PIXMAN_XORG_COMPATIBILITY + + - Reinstate the FbBits typedef? At the moment we don't + even have the FbBits type; we just use uint32_t everywhere. + + Keith says in bug 2335: + + The 64-bit code in fb (pixman) is probably broken; it hasn't been + used in quite some time as PCI (and AGP) is 32-bits wide, so + doing things 64-bits at a time is a net loss. To quickly fix + this, I suggest just using 32-bit datatypes by setting + IC_SHIFT to 5 for all machines. + + - Consider optimizing the 8/16 bit solid fills in pixman-util.c by + storing more than one value at a time. + + - Add an image cache to prevent excessive malloc/free. Note that pixman + needs to be thread safe when used from cairo. + + - Moving to 24.8 coordinates. This is tricky because X is still + defined as 16.16 and will be basically forever. It's possible we + could do this by adding extra offset_x/y parameters to the + trapezoid calls. The X server could then just call the API with + (0, 0). Cairo would have to make sure that the delta *within* a + batch of trapezoids does not exceed 16 bit. + + - Consider adding actual backends. Brain dump: + + A backend is something that knows how to + + - Create images + - Composite three images + - Rasterize trapezoids + - Do solid fills and blits + + These operations are provided by a vtable that the backend will + create when it is initialized. Initial backends: + + - VMX + - SSE2 + - MMX + - Plain Old C + + When the SIMD backends are initialized, they will be passed a + pointer to the Plain Old C backend that they can use for fallback + purposes. + + Images would gain a vtable as well that would contain things like + + - Read scanline + - Write scanline + + (Or even read_patch/write_patch as suggested by Keith a while + back). + + This could simplify the compositing code considerably. + + - Review the pixman_format_code_t enum to make sure it will support + future formats. Some formats we will probably need: + + ARGB/ABGR with 16/32/64 bit integer/floating channels + YUV2, + YV12 + + Also we may need the ability to distinguish between PICT_c8 and + PICT_x4c4. (This could be done by interpreting the A channel as + the depth for TYPE_COLOR and TYPE_GRAY formats). + + A possibility may be to reserve the two top bits and make them + encode "number of places to shift the channel widths given" Since + these bits are 00 at the moment everything will continue to work, + but these additional widths will be allowed: + + All even widths between 18-32 + All multiples of four widths between 33 and 64 + All multiples of eight between 64 and 128 + + This means things like r21g22b21 won't work - is that worth + worrying about? I don't think so. And of course the bpp field + can't handle a depth of over 256, so > 64 bit channels arent' + really all that useful. + + We could reserve one extra bit to indicate floating point, but + we may also just add + + PIXMAN_TYPE_ARGB_FLOAT + PIXMAN_TYPE_BGRA_FLOAT + PIXMAN_TYPE_A_FLOAT + + image types. With five bits we can support up to 32 different + format types, which should be enough for everybody, even if we + decide to support all the various video formats here: + + http://www.fourcc.org/yuv.php + + It may make sense to have a PIXMAN_TYPE_YUV, and then use the + channel bits to specify the exact subtype. + + Another possibility is to add + + PIXMAN_TYPE_ARGB_W + PIXMAN_TYPE_ARGB_WW + + where the channel widths would get 16 and 32 added to them, + respectively. + + What about color spaces such a linear vs. srGB etc.? + + +done: + +- Use pixmanFillsse2 and pixmanBltsse2 + +- Be consistent about calling sse2 sse2 + +- Rename "SSE" to "MMX_EXTENSIONS". (Deleted mmx extensions). + +- Commented-out uses of fbCompositeCopyAreasse2() + +- Consider whether calling regions region16 is really such a great + idea. Vlad wants 32 bit regions for Cairo. This will break X server + ABI, but should otherwise be mostly harmless, though a + pixman_region_get_boxes16() may be useful. + +- Altivec signal issue (Company has fix, there is also a patch by + dwmw2 in rawhide). + +- Behdad's MMX issue - see list + +- SSE2 issues: + - Crashes in Mozilla because of unaligned stack. Possible fixes + - Make use of gcc 4.2 feature to align the stack + - Write some sort of trampoline that aligns the stack + before calling SSE functions. + +- Get rid of the switch-of-doom; replace it with a big table + describing the various fast paths. + +- Make source clipping optional. + - done: source clipping happens through an indirection. + still needs to make the indirection settable. (And call it + from X) + +- Run cairo test suite; fix bugs + - one bug in source-scale-clip + + - Remove the warning suppression in the ACCESS_MEM macro and fix the + warnings that are real + - irrelevant now. + +- make the wrapper functions global instead of image specific + - this won't work since pixman is linked to both fb and wfb + +- Add non-mmx solid fill + +- Make sure the endian-ness macros are defined correctly. + +- The rectangles in a region probably shouldn't be returned const as + the X server will be changing them. + +- Right now we _always_ have a clip region, which is empty by default. + Why does this work at all? It probably doesn't. The server + distinguishes two cases, one where nothing is clipped (CT_NONE), and + one where there is a clip region (CT_REGION). + +- Default clip region should be the full image + + - Test if pseudo color still works. It does, but it also shows that + copying a pixman_indexed_t on every composite operation is not + going to fly. So, for now set_indexed() does not copy the + indexed table. + + Also just the malloc() to allocate a pixman image shows up pretty + high. + + Options include + + - Make all the setters not copy their arguments + + - Possibly combined with going back to the stack allocated + approach that we already use for regions. + + - Keep a cached pixman_image_t around for every picture. It would + have to be kept uptodate every time something changes about the + picture. + + - Break the X server ABI and simply have the relevant parameter + stored in the pixman image. This would have the additional benefits + that: + + - We can get rid of the annoying repeat field which is duplicated + elsewhere. + + - We can use pixman_color_t and pixman_gradient_stop_t + etc. instead of the types that are defined in + renderproto.h + diff --git a/pixman/pixman-1-uninstalled.pc.in b/pixman/pixman-1-uninstalled.pc.in index e0347d010..c15e86547 100644 --- a/pixman/pixman-1-uninstalled.pc.in +++ b/pixman/pixman-1-uninstalled.pc.in @@ -1,5 +1,5 @@ -Name: Pixman -Description: The pixman library (version 1) -Version: @PACKAGE_VERSION@ -Cflags: -I${pc_top_builddir}/${pcfiledir}/pixman -Libs: ${pc_top_builddir}/${pcfiledir}/pixman/libpixman-1.la +Name: Pixman +Description: The pixman library (version 1) +Version: @PACKAGE_VERSION@ +Cflags: -I${pc_top_builddir}/${pcfiledir}/pixman +Libs: ${pc_top_builddir}/${pcfiledir}/pixman/libpixman-1.la diff --git a/pixman/pixman-1.pc.in b/pixman/pixman-1.pc.in index 936d95db0..e44361749 100644 --- a/pixman/pixman-1.pc.in +++ b/pixman/pixman-1.pc.in @@ -1,11 +1,11 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: Pixman -Description: The pixman library (version 1) -Version: @PACKAGE_VERSION@ -Cflags: -I${includedir}/pixman-1 @DEP_CFLAGS@ -Libs: -L${libdir} -lpixman-1 @DEP_LIBS@ - +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: Pixman +Description: The pixman library (version 1) +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir}/pixman-1 @DEP_CFLAGS@ +Libs: -L${libdir} -lpixman-1 @DEP_LIBS@ + diff --git a/pixman/pixman/make-combine.pl b/pixman/pixman/make-combine.pl index 210a5da12..417bdf085 100644 --- a/pixman/pixman/make-combine.pl +++ b/pixman/pixman/make-combine.pl @@ -1,86 +1,86 @@ -$usage = "Usage: combine.pl { 8 | 16 } < pixman-combine.c.template"; - -$#ARGV == 0 or die $usage; - -# Get the component size. -$size = int($ARGV[0]); -$size == 8 or $size == 16 or die $usage; - -$pixel_size = $size * 4; -$half_pixel_size = $size * 2; - -sub mask { - my $str = shift; - my $suffix; - $suffix = "ULL" if $size > 8; - - return "0x" . $str . $suffix; -} - -# Generate mask strings. -$nibbles = $size / 4; -$mask = "f" x $nibbles; -$zero_mask = "0" x $nibbles; -$one_half = "8" . "0" x ($nibbles - 1); - -print "/* WARNING: This file is generated by combine.pl from combine.inc.\n"; -print " Please edit one of those files rather than this one. */\n"; -print "\n"; - -print "#line 1 \"pixman-combine.c.template\"\n"; - -$mask_ = mask($mask); -$one_half_ = mask($one_half); -$g_mask = mask($mask . $zero_mask); -$b_mask = mask($mask . $zero_mask x 2); -$a_mask = mask($mask . $zero_mask x 3); -$rb_mask = mask($mask . $zero_mask . $mask); -$ag_mask = mask($mask . $zero_mask . $mask . $zero_mask); -$rb_one_half = mask($one_half . $zero_mask . $one_half); -$rb_mask_plus_one = mask("1" . $zero_mask x 2 . "1" . $zero_mask); - -while () { - # Mask and 1/2 value for a single component. - s/#define COMPONENT_SIZE\b/$& $size/; - s/#define MASK\b/$& $mask_/; - s/#define ONE_HALF\b/$& $one_half_/; - - # Shifts and masks for green, blue, and alpha. - s/#define G_SHIFT\b/$& $size/; - s/#define R_SHIFT\b/$& $size * 2/; - s/#define A_SHIFT\b/$& $size * 3/; - s/#define G_MASK\b/$& $g_mask/; - s/#define R_MASK\b/$& $b_mask/; - s/#define A_MASK\b/$& $a_mask/; - - # Special values for dealing with red + blue at the same time. - s/#define RB_MASK\b/$& $rb_mask/; - s/#define AG_MASK\b/$& $ag_mask/; - s/#define RB_ONE_HALF\b/$& $rb_one_half/; - s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/; - - # Add 32/64 suffix to combining function types. - s/\bCombineFunc\b/CombineFunc$pixel_size/; - s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/; - s/combine_width/combine_$pixel_size/; - s/_pixman_setup_combiner_functions_width/_pixman_setup_combiner_functions_$pixel_size/; - s/UNc/UN$size/g; - s/ALPHA_c/ALPHA_$size/g; - s/RED_c/RED_$size/g; - s/GREEN_c/GREEN_$size/g; - s/BLUE_c/BLUE_$size/g; - - # Convert comp*_t values into the appropriate real types. - s/comp1_t/uint${size}_t/g; - s/comp2_t/uint${half_pixel_size}_t/g; - s/comp4_t/uint${pixel_size}_t/g; - - # Change the function table name for the 64-bit version. - s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16; - - # Change the header for the 64-bit version - s/pixman-combine.h/pixman-combine64.h/ if $size == 16; - s/pixman-combine.h/pixman-combine32.h/ if $size == 8; - - print; -} +$usage = "Usage: combine.pl { 8 | 16 } < pixman-combine.c.template"; + +$#ARGV == 0 or die $usage; + +# Get the component size. +$size = int($ARGV[0]); +$size == 8 or $size == 16 or die $usage; + +$pixel_size = $size * 4; +$half_pixel_size = $size * 2; + +sub mask { + my $str = shift; + my $suffix; + $suffix = "ULL" if $size > 8; + + return "0x" . $str . $suffix; +} + +# Generate mask strings. +$nibbles = $size / 4; +$mask = "f" x $nibbles; +$zero_mask = "0" x $nibbles; +$one_half = "8" . "0" x ($nibbles - 1); + +print "/* WARNING: This file is generated by combine.pl from combine.inc.\n"; +print " Please edit one of those files rather than this one. */\n"; +print "\n"; + +print "#line 1 \"pixman-combine.c.template\"\n"; + +$mask_ = mask($mask); +$one_half_ = mask($one_half); +$g_mask = mask($mask . $zero_mask); +$b_mask = mask($mask . $zero_mask x 2); +$a_mask = mask($mask . $zero_mask x 3); +$rb_mask = mask($mask . $zero_mask . $mask); +$ag_mask = mask($mask . $zero_mask . $mask . $zero_mask); +$rb_one_half = mask($one_half . $zero_mask . $one_half); +$rb_mask_plus_one = mask("1" . $zero_mask x 2 . "1" . $zero_mask); + +while () { + # Mask and 1/2 value for a single component. + s/#define COMPONENT_SIZE\b/$& $size/; + s/#define MASK\b/$& $mask_/; + s/#define ONE_HALF\b/$& $one_half_/; + + # Shifts and masks for green, blue, and alpha. + s/#define G_SHIFT\b/$& $size/; + s/#define R_SHIFT\b/$& $size * 2/; + s/#define A_SHIFT\b/$& $size * 3/; + s/#define G_MASK\b/$& $g_mask/; + s/#define R_MASK\b/$& $b_mask/; + s/#define A_MASK\b/$& $a_mask/; + + # Special values for dealing with red + blue at the same time. + s/#define RB_MASK\b/$& $rb_mask/; + s/#define AG_MASK\b/$& $ag_mask/; + s/#define RB_ONE_HALF\b/$& $rb_one_half/; + s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/; + + # Add 32/64 suffix to combining function types. + s/\bCombineFunc\b/CombineFunc$pixel_size/; + s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/; + s/combine_width/combine_$pixel_size/; + s/_pixman_setup_combiner_functions_width/_pixman_setup_combiner_functions_$pixel_size/; + s/UNc/UN$size/g; + s/ALPHA_c/ALPHA_$size/g; + s/RED_c/RED_$size/g; + s/GREEN_c/GREEN_$size/g; + s/BLUE_c/BLUE_$size/g; + + # Convert comp*_t values into the appropriate real types. + s/comp1_t/uint${size}_t/g; + s/comp2_t/uint${half_pixel_size}_t/g; + s/comp4_t/uint${pixel_size}_t/g; + + # Change the function table name for the 64-bit version. + s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16; + + # Change the header for the 64-bit version + s/pixman-combine.h/pixman-combine64.h/ if $size == 16; + s/pixman-combine.h/pixman-combine32.h/ if $size == 8; + + print; +} diff --git a/pixman/pixman/pixman-access-accessors.c b/pixman/pixman/pixman-access-accessors.c index 3263582f1..bde67a70e 100644 --- a/pixman/pixman/pixman-access-accessors.c +++ b/pixman/pixman/pixman-access-accessors.c @@ -1,3 +1,3 @@ -#define PIXMAN_FB_ACCESSORS - -#include "pixman-access.c" +#define PIXMAN_FB_ACCESSORS + +#include "pixman-access.c" diff --git a/pixman/pixman/pixman-arm-neon-asm.h b/pixman/pixman/pixman-arm-neon-asm.h index 0ba67d05f..97adc6a87 100644 --- a/pixman/pixman/pixman-arm-neon-asm.h +++ b/pixman/pixman/pixman-arm-neon-asm.h @@ -1,1177 +1,1177 @@ -/* - * Copyright © 2009 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -/* - * This file contains a macro ('generate_composite_function') which can - * construct 2D image processing functions, based on a common template. - * Any combinations of source, destination and mask images with 8bpp, - * 16bpp, 24bpp, 32bpp color formats are supported. - * - * This macro takes care of: - * - handling of leading and trailing unaligned pixels - * - doing most of the work related to L2 cache preload - * - encourages the use of software pipelining for better instructions - * scheduling - * - * The user of this macro has to provide some configuration parameters - * (bit depths for the images, prefetch distance, etc.) and a set of - * macros, which should implement basic code chunks responsible for - * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage - * examples. - * - * TODO: - * - try overlapped pixel method (from Ian Rickards) when processing - * exactly two blocks of pixels - * - maybe add an option to do reverse scanline processing - */ - -/* - * Bit flags for 'generate_composite_function' macro which are used - * to tune generated functions behavior. - */ -.set FLAG_DST_WRITEONLY, 0 -.set FLAG_DST_READWRITE, 1 -.set FLAG_DEINTERLEAVE_32BPP, 2 - -/* - * Offset in stack where mask and source pointer/stride can be accessed - * from 'init' macro. This is useful for doing special handling for solid mask. - */ -.set ARGS_STACK_OFFSET, 40 - -/* - * Constants for selecting preferable prefetch type. - */ -.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */ -.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */ -.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */ - -/* - * Definitions of supplementary pixld/pixst macros (for partial load/store of - * pixel data). - */ - -.macro pixldst1 op, elem_size, reg1, mem_operand, abits -.if abits > 0 - op&.&elem_size {d®1}, [&mem_operand&, :&abits&]! -.else - op&.&elem_size {d®1}, [&mem_operand&]! -.endif -.endm - -.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits -.if abits > 0 - op&.&elem_size {d®1, d®2}, [&mem_operand&, :&abits&]! -.else - op&.&elem_size {d®1, d®2}, [&mem_operand&]! -.endif -.endm - -.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits -.if abits > 0 - op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&, :&abits&]! -.else - op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&]! -.endif -.endm - -.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits - op&.&elem_size {d®1[idx]}, [&mem_operand&]! -.endm - -.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand - op&.&elem_size {d®1, d®2, d®3}, [&mem_operand&]! -.endm - -.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand - op&.&elem_size {d®1[idx], d®2[idx], d®3[idx]}, [&mem_operand&]! -.endm - -.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits -.if numbytes == 32 - pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif numbytes == 16 - pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits -.elseif numbytes == 8 - pixldst1 op, elem_size, %(basereg+1), mem_operand, abits -.elseif numbytes == 4 - .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32) - pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits - .elseif elem_size == 16 - pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits - pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits - .else - pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits - .endif -.elseif numbytes == 2 - .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16) - pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits - .else - pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits - pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits - .endif -.elseif numbytes == 1 - pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits -.else - .error "unsupported size: numbytes" -.endif -.endm - -.macro pixld numpix, bpp, basereg, mem_operand, abits=0 -.if bpp > 0 -.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) - pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif (bpp == 24) && (numpix == 8) - pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand -.elseif (bpp == 24) && (numpix == 4) - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand -.elseif (bpp == 24) && (numpix == 2) - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand -.elseif (bpp == 24) && (numpix == 1) - pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand -.else - pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits -.endif -.endif -.endm - -.macro pixst numpix, bpp, basereg, mem_operand, abits=0 -.if bpp > 0 -.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) - pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \ - %(basereg+6), %(basereg+7), mem_operand, abits -.elseif (bpp == 24) && (numpix == 8) - pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand -.elseif (bpp == 24) && (numpix == 4) - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand -.elseif (bpp == 24) && (numpix == 2) - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand -.elseif (bpp == 24) && (numpix == 1) - pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand -.else - pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits -.endif -.endif -.endm - -.macro pixld_a numpix, bpp, basereg, mem_operand -.if (bpp * numpix) <= 128 - pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix) -.else - pixld numpix, bpp, basereg, mem_operand, 128 -.endif -.endm - -.macro pixst_a numpix, bpp, basereg, mem_operand -.if (bpp * numpix) <= 128 - pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix) -.else - pixst numpix, bpp, basereg, mem_operand, 128 -.endif -.endm - -/* - * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register - * aliases to be defined) - */ -.macro pixld1_s elem_size, reg1, mem_operand -.if elem_size == 16 - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X - add TMP1, mem_operand, TMP1, asl #1 - mov TMP2, VX, asr #16 - add VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #1 - vld1.16 {d®1&[0]}, [TMP1, :16] - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X - add TMP1, mem_operand, TMP1, asl #1 - vld1.16 {d®1&[1]}, [TMP2, :16] - mov TMP2, VX, asr #16 - add VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #1 - vld1.16 {d®1&[2]}, [TMP1, :16] - vld1.16 {d®1&[3]}, [TMP2, :16] -.elseif elem_size == 32 - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X - add TMP1, mem_operand, TMP1, asl #2 - mov TMP2, VX, asr #16 - add VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #2 - vld1.32 {d®1&[0]}, [TMP1, :32] - vld1.32 {d®1&[1]}, [TMP2, :32] -.else - .error "unsupported" -.endif -.endm - -.macro pixld2_s elem_size, reg1, reg2, mem_operand -.if elem_size == 32 - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X, asl #1 - add TMP1, mem_operand, TMP1, asl #2 - mov TMP2, VX, asr #16 - sub VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #2 - vld1.32 {d®1&[0]}, [TMP1, :32] - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X, asl #1 - add TMP1, mem_operand, TMP1, asl #2 - vld1.32 {d®2&[0]}, [TMP2, :32] - mov TMP2, VX, asr #16 - add VX, VX, UNIT_X - add TMP2, mem_operand, TMP2, asl #2 - vld1.32 {d®1&[1]}, [TMP1, :32] - vld1.32 {d®2&[1]}, [TMP2, :32] -.else - pixld1_s elem_size, reg1, mem_operand - pixld1_s elem_size, reg2, mem_operand -.endif -.endm - -.macro pixld0_s elem_size, reg1, idx, mem_operand -.if elem_size == 16 - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X - add TMP1, mem_operand, TMP1, asl #1 - vld1.16 {d®1&[idx]}, [TMP1, :16] -.elseif elem_size == 32 - mov TMP1, VX, asr #16 - add VX, VX, UNIT_X - add TMP1, mem_operand, TMP1, asl #2 - vld1.32 {d®1&[idx]}, [TMP1, :32] -.endif -.endm - -.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand -.if numbytes == 32 - pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand - pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand - pixdeinterleave elem_size, %(basereg+4) -.elseif numbytes == 16 - pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand -.elseif numbytes == 8 - pixld1_s elem_size, %(basereg+1), mem_operand -.elseif numbytes == 4 - .if elem_size == 32 - pixld0_s elem_size, %(basereg+0), 1, mem_operand - .elseif elem_size == 16 - pixld0_s elem_size, %(basereg+0), 2, mem_operand - pixld0_s elem_size, %(basereg+0), 3, mem_operand - .else - pixld0_s elem_size, %(basereg+0), 4, mem_operand - pixld0_s elem_size, %(basereg+0), 5, mem_operand - pixld0_s elem_size, %(basereg+0), 6, mem_operand - pixld0_s elem_size, %(basereg+0), 7, mem_operand - .endif -.elseif numbytes == 2 - .if elem_size == 16 - pixld0_s elem_size, %(basereg+0), 1, mem_operand - .else - pixld0_s elem_size, %(basereg+0), 2, mem_operand - pixld0_s elem_size, %(basereg+0), 3, mem_operand - .endif -.elseif numbytes == 1 - pixld0_s elem_size, %(basereg+0), 1, mem_operand -.else - .error "unsupported size: numbytes" -.endif -.endm - -.macro pixld_s numpix, bpp, basereg, mem_operand -.if bpp > 0 - pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand -.endif -.endm - -.macro vuzp8 reg1, reg2 - vuzp.8 d®1, d®2 -.endm - -.macro vzip8 reg1, reg2 - vzip.8 d®1, d®2 -.endm - -/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ -.macro pixdeinterleave bpp, basereg -.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) - vuzp8 %(basereg+0), %(basereg+1) - vuzp8 %(basereg+2), %(basereg+3) - vuzp8 %(basereg+1), %(basereg+3) - vuzp8 %(basereg+0), %(basereg+2) -.endif -.endm - -/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ -.macro pixinterleave bpp, basereg -.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) - vzip8 %(basereg+0), %(basereg+2) - vzip8 %(basereg+1), %(basereg+3) - vzip8 %(basereg+2), %(basereg+3) - vzip8 %(basereg+0), %(basereg+1) -.endif -.endm - -/* - * This is a macro for implementing cache preload. The main idea is that - * cache preload logic is mostly independent from the rest of pixels - * processing code. It starts at the top left pixel and moves forward - * across pixels and can jump across scanlines. Prefetch distance is - * handled in an 'incremental' way: it starts from 0 and advances to the - * optimal distance over time. After reaching optimal prefetch distance, - * it is kept constant. There are some checks which prevent prefetching - * unneeded pixel lines below the image (but it still can prefetch a bit - * more data on the right side of the image - not a big issue and may - * be actually helpful when rendering text glyphs). Additional trick is - * the use of LDR instruction for prefetch instead of PLD when moving to - * the next line, the point is that we have a high chance of getting TLB - * miss in this case, and PLD would be useless. - * - * This sounds like it may introduce a noticeable overhead (when working with - * fully cached data). But in reality, due to having a separate pipeline and - * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can - * execute simultaneously with NEON and be completely shadowed by it. Thus - * we get no performance overhead at all (*). This looks like a very nice - * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, - * but still can implement some rather advanced prefetch logic in sofware - * for almost zero cost! - * - * (*) The overhead of the prefetcher is visible when running some trivial - * pixels processing like simple copy. Anyway, having prefetch is a must - * when working with the graphics data. - */ -.macro PF a, x:vararg -.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED) - a x -.endif -.endm - -.macro cache_preload std_increment, boost_increment -.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0) -.if regs_shortage - PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */ -.endif -.if std_increment != 0 - PF add PF_X, PF_X, #std_increment -.endif - PF tst PF_CTL, #0xF - PF addne PF_X, PF_X, #boost_increment - PF subne PF_CTL, PF_CTL, #1 - PF cmp PF_X, ORIG_W -.if src_bpp_shift >= 0 - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] -.endif -.if dst_r_bpp != 0 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] -.endif -.if mask_bpp_shift >= 0 - PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] -.endif - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 -.if src_bpp_shift >= 0 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! -.endif -.if dst_r_bpp != 0 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! -.endif -.if mask_bpp_shift >= 0 - PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! -.endif -.endif -.endm - -.macro cache_preload_simple -.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE) -.if src_bpp > 0 - pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)] -.endif -.if dst_r_bpp > 0 - pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)] -.endif -.if mask_bpp > 0 - pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)] -.endif -.endif -.endm - -.macro fetch_mask_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK -.endm - -/* - * Macro which is used to process leading pixels until destination - * pointer is properly aligned (at 16 bytes boundary). When destination - * buffer uses 16bpp format, this is unnecessary, or even pointless. - */ -.macro ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head -.if dst_w_bpp != 24 - tst DST_R, #0xF - beq 2f - -.irp lowbit, 1, 2, 4, 8, 16 -local skip1 -.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) -.if lowbit < 16 /* we don't need more than 16-byte alignment */ - tst DST_R, #lowbit - beq 1f -.endif - pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC - pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK -.if dst_r_bpp > 0 - pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R -.else - add DST_R, DST_R, #lowbit -.endif - PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp) - sub W, W, #(lowbit * 8 / dst_w_bpp) -1: -.endif -.endr - pixdeinterleave src_bpp, src_basereg - pixdeinterleave mask_bpp, mask_basereg - pixdeinterleave dst_r_bpp, dst_r_basereg - - process_pixblock_head - cache_preload 0, pixblock_size - cache_preload_simple - process_pixblock_tail - - pixinterleave dst_w_bpp, dst_w_basereg -.irp lowbit, 1, 2, 4, 8, 16 -.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) -.if lowbit < 16 /* we don't need more than 16-byte alignment */ - tst DST_W, #lowbit - beq 1f -.endif - pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W -1: -.endif -.endr -.endif -2: -.endm - -/* - * Special code for processing up to (pixblock_size - 1) remaining - * trailing pixels. As SIMD processing performs operation on - * pixblock_size pixels, anything smaller than this has to be loaded - * and stored in a special way. Loading and storing of pixel data is - * performed in such a way that we fill some 'slots' in the NEON - * registers (some slots naturally are unused), then perform compositing - * operation as usual. In the end, the data is taken from these 'slots' - * and saved to memory. - * - * cache_preload_flag - allows to suppress prefetch if - * set to 0 - * dst_aligned_flag - selects whether destination buffer - * is aligned - */ -.macro process_trailing_pixels cache_preload_flag, \ - dst_aligned_flag, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - tst W, #(pixblock_size - 1) - beq 2f -.irp chunk_size, 16, 8, 4, 2, 1 -.if pixblock_size > chunk_size - tst W, #chunk_size - beq 1f - pixld_src chunk_size, src_bpp, src_basereg, SRC - pixld chunk_size, mask_bpp, mask_basereg, MASK -.if dst_aligned_flag != 0 - pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R -.else - pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R -.endif -.if cache_preload_flag != 0 - PF add PF_X, PF_X, #chunk_size -.endif -1: -.endif -.endr - pixdeinterleave src_bpp, src_basereg - pixdeinterleave mask_bpp, mask_basereg - pixdeinterleave dst_r_bpp, dst_r_basereg - - process_pixblock_head -.if cache_preload_flag != 0 - cache_preload 0, pixblock_size - cache_preload_simple -.endif - process_pixblock_tail - pixinterleave dst_w_bpp, dst_w_basereg -.irp chunk_size, 16, 8, 4, 2, 1 -.if pixblock_size > chunk_size - tst W, #chunk_size - beq 1f -.if dst_aligned_flag != 0 - pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W -.else - pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W -.endif -1: -.endif -.endr -2: -.endm - -/* - * Macro, which performs all the needed operations to switch to the next - * scanline and start the next loop iteration unless all the scanlines - * are already processed. - */ -.macro advance_to_next_scanline start_of_loop_label -.if regs_shortage - ldrd W, [sp] /* load W and H (width and height) from stack */ -.else - mov W, ORIG_W -.endif - add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift -.if src_bpp != 0 - add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift -.endif -.if mask_bpp != 0 - add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift -.endif -.if (dst_w_bpp != 24) - sub DST_W, DST_W, W, lsl #dst_bpp_shift -.endif -.if (src_bpp != 24) && (src_bpp != 0) - sub SRC, SRC, W, lsl #src_bpp_shift -.endif -.if (mask_bpp != 24) && (mask_bpp != 0) - sub MASK, MASK, W, lsl #mask_bpp_shift -.endif - subs H, H, #1 - mov DST_R, DST_W -.if regs_shortage - str H, [sp, #4] /* save updated height to stack */ -.endif - bge start_of_loop_label -.endm - -/* - * Registers are allocated in the following way by default: - * d0, d1, d2, d3 - reserved for loading source pixel data - * d4, d5, d6, d7 - reserved for loading destination pixel data - * d24, d25, d26, d27 - reserved for loading mask pixel data - * d28, d29, d30, d31 - final destination pixel data for writeback to memory - */ -.macro generate_composite_function fname, \ - src_bpp_, \ - mask_bpp_, \ - dst_w_bpp_, \ - flags, \ - pixblock_size_, \ - prefetch_distance, \ - init, \ - cleanup, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head, \ - dst_w_basereg_ = 28, \ - dst_r_basereg_ = 4, \ - src_basereg_ = 0, \ - mask_basereg_ = 24 - - .func fname - .global fname - /* For ELF format also set function visibility to hidden */ -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: - push {r4-r12, lr} /* save all registers */ - -/* - * Select prefetch type for this function. If prefetch distance is - * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch - * has to be used instead of ADVANCED. - */ - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT -.if prefetch_distance == 0 - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE -.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \ - ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24)) - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE -.endif - -/* - * Make some macro arguments globally visible and accessible - * from other macros - */ - .set src_bpp, src_bpp_ - .set mask_bpp, mask_bpp_ - .set dst_w_bpp, dst_w_bpp_ - .set pixblock_size, pixblock_size_ - .set dst_w_basereg, dst_w_basereg_ - .set dst_r_basereg, dst_r_basereg_ - .set src_basereg, src_basereg_ - .set mask_basereg, mask_basereg_ - - .macro pixld_src x:vararg - pixld x - .endm - .macro fetch_src_pixblock - pixld_src pixblock_size, src_bpp, \ - (src_basereg - pixblock_size * src_bpp / 64), SRC - .endm -/* - * Assign symbolic names to registers - */ - W .req r0 /* width (is updated during processing) */ - H .req r1 /* height (is updated during processing) */ - DST_W .req r2 /* destination buffer pointer for writes */ - DST_STRIDE .req r3 /* destination image stride */ - SRC .req r4 /* source buffer pointer */ - SRC_STRIDE .req r5 /* source image stride */ - DST_R .req r6 /* destination buffer pointer for reads */ - - MASK .req r7 /* mask pointer */ - MASK_STRIDE .req r8 /* mask stride */ - - PF_CTL .req r9 /* combined lines counter and prefetch */ - /* distance increment counter */ - PF_X .req r10 /* pixel index in a scanline for current */ - /* pretetch position */ - PF_SRC .req r11 /* pointer to source scanline start */ - /* for prefetch purposes */ - PF_DST .req r12 /* pointer to destination scanline start */ - /* for prefetch purposes */ - PF_MASK .req r14 /* pointer to mask scanline start */ - /* for prefetch purposes */ -/* - * Check whether we have enough registers for all the local variables. - * If we don't have enough registers, original width and height are - * kept on top of stack (and 'regs_shortage' variable is set to indicate - * this for the rest of code). Even if there are enough registers, the - * allocation scheme may be a bit different depending on whether source - * or mask is not used. - */ -.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED) - ORIG_W .req r10 /* saved original width */ - DUMMY .req r12 /* temporary register */ - .set regs_shortage, 0 -.elseif mask_bpp == 0 - ORIG_W .req r7 /* saved original width */ - DUMMY .req r8 /* temporary register */ - .set regs_shortage, 0 -.elseif src_bpp == 0 - ORIG_W .req r4 /* saved original width */ - DUMMY .req r5 /* temporary register */ - .set regs_shortage, 0 -.else - ORIG_W .req r1 /* saved original width */ - DUMMY .req r1 /* temporary register */ - .set regs_shortage, 1 -.endif - - .set mask_bpp_shift, -1 -.if src_bpp == 32 - .set src_bpp_shift, 2 -.elseif src_bpp == 24 - .set src_bpp_shift, 0 -.elseif src_bpp == 16 - .set src_bpp_shift, 1 -.elseif src_bpp == 8 - .set src_bpp_shift, 0 -.elseif src_bpp == 0 - .set src_bpp_shift, -1 -.else - .error "requested src bpp (src_bpp) is not supported" -.endif -.if mask_bpp == 32 - .set mask_bpp_shift, 2 -.elseif mask_bpp == 24 - .set mask_bpp_shift, 0 -.elseif mask_bpp == 8 - .set mask_bpp_shift, 0 -.elseif mask_bpp == 0 - .set mask_bpp_shift, -1 -.else - .error "requested mask bpp (mask_bpp) is not supported" -.endif -.if dst_w_bpp == 32 - .set dst_bpp_shift, 2 -.elseif dst_w_bpp == 24 - .set dst_bpp_shift, 0 -.elseif dst_w_bpp == 16 - .set dst_bpp_shift, 1 -.elseif dst_w_bpp == 8 - .set dst_bpp_shift, 0 -.else - .error "requested dst bpp (dst_w_bpp) is not supported" -.endif - -.if (((flags) & FLAG_DST_READWRITE) != 0) - .set dst_r_bpp, dst_w_bpp -.else - .set dst_r_bpp, 0 -.endif -.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) - .set DEINTERLEAVE_32BPP_ENABLED, 1 -.else - .set DEINTERLEAVE_32BPP_ENABLED, 0 -.endif - -.if prefetch_distance < 0 || prefetch_distance > 15 - .error "invalid prefetch distance (prefetch_distance)" -.endif - -.if src_bpp > 0 - ldr SRC, [sp, #40] -.endif -.if mask_bpp > 0 - ldr MASK, [sp, #48] -.endif - PF mov PF_X, #0 -.if src_bpp > 0 - ldr SRC_STRIDE, [sp, #44] -.endif -.if mask_bpp > 0 - ldr MASK_STRIDE, [sp, #52] -.endif - mov DST_R, DST_W - -.if src_bpp == 24 - sub SRC_STRIDE, SRC_STRIDE, W - sub SRC_STRIDE, SRC_STRIDE, W, lsl #1 -.endif -.if mask_bpp == 24 - sub MASK_STRIDE, MASK_STRIDE, W - sub MASK_STRIDE, MASK_STRIDE, W, lsl #1 -.endif -.if dst_w_bpp == 24 - sub DST_STRIDE, DST_STRIDE, W - sub DST_STRIDE, DST_STRIDE, W, lsl #1 -.endif - -/* - * Setup advanced prefetcher initial state - */ - PF mov PF_SRC, SRC - PF mov PF_DST, DST_R - PF mov PF_MASK, MASK - /* PF_CTL = prefetch_distance | ((h - 1) << 4) */ - PF mov PF_CTL, H, lsl #4 - PF add PF_CTL, #(prefetch_distance - 0x10) - - init -.if regs_shortage - push {r0, r1} -.endif - subs H, H, #1 -.if regs_shortage - str H, [sp, #4] /* save updated height to stack */ -.else - mov ORIG_W, W -.endif - blt 9f - cmp W, #(pixblock_size * 2) - blt 8f -/* - * This is the start of the pipelined loop, which if optimized for - * long scanlines - */ -0: - ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ - pixld_a pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - PF add PF_X, PF_X, #pixblock_size - process_pixblock_head - cache_preload 0, pixblock_size - cache_preload_simple - subs W, W, #(pixblock_size * 2) - blt 2f -1: - process_pixblock_tail_head - cache_preload_simple - subs W, W, #pixblock_size - bge 1b -2: - process_pixblock_tail - pixst_a pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W - - /* Process the remaining trailing pixels in the scanline */ - process_trailing_pixels 1, 1, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - advance_to_next_scanline 0b - -.if regs_shortage - pop {r0, r1} -.endif - cleanup - pop {r4-r12, pc} /* exit */ -/* - * This is the start of the loop, designed to process images with small width - * (less than pixblock_size * 2 pixels). In this case neither pipelining - * nor prefetch are used. - */ -8: - /* Process exactly pixblock_size pixels if needed */ - tst W, #pixblock_size - beq 1f - pixld pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - process_pixblock_head - process_pixblock_tail - pixst pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W -1: - /* Process the remaining trailing pixels in the scanline */ - process_trailing_pixels 0, 0, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - advance_to_next_scanline 8b -9: -.if regs_shortage - pop {r0, r1} -.endif - cleanup - pop {r4-r12, pc} /* exit */ - - .purgem fetch_src_pixblock - .purgem pixld_src - - .unreq SRC - .unreq MASK - .unreq DST_R - .unreq DST_W - .unreq ORIG_W - .unreq W - .unreq H - .unreq SRC_STRIDE - .unreq DST_STRIDE - .unreq MASK_STRIDE - .unreq PF_CTL - .unreq PF_X - .unreq PF_SRC - .unreq PF_DST - .unreq PF_MASK - .unreq DUMMY - .endfunc -.endm - -/* - * A simplified variant of function generation template for a single - * scanline processing (for implementing pixman combine functions) - */ -.macro generate_composite_function_scanline use_nearest_scaling, \ - fname, \ - src_bpp_, \ - mask_bpp_, \ - dst_w_bpp_, \ - flags, \ - pixblock_size_, \ - init, \ - cleanup, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head, \ - dst_w_basereg_ = 28, \ - dst_r_basereg_ = 4, \ - src_basereg_ = 0, \ - mask_basereg_ = 24 - - .func fname - .global fname - /* For ELF format also set function visibility to hidden */ -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: - .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE -/* - * Make some macro arguments globally visible and accessible - * from other macros - */ - .set src_bpp, src_bpp_ - .set mask_bpp, mask_bpp_ - .set dst_w_bpp, dst_w_bpp_ - .set pixblock_size, pixblock_size_ - .set dst_w_basereg, dst_w_basereg_ - .set dst_r_basereg, dst_r_basereg_ - .set src_basereg, src_basereg_ - .set mask_basereg, mask_basereg_ - -.if use_nearest_scaling != 0 - /* - * Assign symbolic names to registers for nearest scaling - */ - W .req r0 - DST_W .req r1 - SRC .req r2 - VX .req r3 - UNIT_X .req ip - MASK .req lr - TMP1 .req r4 - TMP2 .req r5 - DST_R .req r6 - - .macro pixld_src x:vararg - pixld_s x - .endm - - ldr UNIT_X, [sp] - push {r4-r6, lr} - .if mask_bpp != 0 - ldr MASK, [sp, #(16 + 4)] - .endif -.else - /* - * Assign symbolic names to registers - */ - W .req r0 /* width (is updated during processing) */ - DST_W .req r1 /* destination buffer pointer for writes */ - SRC .req r2 /* source buffer pointer */ - DST_R .req ip /* destination buffer pointer for reads */ - MASK .req r3 /* mask pointer */ - - .macro pixld_src x:vararg - pixld x - .endm -.endif - -.if (((flags) & FLAG_DST_READWRITE) != 0) - .set dst_r_bpp, dst_w_bpp -.else - .set dst_r_bpp, 0 -.endif -.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) - .set DEINTERLEAVE_32BPP_ENABLED, 1 -.else - .set DEINTERLEAVE_32BPP_ENABLED, 0 -.endif - - .macro fetch_src_pixblock - pixld_src pixblock_size, src_bpp, \ - (src_basereg - pixblock_size * src_bpp / 64), SRC - .endm - - init - mov DST_R, DST_W - - cmp W, #pixblock_size - blt 8f - - ensure_destination_ptr_alignment process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - subs W, W, #pixblock_size - blt 7f - - /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ - pixld_a pixblock_size, dst_r_bpp, \ - (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R - fetch_src_pixblock - pixld pixblock_size, mask_bpp, \ - (mask_basereg - pixblock_size * mask_bpp / 64), MASK - process_pixblock_head - subs W, W, #pixblock_size - blt 2f -1: - process_pixblock_tail_head - subs W, W, #pixblock_size - bge 1b -2: - process_pixblock_tail - pixst_a pixblock_size, dst_w_bpp, \ - (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W -7: - /* Process the remaining trailing pixels in the scanline (dst aligned) */ - process_trailing_pixels 0, 1, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - cleanup -.if use_nearest_scaling != 0 - pop {r4-r6, pc} /* exit */ -.else - bx lr /* exit */ -.endif -8: - /* Process the remaining trailing pixels in the scanline (dst unaligned) */ - process_trailing_pixels 0, 0, \ - process_pixblock_head, \ - process_pixblock_tail, \ - process_pixblock_tail_head - - cleanup - -.if use_nearest_scaling != 0 - pop {r4-r6, pc} /* exit */ - - .unreq DST_R - .unreq SRC - .unreq W - .unreq VX - .unreq UNIT_X - .unreq TMP1 - .unreq TMP2 - .unreq DST_W - .unreq MASK - -.else - bx lr /* exit */ - - .unreq SRC - .unreq MASK - .unreq DST_R - .unreq DST_W - .unreq W -.endif - - .purgem fetch_src_pixblock - .purgem pixld_src - - .endfunc -.endm - -.macro generate_composite_function_single_scanline x:vararg - generate_composite_function_scanline 0, x -.endm - -.macro generate_composite_function_nearest_scanline x:vararg - generate_composite_function_scanline 1, x -.endm - -/* Default prologue/epilogue, nothing special needs to be done */ - -.macro default_init -.endm - -.macro default_cleanup -.endm - -/* - * Prologue/epilogue variant which additionally saves/restores d8-d15 - * registers (they need to be saved/restored by callee according to ABI). - * This is required if the code needs to use all the NEON registers. - */ - -.macro default_init_need_all_regs - vpush {d8-d15} -.endm - -.macro default_cleanup_need_all_regs - vpop {d8-d15} -.endm - -/******************************************************************************/ - -/* - * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in) - * into a planar a8r8g8b8 format (with a, r, g, b color components - * stored into 64-bit registers out_a, out_r, out_g, out_b respectively). - * - * Warning: the conversion is destructive and the original - * value (in) is lost. - */ -.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b - vshrn.u16 out_r, in, #8 - vshrn.u16 out_g, in, #3 - vsli.u16 in, in, #5 - vmov.u8 out_a, #255 - vsri.u8 out_r, out_r, #5 - vsri.u8 out_g, out_g, #6 - vshrn.u16 out_b, in, #2 -.endm - -.macro convert_0565_to_x888 in, out_r, out_g, out_b - vshrn.u16 out_r, in, #8 - vshrn.u16 out_g, in, #3 - vsli.u16 in, in, #5 - vsri.u8 out_r, out_r, #5 - vsri.u8 out_g, out_g, #6 - vshrn.u16 out_b, in, #2 -.endm - -/* - * Conversion from planar a8r8g8b8 format (with a, r, g, b color components - * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6 - * pixels packed in 128-bit register (out). Requires two temporary 128-bit - * registers (tmp1, tmp2) - */ -.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2 - vshll.u8 tmp1, in_g, #8 - vshll.u8 out, in_r, #8 - vshll.u8 tmp2, in_b, #8 - vsri.u16 out, tmp1, #5 - vsri.u16 out, tmp2, #11 -.endm - -/* - * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels - * returned in (out0, out1) registers pair. Requires one temporary - * 64-bit register (tmp). 'out1' and 'in' may overlap, the original - * value from 'in' is lost - */ -.macro convert_four_0565_to_x888_packed in, out0, out1, tmp - vshl.u16 out0, in, #5 /* G top 6 bits */ - vshl.u16 tmp, in, #11 /* B top 5 bits */ - vsri.u16 in, in, #5 /* R is ready in top bits */ - vsri.u16 out0, out0, #6 /* G is ready in top bits */ - vsri.u16 tmp, tmp, #5 /* B is ready in top bits */ - vshr.u16 out1, in, #8 /* R is in place */ - vsri.u16 out0, tmp, #8 /* G & B is in place */ - vzip.u16 out0, out1 /* everything is in place */ -.endm +/* + * Copyright © 2009 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +/* + * This file contains a macro ('generate_composite_function') which can + * construct 2D image processing functions, based on a common template. + * Any combinations of source, destination and mask images with 8bpp, + * 16bpp, 24bpp, 32bpp color formats are supported. + * + * This macro takes care of: + * - handling of leading and trailing unaligned pixels + * - doing most of the work related to L2 cache preload + * - encourages the use of software pipelining for better instructions + * scheduling + * + * The user of this macro has to provide some configuration parameters + * (bit depths for the images, prefetch distance, etc.) and a set of + * macros, which should implement basic code chunks responsible for + * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage + * examples. + * + * TODO: + * - try overlapped pixel method (from Ian Rickards) when processing + * exactly two blocks of pixels + * - maybe add an option to do reverse scanline processing + */ + +/* + * Bit flags for 'generate_composite_function' macro which are used + * to tune generated functions behavior. + */ +.set FLAG_DST_WRITEONLY, 0 +.set FLAG_DST_READWRITE, 1 +.set FLAG_DEINTERLEAVE_32BPP, 2 + +/* + * Offset in stack where mask and source pointer/stride can be accessed + * from 'init' macro. This is useful for doing special handling for solid mask. + */ +.set ARGS_STACK_OFFSET, 40 + +/* + * Constants for selecting preferable prefetch type. + */ +.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */ +.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */ +.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */ + +/* + * Definitions of supplementary pixld/pixst macros (for partial load/store of + * pixel data). + */ + +.macro pixldst1 op, elem_size, reg1, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1}, [&mem_operand&]! +.endif +.endm + +.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1, d®2}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1, d®2}, [&mem_operand&]! +.endif +.endm + +.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&]! +.endif +.endm + +.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits + op&.&elem_size {d®1[idx]}, [&mem_operand&]! +.endm + +.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand + op&.&elem_size {d®1, d®2, d®3}, [&mem_operand&]! +.endm + +.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand + op&.&elem_size {d®1[idx], d®2[idx], d®3[idx]}, [&mem_operand&]! +.endm + +.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits +.if numbytes == 32 + pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif numbytes == 16 + pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits +.elseif numbytes == 8 + pixldst1 op, elem_size, %(basereg+1), mem_operand, abits +.elseif numbytes == 4 + .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32) + pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits + .elseif elem_size == 16 + pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits + pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits + .else + pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits + .endif +.elseif numbytes == 2 + .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16) + pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits + .else + pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits + .endif +.elseif numbytes == 1 + pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits +.else + .error "unsupported size: numbytes" +.endif +.endm + +.macro pixld numpix, bpp, basereg, mem_operand, abits=0 +.if bpp > 0 +.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif (bpp == 24) && (numpix == 8) + pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand +.elseif (bpp == 24) && (numpix == 4) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand +.elseif (bpp == 24) && (numpix == 2) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand +.elseif (bpp == 24) && (numpix == 1) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand +.else + pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits +.endif +.endif +.endm + +.macro pixst numpix, bpp, basereg, mem_operand, abits=0 +.if bpp > 0 +.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif (bpp == 24) && (numpix == 8) + pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand +.elseif (bpp == 24) && (numpix == 4) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand +.elseif (bpp == 24) && (numpix == 2) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand +.elseif (bpp == 24) && (numpix == 1) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand +.else + pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits +.endif +.endif +.endm + +.macro pixld_a numpix, bpp, basereg, mem_operand +.if (bpp * numpix) <= 128 + pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix) +.else + pixld numpix, bpp, basereg, mem_operand, 128 +.endif +.endm + +.macro pixst_a numpix, bpp, basereg, mem_operand +.if (bpp * numpix) <= 128 + pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix) +.else + pixst numpix, bpp, basereg, mem_operand, 128 +.endif +.endm + +/* + * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register + * aliases to be defined) + */ +.macro pixld1_s elem_size, reg1, mem_operand +.if elem_size == 16 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #1 + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #1 + vld1.16 {d®1&[0]}, [TMP1, :16] + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #1 + vld1.16 {d®1&[1]}, [TMP2, :16] + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #1 + vld1.16 {d®1&[2]}, [TMP1, :16] + vld1.16 {d®1&[3]}, [TMP2, :16] +.elseif elem_size == 32 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #2 + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[0]}, [TMP1, :32] + vld1.32 {d®1&[1]}, [TMP2, :32] +.else + .error "unsupported" +.endif +.endm + +.macro pixld2_s elem_size, reg1, reg2, mem_operand +.if elem_size == 32 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X, asl #1 + add TMP1, mem_operand, TMP1, asl #2 + mov TMP2, VX, asr #16 + sub VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[0]}, [TMP1, :32] + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X, asl #1 + add TMP1, mem_operand, TMP1, asl #2 + vld1.32 {d®2&[0]}, [TMP2, :32] + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[1]}, [TMP1, :32] + vld1.32 {d®2&[1]}, [TMP2, :32] +.else + pixld1_s elem_size, reg1, mem_operand + pixld1_s elem_size, reg2, mem_operand +.endif +.endm + +.macro pixld0_s elem_size, reg1, idx, mem_operand +.if elem_size == 16 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #1 + vld1.16 {d®1&[idx]}, [TMP1, :16] +.elseif elem_size == 32 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #2 + vld1.32 {d®1&[idx]}, [TMP1, :32] +.endif +.endm + +.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand +.if numbytes == 32 + pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand + pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand + pixdeinterleave elem_size, %(basereg+4) +.elseif numbytes == 16 + pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand +.elseif numbytes == 8 + pixld1_s elem_size, %(basereg+1), mem_operand +.elseif numbytes == 4 + .if elem_size == 32 + pixld0_s elem_size, %(basereg+0), 1, mem_operand + .elseif elem_size == 16 + pixld0_s elem_size, %(basereg+0), 2, mem_operand + pixld0_s elem_size, %(basereg+0), 3, mem_operand + .else + pixld0_s elem_size, %(basereg+0), 4, mem_operand + pixld0_s elem_size, %(basereg+0), 5, mem_operand + pixld0_s elem_size, %(basereg+0), 6, mem_operand + pixld0_s elem_size, %(basereg+0), 7, mem_operand + .endif +.elseif numbytes == 2 + .if elem_size == 16 + pixld0_s elem_size, %(basereg+0), 1, mem_operand + .else + pixld0_s elem_size, %(basereg+0), 2, mem_operand + pixld0_s elem_size, %(basereg+0), 3, mem_operand + .endif +.elseif numbytes == 1 + pixld0_s elem_size, %(basereg+0), 1, mem_operand +.else + .error "unsupported size: numbytes" +.endif +.endm + +.macro pixld_s numpix, bpp, basereg, mem_operand +.if bpp > 0 + pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand +.endif +.endm + +.macro vuzp8 reg1, reg2 + vuzp.8 d®1, d®2 +.endm + +.macro vzip8 reg1, reg2 + vzip.8 d®1, d®2 +.endm + +/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ +.macro pixdeinterleave bpp, basereg +.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) + vuzp8 %(basereg+0), %(basereg+1) + vuzp8 %(basereg+2), %(basereg+3) + vuzp8 %(basereg+1), %(basereg+3) + vuzp8 %(basereg+0), %(basereg+2) +.endif +.endm + +/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ +.macro pixinterleave bpp, basereg +.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) + vzip8 %(basereg+0), %(basereg+2) + vzip8 %(basereg+1), %(basereg+3) + vzip8 %(basereg+2), %(basereg+3) + vzip8 %(basereg+0), %(basereg+1) +.endif +.endm + +/* + * This is a macro for implementing cache preload. The main idea is that + * cache preload logic is mostly independent from the rest of pixels + * processing code. It starts at the top left pixel and moves forward + * across pixels and can jump across scanlines. Prefetch distance is + * handled in an 'incremental' way: it starts from 0 and advances to the + * optimal distance over time. After reaching optimal prefetch distance, + * it is kept constant. There are some checks which prevent prefetching + * unneeded pixel lines below the image (but it still can prefetch a bit + * more data on the right side of the image - not a big issue and may + * be actually helpful when rendering text glyphs). Additional trick is + * the use of LDR instruction for prefetch instead of PLD when moving to + * the next line, the point is that we have a high chance of getting TLB + * miss in this case, and PLD would be useless. + * + * This sounds like it may introduce a noticeable overhead (when working with + * fully cached data). But in reality, due to having a separate pipeline and + * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can + * execute simultaneously with NEON and be completely shadowed by it. Thus + * we get no performance overhead at all (*). This looks like a very nice + * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, + * but still can implement some rather advanced prefetch logic in sofware + * for almost zero cost! + * + * (*) The overhead of the prefetcher is visible when running some trivial + * pixels processing like simple copy. Anyway, having prefetch is a must + * when working with the graphics data. + */ +.macro PF a, x:vararg +.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED) + a x +.endif +.endm + +.macro cache_preload std_increment, boost_increment +.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0) +.if regs_shortage + PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */ +.endif +.if std_increment != 0 + PF add PF_X, PF_X, #std_increment +.endif + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #boost_increment + PF subne PF_CTL, PF_CTL, #1 + PF cmp PF_X, ORIG_W +.if src_bpp_shift >= 0 + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] +.endif +.if dst_r_bpp != 0 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] +.endif +.if mask_bpp_shift >= 0 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] +.endif + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 +.if src_bpp_shift >= 0 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endif +.if dst_r_bpp != 0 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! +.endif +.if mask_bpp_shift >= 0 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! +.endif +.endif +.endm + +.macro cache_preload_simple +.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE) +.if src_bpp > 0 + pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)] +.endif +.if dst_r_bpp > 0 + pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)] +.endif +.if mask_bpp > 0 + pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)] +.endif +.endif +.endm + +.macro fetch_mask_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK +.endm + +/* + * Macro which is used to process leading pixels until destination + * pointer is properly aligned (at 16 bytes boundary). When destination + * buffer uses 16bpp format, this is unnecessary, or even pointless. + */ +.macro ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head +.if dst_w_bpp != 24 + tst DST_R, #0xF + beq 2f + +.irp lowbit, 1, 2, 4, 8, 16 +local skip1 +.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) +.if lowbit < 16 /* we don't need more than 16-byte alignment */ + tst DST_R, #lowbit + beq 1f +.endif + pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC + pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK +.if dst_r_bpp > 0 + pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R +.else + add DST_R, DST_R, #lowbit +.endif + PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp) + sub W, W, #(lowbit * 8 / dst_w_bpp) +1: +.endif +.endr + pixdeinterleave src_bpp, src_basereg + pixdeinterleave mask_bpp, mask_basereg + pixdeinterleave dst_r_bpp, dst_r_basereg + + process_pixblock_head + cache_preload 0, pixblock_size + cache_preload_simple + process_pixblock_tail + + pixinterleave dst_w_bpp, dst_w_basereg +.irp lowbit, 1, 2, 4, 8, 16 +.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) +.if lowbit < 16 /* we don't need more than 16-byte alignment */ + tst DST_W, #lowbit + beq 1f +.endif + pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W +1: +.endif +.endr +.endif +2: +.endm + +/* + * Special code for processing up to (pixblock_size - 1) remaining + * trailing pixels. As SIMD processing performs operation on + * pixblock_size pixels, anything smaller than this has to be loaded + * and stored in a special way. Loading and storing of pixel data is + * performed in such a way that we fill some 'slots' in the NEON + * registers (some slots naturally are unused), then perform compositing + * operation as usual. In the end, the data is taken from these 'slots' + * and saved to memory. + * + * cache_preload_flag - allows to suppress prefetch if + * set to 0 + * dst_aligned_flag - selects whether destination buffer + * is aligned + */ +.macro process_trailing_pixels cache_preload_flag, \ + dst_aligned_flag, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + tst W, #(pixblock_size - 1) + beq 2f +.irp chunk_size, 16, 8, 4, 2, 1 +.if pixblock_size > chunk_size + tst W, #chunk_size + beq 1f + pixld_src chunk_size, src_bpp, src_basereg, SRC + pixld chunk_size, mask_bpp, mask_basereg, MASK +.if dst_aligned_flag != 0 + pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R +.else + pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R +.endif +.if cache_preload_flag != 0 + PF add PF_X, PF_X, #chunk_size +.endif +1: +.endif +.endr + pixdeinterleave src_bpp, src_basereg + pixdeinterleave mask_bpp, mask_basereg + pixdeinterleave dst_r_bpp, dst_r_basereg + + process_pixblock_head +.if cache_preload_flag != 0 + cache_preload 0, pixblock_size + cache_preload_simple +.endif + process_pixblock_tail + pixinterleave dst_w_bpp, dst_w_basereg +.irp chunk_size, 16, 8, 4, 2, 1 +.if pixblock_size > chunk_size + tst W, #chunk_size + beq 1f +.if dst_aligned_flag != 0 + pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W +.else + pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W +.endif +1: +.endif +.endr +2: +.endm + +/* + * Macro, which performs all the needed operations to switch to the next + * scanline and start the next loop iteration unless all the scanlines + * are already processed. + */ +.macro advance_to_next_scanline start_of_loop_label +.if regs_shortage + ldrd W, [sp] /* load W and H (width and height) from stack */ +.else + mov W, ORIG_W +.endif + add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift +.if src_bpp != 0 + add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift +.endif +.if mask_bpp != 0 + add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift +.endif +.if (dst_w_bpp != 24) + sub DST_W, DST_W, W, lsl #dst_bpp_shift +.endif +.if (src_bpp != 24) && (src_bpp != 0) + sub SRC, SRC, W, lsl #src_bpp_shift +.endif +.if (mask_bpp != 24) && (mask_bpp != 0) + sub MASK, MASK, W, lsl #mask_bpp_shift +.endif + subs H, H, #1 + mov DST_R, DST_W +.if regs_shortage + str H, [sp, #4] /* save updated height to stack */ +.endif + bge start_of_loop_label +.endm + +/* + * Registers are allocated in the following way by default: + * d0, d1, d2, d3 - reserved for loading source pixel data + * d4, d5, d6, d7 - reserved for loading destination pixel data + * d24, d25, d26, d27 - reserved for loading mask pixel data + * d28, d29, d30, d31 - final destination pixel data for writeback to memory + */ +.macro generate_composite_function fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags, \ + pixblock_size_, \ + prefetch_distance, \ + init, \ + cleanup, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head, \ + dst_w_basereg_ = 28, \ + dst_r_basereg_ = 4, \ + src_basereg_ = 0, \ + mask_basereg_ = 24 + + .func fname + .global fname + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: + push {r4-r12, lr} /* save all registers */ + +/* + * Select prefetch type for this function. If prefetch distance is + * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch + * has to be used instead of ADVANCED. + */ + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT +.if prefetch_distance == 0 + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE +.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \ + ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24)) + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE +.endif + +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set pixblock_size, pixblock_size_ + .set dst_w_basereg, dst_w_basereg_ + .set dst_r_basereg, dst_r_basereg_ + .set src_basereg, src_basereg_ + .set mask_basereg, mask_basereg_ + + .macro pixld_src x:vararg + pixld x + .endm + .macro fetch_src_pixblock + pixld_src pixblock_size, src_bpp, \ + (src_basereg - pixblock_size * src_bpp / 64), SRC + .endm +/* + * Assign symbolic names to registers + */ + W .req r0 /* width (is updated during processing) */ + H .req r1 /* height (is updated during processing) */ + DST_W .req r2 /* destination buffer pointer for writes */ + DST_STRIDE .req r3 /* destination image stride */ + SRC .req r4 /* source buffer pointer */ + SRC_STRIDE .req r5 /* source image stride */ + DST_R .req r6 /* destination buffer pointer for reads */ + + MASK .req r7 /* mask pointer */ + MASK_STRIDE .req r8 /* mask stride */ + + PF_CTL .req r9 /* combined lines counter and prefetch */ + /* distance increment counter */ + PF_X .req r10 /* pixel index in a scanline for current */ + /* pretetch position */ + PF_SRC .req r11 /* pointer to source scanline start */ + /* for prefetch purposes */ + PF_DST .req r12 /* pointer to destination scanline start */ + /* for prefetch purposes */ + PF_MASK .req r14 /* pointer to mask scanline start */ + /* for prefetch purposes */ +/* + * Check whether we have enough registers for all the local variables. + * If we don't have enough registers, original width and height are + * kept on top of stack (and 'regs_shortage' variable is set to indicate + * this for the rest of code). Even if there are enough registers, the + * allocation scheme may be a bit different depending on whether source + * or mask is not used. + */ +.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED) + ORIG_W .req r10 /* saved original width */ + DUMMY .req r12 /* temporary register */ + .set regs_shortage, 0 +.elseif mask_bpp == 0 + ORIG_W .req r7 /* saved original width */ + DUMMY .req r8 /* temporary register */ + .set regs_shortage, 0 +.elseif src_bpp == 0 + ORIG_W .req r4 /* saved original width */ + DUMMY .req r5 /* temporary register */ + .set regs_shortage, 0 +.else + ORIG_W .req r1 /* saved original width */ + DUMMY .req r1 /* temporary register */ + .set regs_shortage, 1 +.endif + + .set mask_bpp_shift, -1 +.if src_bpp == 32 + .set src_bpp_shift, 2 +.elseif src_bpp == 24 + .set src_bpp_shift, 0 +.elseif src_bpp == 16 + .set src_bpp_shift, 1 +.elseif src_bpp == 8 + .set src_bpp_shift, 0 +.elseif src_bpp == 0 + .set src_bpp_shift, -1 +.else + .error "requested src bpp (src_bpp) is not supported" +.endif +.if mask_bpp == 32 + .set mask_bpp_shift, 2 +.elseif mask_bpp == 24 + .set mask_bpp_shift, 0 +.elseif mask_bpp == 8 + .set mask_bpp_shift, 0 +.elseif mask_bpp == 0 + .set mask_bpp_shift, -1 +.else + .error "requested mask bpp (mask_bpp) is not supported" +.endif +.if dst_w_bpp == 32 + .set dst_bpp_shift, 2 +.elseif dst_w_bpp == 24 + .set dst_bpp_shift, 0 +.elseif dst_w_bpp == 16 + .set dst_bpp_shift, 1 +.elseif dst_w_bpp == 8 + .set dst_bpp_shift, 0 +.else + .error "requested dst bpp (dst_w_bpp) is not supported" +.endif + +.if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp +.else + .set dst_r_bpp, 0 +.endif +.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) + .set DEINTERLEAVE_32BPP_ENABLED, 1 +.else + .set DEINTERLEAVE_32BPP_ENABLED, 0 +.endif + +.if prefetch_distance < 0 || prefetch_distance > 15 + .error "invalid prefetch distance (prefetch_distance)" +.endif + +.if src_bpp > 0 + ldr SRC, [sp, #40] +.endif +.if mask_bpp > 0 + ldr MASK, [sp, #48] +.endif + PF mov PF_X, #0 +.if src_bpp > 0 + ldr SRC_STRIDE, [sp, #44] +.endif +.if mask_bpp > 0 + ldr MASK_STRIDE, [sp, #52] +.endif + mov DST_R, DST_W + +.if src_bpp == 24 + sub SRC_STRIDE, SRC_STRIDE, W + sub SRC_STRIDE, SRC_STRIDE, W, lsl #1 +.endif +.if mask_bpp == 24 + sub MASK_STRIDE, MASK_STRIDE, W + sub MASK_STRIDE, MASK_STRIDE, W, lsl #1 +.endif +.if dst_w_bpp == 24 + sub DST_STRIDE, DST_STRIDE, W + sub DST_STRIDE, DST_STRIDE, W, lsl #1 +.endif + +/* + * Setup advanced prefetcher initial state + */ + PF mov PF_SRC, SRC + PF mov PF_DST, DST_R + PF mov PF_MASK, MASK + /* PF_CTL = prefetch_distance | ((h - 1) << 4) */ + PF mov PF_CTL, H, lsl #4 + PF add PF_CTL, #(prefetch_distance - 0x10) + + init +.if regs_shortage + push {r0, r1} +.endif + subs H, H, #1 +.if regs_shortage + str H, [sp, #4] /* save updated height to stack */ +.else + mov ORIG_W, W +.endif + blt 9f + cmp W, #(pixblock_size * 2) + blt 8f +/* + * This is the start of the pipelined loop, which if optimized for + * long scanlines + */ +0: + ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ + pixld_a pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + PF add PF_X, PF_X, #pixblock_size + process_pixblock_head + cache_preload 0, pixblock_size + cache_preload_simple + subs W, W, #(pixblock_size * 2) + blt 2f +1: + process_pixblock_tail_head + cache_preload_simple + subs W, W, #pixblock_size + bge 1b +2: + process_pixblock_tail + pixst_a pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W + + /* Process the remaining trailing pixels in the scanline */ + process_trailing_pixels 1, 1, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + advance_to_next_scanline 0b + +.if regs_shortage + pop {r0, r1} +.endif + cleanup + pop {r4-r12, pc} /* exit */ +/* + * This is the start of the loop, designed to process images with small width + * (less than pixblock_size * 2 pixels). In this case neither pipelining + * nor prefetch are used. + */ +8: + /* Process exactly pixblock_size pixels if needed */ + tst W, #pixblock_size + beq 1f + pixld pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + process_pixblock_head + process_pixblock_tail + pixst pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W +1: + /* Process the remaining trailing pixels in the scanline */ + process_trailing_pixels 0, 0, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + advance_to_next_scanline 8b +9: +.if regs_shortage + pop {r0, r1} +.endif + cleanup + pop {r4-r12, pc} /* exit */ + + .purgem fetch_src_pixblock + .purgem pixld_src + + .unreq SRC + .unreq MASK + .unreq DST_R + .unreq DST_W + .unreq ORIG_W + .unreq W + .unreq H + .unreq SRC_STRIDE + .unreq DST_STRIDE + .unreq MASK_STRIDE + .unreq PF_CTL + .unreq PF_X + .unreq PF_SRC + .unreq PF_DST + .unreq PF_MASK + .unreq DUMMY + .endfunc +.endm + +/* + * A simplified variant of function generation template for a single + * scanline processing (for implementing pixman combine functions) + */ +.macro generate_composite_function_scanline use_nearest_scaling, \ + fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags, \ + pixblock_size_, \ + init, \ + cleanup, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head, \ + dst_w_basereg_ = 28, \ + dst_r_basereg_ = 4, \ + src_basereg_ = 0, \ + mask_basereg_ = 24 + + .func fname + .global fname + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set pixblock_size, pixblock_size_ + .set dst_w_basereg, dst_w_basereg_ + .set dst_r_basereg, dst_r_basereg_ + .set src_basereg, src_basereg_ + .set mask_basereg, mask_basereg_ + +.if use_nearest_scaling != 0 + /* + * Assign symbolic names to registers for nearest scaling + */ + W .req r0 + DST_W .req r1 + SRC .req r2 + VX .req r3 + UNIT_X .req ip + MASK .req lr + TMP1 .req r4 + TMP2 .req r5 + DST_R .req r6 + + .macro pixld_src x:vararg + pixld_s x + .endm + + ldr UNIT_X, [sp] + push {r4-r6, lr} + .if mask_bpp != 0 + ldr MASK, [sp, #(16 + 4)] + .endif +.else + /* + * Assign symbolic names to registers + */ + W .req r0 /* width (is updated during processing) */ + DST_W .req r1 /* destination buffer pointer for writes */ + SRC .req r2 /* source buffer pointer */ + DST_R .req ip /* destination buffer pointer for reads */ + MASK .req r3 /* mask pointer */ + + .macro pixld_src x:vararg + pixld x + .endm +.endif + +.if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp +.else + .set dst_r_bpp, 0 +.endif +.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) + .set DEINTERLEAVE_32BPP_ENABLED, 1 +.else + .set DEINTERLEAVE_32BPP_ENABLED, 0 +.endif + + .macro fetch_src_pixblock + pixld_src pixblock_size, src_bpp, \ + (src_basereg - pixblock_size * src_bpp / 64), SRC + .endm + + init + mov DST_R, DST_W + + cmp W, #pixblock_size + blt 8f + + ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + subs W, W, #pixblock_size + blt 7f + + /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ + pixld_a pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + process_pixblock_head + subs W, W, #pixblock_size + blt 2f +1: + process_pixblock_tail_head + subs W, W, #pixblock_size + bge 1b +2: + process_pixblock_tail + pixst_a pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W +7: + /* Process the remaining trailing pixels in the scanline (dst aligned) */ + process_trailing_pixels 0, 1, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + cleanup +.if use_nearest_scaling != 0 + pop {r4-r6, pc} /* exit */ +.else + bx lr /* exit */ +.endif +8: + /* Process the remaining trailing pixels in the scanline (dst unaligned) */ + process_trailing_pixels 0, 0, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + cleanup + +.if use_nearest_scaling != 0 + pop {r4-r6, pc} /* exit */ + + .unreq DST_R + .unreq SRC + .unreq W + .unreq VX + .unreq UNIT_X + .unreq TMP1 + .unreq TMP2 + .unreq DST_W + .unreq MASK + +.else + bx lr /* exit */ + + .unreq SRC + .unreq MASK + .unreq DST_R + .unreq DST_W + .unreq W +.endif + + .purgem fetch_src_pixblock + .purgem pixld_src + + .endfunc +.endm + +.macro generate_composite_function_single_scanline x:vararg + generate_composite_function_scanline 0, x +.endm + +.macro generate_composite_function_nearest_scanline x:vararg + generate_composite_function_scanline 1, x +.endm + +/* Default prologue/epilogue, nothing special needs to be done */ + +.macro default_init +.endm + +.macro default_cleanup +.endm + +/* + * Prologue/epilogue variant which additionally saves/restores d8-d15 + * registers (they need to be saved/restored by callee according to ABI). + * This is required if the code needs to use all the NEON registers. + */ + +.macro default_init_need_all_regs + vpush {d8-d15} +.endm + +.macro default_cleanup_need_all_regs + vpop {d8-d15} +.endm + +/******************************************************************************/ + +/* + * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in) + * into a planar a8r8g8b8 format (with a, r, g, b color components + * stored into 64-bit registers out_a, out_r, out_g, out_b respectively). + * + * Warning: the conversion is destructive and the original + * value (in) is lost. + */ +.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b + vshrn.u16 out_r, in, #8 + vshrn.u16 out_g, in, #3 + vsli.u16 in, in, #5 + vmov.u8 out_a, #255 + vsri.u8 out_r, out_r, #5 + vsri.u8 out_g, out_g, #6 + vshrn.u16 out_b, in, #2 +.endm + +.macro convert_0565_to_x888 in, out_r, out_g, out_b + vshrn.u16 out_r, in, #8 + vshrn.u16 out_g, in, #3 + vsli.u16 in, in, #5 + vsri.u8 out_r, out_r, #5 + vsri.u8 out_g, out_g, #6 + vshrn.u16 out_b, in, #2 +.endm + +/* + * Conversion from planar a8r8g8b8 format (with a, r, g, b color components + * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6 + * pixels packed in 128-bit register (out). Requires two temporary 128-bit + * registers (tmp1, tmp2) + */ +.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2 + vshll.u8 tmp1, in_g, #8 + vshll.u8 out, in_r, #8 + vshll.u8 tmp2, in_b, #8 + vsri.u16 out, tmp1, #5 + vsri.u16 out, tmp2, #11 +.endm + +/* + * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels + * returned in (out0, out1) registers pair. Requires one temporary + * 64-bit register (tmp). 'out1' and 'in' may overlap, the original + * value from 'in' is lost + */ +.macro convert_four_0565_to_x888_packed in, out0, out1, tmp + vshl.u16 out0, in, #5 /* G top 6 bits */ + vshl.u16 tmp, in, #11 /* B top 5 bits */ + vsri.u16 in, in, #5 /* R is ready in top bits */ + vsri.u16 out0, out0, #6 /* G is ready in top bits */ + vsri.u16 tmp, tmp, #5 /* B is ready in top bits */ + vshr.u16 out1, in, #8 /* R is in place */ + vsri.u16 out0, tmp, #8 /* G & B is in place */ + vzip.u16 out0, out1 /* everything is in place */ +.endm diff --git a/pixman/pixman/pixman-edge-accessors.c b/pixman/pixman/pixman-edge-accessors.c index ea3a31e2f..0f2c56e74 100644 --- a/pixman/pixman/pixman-edge-accessors.c +++ b/pixman/pixman/pixman-edge-accessors.c @@ -1,4 +1,4 @@ - -#define PIXMAN_FB_ACCESSORS - -#include "pixman-edge.c" + +#define PIXMAN_FB_ACCESSORS + +#include "pixman-edge.c" diff --git a/pixman/pixman/pixman-edge-imp.h b/pixman/pixman/pixman-edge-imp.h index a4698eddb..20ffda896 100644 --- a/pixman/pixman/pixman-edge-imp.h +++ b/pixman/pixman/pixman-edge-imp.h @@ -1,182 +1,182 @@ -/* - * Copyright © 2004 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef rasterize_span -#endif - -static void -RASTERIZE_EDGES (pixman_image_t *image, - pixman_edge_t *l, - pixman_edge_t *r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - pixman_fixed_t y = t; - uint32_t *line; - uint32_t *buf = (image)->bits.bits; - int stride = (image)->bits.rowstride; - int width = (image)->bits.width; - - line = buf + pixman_fixed_to_int (y) * stride; - - for (;;) - { - pixman_fixed_t lx; - pixman_fixed_t rx; - int lxi; - int rxi; - - lx = l->x; - rx = r->x; -#if N_BITS == 1 - /* For the non-antialiased case, round the coordinates up, in effect - * sampling just slightly to the left of the pixel. This is so that - * when the sample point lies exactly on the line, we round towards - * north-west. - * - * (The AA case does a similar adjustment in RENDER_SAMPLES_X) - */ - lx += X_FRAC_FIRST(1) - pixman_fixed_e; - rx += X_FRAC_FIRST(1) - pixman_fixed_e; -#endif - /* clip X */ - if (lx < 0) - lx = 0; - if (pixman_fixed_to_int (rx) >= width) -#if N_BITS == 1 - rx = pixman_int_to_fixed (width); -#else - /* Use the last pixel of the scanline, covered 100%. - * We can't use the first pixel following the scanline, - * because accessing it could result in a buffer overrun. - */ - rx = pixman_int_to_fixed (width) - 1; -#endif - - /* Skip empty (or backwards) sections */ - if (rx > lx) - { - - /* Find pixel bounds for span */ - lxi = pixman_fixed_to_int (lx); - rxi = pixman_fixed_to_int (rx); - -#if N_BITS == 1 - { - -#define LEFT_MASK(x) \ - (((x) & 0x1f) ? \ - SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0) -#define RIGHT_MASK(x) \ - (((32 - (x)) & 0x1f) ? \ - SCREEN_SHIFT_LEFT (0xffffffff, (32 - (x)) & 0x1f) : 0) - -#define MASK_BITS(x,w,l,n,r) { \ - n = (w); \ - r = RIGHT_MASK ((x) + n); \ - l = LEFT_MASK (x); \ - if (l) { \ - n -= 32 - ((x) & 0x1f); \ - if (n < 0) { \ - n = 0; \ - l &= r; \ - r = 0; \ - } \ - } \ - n >>= 5; \ - } - - uint32_t *a = line; - uint32_t startmask; - uint32_t endmask; - int nmiddle; - int width = rxi - lxi; - int x = lxi; - - a += x >> 5; - x &= 0x1f; - - MASK_BITS (x, width, startmask, nmiddle, endmask); - - if (startmask) { - WRITE(image, a, READ(image, a) | startmask); - a++; - } - while (nmiddle--) - WRITE(image, a++, 0xffffffff); - if (endmask) - WRITE(image, a, READ(image, a) | endmask); - } -#else - { - DEFINE_ALPHA(line,lxi); - int lxs; - int rxs; - - /* Sample coverage for edge pixels */ - lxs = RENDER_SAMPLES_X (lx, N_BITS); - rxs = RENDER_SAMPLES_X (rx, N_BITS); - - /* Add coverage across row */ - if (lxi == rxi) - { - ADD_ALPHA (rxs - lxs); - } - else - { - int xi; - - ADD_ALPHA (N_X_FRAC(N_BITS) - lxs); - STEP_ALPHA; - for (xi = lxi + 1; xi < rxi; xi++) - { - ADD_ALPHA (N_X_FRAC(N_BITS)); - STEP_ALPHA; - } - ADD_ALPHA (rxs); - } - } -#endif - } - - if (y == b) - break; - -#if N_BITS > 1 - if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS)) - { - RENDER_EDGE_STEP_SMALL (l); - RENDER_EDGE_STEP_SMALL (r); - y += STEP_Y_SMALL(N_BITS); - } - else -#endif - { - RENDER_EDGE_STEP_BIG (l); - RENDER_EDGE_STEP_BIG (r); - y += STEP_Y_BIG(N_BITS); - line += stride; - } - } -} - -#undef rasterize_span +/* + * Copyright © 2004 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef rasterize_span +#endif + +static void +RASTERIZE_EDGES (pixman_image_t *image, + pixman_edge_t *l, + pixman_edge_t *r, + pixman_fixed_t t, + pixman_fixed_t b) +{ + pixman_fixed_t y = t; + uint32_t *line; + uint32_t *buf = (image)->bits.bits; + int stride = (image)->bits.rowstride; + int width = (image)->bits.width; + + line = buf + pixman_fixed_to_int (y) * stride; + + for (;;) + { + pixman_fixed_t lx; + pixman_fixed_t rx; + int lxi; + int rxi; + + lx = l->x; + rx = r->x; +#if N_BITS == 1 + /* For the non-antialiased case, round the coordinates up, in effect + * sampling just slightly to the left of the pixel. This is so that + * when the sample point lies exactly on the line, we round towards + * north-west. + * + * (The AA case does a similar adjustment in RENDER_SAMPLES_X) + */ + lx += X_FRAC_FIRST(1) - pixman_fixed_e; + rx += X_FRAC_FIRST(1) - pixman_fixed_e; +#endif + /* clip X */ + if (lx < 0) + lx = 0; + if (pixman_fixed_to_int (rx) >= width) +#if N_BITS == 1 + rx = pixman_int_to_fixed (width); +#else + /* Use the last pixel of the scanline, covered 100%. + * We can't use the first pixel following the scanline, + * because accessing it could result in a buffer overrun. + */ + rx = pixman_int_to_fixed (width) - 1; +#endif + + /* Skip empty (or backwards) sections */ + if (rx > lx) + { + + /* Find pixel bounds for span */ + lxi = pixman_fixed_to_int (lx); + rxi = pixman_fixed_to_int (rx); + +#if N_BITS == 1 + { + +#define LEFT_MASK(x) \ + (((x) & 0x1f) ? \ + SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0) +#define RIGHT_MASK(x) \ + (((32 - (x)) & 0x1f) ? \ + SCREEN_SHIFT_LEFT (0xffffffff, (32 - (x)) & 0x1f) : 0) + +#define MASK_BITS(x,w,l,n,r) { \ + n = (w); \ + r = RIGHT_MASK ((x) + n); \ + l = LEFT_MASK (x); \ + if (l) { \ + n -= 32 - ((x) & 0x1f); \ + if (n < 0) { \ + n = 0; \ + l &= r; \ + r = 0; \ + } \ + } \ + n >>= 5; \ + } + + uint32_t *a = line; + uint32_t startmask; + uint32_t endmask; + int nmiddle; + int width = rxi - lxi; + int x = lxi; + + a += x >> 5; + x &= 0x1f; + + MASK_BITS (x, width, startmask, nmiddle, endmask); + + if (startmask) { + WRITE(image, a, READ(image, a) | startmask); + a++; + } + while (nmiddle--) + WRITE(image, a++, 0xffffffff); + if (endmask) + WRITE(image, a, READ(image, a) | endmask); + } +#else + { + DEFINE_ALPHA(line,lxi); + int lxs; + int rxs; + + /* Sample coverage for edge pixels */ + lxs = RENDER_SAMPLES_X (lx, N_BITS); + rxs = RENDER_SAMPLES_X (rx, N_BITS); + + /* Add coverage across row */ + if (lxi == rxi) + { + ADD_ALPHA (rxs - lxs); + } + else + { + int xi; + + ADD_ALPHA (N_X_FRAC(N_BITS) - lxs); + STEP_ALPHA; + for (xi = lxi + 1; xi < rxi; xi++) + { + ADD_ALPHA (N_X_FRAC(N_BITS)); + STEP_ALPHA; + } + ADD_ALPHA (rxs); + } + } +#endif + } + + if (y == b) + break; + +#if N_BITS > 1 + if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS)) + { + RENDER_EDGE_STEP_SMALL (l); + RENDER_EDGE_STEP_SMALL (r); + y += STEP_Y_SMALL(N_BITS); + } + else +#endif + { + RENDER_EDGE_STEP_BIG (l); + RENDER_EDGE_STEP_BIG (r); + y += STEP_Y_BIG(N_BITS); + line += stride; + } + } +} + +#undef rasterize_span diff --git a/pixman/pixman/pixman-edge.c b/pixman/pixman/pixman-edge.c index 8d498ab44..22b0158ba 100644 --- a/pixman/pixman/pixman-edge.c +++ b/pixman/pixman/pixman-edge.c @@ -1,384 +1,384 @@ -/* - * Copyright © 2004 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include - -#include "pixman-private.h" -#include "pixman-accessor.h" - -/* - * Step across a small sample grid gap - */ -#define RENDER_EDGE_STEP_SMALL(edge) \ - { \ - edge->x += edge->stepx_small; \ - edge->e += edge->dx_small; \ - if (edge->e > 0) \ - { \ - edge->e -= edge->dy; \ - edge->x += edge->signdx; \ - } \ - } - -/* - * Step across a large sample grid gap - */ -#define RENDER_EDGE_STEP_BIG(edge) \ - { \ - edge->x += edge->stepx_big; \ - edge->e += edge->dx_big; \ - if (edge->e > 0) \ - { \ - edge->e -= edge->dy; \ - edge->x += edge->signdx; \ - } \ - } - -#ifdef PIXMAN_FB_ACCESSORS -#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors -#else -#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_no_accessors -#endif - -/* - * 4 bit alpha - */ - -#define N_BITS 4 -#define RASTERIZE_EDGES rasterize_edges_4 - -#ifndef WORDS_BIGENDIAN -#define SHIFT_4(o) ((o) << 2) -#else -#define SHIFT_4(o) ((1 - (o)) << 2) -#endif - -#define GET_4(x, o) (((x) >> SHIFT_4 (o)) & 0xf) -#define PUT_4(x, o, v) \ - (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o))) - -#define DEFINE_ALPHA(line, x) \ - uint8_t *__ap = (uint8_t *) line + ((x) >> 1); \ - int __ao = (x) & 1 - -#define STEP_ALPHA ((__ap += __ao), (__ao ^= 1)) - -#define ADD_ALPHA(a) \ - { \ - uint8_t __o = READ (image, __ap); \ - uint8_t __a = (a) + GET_4 (__o, __ao); \ - WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \ - } - -#include "pixman-edge-imp.h" - -#undef ADD_ALPHA -#undef STEP_ALPHA -#undef DEFINE_ALPHA -#undef RASTERIZE_EDGES -#undef N_BITS - - -/* - * 1 bit alpha - */ - -#define N_BITS 1 -#define RASTERIZE_EDGES rasterize_edges_1 - -#include "pixman-edge-imp.h" - -#undef RASTERIZE_EDGES -#undef N_BITS - -/* - * 8 bit alpha - */ - -static force_inline uint8_t -clip255 (int x) -{ - if (x > 255) - return 255; - - return x; -} - -#define ADD_SATURATE_8(buf, val, length) \ - do \ - { \ - int i__ = (length); \ - uint8_t *buf__ = (buf); \ - int val__ = (val); \ - \ - while (i__--) \ - { \ - WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \ - (buf__)++; \ - } \ - } while (0) - -/* - * We want to detect the case where we add the same value to a long - * span of pixels. The triangles on the end are filled in while we - * count how many sub-pixel scanlines contribute to the middle section. - * - * +--------------------------+ - * fill_height =| \ / - * +------------------+ - * |================| - * fill_start fill_end - */ -static void -rasterize_edges_8 (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - pixman_fixed_t y = t; - uint32_t *line; - int fill_start = -1, fill_end = -1; - int fill_size = 0; - uint32_t *buf = (image)->bits.bits; - int stride = (image)->bits.rowstride; - int width = (image)->bits.width; - - line = buf + pixman_fixed_to_int (y) * stride; - - for (;;) - { - uint8_t *ap = (uint8_t *) line; - pixman_fixed_t lx, rx; - int lxi, rxi; - - /* clip X */ - lx = l->x; - if (lx < 0) - lx = 0; - - rx = r->x; - - if (pixman_fixed_to_int (rx) >= width) - { - /* Use the last pixel of the scanline, covered 100%. - * We can't use the first pixel following the scanline, - * because accessing it could result in a buffer overrun. - */ - rx = pixman_int_to_fixed (width) - 1; - } - - /* Skip empty (or backwards) sections */ - if (rx > lx) - { - int lxs, rxs; - - /* Find pixel bounds for span. */ - lxi = pixman_fixed_to_int (lx); - rxi = pixman_fixed_to_int (rx); - - /* Sample coverage for edge pixels */ - lxs = RENDER_SAMPLES_X (lx, 8); - rxs = RENDER_SAMPLES_X (rx, 8); - - /* Add coverage across row */ - if (lxi == rxi) - { - WRITE (image, ap + lxi, - clip255 (READ (image, ap + lxi) + rxs - lxs)); - } - else - { - WRITE (image, ap + lxi, - clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs)); - - /* Move forward so that lxi/rxi is the pixel span */ - lxi++; - - /* Don't bother trying to optimize the fill unless - * the span is longer than 4 pixels. */ - if (rxi - lxi > 4) - { - if (fill_start < 0) - { - fill_start = lxi; - fill_end = rxi; - fill_size++; - } - else - { - if (lxi >= fill_end || rxi < fill_start) - { - /* We're beyond what we saved, just fill it */ - ADD_SATURATE_8 (ap + fill_start, - fill_size * N_X_FRAC (8), - fill_end - fill_start); - fill_start = lxi; - fill_end = rxi; - fill_size = 1; - } - else - { - /* Update fill_start */ - if (lxi > fill_start) - { - ADD_SATURATE_8 (ap + fill_start, - fill_size * N_X_FRAC (8), - lxi - fill_start); - fill_start = lxi; - } - else if (lxi < fill_start) - { - ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), - fill_start - lxi); - } - - /* Update fill_end */ - if (rxi < fill_end) - { - ADD_SATURATE_8 (ap + rxi, - fill_size * N_X_FRAC (8), - fill_end - rxi); - fill_end = rxi; - } - else if (fill_end < rxi) - { - ADD_SATURATE_8 (ap + fill_end, - N_X_FRAC (8), - rxi - fill_end); - } - fill_size++; - } - } - } - else - { - ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi); - } - - WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs)); - } - } - - if (y == b) - { - /* We're done, make sure we clean up any remaining fill. */ - if (fill_start != fill_end) - { - if (fill_size == N_Y_FRAC (8)) - { - MEMSET_WRAPPED (image, ap + fill_start, - 0xff, fill_end - fill_start); - } - else - { - ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), - fill_end - fill_start); - } - } - break; - } - - if (pixman_fixed_frac (y) != Y_FRAC_LAST (8)) - { - RENDER_EDGE_STEP_SMALL (l); - RENDER_EDGE_STEP_SMALL (r); - y += STEP_Y_SMALL (8); - } - else - { - RENDER_EDGE_STEP_BIG (l); - RENDER_EDGE_STEP_BIG (r); - y += STEP_Y_BIG (8); - if (fill_start != fill_end) - { - if (fill_size == N_Y_FRAC (8)) - { - MEMSET_WRAPPED (image, ap + fill_start, - 0xff, fill_end - fill_start); - } - else - { - ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), - fill_end - fill_start); - } - - fill_start = fill_end = -1; - fill_size = 0; - } - - line += stride; - } - } -} - -#ifndef PIXMAN_FB_ACCESSORS -static -#endif -void -PIXMAN_RASTERIZE_EDGES (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - switch (PIXMAN_FORMAT_BPP (image->bits.format)) - { - case 1: - rasterize_edges_1 (image, l, r, t, b); - break; - - case 4: - rasterize_edges_4 (image, l, r, t, b); - break; - - case 8: - rasterize_edges_8 (image, l, r, t, b); - break; - - default: - break; - } -} - -#ifndef PIXMAN_FB_ACCESSORS - -PIXMAN_EXPORT void -pixman_rasterize_edges (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - return_if_fail (image->type == BITS); - - if (image->bits.read_func || image->bits.write_func) - pixman_rasterize_edges_accessors (image, l, r, t, b); - else - pixman_rasterize_edges_no_accessors (image, l, r, t, b); -} - -#endif +/* + * Copyright © 2004 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "pixman-private.h" +#include "pixman-accessor.h" + +/* + * Step across a small sample grid gap + */ +#define RENDER_EDGE_STEP_SMALL(edge) \ + { \ + edge->x += edge->stepx_small; \ + edge->e += edge->dx_small; \ + if (edge->e > 0) \ + { \ + edge->e -= edge->dy; \ + edge->x += edge->signdx; \ + } \ + } + +/* + * Step across a large sample grid gap + */ +#define RENDER_EDGE_STEP_BIG(edge) \ + { \ + edge->x += edge->stepx_big; \ + edge->e += edge->dx_big; \ + if (edge->e > 0) \ + { \ + edge->e -= edge->dy; \ + edge->x += edge->signdx; \ + } \ + } + +#ifdef PIXMAN_FB_ACCESSORS +#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors +#else +#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_no_accessors +#endif + +/* + * 4 bit alpha + */ + +#define N_BITS 4 +#define RASTERIZE_EDGES rasterize_edges_4 + +#ifndef WORDS_BIGENDIAN +#define SHIFT_4(o) ((o) << 2) +#else +#define SHIFT_4(o) ((1 - (o)) << 2) +#endif + +#define GET_4(x, o) (((x) >> SHIFT_4 (o)) & 0xf) +#define PUT_4(x, o, v) \ + (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o))) + +#define DEFINE_ALPHA(line, x) \ + uint8_t *__ap = (uint8_t *) line + ((x) >> 1); \ + int __ao = (x) & 1 + +#define STEP_ALPHA ((__ap += __ao), (__ao ^= 1)) + +#define ADD_ALPHA(a) \ + { \ + uint8_t __o = READ (image, __ap); \ + uint8_t __a = (a) + GET_4 (__o, __ao); \ + WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \ + } + +#include "pixman-edge-imp.h" + +#undef ADD_ALPHA +#undef STEP_ALPHA +#undef DEFINE_ALPHA +#undef RASTERIZE_EDGES +#undef N_BITS + + +/* + * 1 bit alpha + */ + +#define N_BITS 1 +#define RASTERIZE_EDGES rasterize_edges_1 + +#include "pixman-edge-imp.h" + +#undef RASTERIZE_EDGES +#undef N_BITS + +/* + * 8 bit alpha + */ + +static force_inline uint8_t +clip255 (int x) +{ + if (x > 255) + return 255; + + return x; +} + +#define ADD_SATURATE_8(buf, val, length) \ + do \ + { \ + int i__ = (length); \ + uint8_t *buf__ = (buf); \ + int val__ = (val); \ + \ + while (i__--) \ + { \ + WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \ + (buf__)++; \ + } \ + } while (0) + +/* + * We want to detect the case where we add the same value to a long + * span of pixels. The triangles on the end are filled in while we + * count how many sub-pixel scanlines contribute to the middle section. + * + * +--------------------------+ + * fill_height =| \ / + * +------------------+ + * |================| + * fill_start fill_end + */ +static void +rasterize_edges_8 (pixman_image_t *image, + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b) +{ + pixman_fixed_t y = t; + uint32_t *line; + int fill_start = -1, fill_end = -1; + int fill_size = 0; + uint32_t *buf = (image)->bits.bits; + int stride = (image)->bits.rowstride; + int width = (image)->bits.width; + + line = buf + pixman_fixed_to_int (y) * stride; + + for (;;) + { + uint8_t *ap = (uint8_t *) line; + pixman_fixed_t lx, rx; + int lxi, rxi; + + /* clip X */ + lx = l->x; + if (lx < 0) + lx = 0; + + rx = r->x; + + if (pixman_fixed_to_int (rx) >= width) + { + /* Use the last pixel of the scanline, covered 100%. + * We can't use the first pixel following the scanline, + * because accessing it could result in a buffer overrun. + */ + rx = pixman_int_to_fixed (width) - 1; + } + + /* Skip empty (or backwards) sections */ + if (rx > lx) + { + int lxs, rxs; + + /* Find pixel bounds for span. */ + lxi = pixman_fixed_to_int (lx); + rxi = pixman_fixed_to_int (rx); + + /* Sample coverage for edge pixels */ + lxs = RENDER_SAMPLES_X (lx, 8); + rxs = RENDER_SAMPLES_X (rx, 8); + + /* Add coverage across row */ + if (lxi == rxi) + { + WRITE (image, ap + lxi, + clip255 (READ (image, ap + lxi) + rxs - lxs)); + } + else + { + WRITE (image, ap + lxi, + clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs)); + + /* Move forward so that lxi/rxi is the pixel span */ + lxi++; + + /* Don't bother trying to optimize the fill unless + * the span is longer than 4 pixels. */ + if (rxi - lxi > 4) + { + if (fill_start < 0) + { + fill_start = lxi; + fill_end = rxi; + fill_size++; + } + else + { + if (lxi >= fill_end || rxi < fill_start) + { + /* We're beyond what we saved, just fill it */ + ADD_SATURATE_8 (ap + fill_start, + fill_size * N_X_FRAC (8), + fill_end - fill_start); + fill_start = lxi; + fill_end = rxi; + fill_size = 1; + } + else + { + /* Update fill_start */ + if (lxi > fill_start) + { + ADD_SATURATE_8 (ap + fill_start, + fill_size * N_X_FRAC (8), + lxi - fill_start); + fill_start = lxi; + } + else if (lxi < fill_start) + { + ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), + fill_start - lxi); + } + + /* Update fill_end */ + if (rxi < fill_end) + { + ADD_SATURATE_8 (ap + rxi, + fill_size * N_X_FRAC (8), + fill_end - rxi); + fill_end = rxi; + } + else if (fill_end < rxi) + { + ADD_SATURATE_8 (ap + fill_end, + N_X_FRAC (8), + rxi - fill_end); + } + fill_size++; + } + } + } + else + { + ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi); + } + + WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs)); + } + } + + if (y == b) + { + /* We're done, make sure we clean up any remaining fill. */ + if (fill_start != fill_end) + { + if (fill_size == N_Y_FRAC (8)) + { + MEMSET_WRAPPED (image, ap + fill_start, + 0xff, fill_end - fill_start); + } + else + { + ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), + fill_end - fill_start); + } + } + break; + } + + if (pixman_fixed_frac (y) != Y_FRAC_LAST (8)) + { + RENDER_EDGE_STEP_SMALL (l); + RENDER_EDGE_STEP_SMALL (r); + y += STEP_Y_SMALL (8); + } + else + { + RENDER_EDGE_STEP_BIG (l); + RENDER_EDGE_STEP_BIG (r); + y += STEP_Y_BIG (8); + if (fill_start != fill_end) + { + if (fill_size == N_Y_FRAC (8)) + { + MEMSET_WRAPPED (image, ap + fill_start, + 0xff, fill_end - fill_start); + } + else + { + ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), + fill_end - fill_start); + } + + fill_start = fill_end = -1; + fill_size = 0; + } + + line += stride; + } + } +} + +#ifndef PIXMAN_FB_ACCESSORS +static +#endif +void +PIXMAN_RASTERIZE_EDGES (pixman_image_t *image, + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b) +{ + switch (PIXMAN_FORMAT_BPP (image->bits.format)) + { + case 1: + rasterize_edges_1 (image, l, r, t, b); + break; + + case 4: + rasterize_edges_4 (image, l, r, t, b); + break; + + case 8: + rasterize_edges_8 (image, l, r, t, b); + break; + + default: + break; + } +} + +#ifndef PIXMAN_FB_ACCESSORS + +PIXMAN_EXPORT void +pixman_rasterize_edges (pixman_image_t *image, + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b) +{ + return_if_fail (image->type == BITS); + + if (image->bits.read_func || image->bits.write_func) + pixman_rasterize_edges_accessors (image, l, r, t, b); + else + pixman_rasterize_edges_no_accessors (image, l, r, t, b); +} + +#endif diff --git a/pixman/pixman/pixman-matrix.c b/pixman/pixman/pixman-matrix.c index 0b3ae78b3..8d0d97325 100644 --- a/pixman/pixman/pixman-matrix.c +++ b/pixman/pixman/pixman-matrix.c @@ -1,766 +1,766 @@ -/* - * Copyright © 2008 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that copyright - * notice and this permission notice appear in supporting documentation, and - * that the name of the copyright holders not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. The copyright holders make no representations - * about the suitability of this software for any purpose. It is provided "as - * is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE - * OF THIS SOFTWARE. - */ - -/* - * Matrix interfaces - */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include "pixman-private.h" - -#define F(x) pixman_int_to_fixed (x) - -PIXMAN_EXPORT void -pixman_transform_init_identity (struct pixman_transform *matrix) -{ - int i; - - memset (matrix, '\0', sizeof (struct pixman_transform)); - for (i = 0; i < 3; i++) - matrix->matrix[i][i] = F (1); -} - -typedef pixman_fixed_32_32_t pixman_fixed_34_30_t; - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_point_3d (const struct pixman_transform *transform, - struct pixman_vector * vector) -{ - struct pixman_vector result; - pixman_fixed_32_32_t partial; - pixman_fixed_48_16_t v; - int i, j; - - for (j = 0; j < 3; j++) - { - v = 0; - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * - (pixman_fixed_48_16_t) vector->vector[i]); - v += partial >> 16; - } - - if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) - return FALSE; - - result.vector[j] = (pixman_fixed_t) v; - } - - *vector = result; - - if (!result.vector[2]) - return FALSE; - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_point (const struct pixman_transform *transform, - struct pixman_vector * vector) -{ - pixman_fixed_32_32_t partial; - pixman_fixed_34_30_t v[3]; - pixman_fixed_48_16_t quo; - int i, j; - - for (j = 0; j < 3; j++) - { - v[j] = 0; - - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * - (pixman_fixed_32_32_t) vector->vector[i]); - v[j] += partial >> 2; - } - } - - if (!(v[2] >> 16)) - return FALSE; - - for (j = 0; j < 2; j++) - { - quo = v[j] / (v[2] >> 16); - if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) - return FALSE; - vector->vector[j] = (pixman_fixed_t) quo; - } - - vector->vector[2] = pixman_fixed_1; - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_multiply (struct pixman_transform * dst, - const struct pixman_transform *l, - const struct pixman_transform *r) -{ - struct pixman_transform d; - int dx, dy; - int o; - - for (dy = 0; dy < 3; dy++) - { - for (dx = 0; dx < 3; dx++) - { - pixman_fixed_48_16_t v; - pixman_fixed_32_32_t partial; - - v = 0; - for (o = 0; o < 3; o++) - { - partial = - (pixman_fixed_32_32_t) l->matrix[dy][o] * - (pixman_fixed_32_32_t) r->matrix[o][dx]; - - v += partial >> 16; - } - - if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) - return FALSE; - - d.matrix[dy][dx] = (pixman_fixed_t) v; - } - } - - *dst = d; - return TRUE; -} - -PIXMAN_EXPORT void -pixman_transform_init_scale (struct pixman_transform *t, - pixman_fixed_t sx, - pixman_fixed_t sy) -{ - memset (t, '\0', sizeof (struct pixman_transform)); - - t->matrix[0][0] = sx; - t->matrix[1][1] = sy; - t->matrix[2][2] = F (1); -} - -static pixman_fixed_t -fixed_inverse (pixman_fixed_t x) -{ - return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_scale (struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t sx, - pixman_fixed_t sy) -{ - struct pixman_transform t; - - if (sx == 0 || sy == 0) - return FALSE; - - if (forward) - { - pixman_transform_init_scale (&t, sx, sy); - if (!pixman_transform_multiply (forward, &t, forward)) - return FALSE; - } - - if (reverse) - { - pixman_transform_init_scale (&t, fixed_inverse (sx), - fixed_inverse (sy)); - if (!pixman_transform_multiply (reverse, reverse, &t)) - return FALSE; - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_transform_init_rotate (struct pixman_transform *t, - pixman_fixed_t c, - pixman_fixed_t s) -{ - memset (t, '\0', sizeof (struct pixman_transform)); - - t->matrix[0][0] = c; - t->matrix[0][1] = -s; - t->matrix[1][0] = s; - t->matrix[1][1] = c; - t->matrix[2][2] = F (1); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_rotate (struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t c, - pixman_fixed_t s) -{ - struct pixman_transform t; - - if (forward) - { - pixman_transform_init_rotate (&t, c, s); - if (!pixman_transform_multiply (forward, &t, forward)) - return FALSE; - } - - if (reverse) - { - pixman_transform_init_rotate (&t, c, -s); - if (!pixman_transform_multiply (reverse, reverse, &t)) - return FALSE; - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_transform_init_translate (struct pixman_transform *t, - pixman_fixed_t tx, - pixman_fixed_t ty) -{ - memset (t, '\0', sizeof (struct pixman_transform)); - - t->matrix[0][0] = F (1); - t->matrix[0][2] = tx; - t->matrix[1][1] = F (1); - t->matrix[1][2] = ty; - t->matrix[2][2] = F (1); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_translate (struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t tx, - pixman_fixed_t ty) -{ - struct pixman_transform t; - - if (forward) - { - pixman_transform_init_translate (&t, tx, ty); - - if (!pixman_transform_multiply (forward, &t, forward)) - return FALSE; - } - - if (reverse) - { - pixman_transform_init_translate (&t, -tx, -ty); - - if (!pixman_transform_multiply (reverse, reverse, &t)) - return FALSE; - } - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_bounds (const struct pixman_transform *matrix, - struct pixman_box16 * b) - -{ - struct pixman_vector v[4]; - int i; - int x1, y1, x2, y2; - - v[0].vector[0] = F (b->x1); - v[0].vector[1] = F (b->y1); - v[0].vector[2] = F (1); - - v[1].vector[0] = F (b->x2); - v[1].vector[1] = F (b->y1); - v[1].vector[2] = F (1); - - v[2].vector[0] = F (b->x2); - v[2].vector[1] = F (b->y2); - v[2].vector[2] = F (1); - - v[3].vector[0] = F (b->x1); - v[3].vector[1] = F (b->y2); - v[3].vector[2] = F (1); - - for (i = 0; i < 4; i++) - { - if (!pixman_transform_point (matrix, &v[i])) - return FALSE; - - x1 = pixman_fixed_to_int (v[i].vector[0]); - y1 = pixman_fixed_to_int (v[i].vector[1]); - x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0])); - y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1])); - - if (i == 0) - { - b->x1 = x1; - b->y1 = y1; - b->x2 = x2; - b->y2 = y2; - } - else - { - if (x1 < b->x1) b->x1 = x1; - if (y1 < b->y1) b->y1 = y1; - if (x2 > b->x2) b->x2 = x2; - if (y2 > b->y2) b->y2 = y2; - } - } - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_invert (struct pixman_transform * dst, - const struct pixman_transform *src) -{ - struct pixman_f_transform m, r; - - pixman_f_transform_from_pixman_transform (&m, src); - - if (!pixman_f_transform_invert (&r, &m)) - return FALSE; - - if (!pixman_transform_from_pixman_f_transform (dst, &r)) - return FALSE; - - return TRUE; -} - -static pixman_bool_t -within_epsilon (pixman_fixed_t a, - pixman_fixed_t b, - pixman_fixed_t epsilon) -{ - pixman_fixed_t t = a - b; - - if (t < 0) - t = -t; - - return t <= epsilon; -} - -#define EPSILON (pixman_fixed_t) (2) - -#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON)) -#define IS_ZERO(a) (within_epsilon (a, 0, EPSILON)) -#define IS_ONE(a) (within_epsilon (a, F (1), EPSILON)) -#define IS_UNIT(a) \ - (within_epsilon (a, F (1), EPSILON) || \ - within_epsilon (a, F (-1), EPSILON) || \ - IS_ZERO (a)) -#define IS_INT(a) (IS_ZERO (pixman_fixed_frac (a))) - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_identity (const struct pixman_transform *t) -{ - return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) && - IS_SAME (t->matrix[0][0], t->matrix[2][2]) && - !IS_ZERO (t->matrix[0][0]) && - IS_ZERO (t->matrix[0][1]) && - IS_ZERO (t->matrix[0][2]) && - IS_ZERO (t->matrix[1][0]) && - IS_ZERO (t->matrix[1][2]) && - IS_ZERO (t->matrix[2][0]) && - IS_ZERO (t->matrix[2][1])); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_scale (const struct pixman_transform *t) -{ - return (!IS_ZERO (t->matrix[0][0]) && - IS_ZERO (t->matrix[0][1]) && - IS_ZERO (t->matrix[0][2]) && - - IS_ZERO (t->matrix[1][0]) && - !IS_ZERO (t->matrix[1][1]) && - IS_ZERO (t->matrix[1][2]) && - - IS_ZERO (t->matrix[2][0]) && - IS_ZERO (t->matrix[2][1]) && - !IS_ZERO (t->matrix[2][2])); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_int_translate (const struct pixman_transform *t) -{ - return (IS_ONE (t->matrix[0][0]) && - IS_ZERO (t->matrix[0][1]) && - IS_INT (t->matrix[0][2]) && - - IS_ZERO (t->matrix[1][0]) && - IS_ONE (t->matrix[1][1]) && - IS_INT (t->matrix[1][2]) && - - IS_ZERO (t->matrix[2][0]) && - IS_ZERO (t->matrix[2][1]) && - IS_ONE (t->matrix[2][2])); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_inverse (const struct pixman_transform *a, - const struct pixman_transform *b) -{ - struct pixman_transform t; - - if (!pixman_transform_multiply (&t, a, b)) - return FALSE; - - return pixman_transform_is_identity (&t); -} - -PIXMAN_EXPORT void -pixman_f_transform_from_pixman_transform (struct pixman_f_transform * ft, - const struct pixman_transform *t) -{ - int i, j; - - for (j = 0; j < 3; j++) - { - for (i = 0; i < 3; i++) - ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]); - } -} - -PIXMAN_EXPORT pixman_bool_t -pixman_transform_from_pixman_f_transform (struct pixman_transform * t, - const struct pixman_f_transform *ft) -{ - int i, j; - - for (j = 0; j < 3; j++) - { - for (i = 0; i < 3; i++) - { - double d = ft->m[j][i]; - if (d < -32767.0 || d > 32767.0) - return FALSE; - d = d * 65536.0 + 0.5; - t->matrix[j][i] = (pixman_fixed_t) floor (d); - } - } - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_invert (struct pixman_f_transform * dst, - const struct pixman_f_transform *src) -{ - double det; - int i, j; - static int a[3] = { 2, 2, 1 }; - static int b[3] = { 1, 0, 0 }; - - det = 0; - for (i = 0; i < 3; i++) - { - double p; - int ai = a[i]; - int bi = b[i]; - p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] - - src->m[ai][1] * src->m[bi][2]); - if (i == 1) - p = -p; - det += p; - } - - if (det == 0) - return FALSE; - - det = 1 / det; - for (j = 0; j < 3; j++) - { - for (i = 0; i < 3; i++) - { - double p; - int ai = a[i]; - int aj = a[j]; - int bi = b[i]; - int bj = b[j]; - - p = (src->m[ai][aj] * src->m[bi][bj] - - src->m[ai][bj] * src->m[bi][aj]); - - if (((i + j) & 1) != 0) - p = -p; - - dst->m[j][i] = det * p; - } - } - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_point (const struct pixman_f_transform *t, - struct pixman_f_vector * v) -{ - struct pixman_f_vector result; - int i, j; - double a; - - for (j = 0; j < 3; j++) - { - a = 0; - for (i = 0; i < 3; i++) - a += t->m[j][i] * v->v[i]; - result.v[j] = a; - } - - if (!result.v[2]) - return FALSE; - - for (j = 0; j < 2; j++) - v->v[j] = result.v[j] / result.v[2]; - - v->v[2] = 1; - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_f_transform_point_3d (const struct pixman_f_transform *t, - struct pixman_f_vector * v) -{ - struct pixman_f_vector result; - int i, j; - double a; - - for (j = 0; j < 3; j++) - { - a = 0; - for (i = 0; i < 3; i++) - a += t->m[j][i] * v->v[i]; - result.v[j] = a; - } - - *v = result; -} - -PIXMAN_EXPORT void -pixman_f_transform_multiply (struct pixman_f_transform * dst, - const struct pixman_f_transform *l, - const struct pixman_f_transform *r) -{ - struct pixman_f_transform d; - int dx, dy; - int o; - - for (dy = 0; dy < 3; dy++) - { - for (dx = 0; dx < 3; dx++) - { - double v = 0; - for (o = 0; o < 3; o++) - v += l->m[dy][o] * r->m[o][dx]; - d.m[dy][dx] = v; - } - } - - *dst = d; -} - -PIXMAN_EXPORT void -pixman_f_transform_init_scale (struct pixman_f_transform *t, - double sx, - double sy) -{ - t->m[0][0] = sx; - t->m[0][1] = 0; - t->m[0][2] = 0; - t->m[1][0] = 0; - t->m[1][1] = sy; - t->m[1][2] = 0; - t->m[2][0] = 0; - t->m[2][1] = 0; - t->m[2][2] = 1; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_scale (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double sx, - double sy) -{ - struct pixman_f_transform t; - - if (sx == 0 || sy == 0) - return FALSE; - - if (forward) - { - pixman_f_transform_init_scale (&t, sx, sy); - pixman_f_transform_multiply (forward, &t, forward); - } - - if (reverse) - { - pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy); - pixman_f_transform_multiply (reverse, reverse, &t); - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_f_transform_init_rotate (struct pixman_f_transform *t, - double c, - double s) -{ - t->m[0][0] = c; - t->m[0][1] = -s; - t->m[0][2] = 0; - t->m[1][0] = s; - t->m[1][1] = c; - t->m[1][2] = 0; - t->m[2][0] = 0; - t->m[2][1] = 0; - t->m[2][2] = 1; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_rotate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double c, - double s) -{ - struct pixman_f_transform t; - - if (forward) - { - pixman_f_transform_init_rotate (&t, c, s); - pixman_f_transform_multiply (forward, &t, forward); - } - - if (reverse) - { - pixman_f_transform_init_rotate (&t, c, -s); - pixman_f_transform_multiply (reverse, reverse, &t); - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_f_transform_init_translate (struct pixman_f_transform *t, - double tx, - double ty) -{ - t->m[0][0] = 1; - t->m[0][1] = 0; - t->m[0][2] = tx; - t->m[1][0] = 0; - t->m[1][1] = 1; - t->m[1][2] = ty; - t->m[2][0] = 0; - t->m[2][1] = 0; - t->m[2][2] = 1; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_translate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double tx, - double ty) -{ - struct pixman_f_transform t; - - if (forward) - { - pixman_f_transform_init_translate (&t, tx, ty); - pixman_f_transform_multiply (forward, &t, forward); - } - - if (reverse) - { - pixman_f_transform_init_translate (&t, -tx, -ty); - pixman_f_transform_multiply (reverse, reverse, &t); - } - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_bounds (const struct pixman_f_transform *t, - struct pixman_box16 * b) -{ - struct pixman_f_vector v[4]; - int i; - int x1, y1, x2, y2; - - v[0].v[0] = b->x1; - v[0].v[1] = b->y1; - v[0].v[2] = 1; - v[1].v[0] = b->x2; - v[1].v[1] = b->y1; - v[1].v[2] = 1; - v[2].v[0] = b->x2; - v[2].v[1] = b->y2; - v[2].v[2] = 1; - v[3].v[0] = b->x1; - v[3].v[1] = b->y2; - v[3].v[2] = 1; - - for (i = 0; i < 4; i++) - { - if (!pixman_f_transform_point (t, &v[i])) - return FALSE; - - x1 = floor (v[i].v[0]); - y1 = floor (v[i].v[1]); - x2 = ceil (v[i].v[0]); - y2 = ceil (v[i].v[1]); - - if (i == 0) - { - b->x1 = x1; - b->y1 = y1; - b->x2 = x2; - b->y2 = y2; - } - else - { - if (x1 < b->x1) b->x1 = x1; - if (y1 < b->y1) b->y1 = y1; - if (x2 > b->x2) b->x2 = x2; - if (y2 > b->y2) b->y2 = y2; - } - } - - return TRUE; -} - -PIXMAN_EXPORT void -pixman_f_transform_init_identity (struct pixman_f_transform *t) -{ - int i, j; - - for (j = 0; j < 3; j++) - { - for (i = 0; i < 3; i++) - t->m[j][i] = i == j ? 1 : 0; - } -} +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +/* + * Matrix interfaces + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include "pixman-private.h" + +#define F(x) pixman_int_to_fixed (x) + +PIXMAN_EXPORT void +pixman_transform_init_identity (struct pixman_transform *matrix) +{ + int i; + + memset (matrix, '\0', sizeof (struct pixman_transform)); + for (i = 0; i < 3; i++) + matrix->matrix[i][i] = F (1); +} + +typedef pixman_fixed_32_32_t pixman_fixed_34_30_t; + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_3d (const struct pixman_transform *transform, + struct pixman_vector * vector) +{ + struct pixman_vector result; + pixman_fixed_32_32_t partial; + pixman_fixed_48_16_t v; + int i, j; + + for (j = 0; j < 3; j++) + { + v = 0; + for (i = 0; i < 3; i++) + { + partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * + (pixman_fixed_48_16_t) vector->vector[i]); + v += partial >> 16; + } + + if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) + return FALSE; + + result.vector[j] = (pixman_fixed_t) v; + } + + *vector = result; + + if (!result.vector[2]) + return FALSE; + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point (const struct pixman_transform *transform, + struct pixman_vector * vector) +{ + pixman_fixed_32_32_t partial; + pixman_fixed_34_30_t v[3]; + pixman_fixed_48_16_t quo; + int i, j; + + for (j = 0; j < 3; j++) + { + v[j] = 0; + + for (i = 0; i < 3; i++) + { + partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * + (pixman_fixed_32_32_t) vector->vector[i]); + v[j] += partial >> 2; + } + } + + if (!(v[2] >> 16)) + return FALSE; + + for (j = 0; j < 2; j++) + { + quo = v[j] / (v[2] >> 16); + if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) + return FALSE; + vector->vector[j] = (pixman_fixed_t) quo; + } + + vector->vector[2] = pixman_fixed_1; + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_multiply (struct pixman_transform * dst, + const struct pixman_transform *l, + const struct pixman_transform *r) +{ + struct pixman_transform d; + int dx, dy; + int o; + + for (dy = 0; dy < 3; dy++) + { + for (dx = 0; dx < 3; dx++) + { + pixman_fixed_48_16_t v; + pixman_fixed_32_32_t partial; + + v = 0; + for (o = 0; o < 3; o++) + { + partial = + (pixman_fixed_32_32_t) l->matrix[dy][o] * + (pixman_fixed_32_32_t) r->matrix[o][dx]; + + v += partial >> 16; + } + + if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) + return FALSE; + + d.matrix[dy][dx] = (pixman_fixed_t) v; + } + } + + *dst = d; + return TRUE; +} + +PIXMAN_EXPORT void +pixman_transform_init_scale (struct pixman_transform *t, + pixman_fixed_t sx, + pixman_fixed_t sy) +{ + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = sx; + t->matrix[1][1] = sy; + t->matrix[2][2] = F (1); +} + +static pixman_fixed_t +fixed_inverse (pixman_fixed_t x) +{ + return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_scale (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t sx, + pixman_fixed_t sy) +{ + struct pixman_transform t; + + if (sx == 0 || sy == 0) + return FALSE; + + if (forward) + { + pixman_transform_init_scale (&t, sx, sy); + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_scale (&t, fixed_inverse (sx), + fixed_inverse (sy)); + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_transform_init_rotate (struct pixman_transform *t, + pixman_fixed_t c, + pixman_fixed_t s) +{ + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = c; + t->matrix[0][1] = -s; + t->matrix[1][0] = s; + t->matrix[1][1] = c; + t->matrix[2][2] = F (1); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_rotate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t c, + pixman_fixed_t s) +{ + struct pixman_transform t; + + if (forward) + { + pixman_transform_init_rotate (&t, c, s); + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_rotate (&t, c, -s); + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_transform_init_translate (struct pixman_transform *t, + pixman_fixed_t tx, + pixman_fixed_t ty) +{ + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = F (1); + t->matrix[0][2] = tx; + t->matrix[1][1] = F (1); + t->matrix[1][2] = ty; + t->matrix[2][2] = F (1); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_translate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t tx, + pixman_fixed_t ty) +{ + struct pixman_transform t; + + if (forward) + { + pixman_transform_init_translate (&t, tx, ty); + + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_translate (&t, -tx, -ty); + + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_bounds (const struct pixman_transform *matrix, + struct pixman_box16 * b) + +{ + struct pixman_vector v[4]; + int i; + int x1, y1, x2, y2; + + v[0].vector[0] = F (b->x1); + v[0].vector[1] = F (b->y1); + v[0].vector[2] = F (1); + + v[1].vector[0] = F (b->x2); + v[1].vector[1] = F (b->y1); + v[1].vector[2] = F (1); + + v[2].vector[0] = F (b->x2); + v[2].vector[1] = F (b->y2); + v[2].vector[2] = F (1); + + v[3].vector[0] = F (b->x1); + v[3].vector[1] = F (b->y2); + v[3].vector[2] = F (1); + + for (i = 0; i < 4; i++) + { + if (!pixman_transform_point (matrix, &v[i])) + return FALSE; + + x1 = pixman_fixed_to_int (v[i].vector[0]); + y1 = pixman_fixed_to_int (v[i].vector[1]); + x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0])); + y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1])); + + if (i == 0) + { + b->x1 = x1; + b->y1 = y1; + b->x2 = x2; + b->y2 = y2; + } + else + { + if (x1 < b->x1) b->x1 = x1; + if (y1 < b->y1) b->y1 = y1; + if (x2 > b->x2) b->x2 = x2; + if (y2 > b->y2) b->y2 = y2; + } + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_invert (struct pixman_transform * dst, + const struct pixman_transform *src) +{ + struct pixman_f_transform m, r; + + pixman_f_transform_from_pixman_transform (&m, src); + + if (!pixman_f_transform_invert (&r, &m)) + return FALSE; + + if (!pixman_transform_from_pixman_f_transform (dst, &r)) + return FALSE; + + return TRUE; +} + +static pixman_bool_t +within_epsilon (pixman_fixed_t a, + pixman_fixed_t b, + pixman_fixed_t epsilon) +{ + pixman_fixed_t t = a - b; + + if (t < 0) + t = -t; + + return t <= epsilon; +} + +#define EPSILON (pixman_fixed_t) (2) + +#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON)) +#define IS_ZERO(a) (within_epsilon (a, 0, EPSILON)) +#define IS_ONE(a) (within_epsilon (a, F (1), EPSILON)) +#define IS_UNIT(a) \ + (within_epsilon (a, F (1), EPSILON) || \ + within_epsilon (a, F (-1), EPSILON) || \ + IS_ZERO (a)) +#define IS_INT(a) (IS_ZERO (pixman_fixed_frac (a))) + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_identity (const struct pixman_transform *t) +{ + return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) && + IS_SAME (t->matrix[0][0], t->matrix[2][2]) && + !IS_ZERO (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_ZERO (t->matrix[0][2]) && + IS_ZERO (t->matrix[1][0]) && + IS_ZERO (t->matrix[1][2]) && + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1])); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_scale (const struct pixman_transform *t) +{ + return (!IS_ZERO (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_ZERO (t->matrix[0][2]) && + + IS_ZERO (t->matrix[1][0]) && + !IS_ZERO (t->matrix[1][1]) && + IS_ZERO (t->matrix[1][2]) && + + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1]) && + !IS_ZERO (t->matrix[2][2])); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_int_translate (const struct pixman_transform *t) +{ + return (IS_ONE (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_INT (t->matrix[0][2]) && + + IS_ZERO (t->matrix[1][0]) && + IS_ONE (t->matrix[1][1]) && + IS_INT (t->matrix[1][2]) && + + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1]) && + IS_ONE (t->matrix[2][2])); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_inverse (const struct pixman_transform *a, + const struct pixman_transform *b) +{ + struct pixman_transform t; + + if (!pixman_transform_multiply (&t, a, b)) + return FALSE; + + return pixman_transform_is_identity (&t); +} + +PIXMAN_EXPORT void +pixman_f_transform_from_pixman_transform (struct pixman_f_transform * ft, + const struct pixman_transform *t) +{ + int i, j; + + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]); + } +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_from_pixman_f_transform (struct pixman_transform * t, + const struct pixman_f_transform *ft) +{ + int i, j; + + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + { + double d = ft->m[j][i]; + if (d < -32767.0 || d > 32767.0) + return FALSE; + d = d * 65536.0 + 0.5; + t->matrix[j][i] = (pixman_fixed_t) floor (d); + } + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_invert (struct pixman_f_transform * dst, + const struct pixman_f_transform *src) +{ + double det; + int i, j; + static int a[3] = { 2, 2, 1 }; + static int b[3] = { 1, 0, 0 }; + + det = 0; + for (i = 0; i < 3; i++) + { + double p; + int ai = a[i]; + int bi = b[i]; + p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] - + src->m[ai][1] * src->m[bi][2]); + if (i == 1) + p = -p; + det += p; + } + + if (det == 0) + return FALSE; + + det = 1 / det; + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + { + double p; + int ai = a[i]; + int aj = a[j]; + int bi = b[i]; + int bj = b[j]; + + p = (src->m[ai][aj] * src->m[bi][bj] - + src->m[ai][bj] * src->m[bi][aj]); + + if (((i + j) & 1) != 0) + p = -p; + + dst->m[j][i] = det * p; + } + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_point (const struct pixman_f_transform *t, + struct pixman_f_vector * v) +{ + struct pixman_f_vector result; + int i, j; + double a; + + for (j = 0; j < 3; j++) + { + a = 0; + for (i = 0; i < 3; i++) + a += t->m[j][i] * v->v[i]; + result.v[j] = a; + } + + if (!result.v[2]) + return FALSE; + + for (j = 0; j < 2; j++) + v->v[j] = result.v[j] / result.v[2]; + + v->v[2] = 1; + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_point_3d (const struct pixman_f_transform *t, + struct pixman_f_vector * v) +{ + struct pixman_f_vector result; + int i, j; + double a; + + for (j = 0; j < 3; j++) + { + a = 0; + for (i = 0; i < 3; i++) + a += t->m[j][i] * v->v[i]; + result.v[j] = a; + } + + *v = result; +} + +PIXMAN_EXPORT void +pixman_f_transform_multiply (struct pixman_f_transform * dst, + const struct pixman_f_transform *l, + const struct pixman_f_transform *r) +{ + struct pixman_f_transform d; + int dx, dy; + int o; + + for (dy = 0; dy < 3; dy++) + { + for (dx = 0; dx < 3; dx++) + { + double v = 0; + for (o = 0; o < 3; o++) + v += l->m[dy][o] * r->m[o][dx]; + d.m[dy][dx] = v; + } + } + + *dst = d; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_scale (struct pixman_f_transform *t, + double sx, + double sy) +{ + t->m[0][0] = sx; + t->m[0][1] = 0; + t->m[0][2] = 0; + t->m[1][0] = 0; + t->m[1][1] = sy; + t->m[1][2] = 0; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_scale (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double sx, + double sy) +{ + struct pixman_f_transform t; + + if (sx == 0 || sy == 0) + return FALSE; + + if (forward) + { + pixman_f_transform_init_scale (&t, sx, sy); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_rotate (struct pixman_f_transform *t, + double c, + double s) +{ + t->m[0][0] = c; + t->m[0][1] = -s; + t->m[0][2] = 0; + t->m[1][0] = s; + t->m[1][1] = c; + t->m[1][2] = 0; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_rotate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double c, + double s) +{ + struct pixman_f_transform t; + + if (forward) + { + pixman_f_transform_init_rotate (&t, c, s); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_rotate (&t, c, -s); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_translate (struct pixman_f_transform *t, + double tx, + double ty) +{ + t->m[0][0] = 1; + t->m[0][1] = 0; + t->m[0][2] = tx; + t->m[1][0] = 0; + t->m[1][1] = 1; + t->m[1][2] = ty; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_translate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double tx, + double ty) +{ + struct pixman_f_transform t; + + if (forward) + { + pixman_f_transform_init_translate (&t, tx, ty); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_translate (&t, -tx, -ty); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_bounds (const struct pixman_f_transform *t, + struct pixman_box16 * b) +{ + struct pixman_f_vector v[4]; + int i; + int x1, y1, x2, y2; + + v[0].v[0] = b->x1; + v[0].v[1] = b->y1; + v[0].v[2] = 1; + v[1].v[0] = b->x2; + v[1].v[1] = b->y1; + v[1].v[2] = 1; + v[2].v[0] = b->x2; + v[2].v[1] = b->y2; + v[2].v[2] = 1; + v[3].v[0] = b->x1; + v[3].v[1] = b->y2; + v[3].v[2] = 1; + + for (i = 0; i < 4; i++) + { + if (!pixman_f_transform_point (t, &v[i])) + return FALSE; + + x1 = floor (v[i].v[0]); + y1 = floor (v[i].v[1]); + x2 = ceil (v[i].v[0]); + y2 = ceil (v[i].v[1]); + + if (i == 0) + { + b->x1 = x1; + b->y1 = y1; + b->x2 = x2; + b->y2 = y2; + } + else + { + if (x1 < b->x1) b->x1 = x1; + if (y1 < b->y1) b->y1 = y1; + if (x2 > b->x2) b->x2 = x2; + if (y2 > b->y2) b->y2 = y2; + } + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_identity (struct pixman_f_transform *t) +{ + int i, j; + + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + t->m[j][i] = i == j ? 1 : 0; + } +} diff --git a/pixman/pixman/pixman-timer.c b/pixman/pixman/pixman-timer.c index f5ae18e89..c45d7b4fa 100644 --- a/pixman/pixman/pixman-timer.c +++ b/pixman/pixman/pixman-timer.c @@ -1,66 +1,66 @@ -/* - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Red Hat not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. Red Hat makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL RED HAT - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include "pixman-private.h" - -#ifdef PIXMAN_TIMERS - -static pixman_timer_t *timers; - -static void -dump_timers (void) -{ - pixman_timer_t *timer; - - for (timer = timers; timer != NULL; timer = timer->next) - { - printf ("%s: total: %llu n: %llu avg: %f\n", - timer->name, - timer->total, - timer->n_times, - timer->total / (double)timer->n_times); - } -} - -void -pixman_timer_register (pixman_timer_t *timer) -{ - static int initialized; - - int atexit (void (*function)(void)); - - if (!initialized) - { - atexit (dump_timers); - initialized = 1; - } - - timer->next = timers; - timers = timer; -} - -#endif +/* + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL RED HAT + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include "pixman-private.h" + +#ifdef PIXMAN_TIMERS + +static pixman_timer_t *timers; + +static void +dump_timers (void) +{ + pixman_timer_t *timer; + + for (timer = timers; timer != NULL; timer = timer->next) + { + printf ("%s: total: %llu n: %llu avg: %f\n", + timer->name, + timer->total, + timer->n_times, + timer->total / (double)timer->n_times); + } +} + +void +pixman_timer_register (pixman_timer_t *timer) +{ + static int initialized; + + int atexit (void (*function)(void)); + + if (!initialized) + { + atexit (dump_timers); + initialized = 1; + } + + timer->next = timers; + timers = timer; +} + +#endif diff --git a/pixman/pixman/pixman-version.h.in b/pixman/pixman/pixman-version.h.in index 256b2e6f1..022bf1a3c 100644 --- a/pixman/pixman/pixman-version.h.in +++ b/pixman/pixman/pixman-version.h.in @@ -1,50 +1,50 @@ -/* - * Copyright © 2008 Red Hat, Inc. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Author: Carl D. Worth - */ - -#ifndef PIXMAN_VERSION_H__ -#define PIXMAN_VERSION_H__ - -#ifndef PIXMAN_H__ -# error pixman-version.h should only be included by pixman.h -#endif - -#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@ -#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@ -#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@ - -#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@" - -#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ - ((major) * 10000) \ - + ((minor) * 100) \ - + ((micro) * 1)) - -#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \ - PIXMAN_VERSION_MAJOR, \ - PIXMAN_VERSION_MINOR, \ - PIXMAN_VERSION_MICRO) - -#endif /* PIXMAN_VERSION_H__ */ +/* + * Copyright © 2008 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Carl D. Worth + */ + +#ifndef PIXMAN_VERSION_H__ +#define PIXMAN_VERSION_H__ + +#ifndef PIXMAN_H__ +# error pixman-version.h should only be included by pixman.h +#endif + +#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@ +#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@ +#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@ + +#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@" + +#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ + ((major) * 10000) \ + + ((minor) * 100) \ + + ((micro) * 1)) + +#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \ + PIXMAN_VERSION_MAJOR, \ + PIXMAN_VERSION_MINOR, \ + PIXMAN_VERSION_MICRO) + +#endif /* PIXMAN_VERSION_H__ */ diff --git a/pixman/test/fuzzer-find-diff.pl b/pixman/test/fuzzer-find-diff.pl index a43f6fb83..53d9b8de1 100644 --- a/pixman/test/fuzzer-find-diff.pl +++ b/pixman/test/fuzzer-find-diff.pl @@ -1,68 +1,68 @@ -#!/usr/bin/env perl - -$usage = "Usage: - fuzzer-find-diff.pl reference_binary new_binary [number_of_tests_to_run] - -The first two input arguments are the commands to run the test programs -based on fuzzer_test_main() function from 'util.c' (preferably they should -be statically compiled, this can be achieved via '--disable-shared' pixman -configure option). The third optional argument is the number of test rounds -to run (if not specified, then testing runs infinitely or until some problem -is detected). - -Usage examples: - fuzzer-find-diff.pl ./blitters-test-with-sse-disabled ./blitters-test 9000000 - fuzzer-find-diff.pl ./blitters-test \"ssh ppc64_host /path/to/blitters-test\" -"; - -$#ARGV >= 1 or die $usage; - -$batch_size = 10000; - -if ($#ARGV >= 2) { - $number_of_tests = int($ARGV[2]); -} else { - $number_of_tests = -1 -} - -sub test_range { - my $min = shift; - my $max = shift; - - if (`$ARGV[0] $min $max 2>/dev/null` eq `$ARGV[1] $min $max 2>/dev/null`) { - return; - } - - while ($max != $min + 1) { - my $avg = int(($min + $max) / 2); - my $res1 = `$ARGV[0] $min $avg 2>/dev/null`; - my $res2 = `$ARGV[1] $min $avg 2>/dev/null`; - if ($res1 ne $res2) { - $max = $avg; - } else { - $min = $avg; - } - } - return $max; -} - -$base = 1; -while ($number_of_tests <= 0 || $base <= $number_of_tests) { - printf("testing %-12d\r", $base + $batch_size - 1); - my $res = test_range($base, $base + $batch_size - 1); - if ($res) { - printf("Failure: results are different for test %d:\n", $res); - - printf("\n-- ref --\n"); - print `$ARGV[0] $res`; - printf("-- new --\n"); - print `$ARGV[1] $res`; - - printf("The problematic conditions can be reproduced by running:\n"); - printf("$ARGV[1] %d\n", $res); - - exit(1); - } - $base += $batch_size; -} -printf("Success: %d tests finished\n", $base - 1); +#!/usr/bin/env perl + +$usage = "Usage: + fuzzer-find-diff.pl reference_binary new_binary [number_of_tests_to_run] + +The first two input arguments are the commands to run the test programs +based on fuzzer_test_main() function from 'util.c' (preferably they should +be statically compiled, this can be achieved via '--disable-shared' pixman +configure option). The third optional argument is the number of test rounds +to run (if not specified, then testing runs infinitely or until some problem +is detected). + +Usage examples: + fuzzer-find-diff.pl ./blitters-test-with-sse-disabled ./blitters-test 9000000 + fuzzer-find-diff.pl ./blitters-test \"ssh ppc64_host /path/to/blitters-test\" +"; + +$#ARGV >= 1 or die $usage; + +$batch_size = 10000; + +if ($#ARGV >= 2) { + $number_of_tests = int($ARGV[2]); +} else { + $number_of_tests = -1 +} + +sub test_range { + my $min = shift; + my $max = shift; + + if (`$ARGV[0] $min $max 2>/dev/null` eq `$ARGV[1] $min $max 2>/dev/null`) { + return; + } + + while ($max != $min + 1) { + my $avg = int(($min + $max) / 2); + my $res1 = `$ARGV[0] $min $avg 2>/dev/null`; + my $res2 = `$ARGV[1] $min $avg 2>/dev/null`; + if ($res1 ne $res2) { + $max = $avg; + } else { + $min = $avg; + } + } + return $max; +} + +$base = 1; +while ($number_of_tests <= 0 || $base <= $number_of_tests) { + printf("testing %-12d\r", $base + $batch_size - 1); + my $res = test_range($base, $base + $batch_size - 1); + if ($res) { + printf("Failure: results are different for test %d:\n", $res); + + printf("\n-- ref --\n"); + print `$ARGV[0] $res`; + printf("-- new --\n"); + print `$ARGV[1] $res`; + + printf("The problematic conditions can be reproduced by running:\n"); + printf("$ARGV[1] %d\n", $res); + + exit(1); + } + $base += $batch_size; +} +printf("Success: %d tests finished\n", $base - 1); diff --git a/pixman/test/region-test.c b/pixman/test/region-test.c index 9d5a41eb9..a1fc4a837 100644 --- a/pixman/test/region-test.c +++ b/pixman/test/region-test.c @@ -1,123 +1,123 @@ -#include -#include -#include -#include "utils.h" - -int -main () -{ - pixman_region32_t r1; - pixman_region32_t r2; - pixman_region32_t r3; - pixman_box32_t boxes[] = { - { 10, 10, 20, 20 }, - { 30, 30, 30, 40 }, - { 50, 45, 60, 44 }, - }; - pixman_box32_t boxes2[] = { - { 2, 6, 7, 6 }, - { 4, 1, 6, 7 }, - }; - pixman_box32_t boxes3[] = { - { 2, 6, 7, 6 }, - { 4, 1, 6, 1 }, - }; - int i, j; - pixman_box32_t *b; - pixman_image_t *image, *fill; - pixman_color_t white = { - 0xffff, - 0xffff, - 0xffff, - 0xffff - }; - - /* This used to go into an infinite loop before pixman-region.c - * was fixed to not use explict "short" variables - */ - pixman_region32_init_rect (&r1, 0, 0, 20, 64000); - pixman_region32_init_rect (&r2, 0, 0, 20, 64000); - pixman_region32_init_rect (&r3, 0, 0, 20, 64000); - - pixman_region32_subtract (&r1, &r2, &r3); - - - /* This would produce a region containing an empty - * rectangle in it. Such regions are considered malformed, - * but using an empty rectangle for initialization should - * work. - */ - pixman_region32_init_rects (&r1, boxes, 3); - - b = pixman_region32_rectangles (&r1, &i); - - assert (i == 1); - - while (i--) - { - assert (b[i].x1 < b[i].x2); - assert (b[i].y1 < b[i].y2); - } - - /* This would produce a rectangle containing the bounding box - * of the two rectangles. The correct result is to eliminate - * the broken rectangle. - */ - pixman_region32_init_rects (&r1, boxes2, 2); - - b = pixman_region32_rectangles (&r1, &i); - - assert (i == 1); - - assert (b[0].x1 == 4); - assert (b[0].y1 == 1); - assert (b[0].x2 == 6); - assert (b[0].y2 == 7); - - /* This should produce an empty region */ - pixman_region32_init_rects (&r1, boxes3, 2); - - b = pixman_region32_rectangles (&r1, &i); - - assert (i == 0); - - fill = pixman_image_create_solid_fill (&white); - for (i = 0; i < 100; i++) - { - int image_size = 128; - - pixman_region32_init (&r1); - - /* Add some random rectangles */ - for (j = 0; j < 64; j++) - pixman_region32_union_rect (&r1, &r1, - lcg_rand_n (image_size), - lcg_rand_n (image_size), - lcg_rand_n (25), - lcg_rand_n (25)); - - /* Clip to image size */ - pixman_region32_init_rect (&r2, 0, 0, image_size, image_size); - pixman_region32_intersect (&r1, &r1, &r2); - pixman_region32_fini (&r2); - - /* render region to a1 mask */ - image = pixman_image_create_bits (PIXMAN_a1, image_size, image_size, NULL, 0); - pixman_image_set_clip_region32 (image, &r1); - pixman_image_composite32 (PIXMAN_OP_SRC, - fill, NULL, image, - 0, 0, 0, 0, 0, 0, - image_size, image_size); - pixman_region32_init_from_image (&r2, image); - - pixman_image_unref (image); - - assert (pixman_region32_equal (&r1, &r2)); - pixman_region32_fini (&r1); - pixman_region32_fini (&r2); - - } - pixman_image_unref (fill); - - return 0; -} +#include +#include +#include +#include "utils.h" + +int +main () +{ + pixman_region32_t r1; + pixman_region32_t r2; + pixman_region32_t r3; + pixman_box32_t boxes[] = { + { 10, 10, 20, 20 }, + { 30, 30, 30, 40 }, + { 50, 45, 60, 44 }, + }; + pixman_box32_t boxes2[] = { + { 2, 6, 7, 6 }, + { 4, 1, 6, 7 }, + }; + pixman_box32_t boxes3[] = { + { 2, 6, 7, 6 }, + { 4, 1, 6, 1 }, + }; + int i, j; + pixman_box32_t *b; + pixman_image_t *image, *fill; + pixman_color_t white = { + 0xffff, + 0xffff, + 0xffff, + 0xffff + }; + + /* This used to go into an infinite loop before pixman-region.c + * was fixed to not use explict "short" variables + */ + pixman_region32_init_rect (&r1, 0, 0, 20, 64000); + pixman_region32_init_rect (&r2, 0, 0, 20, 64000); + pixman_region32_init_rect (&r3, 0, 0, 20, 64000); + + pixman_region32_subtract (&r1, &r2, &r3); + + + /* This would produce a region containing an empty + * rectangle in it. Such regions are considered malformed, + * but using an empty rectangle for initialization should + * work. + */ + pixman_region32_init_rects (&r1, boxes, 3); + + b = pixman_region32_rectangles (&r1, &i); + + assert (i == 1); + + while (i--) + { + assert (b[i].x1 < b[i].x2); + assert (b[i].y1 < b[i].y2); + } + + /* This would produce a rectangle containing the bounding box + * of the two rectangles. The correct result is to eliminate + * the broken rectangle. + */ + pixman_region32_init_rects (&r1, boxes2, 2); + + b = pixman_region32_rectangles (&r1, &i); + + assert (i == 1); + + assert (b[0].x1 == 4); + assert (b[0].y1 == 1); + assert (b[0].x2 == 6); + assert (b[0].y2 == 7); + + /* This should produce an empty region */ + pixman_region32_init_rects (&r1, boxes3, 2); + + b = pixman_region32_rectangles (&r1, &i); + + assert (i == 0); + + fill = pixman_image_create_solid_fill (&white); + for (i = 0; i < 100; i++) + { + int image_size = 128; + + pixman_region32_init (&r1); + + /* Add some random rectangles */ + for (j = 0; j < 64; j++) + pixman_region32_union_rect (&r1, &r1, + lcg_rand_n (image_size), + lcg_rand_n (image_size), + lcg_rand_n (25), + lcg_rand_n (25)); + + /* Clip to image size */ + pixman_region32_init_rect (&r2, 0, 0, image_size, image_size); + pixman_region32_intersect (&r1, &r1, &r2); + pixman_region32_fini (&r2); + + /* render region to a1 mask */ + image = pixman_image_create_bits (PIXMAN_a1, image_size, image_size, NULL, 0); + pixman_image_set_clip_region32 (image, &r1); + pixman_image_composite32 (PIXMAN_OP_SRC, + fill, NULL, image, + 0, 0, 0, 0, 0, 0, + image_size, image_size); + pixman_region32_init_from_image (&r2, image); + + pixman_image_unref (image); + + assert (pixman_region32_equal (&r1, &r2)); + pixman_region32_fini (&r1); + pixman_region32_fini (&r2); + + } + pixman_image_unref (fill); + + return 0; +} -- cgit v1.2.3