diff options
author | marha <marha@users.sourceforge.net> | 2012-06-08 09:33:13 +0200 |
---|---|---|
committer | marha <marha@users.sourceforge.net> | 2012-06-08 09:33:13 +0200 |
commit | 990bc3f015a4f8fce2eb918375defcd44980a845 (patch) | |
tree | 8e8301f19482b52cc00bd95b4593522cc93267af /pixman | |
parent | 1af6fc1b5d93e54d6674de8b5870448b29f139a7 (diff) | |
download | vcxsrv-990bc3f015a4f8fce2eb918375defcd44980a845.tar.gz vcxsrv-990bc3f015a4f8fce2eb918375defcd44980a845.tar.bz2 vcxsrv-990bc3f015a4f8fce2eb918375defcd44980a845.zip |
Used synchronise script to update files
Diffstat (limited to 'pixman')
-rw-r--r-- | pixman/.gitignore | 81 | ||||
-rw-r--r-- | pixman/COPYING | 84 | ||||
-rw-r--r-- | pixman/INSTALL | 468 | ||||
-rw-r--r-- | pixman/TODO | 542 | ||||
-rw-r--r-- | pixman/pixman-1-uninstalled.pc.in | 10 | ||||
-rw-r--r-- | pixman/pixman-1.pc.in | 22 | ||||
-rw-r--r-- | pixman/pixman/make-combine.pl | 172 | ||||
-rw-r--r-- | pixman/pixman/pixman-access-accessors.c | 6 | ||||
-rw-r--r-- | pixman/pixman/pixman-arm-neon-asm.h | 2354 | ||||
-rw-r--r-- | pixman/pixman/pixman-edge-accessors.c | 8 | ||||
-rw-r--r-- | pixman/pixman/pixman-edge-imp.h | 364 | ||||
-rw-r--r-- | pixman/pixman/pixman-edge.c | 768 | ||||
-rw-r--r-- | pixman/pixman/pixman-matrix.c | 1532 | ||||
-rw-r--r-- | pixman/pixman/pixman-timer.c | 132 | ||||
-rw-r--r-- | pixman/pixman/pixman-version.h.in | 100 | ||||
-rw-r--r-- | pixman/test/fuzzer-find-diff.pl | 136 | ||||
-rw-r--r-- | pixman/test/region-test.c | 246 |
17 files changed, 3553 insertions, 3472 deletions
diff --git a/pixman/.gitignore b/pixman/.gitignore new file mode 100644 index 000000000..98612c91f --- /dev/null +++ b/pixman/.gitignore @@ -0,0 +1,81 @@ +Makefile +Makefile.in +.deps +.libs +.msg +*.pc +*.lo +*.la +*.a +*.o +*~ +aclocal.m4 +autom4te.cache +compile +config.guess +config.log +config.status +config.sub +configure +depcomp +install-sh +libtool +ltmain.sh +missing +stamp-h? +config.h +config.h.in +.*.swp +demos/alpha-test +demos/checkerboard +demos/clip-in +demos/clip-test +demos/composite-test +demos/convolution-test +demos/gradient-test +demos/quad2quad +demos/radial-test +demos/screen-test +demos/trap-test +demos/tri-test +pixman/pixman-combine32.c +pixman/pixman-combine32.h +pixman/pixman-combine64.c +pixman/pixman-combine64.h +pixman/pixman-version.h +test/a1-trap-test +test/affine-test +test/alpha-loop +test/alphamap +test/alpha-test +test/blitters-test +test/clip-in +test/clip-test +test/composite +test/composite-test +test/composite-traps-test +test/convolution-test +test/fetch-test +test/gradient-crash-test +test/gradient-test +test/lowlevel-blt-bench +test/oob-test +test/pdf-op-test +test/region-contains-test +test/region-test +test/region-translate +test/region-translate-test +test/scaling-crash-test +test/scaling-helpers-test +test/scaling-test +test/screen-test +test/stress-test +test/trap-crasher +test/trap-test +test/window-test +*.pdb +*.dll +*.lib +*.ilk +*.obj +*.exe diff --git a/pixman/COPYING b/pixman/COPYING index 11b022bc2..6168dea56 100644 --- a/pixman/COPYING +++ b/pixman/COPYING @@ -1,42 +1,42 @@ -The following is the MIT license, agreed upon by most contributors.
-Copyright holders of new code should use this license statement where
-possible. They may also add themselves to the list below.
-
-/*
- * Copyright 1987, 1988, 1989, 1998 The Open Group
- * Copyright 1987, 1988, 1989 Digital Equipment Corporation
- * Copyright 1999, 2004, 2008 Keith Packard
- * Copyright 2000 SuSE, Inc.
- * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc.
- * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc.
- * Copyright 2004 Nicholas Miell
- * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech
- * Copyright 2005 Trolltech AS
- * Copyright 2007 Luca Barbato
- * Copyright 2008 Aaron Plattner, NVIDIA Corporation
- * Copyright 2008 Rodrigo Kumpera
- * Copyright 2008 André Tupinambá
- * Copyright 2008 Mozilla Corporation
- * Copyright 2008 Frederic Plourde
- * Copyright 2009, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2009, 2010 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
+The following is the MIT license, agreed upon by most contributors. +Copyright holders of new code should use this license statement where +possible. They may also add themselves to the list below. + +/* + * Copyright 1987, 1988, 1989, 1998 The Open Group + * Copyright 1987, 1988, 1989 Digital Equipment Corporation + * Copyright 1999, 2004, 2008 Keith Packard + * Copyright 2000 SuSE, Inc. + * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc. + * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc. + * Copyright 2004 Nicholas Miell + * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech + * Copyright 2005 Trolltech AS + * Copyright 2007 Luca Barbato + * Copyright 2008 Aaron Plattner, NVIDIA Corporation + * Copyright 2008 Rodrigo Kumpera + * Copyright 2008 André Tupinambá + * Copyright 2008 Mozilla Corporation + * Copyright 2008 Frederic Plourde + * Copyright 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright 2009, 2010 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ diff --git a/pixman/INSTALL b/pixman/INSTALL index 5458714e1..cf1202b66 100644 --- a/pixman/INSTALL +++ b/pixman/INSTALL @@ -1,234 +1,234 @@ -Installation Instructions -************************* - -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, -2006 Free Software Foundation, Inc. - -This file is free documentation; the Free Software Foundation gives -unlimited permission to copy, distribute and modify it. - -Basic Installation -================== - -Briefly, the shell commands `./configure; make; make install' should -configure, build, and install this package. The following -more-detailed instructions are generic; see the `README' file for -instructions specific to this package. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). - - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. Caching is -disabled by default to prevent problems with accidental use of stale -cache files. - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you -may remove or edit it. - - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You need `configure.ac' if -you want to change it or regenerate `configure' using a newer version -of `autoconf'. - -The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. - - Running `configure' might take a while. While running, it prints - some messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package. - - 4. Type `make install' to install the programs and any data files and - documentation. - - 5. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - -Compilers and Options -===================== - -Some systems require unusual options for compilation or linking that the -`configure' script does not know about. Run `./configure --help' for -details on some of the pertinent environment variables. - - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: - - ./configure CC=c99 CFLAGS=-g LIBS=-lposix - - *Note Defining Variables::, for more details. - -Compiling For Multiple Architectures -==================================== - -You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you can use GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. - - With a non-GNU `make', it is safer to compile the package for one -architecture at a time in the source code directory. After you have -installed the package for one architecture, use `make distclean' before -reconfiguring for another architecture. - -Installation Names -================== - -By default, `make install' installs the package's commands under -`/usr/local/bin', include files under `/usr/local/include', etc. You -can specify an installation prefix other than `/usr/local' by giving -`configure' the option `--prefix=PREFIX'. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -pass the option `--exec-prefix=PREFIX' to `configure', the package uses -PREFIX as the prefix for installing programs and libraries. -Documentation and other data files still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=DIR' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - -Optional Features -================= - -Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - -Specifying the System Type -========================== - -There may be some features `configure' cannot figure out automatically, -but needs to determine by the type of machine the package will run on. -Usually, assuming the package is built to be run on the _same_ -architectures, `configure' can figure that out, but if it prints a -message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: - - CPU-COMPANY-SYSTEM - -where SYSTEM can have one of these forms: - - OS KERNEL-OS - - See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the machine type. - - If you are _building_ compiler tools for cross-compiling, you should -use the option `--target=TYPE' to select the type of system they will -produce code for. - - If you want to _use_ a cross compiler, that generates code for a -platform different from the build platform, you should specify the -"host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. - -Sharing Defaults -================ - -If you want to set default values for `configure' scripts to share, you -can create a site shell script called `config.site' that gives default -values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - -Defining Variables -================== - -Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: - - ./configure CC=/usr/local2/bin/gcc - -causes the specified `gcc' to be used as the C compiler (unless it is -overridden in the site shell script). - -Unfortunately, this technique does not work for `CONFIG_SHELL' due to -an Autoconf bug. Until the bug is fixed you can use this workaround: - - CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash - -`configure' Invocation -====================== - -`configure' recognizes the following options to control how it operates. - -`--help' -`-h' - Print a summary of the options to `configure', and exit. - -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`--cache-file=FILE' - Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to - disable caching. - -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error - messages will still be shown). - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. - +Installation Instructions
+*************************
+
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
+2006 Free Software Foundation, Inc.
+
+This file is free documentation; the Free Software Foundation gives
+unlimited permission to copy, distribute and modify it.
+
+Basic Installation
+==================
+
+Briefly, the shell commands `./configure; make; make install' should
+configure, build, and install this package. The following
+more-detailed instructions are generic; see the `README' file for
+instructions specific to this package.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+
+ It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring. Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+
+ The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'. You need `configure.ac' if
+you want to change it or regenerate `configure' using a newer version
+of `autoconf'.
+
+The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system.
+
+ Running `configure' might take a while. While running, it prints
+ some messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation.
+
+ 5. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'. There is
+ also a `make maintainer-clean' target, but that is intended mainly
+ for the package's developers. If you use it, you may have to get
+ all sorts of other programs in order to regenerate files that came
+ with the distribution.
+
+Compilers and Options
+=====================
+
+Some systems require unusual options for compilation or linking that the
+`configure' script does not know about. Run `./configure --help' for
+details on some of the pertinent environment variables.
+
+ You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment. Here
+is an example:
+
+ ./configure CC=c99 CFLAGS=-g LIBS=-lposix
+
+ *Note Defining Variables::, for more details.
+
+Compiling For Multiple Architectures
+====================================
+
+You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you can use GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+ With a non-GNU `make', it is safer to compile the package for one
+architecture at a time in the source code directory. After you have
+installed the package for one architecture, use `make distclean' before
+reconfiguring for another architecture.
+
+Installation Names
+==================
+
+By default, `make install' installs the package's commands under
+`/usr/local/bin', include files under `/usr/local/include', etc. You
+can specify an installation prefix other than `/usr/local' by giving
+`configure' the option `--prefix=PREFIX'.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+pass the option `--exec-prefix=PREFIX' to `configure', the package uses
+PREFIX as the prefix for installing programs and libraries.
+Documentation and other data files still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=DIR' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+ If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+ For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+There may be some features `configure' cannot figure out automatically,
+but needs to determine by the type of machine the package will run on.
+Usually, assuming the package is built to be run on the _same_
+architectures, `configure' can figure that out, but if it prints a
+message saying it cannot guess the machine type, give it the
+`--build=TYPE' option. TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+
+ CPU-COMPANY-SYSTEM
+
+where SYSTEM can have one of these forms:
+
+ OS KERNEL-OS
+
+ See the file `config.sub' for the possible values of each field. If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+
+ If you are _building_ compiler tools for cross-compiling, you should
+use the option `--target=TYPE' to select the type of system they will
+produce code for.
+
+ If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+
+Sharing Defaults
+================
+
+If you want to set default values for `configure' scripts to share, you
+can create a site shell script called `config.site' that gives default
+values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Defining Variables
+==================
+
+Variables not defined in a site shell script can be set in the
+environment passed to `configure'. However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost. In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'. For example:
+
+ ./configure CC=/usr/local2/bin/gcc
+
+causes the specified `gcc' to be used as the C compiler (unless it is
+overridden in the site shell script).
+
+Unfortunately, this technique does not work for `CONFIG_SHELL' due to
+an Autoconf bug. Until the bug is fixed you can use this workaround:
+
+ CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
+
+`configure' Invocation
+======================
+
+`configure' recognizes the following options to control how it operates.
+
+`--help'
+`-h'
+ Print a summary of the options to `configure', and exit.
+
+`--version'
+`-V'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`--cache-file=FILE'
+ Enable the cache: use and save the results of the tests in FILE,
+ traditionally `config.cache'. FILE defaults to `/dev/null' to
+ disable caching.
+
+`--config-cache'
+`-C'
+ Alias for `--cache-file=config.cache'.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made. To
+ suppress all normal output, redirect it to `/dev/null' (any error
+ messages will still be shown).
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`configure' also accepts some other, not widely useful, options. Run
+`configure --help' for more details.
+
diff --git a/pixman/TODO b/pixman/TODO index 4434ec7cb..465abe7b5 100644 --- a/pixman/TODO +++ b/pixman/TODO @@ -1,271 +1,271 @@ - - Testing - - Test implementations against each other - - Test both with and without the operator strength reduction. - They shold be identical. - - - SSE 2 issues: - - - Use MM_HINT_NTA instead of MM_HINT_T0 - - - Use of fbCompositeOver_x888x8x8888sse2() - - - Update the RLEASING file - - - Things to keep in mind if breaking ABI: - - - There should be a guard #ifndef I_AM_EITHER_CAIRO_OR_THE_X_SERVER - - - X server will require 16.16 essentially forever. Can we get - the required precision by simply adding offset_x/y to the - relevant rendering API? - - - Get rid of workaround for X server bug. - - - pixman_image_set_indexed() should copy its argument, and X - should be ported over to use a pixman_image as the - representation of a Picture, rather than creating one on each - operation. - - - We should get rid of pixman_set_static_pointers() - - - We should get rid of the various trapezoid helper functions(). - (They only exist because they are theoretically available to - drivers). - - - 16 bit regions should be deleted - - - There should only be one trap rasterization API. - - - The PIXMAN_g8/c8/etc formats should use the A channel - to indicate the actual depth. That way PIXMAN_x4c4 and PIXMAN_c8 - won't collide. - - - Maybe bite the bullet and make configure.ac generate a pixman-types.h - file that can be included from pixman.h to avoid the #ifdef magic - in pixman.h - - - Make pixman_region_point_in() survive a NULL box, then fix up - pixman-compose.c - - - Possibly look into inlining the fetch functions - - - There is a bug with source clipping demonstrated by clip-test in the - test directory. If we interprete source clipping as given in - destination coordinates, which is probably the only sane choice, - then the result should have two red bars down the sides. - - - Test suite - - - Add a general way of dealing with architecture specific - fast-paths. The current idea is to have each operation that can - be optimized is called through a function pointer that is - initially set to an initialization function that is responsible for - setting the function pointer to the appropriate fast-path. - - - Go through things marked FIXME - - - Add calls to prepare and finish access where necessary. grep for - ACCESS_MEM, and make sure they are correctly wrapped in prepare - and finish. - - - restore READ/WRITE in the fbcompose combiners since they sometimes - store directly to destination drawables. - - - It probably makes sense to move the more strange X region API - into pixman as well, but guarded with PIXMAN_XORG_COMPATIBILITY - - - Reinstate the FbBits typedef? At the moment we don't - even have the FbBits type; we just use uint32_t everywhere. - - Keith says in bug 2335: - - The 64-bit code in fb (pixman) is probably broken; it hasn't been - used in quite some time as PCI (and AGP) is 32-bits wide, so - doing things 64-bits at a time is a net loss. To quickly fix - this, I suggest just using 32-bit datatypes by setting - IC_SHIFT to 5 for all machines. - - - Consider optimizing the 8/16 bit solid fills in pixman-util.c by - storing more than one value at a time. - - - Add an image cache to prevent excessive malloc/free. Note that pixman - needs to be thread safe when used from cairo. - - - Moving to 24.8 coordinates. This is tricky because X is still - defined as 16.16 and will be basically forever. It's possible we - could do this by adding extra offset_x/y parameters to the - trapezoid calls. The X server could then just call the API with - (0, 0). Cairo would have to make sure that the delta *within* a - batch of trapezoids does not exceed 16 bit. - - - Consider adding actual backends. Brain dump: - - A backend is something that knows how to - - - Create images - - Composite three images - - Rasterize trapezoids - - Do solid fills and blits - - These operations are provided by a vtable that the backend will - create when it is initialized. Initial backends: - - - VMX - - SSE2 - - MMX - - Plain Old C - - When the SIMD backends are initialized, they will be passed a - pointer to the Plain Old C backend that they can use for fallback - purposes. - - Images would gain a vtable as well that would contain things like - - - Read scanline - - Write scanline - - (Or even read_patch/write_patch as suggested by Keith a while - back). - - This could simplify the compositing code considerably. - - - Review the pixman_format_code_t enum to make sure it will support - future formats. Some formats we will probably need: - - ARGB/ABGR with 16/32/64 bit integer/floating channels - YUV2, - YV12 - - Also we may need the ability to distinguish between PICT_c8 and - PICT_x4c4. (This could be done by interpreting the A channel as - the depth for TYPE_COLOR and TYPE_GRAY formats). - - A possibility may be to reserve the two top bits and make them - encode "number of places to shift the channel widths given" Since - these bits are 00 at the moment everything will continue to work, - but these additional widths will be allowed: - - All even widths between 18-32 - All multiples of four widths between 33 and 64 - All multiples of eight between 64 and 128 - - This means things like r21g22b21 won't work - is that worth - worrying about? I don't think so. And of course the bpp field - can't handle a depth of over 256, so > 64 bit channels arent' - really all that useful. - - We could reserve one extra bit to indicate floating point, but - we may also just add - - PIXMAN_TYPE_ARGB_FLOAT - PIXMAN_TYPE_BGRA_FLOAT - PIXMAN_TYPE_A_FLOAT - - image types. With five bits we can support up to 32 different - format types, which should be enough for everybody, even if we - decide to support all the various video formats here: - - http://www.fourcc.org/yuv.php - - It may make sense to have a PIXMAN_TYPE_YUV, and then use the - channel bits to specify the exact subtype. - - Another possibility is to add - - PIXMAN_TYPE_ARGB_W - PIXMAN_TYPE_ARGB_WW - - where the channel widths would get 16 and 32 added to them, - respectively. - - What about color spaces such a linear vs. srGB etc.? - - -done: - -- Use pixmanFillsse2 and pixmanBltsse2 - -- Be consistent about calling sse2 sse2 - -- Rename "SSE" to "MMX_EXTENSIONS". (Deleted mmx extensions). - -- Commented-out uses of fbCompositeCopyAreasse2() - -- Consider whether calling regions region16 is really such a great - idea. Vlad wants 32 bit regions for Cairo. This will break X server - ABI, but should otherwise be mostly harmless, though a - pixman_region_get_boxes16() may be useful. - -- Altivec signal issue (Company has fix, there is also a patch by - dwmw2 in rawhide). - -- Behdad's MMX issue - see list - -- SSE2 issues: - - Crashes in Mozilla because of unaligned stack. Possible fixes - - Make use of gcc 4.2 feature to align the stack - - Write some sort of trampoline that aligns the stack - before calling SSE functions. - -- Get rid of the switch-of-doom; replace it with a big table - describing the various fast paths. - -- Make source clipping optional. - - done: source clipping happens through an indirection. - still needs to make the indirection settable. (And call it - from X) - -- Run cairo test suite; fix bugs - - one bug in source-scale-clip - - - Remove the warning suppression in the ACCESS_MEM macro and fix the - warnings that are real - - irrelevant now. - -- make the wrapper functions global instead of image specific - - this won't work since pixman is linked to both fb and wfb - -- Add non-mmx solid fill - -- Make sure the endian-ness macros are defined correctly. - -- The rectangles in a region probably shouldn't be returned const as - the X server will be changing them. - -- Right now we _always_ have a clip region, which is empty by default. - Why does this work at all? It probably doesn't. The server - distinguishes two cases, one where nothing is clipped (CT_NONE), and - one where there is a clip region (CT_REGION). - -- Default clip region should be the full image - - - Test if pseudo color still works. It does, but it also shows that - copying a pixman_indexed_t on every composite operation is not - going to fly. So, for now set_indexed() does not copy the - indexed table. - - Also just the malloc() to allocate a pixman image shows up pretty - high. - - Options include - - - Make all the setters not copy their arguments - - - Possibly combined with going back to the stack allocated - approach that we already use for regions. - - - Keep a cached pixman_image_t around for every picture. It would - have to be kept uptodate every time something changes about the - picture. - - - Break the X server ABI and simply have the relevant parameter - stored in the pixman image. This would have the additional benefits - that: - - - We can get rid of the annoying repeat field which is duplicated - elsewhere. - - - We can use pixman_color_t and pixman_gradient_stop_t - etc. instead of the types that are defined in - renderproto.h - + - Testing
+ - Test implementations against each other
+ - Test both with and without the operator strength reduction.
+ They shold be identical.
+
+ - SSE 2 issues:
+
+ - Use MM_HINT_NTA instead of MM_HINT_T0
+
+ - Use of fbCompositeOver_x888x8x8888sse2()
+
+ - Update the RLEASING file
+
+ - Things to keep in mind if breaking ABI:
+
+ - There should be a guard #ifndef I_AM_EITHER_CAIRO_OR_THE_X_SERVER
+
+ - X server will require 16.16 essentially forever. Can we get
+ the required precision by simply adding offset_x/y to the
+ relevant rendering API?
+
+ - Get rid of workaround for X server bug.
+
+ - pixman_image_set_indexed() should copy its argument, and X
+ should be ported over to use a pixman_image as the
+ representation of a Picture, rather than creating one on each
+ operation.
+
+ - We should get rid of pixman_set_static_pointers()
+
+ - We should get rid of the various trapezoid helper functions().
+ (They only exist because they are theoretically available to
+ drivers).
+
+ - 16 bit regions should be deleted
+
+ - There should only be one trap rasterization API.
+
+ - The PIXMAN_g8/c8/etc formats should use the A channel
+ to indicate the actual depth. That way PIXMAN_x4c4 and PIXMAN_c8
+ won't collide.
+
+ - Maybe bite the bullet and make configure.ac generate a pixman-types.h
+ file that can be included from pixman.h to avoid the #ifdef magic
+ in pixman.h
+
+ - Make pixman_region_point_in() survive a NULL box, then fix up
+ pixman-compose.c
+
+ - Possibly look into inlining the fetch functions
+
+ - There is a bug with source clipping demonstrated by clip-test in the
+ test directory. If we interprete source clipping as given in
+ destination coordinates, which is probably the only sane choice,
+ then the result should have two red bars down the sides.
+
+ - Test suite
+
+ - Add a general way of dealing with architecture specific
+ fast-paths. The current idea is to have each operation that can
+ be optimized is called through a function pointer that is
+ initially set to an initialization function that is responsible for
+ setting the function pointer to the appropriate fast-path.
+
+ - Go through things marked FIXME
+
+ - Add calls to prepare and finish access where necessary. grep for
+ ACCESS_MEM, and make sure they are correctly wrapped in prepare
+ and finish.
+
+ - restore READ/WRITE in the fbcompose combiners since they sometimes
+ store directly to destination drawables.
+
+ - It probably makes sense to move the more strange X region API
+ into pixman as well, but guarded with PIXMAN_XORG_COMPATIBILITY
+
+ - Reinstate the FbBits typedef? At the moment we don't
+ even have the FbBits type; we just use uint32_t everywhere.
+
+ Keith says in bug 2335:
+
+ The 64-bit code in fb (pixman) is probably broken; it hasn't been
+ used in quite some time as PCI (and AGP) is 32-bits wide, so
+ doing things 64-bits at a time is a net loss. To quickly fix
+ this, I suggest just using 32-bit datatypes by setting
+ IC_SHIFT to 5 for all machines.
+
+ - Consider optimizing the 8/16 bit solid fills in pixman-util.c by
+ storing more than one value at a time.
+
+ - Add an image cache to prevent excessive malloc/free. Note that pixman
+ needs to be thread safe when used from cairo.
+
+ - Moving to 24.8 coordinates. This is tricky because X is still
+ defined as 16.16 and will be basically forever. It's possible we
+ could do this by adding extra offset_x/y parameters to the
+ trapezoid calls. The X server could then just call the API with
+ (0, 0). Cairo would have to make sure that the delta *within* a
+ batch of trapezoids does not exceed 16 bit.
+
+ - Consider adding actual backends. Brain dump:
+
+ A backend is something that knows how to
+
+ - Create images
+ - Composite three images
+ - Rasterize trapezoids
+ - Do solid fills and blits
+
+ These operations are provided by a vtable that the backend will
+ create when it is initialized. Initial backends:
+
+ - VMX
+ - SSE2
+ - MMX
+ - Plain Old C
+
+ When the SIMD backends are initialized, they will be passed a
+ pointer to the Plain Old C backend that they can use for fallback
+ purposes.
+
+ Images would gain a vtable as well that would contain things like
+
+ - Read scanline
+ - Write scanline
+
+ (Or even read_patch/write_patch as suggested by Keith a while
+ back).
+
+ This could simplify the compositing code considerably.
+
+ - Review the pixman_format_code_t enum to make sure it will support
+ future formats. Some formats we will probably need:
+
+ ARGB/ABGR with 16/32/64 bit integer/floating channels
+ YUV2,
+ YV12
+
+ Also we may need the ability to distinguish between PICT_c8 and
+ PICT_x4c4. (This could be done by interpreting the A channel as
+ the depth for TYPE_COLOR and TYPE_GRAY formats).
+
+ A possibility may be to reserve the two top bits and make them
+ encode "number of places to shift the channel widths given" Since
+ these bits are 00 at the moment everything will continue to work,
+ but these additional widths will be allowed:
+
+ All even widths between 18-32
+ All multiples of four widths between 33 and 64
+ All multiples of eight between 64 and 128
+
+ This means things like r21g22b21 won't work - is that worth
+ worrying about? I don't think so. And of course the bpp field
+ can't handle a depth of over 256, so > 64 bit channels arent'
+ really all that useful.
+
+ We could reserve one extra bit to indicate floating point, but
+ we may also just add
+
+ PIXMAN_TYPE_ARGB_FLOAT
+ PIXMAN_TYPE_BGRA_FLOAT
+ PIXMAN_TYPE_A_FLOAT
+
+ image types. With five bits we can support up to 32 different
+ format types, which should be enough for everybody, even if we
+ decide to support all the various video formats here:
+
+ http://www.fourcc.org/yuv.php
+
+ It may make sense to have a PIXMAN_TYPE_YUV, and then use the
+ channel bits to specify the exact subtype.
+
+ Another possibility is to add
+
+ PIXMAN_TYPE_ARGB_W
+ PIXMAN_TYPE_ARGB_WW
+
+ where the channel widths would get 16 and 32 added to them,
+ respectively.
+
+ What about color spaces such a linear vs. srGB etc.?
+
+
+done:
+
+- Use pixmanFillsse2 and pixmanBltsse2
+
+- Be consistent about calling sse2 sse2
+
+- Rename "SSE" to "MMX_EXTENSIONS". (Deleted mmx extensions).
+
+- Commented-out uses of fbCompositeCopyAreasse2()
+
+- Consider whether calling regions region16 is really such a great
+ idea. Vlad wants 32 bit regions for Cairo. This will break X server
+ ABI, but should otherwise be mostly harmless, though a
+ pixman_region_get_boxes16() may be useful.
+
+- Altivec signal issue (Company has fix, there is also a patch by
+ dwmw2 in rawhide).
+
+- Behdad's MMX issue - see list
+
+- SSE2 issues:
+ - Crashes in Mozilla because of unaligned stack. Possible fixes
+ - Make use of gcc 4.2 feature to align the stack
+ - Write some sort of trampoline that aligns the stack
+ before calling SSE functions.
+
+- Get rid of the switch-of-doom; replace it with a big table
+ describing the various fast paths.
+
+- Make source clipping optional.
+ - done: source clipping happens through an indirection.
+ still needs to make the indirection settable. (And call it
+ from X)
+
+- Run cairo test suite; fix bugs
+ - one bug in source-scale-clip
+
+ - Remove the warning suppression in the ACCESS_MEM macro and fix the
+ warnings that are real
+ - irrelevant now.
+
+- make the wrapper functions global instead of image specific
+ - this won't work since pixman is linked to both fb and wfb
+
+- Add non-mmx solid fill
+
+- Make sure the endian-ness macros are defined correctly.
+
+- The rectangles in a region probably shouldn't be returned const as
+ the X server will be changing them.
+
+- Right now we _always_ have a clip region, which is empty by default.
+ Why does this work at all? It probably doesn't. The server
+ distinguishes two cases, one where nothing is clipped (CT_NONE), and
+ one where there is a clip region (CT_REGION).
+
+- Default clip region should be the full image
+
+ - Test if pseudo color still works. It does, but it also shows that
+ copying a pixman_indexed_t on every composite operation is not
+ going to fly. So, for now set_indexed() does not copy the
+ indexed table.
+
+ Also just the malloc() to allocate a pixman image shows up pretty
+ high.
+
+ Options include
+
+ - Make all the setters not copy their arguments
+
+ - Possibly combined with going back to the stack allocated
+ approach that we already use for regions.
+
+ - Keep a cached pixman_image_t around for every picture. It would
+ have to be kept uptodate every time something changes about the
+ picture.
+
+ - Break the X server ABI and simply have the relevant parameter
+ stored in the pixman image. This would have the additional benefits
+ that:
+
+ - We can get rid of the annoying repeat field which is duplicated
+ elsewhere.
+
+ - We can use pixman_color_t and pixman_gradient_stop_t
+ etc. instead of the types that are defined in
+ renderproto.h
+
diff --git a/pixman/pixman-1-uninstalled.pc.in b/pixman/pixman-1-uninstalled.pc.in index e0347d010..c15e86547 100644 --- a/pixman/pixman-1-uninstalled.pc.in +++ b/pixman/pixman-1-uninstalled.pc.in @@ -1,5 +1,5 @@ -Name: Pixman -Description: The pixman library (version 1) -Version: @PACKAGE_VERSION@ -Cflags: -I${pc_top_builddir}/${pcfiledir}/pixman -Libs: ${pc_top_builddir}/${pcfiledir}/pixman/libpixman-1.la +Name: Pixman
+Description: The pixman library (version 1)
+Version: @PACKAGE_VERSION@
+Cflags: -I${pc_top_builddir}/${pcfiledir}/pixman
+Libs: ${pc_top_builddir}/${pcfiledir}/pixman/libpixman-1.la
diff --git a/pixman/pixman-1.pc.in b/pixman/pixman-1.pc.in index 936d95db0..e44361749 100644 --- a/pixman/pixman-1.pc.in +++ b/pixman/pixman-1.pc.in @@ -1,11 +1,11 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ - -Name: Pixman -Description: The pixman library (version 1) -Version: @PACKAGE_VERSION@ -Cflags: -I${includedir}/pixman-1 @DEP_CFLAGS@ -Libs: -L${libdir} -lpixman-1 @DEP_LIBS@ - +prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: Pixman
+Description: The pixman library (version 1)
+Version: @PACKAGE_VERSION@
+Cflags: -I${includedir}/pixman-1 @DEP_CFLAGS@
+Libs: -L${libdir} -lpixman-1 @DEP_LIBS@
+
diff --git a/pixman/pixman/make-combine.pl b/pixman/pixman/make-combine.pl index 210a5da12..417bdf085 100644 --- a/pixman/pixman/make-combine.pl +++ b/pixman/pixman/make-combine.pl @@ -1,86 +1,86 @@ -$usage = "Usage: combine.pl { 8 | 16 } < pixman-combine.c.template"; - -$#ARGV == 0 or die $usage; - -# Get the component size. -$size = int($ARGV[0]); -$size == 8 or $size == 16 or die $usage; - -$pixel_size = $size * 4; -$half_pixel_size = $size * 2; - -sub mask { - my $str = shift; - my $suffix; - $suffix = "ULL" if $size > 8; - - return "0x" . $str . $suffix; -} - -# Generate mask strings. -$nibbles = $size / 4; -$mask = "f" x $nibbles; -$zero_mask = "0" x $nibbles; -$one_half = "8" . "0" x ($nibbles - 1); - -print "/* WARNING: This file is generated by combine.pl from combine.inc.\n"; -print " Please edit one of those files rather than this one. */\n"; -print "\n"; - -print "#line 1 \"pixman-combine.c.template\"\n"; - -$mask_ = mask($mask); -$one_half_ = mask($one_half); -$g_mask = mask($mask . $zero_mask); -$b_mask = mask($mask . $zero_mask x 2); -$a_mask = mask($mask . $zero_mask x 3); -$rb_mask = mask($mask . $zero_mask . $mask); -$ag_mask = mask($mask . $zero_mask . $mask . $zero_mask); -$rb_one_half = mask($one_half . $zero_mask . $one_half); -$rb_mask_plus_one = mask("1" . $zero_mask x 2 . "1" . $zero_mask); - -while (<STDIN>) { - # Mask and 1/2 value for a single component. - s/#define COMPONENT_SIZE\b/$& $size/; - s/#define MASK\b/$& $mask_/; - s/#define ONE_HALF\b/$& $one_half_/; - - # Shifts and masks for green, blue, and alpha. - s/#define G_SHIFT\b/$& $size/; - s/#define R_SHIFT\b/$& $size * 2/; - s/#define A_SHIFT\b/$& $size * 3/; - s/#define G_MASK\b/$& $g_mask/; - s/#define R_MASK\b/$& $b_mask/; - s/#define A_MASK\b/$& $a_mask/; - - # Special values for dealing with red + blue at the same time. - s/#define RB_MASK\b/$& $rb_mask/; - s/#define AG_MASK\b/$& $ag_mask/; - s/#define RB_ONE_HALF\b/$& $rb_one_half/; - s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/; - - # Add 32/64 suffix to combining function types. - s/\bCombineFunc\b/CombineFunc$pixel_size/; - s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/; - s/combine_width/combine_$pixel_size/; - s/_pixman_setup_combiner_functions_width/_pixman_setup_combiner_functions_$pixel_size/; - s/UNc/UN$size/g; - s/ALPHA_c/ALPHA_$size/g; - s/RED_c/RED_$size/g; - s/GREEN_c/GREEN_$size/g; - s/BLUE_c/BLUE_$size/g; - - # Convert comp*_t values into the appropriate real types. - s/comp1_t/uint${size}_t/g; - s/comp2_t/uint${half_pixel_size}_t/g; - s/comp4_t/uint${pixel_size}_t/g; - - # Change the function table name for the 64-bit version. - s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16; - - # Change the header for the 64-bit version - s/pixman-combine.h/pixman-combine64.h/ if $size == 16; - s/pixman-combine.h/pixman-combine32.h/ if $size == 8; - - print; -} +$usage = "Usage: combine.pl { 8 | 16 } < pixman-combine.c.template";
+
+$#ARGV == 0 or die $usage;
+
+# Get the component size.
+$size = int($ARGV[0]);
+$size == 8 or $size == 16 or die $usage;
+
+$pixel_size = $size * 4;
+$half_pixel_size = $size * 2;
+
+sub mask {
+ my $str = shift;
+ my $suffix;
+ $suffix = "ULL" if $size > 8;
+
+ return "0x" . $str . $suffix;
+}
+
+# Generate mask strings.
+$nibbles = $size / 4;
+$mask = "f" x $nibbles;
+$zero_mask = "0" x $nibbles;
+$one_half = "8" . "0" x ($nibbles - 1);
+
+print "/* WARNING: This file is generated by combine.pl from combine.inc.\n";
+print " Please edit one of those files rather than this one. */\n";
+print "\n";
+
+print "#line 1 \"pixman-combine.c.template\"\n";
+
+$mask_ = mask($mask);
+$one_half_ = mask($one_half);
+$g_mask = mask($mask . $zero_mask);
+$b_mask = mask($mask . $zero_mask x 2);
+$a_mask = mask($mask . $zero_mask x 3);
+$rb_mask = mask($mask . $zero_mask . $mask);
+$ag_mask = mask($mask . $zero_mask . $mask . $zero_mask);
+$rb_one_half = mask($one_half . $zero_mask . $one_half);
+$rb_mask_plus_one = mask("1" . $zero_mask x 2 . "1" . $zero_mask);
+
+while (<STDIN>) {
+ # Mask and 1/2 value for a single component.
+ s/#define COMPONENT_SIZE\b/$& $size/;
+ s/#define MASK\b/$& $mask_/;
+ s/#define ONE_HALF\b/$& $one_half_/;
+
+ # Shifts and masks for green, blue, and alpha.
+ s/#define G_SHIFT\b/$& $size/;
+ s/#define R_SHIFT\b/$& $size * 2/;
+ s/#define A_SHIFT\b/$& $size * 3/;
+ s/#define G_MASK\b/$& $g_mask/;
+ s/#define R_MASK\b/$& $b_mask/;
+ s/#define A_MASK\b/$& $a_mask/;
+
+ # Special values for dealing with red + blue at the same time.
+ s/#define RB_MASK\b/$& $rb_mask/;
+ s/#define AG_MASK\b/$& $ag_mask/;
+ s/#define RB_ONE_HALF\b/$& $rb_one_half/;
+ s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/;
+
+ # Add 32/64 suffix to combining function types.
+ s/\bCombineFunc\b/CombineFunc$pixel_size/;
+ s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/;
+ s/combine_width/combine_$pixel_size/;
+ s/_pixman_setup_combiner_functions_width/_pixman_setup_combiner_functions_$pixel_size/;
+ s/UNc/UN$size/g;
+ s/ALPHA_c/ALPHA_$size/g;
+ s/RED_c/RED_$size/g;
+ s/GREEN_c/GREEN_$size/g;
+ s/BLUE_c/BLUE_$size/g;
+
+ # Convert comp*_t values into the appropriate real types.
+ s/comp1_t/uint${size}_t/g;
+ s/comp2_t/uint${half_pixel_size}_t/g;
+ s/comp4_t/uint${pixel_size}_t/g;
+
+ # Change the function table name for the 64-bit version.
+ s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16;
+
+ # Change the header for the 64-bit version
+ s/pixman-combine.h/pixman-combine64.h/ if $size == 16;
+ s/pixman-combine.h/pixman-combine32.h/ if $size == 8;
+
+ print;
+}
diff --git a/pixman/pixman/pixman-access-accessors.c b/pixman/pixman/pixman-access-accessors.c index 3263582f1..bde67a70e 100644 --- a/pixman/pixman/pixman-access-accessors.c +++ b/pixman/pixman/pixman-access-accessors.c @@ -1,3 +1,3 @@ -#define PIXMAN_FB_ACCESSORS - -#include "pixman-access.c" +#define PIXMAN_FB_ACCESSORS
+
+#include "pixman-access.c"
diff --git a/pixman/pixman/pixman-arm-neon-asm.h b/pixman/pixman/pixman-arm-neon-asm.h index 0ba67d05f..97adc6a87 100644 --- a/pixman/pixman/pixman-arm-neon-asm.h +++ b/pixman/pixman/pixman-arm-neon-asm.h @@ -1,1177 +1,1177 @@ -/*
- * Copyright © 2009 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-/*
- * This file contains a macro ('generate_composite_function') which can
- * construct 2D image processing functions, based on a common template.
- * Any combinations of source, destination and mask images with 8bpp,
- * 16bpp, 24bpp, 32bpp color formats are supported.
- *
- * This macro takes care of:
- * - handling of leading and trailing unaligned pixels
- * - doing most of the work related to L2 cache preload
- * - encourages the use of software pipelining for better instructions
- * scheduling
- *
- * The user of this macro has to provide some configuration parameters
- * (bit depths for the images, prefetch distance, etc.) and a set of
- * macros, which should implement basic code chunks responsible for
- * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage
- * examples.
- *
- * TODO:
- * - try overlapped pixel method (from Ian Rickards) when processing
- * exactly two blocks of pixels
- * - maybe add an option to do reverse scanline processing
- */
-
-/*
- * Bit flags for 'generate_composite_function' macro which are used
- * to tune generated functions behavior.
- */
-.set FLAG_DST_WRITEONLY, 0
-.set FLAG_DST_READWRITE, 1
-.set FLAG_DEINTERLEAVE_32BPP, 2
-
-/*
- * Offset in stack where mask and source pointer/stride can be accessed
- * from 'init' macro. This is useful for doing special handling for solid mask.
- */
-.set ARGS_STACK_OFFSET, 40
-
-/*
- * Constants for selecting preferable prefetch type.
- */
-.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */
-.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */
-.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */
-
-/*
- * Definitions of supplementary pixld/pixst macros (for partial load/store of
- * pixel data).
- */
-
-.macro pixldst1 op, elem_size, reg1, mem_operand, abits
-.if abits > 0
- op&.&elem_size {d®1}, [&mem_operand&, :&abits&]!
-.else
- op&.&elem_size {d®1}, [&mem_operand&]!
-.endif
-.endm
-
-.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits
-.if abits > 0
- op&.&elem_size {d®1, d®2}, [&mem_operand&, :&abits&]!
-.else
- op&.&elem_size {d®1, d®2}, [&mem_operand&]!
-.endif
-.endm
-
-.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits
-.if abits > 0
- op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&, :&abits&]!
-.else
- op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&]!
-.endif
-.endm
-
-.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits
- op&.&elem_size {d®1[idx]}, [&mem_operand&]!
-.endm
-
-.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand
- op&.&elem_size {d®1, d®2, d®3}, [&mem_operand&]!
-.endm
-
-.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand
- op&.&elem_size {d®1[idx], d®2[idx], d®3[idx]}, [&mem_operand&]!
-.endm
-
-.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits
-.if numbytes == 32
- pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \
- %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif numbytes == 16
- pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits
-.elseif numbytes == 8
- pixldst1 op, elem_size, %(basereg+1), mem_operand, abits
-.elseif numbytes == 4
- .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32)
- pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits
- .elseif elem_size == 16
- pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits
- pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits
- .else
- pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits
- pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits
- pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits
- pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits
- .endif
-.elseif numbytes == 2
- .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16)
- pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits
- .else
- pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits
- pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits
- .endif
-.elseif numbytes == 1
- pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits
-.else
- .error "unsupported size: numbytes"
-.endif
-.endm
-
-.macro pixld numpix, bpp, basereg, mem_operand, abits=0
-.if bpp > 0
-.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
- pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \
- %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif (bpp == 24) && (numpix == 8)
- pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
-.elseif (bpp == 24) && (numpix == 4)
- pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
- pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
- pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
- pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
-.elseif (bpp == 24) && (numpix == 2)
- pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
- pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
-.elseif (bpp == 24) && (numpix == 1)
- pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
-.else
- pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits
-.endif
-.endif
-.endm
-
-.macro pixst numpix, bpp, basereg, mem_operand, abits=0
-.if bpp > 0
-.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
- pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \
- %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif (bpp == 24) && (numpix == 8)
- pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
-.elseif (bpp == 24) && (numpix == 4)
- pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
- pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
- pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
- pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
-.elseif (bpp == 24) && (numpix == 2)
- pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
- pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
-.elseif (bpp == 24) && (numpix == 1)
- pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
-.else
- pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits
-.endif
-.endif
-.endm
-
-.macro pixld_a numpix, bpp, basereg, mem_operand
-.if (bpp * numpix) <= 128
- pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix)
-.else
- pixld numpix, bpp, basereg, mem_operand, 128
-.endif
-.endm
-
-.macro pixst_a numpix, bpp, basereg, mem_operand
-.if (bpp * numpix) <= 128
- pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix)
-.else
- pixst numpix, bpp, basereg, mem_operand, 128
-.endif
-.endm
-
-/*
- * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register
- * aliases to be defined)
- */
-.macro pixld1_s elem_size, reg1, mem_operand
-.if elem_size == 16
- mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
- add TMP1, mem_operand, TMP1, asl #1
- mov TMP2, VX, asr #16
- add VX, VX, UNIT_X
- add TMP2, mem_operand, TMP2, asl #1
- vld1.16 {d®1&[0]}, [TMP1, :16]
- mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
- add TMP1, mem_operand, TMP1, asl #1
- vld1.16 {d®1&[1]}, [TMP2, :16]
- mov TMP2, VX, asr #16
- add VX, VX, UNIT_X
- add TMP2, mem_operand, TMP2, asl #1
- vld1.16 {d®1&[2]}, [TMP1, :16]
- vld1.16 {d®1&[3]}, [TMP2, :16]
-.elseif elem_size == 32
- mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
- add TMP1, mem_operand, TMP1, asl #2
- mov TMP2, VX, asr #16
- add VX, VX, UNIT_X
- add TMP2, mem_operand, TMP2, asl #2
- vld1.32 {d®1&[0]}, [TMP1, :32]
- vld1.32 {d®1&[1]}, [TMP2, :32]
-.else
- .error "unsupported"
-.endif
-.endm
-
-.macro pixld2_s elem_size, reg1, reg2, mem_operand
-.if elem_size == 32
- mov TMP1, VX, asr #16
- add VX, VX, UNIT_X, asl #1
- add TMP1, mem_operand, TMP1, asl #2
- mov TMP2, VX, asr #16
- sub VX, VX, UNIT_X
- add TMP2, mem_operand, TMP2, asl #2
- vld1.32 {d®1&[0]}, [TMP1, :32]
- mov TMP1, VX, asr #16
- add VX, VX, UNIT_X, asl #1
- add TMP1, mem_operand, TMP1, asl #2
- vld1.32 {d®2&[0]}, [TMP2, :32]
- mov TMP2, VX, asr #16
- add VX, VX, UNIT_X
- add TMP2, mem_operand, TMP2, asl #2
- vld1.32 {d®1&[1]}, [TMP1, :32]
- vld1.32 {d®2&[1]}, [TMP2, :32]
-.else
- pixld1_s elem_size, reg1, mem_operand
- pixld1_s elem_size, reg2, mem_operand
-.endif
-.endm
-
-.macro pixld0_s elem_size, reg1, idx, mem_operand
-.if elem_size == 16
- mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
- add TMP1, mem_operand, TMP1, asl #1
- vld1.16 {d®1&[idx]}, [TMP1, :16]
-.elseif elem_size == 32
- mov TMP1, VX, asr #16
- add VX, VX, UNIT_X
- add TMP1, mem_operand, TMP1, asl #2
- vld1.32 {d®1&[idx]}, [TMP1, :32]
-.endif
-.endm
-
-.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand
-.if numbytes == 32
- pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand
- pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand
- pixdeinterleave elem_size, %(basereg+4)
-.elseif numbytes == 16
- pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand
-.elseif numbytes == 8
- pixld1_s elem_size, %(basereg+1), mem_operand
-.elseif numbytes == 4
- .if elem_size == 32
- pixld0_s elem_size, %(basereg+0), 1, mem_operand
- .elseif elem_size == 16
- pixld0_s elem_size, %(basereg+0), 2, mem_operand
- pixld0_s elem_size, %(basereg+0), 3, mem_operand
- .else
- pixld0_s elem_size, %(basereg+0), 4, mem_operand
- pixld0_s elem_size, %(basereg+0), 5, mem_operand
- pixld0_s elem_size, %(basereg+0), 6, mem_operand
- pixld0_s elem_size, %(basereg+0), 7, mem_operand
- .endif
-.elseif numbytes == 2
- .if elem_size == 16
- pixld0_s elem_size, %(basereg+0), 1, mem_operand
- .else
- pixld0_s elem_size, %(basereg+0), 2, mem_operand
- pixld0_s elem_size, %(basereg+0), 3, mem_operand
- .endif
-.elseif numbytes == 1
- pixld0_s elem_size, %(basereg+0), 1, mem_operand
-.else
- .error "unsupported size: numbytes"
-.endif
-.endm
-
-.macro pixld_s numpix, bpp, basereg, mem_operand
-.if bpp > 0
- pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand
-.endif
-.endm
-
-.macro vuzp8 reg1, reg2
- vuzp.8 d®1, d®2
-.endm
-
-.macro vzip8 reg1, reg2
- vzip.8 d®1, d®2
-.endm
-
-/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
-.macro pixdeinterleave bpp, basereg
-.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
- vuzp8 %(basereg+0), %(basereg+1)
- vuzp8 %(basereg+2), %(basereg+3)
- vuzp8 %(basereg+1), %(basereg+3)
- vuzp8 %(basereg+0), %(basereg+2)
-.endif
-.endm
-
-/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
-.macro pixinterleave bpp, basereg
-.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
- vzip8 %(basereg+0), %(basereg+2)
- vzip8 %(basereg+1), %(basereg+3)
- vzip8 %(basereg+2), %(basereg+3)
- vzip8 %(basereg+0), %(basereg+1)
-.endif
-.endm
-
-/*
- * This is a macro for implementing cache preload. The main idea is that
- * cache preload logic is mostly independent from the rest of pixels
- * processing code. It starts at the top left pixel and moves forward
- * across pixels and can jump across scanlines. Prefetch distance is
- * handled in an 'incremental' way: it starts from 0 and advances to the
- * optimal distance over time. After reaching optimal prefetch distance,
- * it is kept constant. There are some checks which prevent prefetching
- * unneeded pixel lines below the image (but it still can prefetch a bit
- * more data on the right side of the image - not a big issue and may
- * be actually helpful when rendering text glyphs). Additional trick is
- * the use of LDR instruction for prefetch instead of PLD when moving to
- * the next line, the point is that we have a high chance of getting TLB
- * miss in this case, and PLD would be useless.
- *
- * This sounds like it may introduce a noticeable overhead (when working with
- * fully cached data). But in reality, due to having a separate pipeline and
- * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can
- * execute simultaneously with NEON and be completely shadowed by it. Thus
- * we get no performance overhead at all (*). This looks like a very nice
- * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
- * but still can implement some rather advanced prefetch logic in sofware
- * for almost zero cost!
- *
- * (*) The overhead of the prefetcher is visible when running some trivial
- * pixels processing like simple copy. Anyway, having prefetch is a must
- * when working with the graphics data.
- */
-.macro PF a, x:vararg
-.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED)
- a x
-.endif
-.endm
-
-.macro cache_preload std_increment, boost_increment
-.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
-.if regs_shortage
- PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
-.endif
-.if std_increment != 0
- PF add PF_X, PF_X, #std_increment
-.endif
- PF tst PF_CTL, #0xF
- PF addne PF_X, PF_X, #boost_increment
- PF subne PF_CTL, PF_CTL, #1
- PF cmp PF_X, ORIG_W
-.if src_bpp_shift >= 0
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-.endif
-.if dst_r_bpp != 0
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-.endif
-.if mask_bpp_shift >= 0
- PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
-.endif
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
-.if src_bpp_shift >= 0
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-.endif
-.if dst_r_bpp != 0
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-.endif
-.if mask_bpp_shift >= 0
- PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
-.endif
-.endif
-.endm
-
-.macro cache_preload_simple
-.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE)
-.if src_bpp > 0
- pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)]
-.endif
-.if dst_r_bpp > 0
- pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)]
-.endif
-.if mask_bpp > 0
- pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)]
-.endif
-.endif
-.endm
-
-.macro fetch_mask_pixblock
- pixld pixblock_size, mask_bpp, \
- (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-.endm
-
-/*
- * Macro which is used to process leading pixels until destination
- * pointer is properly aligned (at 16 bytes boundary). When destination
- * buffer uses 16bpp format, this is unnecessary, or even pointless.
- */
-.macro ensure_destination_ptr_alignment process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
-.if dst_w_bpp != 24
- tst DST_R, #0xF
- beq 2f
-
-.irp lowbit, 1, 2, 4, 8, 16
-local skip1
-.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
-.if lowbit < 16 /* we don't need more than 16-byte alignment */
- tst DST_R, #lowbit
- beq 1f
-.endif
- pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC
- pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK
-.if dst_r_bpp > 0
- pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R
-.else
- add DST_R, DST_R, #lowbit
-.endif
- PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
- sub W, W, #(lowbit * 8 / dst_w_bpp)
-1:
-.endif
-.endr
- pixdeinterleave src_bpp, src_basereg
- pixdeinterleave mask_bpp, mask_basereg
- pixdeinterleave dst_r_bpp, dst_r_basereg
-
- process_pixblock_head
- cache_preload 0, pixblock_size
- cache_preload_simple
- process_pixblock_tail
-
- pixinterleave dst_w_bpp, dst_w_basereg
-.irp lowbit, 1, 2, 4, 8, 16
-.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
-.if lowbit < 16 /* we don't need more than 16-byte alignment */
- tst DST_W, #lowbit
- beq 1f
-.endif
- pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W
-1:
-.endif
-.endr
-.endif
-2:
-.endm
-
-/*
- * Special code for processing up to (pixblock_size - 1) remaining
- * trailing pixels. As SIMD processing performs operation on
- * pixblock_size pixels, anything smaller than this has to be loaded
- * and stored in a special way. Loading and storing of pixel data is
- * performed in such a way that we fill some 'slots' in the NEON
- * registers (some slots naturally are unused), then perform compositing
- * operation as usual. In the end, the data is taken from these 'slots'
- * and saved to memory.
- *
- * cache_preload_flag - allows to suppress prefetch if
- * set to 0
- * dst_aligned_flag - selects whether destination buffer
- * is aligned
- */
-.macro process_trailing_pixels cache_preload_flag, \
- dst_aligned_flag, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
- tst W, #(pixblock_size - 1)
- beq 2f
-.irp chunk_size, 16, 8, 4, 2, 1
-.if pixblock_size > chunk_size
- tst W, #chunk_size
- beq 1f
- pixld_src chunk_size, src_bpp, src_basereg, SRC
- pixld chunk_size, mask_bpp, mask_basereg, MASK
-.if dst_aligned_flag != 0
- pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R
-.else
- pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R
-.endif
-.if cache_preload_flag != 0
- PF add PF_X, PF_X, #chunk_size
-.endif
-1:
-.endif
-.endr
- pixdeinterleave src_bpp, src_basereg
- pixdeinterleave mask_bpp, mask_basereg
- pixdeinterleave dst_r_bpp, dst_r_basereg
-
- process_pixblock_head
-.if cache_preload_flag != 0
- cache_preload 0, pixblock_size
- cache_preload_simple
-.endif
- process_pixblock_tail
- pixinterleave dst_w_bpp, dst_w_basereg
-.irp chunk_size, 16, 8, 4, 2, 1
-.if pixblock_size > chunk_size
- tst W, #chunk_size
- beq 1f
-.if dst_aligned_flag != 0
- pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W
-.else
- pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W
-.endif
-1:
-.endif
-.endr
-2:
-.endm
-
-/*
- * Macro, which performs all the needed operations to switch to the next
- * scanline and start the next loop iteration unless all the scanlines
- * are already processed.
- */
-.macro advance_to_next_scanline start_of_loop_label
-.if regs_shortage
- ldrd W, [sp] /* load W and H (width and height) from stack */
-.else
- mov W, ORIG_W
-.endif
- add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift
-.if src_bpp != 0
- add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift
-.endif
-.if mask_bpp != 0
- add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift
-.endif
-.if (dst_w_bpp != 24)
- sub DST_W, DST_W, W, lsl #dst_bpp_shift
-.endif
-.if (src_bpp != 24) && (src_bpp != 0)
- sub SRC, SRC, W, lsl #src_bpp_shift
-.endif
-.if (mask_bpp != 24) && (mask_bpp != 0)
- sub MASK, MASK, W, lsl #mask_bpp_shift
-.endif
- subs H, H, #1
- mov DST_R, DST_W
-.if regs_shortage
- str H, [sp, #4] /* save updated height to stack */
-.endif
- bge start_of_loop_label
-.endm
-
-/*
- * Registers are allocated in the following way by default:
- * d0, d1, d2, d3 - reserved for loading source pixel data
- * d4, d5, d6, d7 - reserved for loading destination pixel data
- * d24, d25, d26, d27 - reserved for loading mask pixel data
- * d28, d29, d30, d31 - final destination pixel data for writeback to memory
- */
-.macro generate_composite_function fname, \
- src_bpp_, \
- mask_bpp_, \
- dst_w_bpp_, \
- flags, \
- pixblock_size_, \
- prefetch_distance, \
- init, \
- cleanup, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head, \
- dst_w_basereg_ = 28, \
- dst_r_basereg_ = 4, \
- src_basereg_ = 0, \
- mask_basereg_ = 24
-
- .func fname
- .global fname
- /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
- push {r4-r12, lr} /* save all registers */
-
-/*
- * Select prefetch type for this function. If prefetch distance is
- * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch
- * has to be used instead of ADVANCED.
- */
- .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT
-.if prefetch_distance == 0
- .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
-.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \
- ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24))
- .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE
-.endif
-
-/*
- * Make some macro arguments globally visible and accessible
- * from other macros
- */
- .set src_bpp, src_bpp_
- .set mask_bpp, mask_bpp_
- .set dst_w_bpp, dst_w_bpp_
- .set pixblock_size, pixblock_size_
- .set dst_w_basereg, dst_w_basereg_
- .set dst_r_basereg, dst_r_basereg_
- .set src_basereg, src_basereg_
- .set mask_basereg, mask_basereg_
-
- .macro pixld_src x:vararg
- pixld x
- .endm
- .macro fetch_src_pixblock
- pixld_src pixblock_size, src_bpp, \
- (src_basereg - pixblock_size * src_bpp / 64), SRC
- .endm
-/*
- * Assign symbolic names to registers
- */
- W .req r0 /* width (is updated during processing) */
- H .req r1 /* height (is updated during processing) */
- DST_W .req r2 /* destination buffer pointer for writes */
- DST_STRIDE .req r3 /* destination image stride */
- SRC .req r4 /* source buffer pointer */
- SRC_STRIDE .req r5 /* source image stride */
- DST_R .req r6 /* destination buffer pointer for reads */
-
- MASK .req r7 /* mask pointer */
- MASK_STRIDE .req r8 /* mask stride */
-
- PF_CTL .req r9 /* combined lines counter and prefetch */
- /* distance increment counter */
- PF_X .req r10 /* pixel index in a scanline for current */
- /* pretetch position */
- PF_SRC .req r11 /* pointer to source scanline start */
- /* for prefetch purposes */
- PF_DST .req r12 /* pointer to destination scanline start */
- /* for prefetch purposes */
- PF_MASK .req r14 /* pointer to mask scanline start */
- /* for prefetch purposes */
-/*
- * Check whether we have enough registers for all the local variables.
- * If we don't have enough registers, original width and height are
- * kept on top of stack (and 'regs_shortage' variable is set to indicate
- * this for the rest of code). Even if there are enough registers, the
- * allocation scheme may be a bit different depending on whether source
- * or mask is not used.
- */
-.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED)
- ORIG_W .req r10 /* saved original width */
- DUMMY .req r12 /* temporary register */
- .set regs_shortage, 0
-.elseif mask_bpp == 0
- ORIG_W .req r7 /* saved original width */
- DUMMY .req r8 /* temporary register */
- .set regs_shortage, 0
-.elseif src_bpp == 0
- ORIG_W .req r4 /* saved original width */
- DUMMY .req r5 /* temporary register */
- .set regs_shortage, 0
-.else
- ORIG_W .req r1 /* saved original width */
- DUMMY .req r1 /* temporary register */
- .set regs_shortage, 1
-.endif
-
- .set mask_bpp_shift, -1
-.if src_bpp == 32
- .set src_bpp_shift, 2
-.elseif src_bpp == 24
- .set src_bpp_shift, 0
-.elseif src_bpp == 16
- .set src_bpp_shift, 1
-.elseif src_bpp == 8
- .set src_bpp_shift, 0
-.elseif src_bpp == 0
- .set src_bpp_shift, -1
-.else
- .error "requested src bpp (src_bpp) is not supported"
-.endif
-.if mask_bpp == 32
- .set mask_bpp_shift, 2
-.elseif mask_bpp == 24
- .set mask_bpp_shift, 0
-.elseif mask_bpp == 8
- .set mask_bpp_shift, 0
-.elseif mask_bpp == 0
- .set mask_bpp_shift, -1
-.else
- .error "requested mask bpp (mask_bpp) is not supported"
-.endif
-.if dst_w_bpp == 32
- .set dst_bpp_shift, 2
-.elseif dst_w_bpp == 24
- .set dst_bpp_shift, 0
-.elseif dst_w_bpp == 16
- .set dst_bpp_shift, 1
-.elseif dst_w_bpp == 8
- .set dst_bpp_shift, 0
-.else
- .error "requested dst bpp (dst_w_bpp) is not supported"
-.endif
-
-.if (((flags) & FLAG_DST_READWRITE) != 0)
- .set dst_r_bpp, dst_w_bpp
-.else
- .set dst_r_bpp, 0
-.endif
-.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
- .set DEINTERLEAVE_32BPP_ENABLED, 1
-.else
- .set DEINTERLEAVE_32BPP_ENABLED, 0
-.endif
-
-.if prefetch_distance < 0 || prefetch_distance > 15
- .error "invalid prefetch distance (prefetch_distance)"
-.endif
-
-.if src_bpp > 0
- ldr SRC, [sp, #40]
-.endif
-.if mask_bpp > 0
- ldr MASK, [sp, #48]
-.endif
- PF mov PF_X, #0
-.if src_bpp > 0
- ldr SRC_STRIDE, [sp, #44]
-.endif
-.if mask_bpp > 0
- ldr MASK_STRIDE, [sp, #52]
-.endif
- mov DST_R, DST_W
-
-.if src_bpp == 24
- sub SRC_STRIDE, SRC_STRIDE, W
- sub SRC_STRIDE, SRC_STRIDE, W, lsl #1
-.endif
-.if mask_bpp == 24
- sub MASK_STRIDE, MASK_STRIDE, W
- sub MASK_STRIDE, MASK_STRIDE, W, lsl #1
-.endif
-.if dst_w_bpp == 24
- sub DST_STRIDE, DST_STRIDE, W
- sub DST_STRIDE, DST_STRIDE, W, lsl #1
-.endif
-
-/*
- * Setup advanced prefetcher initial state
- */
- PF mov PF_SRC, SRC
- PF mov PF_DST, DST_R
- PF mov PF_MASK, MASK
- /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
- PF mov PF_CTL, H, lsl #4
- PF add PF_CTL, #(prefetch_distance - 0x10)
-
- init
-.if regs_shortage
- push {r0, r1}
-.endif
- subs H, H, #1
-.if regs_shortage
- str H, [sp, #4] /* save updated height to stack */
-.else
- mov ORIG_W, W
-.endif
- blt 9f
- cmp W, #(pixblock_size * 2)
- blt 8f
-/*
- * This is the start of the pipelined loop, which if optimized for
- * long scanlines
- */
-0:
- ensure_destination_ptr_alignment process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
-
- /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
- pixld_a pixblock_size, dst_r_bpp, \
- (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
- fetch_src_pixblock
- pixld pixblock_size, mask_bpp, \
- (mask_basereg - pixblock_size * mask_bpp / 64), MASK
- PF add PF_X, PF_X, #pixblock_size
- process_pixblock_head
- cache_preload 0, pixblock_size
- cache_preload_simple
- subs W, W, #(pixblock_size * 2)
- blt 2f
-1:
- process_pixblock_tail_head
- cache_preload_simple
- subs W, W, #pixblock_size
- bge 1b
-2:
- process_pixblock_tail
- pixst_a pixblock_size, dst_w_bpp, \
- (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
-
- /* Process the remaining trailing pixels in the scanline */
- process_trailing_pixels 1, 1, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
- advance_to_next_scanline 0b
-
-.if regs_shortage
- pop {r0, r1}
-.endif
- cleanup
- pop {r4-r12, pc} /* exit */
-/*
- * This is the start of the loop, designed to process images with small width
- * (less than pixblock_size * 2 pixels). In this case neither pipelining
- * nor prefetch are used.
- */
-8:
- /* Process exactly pixblock_size pixels if needed */
- tst W, #pixblock_size
- beq 1f
- pixld pixblock_size, dst_r_bpp, \
- (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
- fetch_src_pixblock
- pixld pixblock_size, mask_bpp, \
- (mask_basereg - pixblock_size * mask_bpp / 64), MASK
- process_pixblock_head
- process_pixblock_tail
- pixst pixblock_size, dst_w_bpp, \
- (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
-1:
- /* Process the remaining trailing pixels in the scanline */
- process_trailing_pixels 0, 0, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
- advance_to_next_scanline 8b
-9:
-.if regs_shortage
- pop {r0, r1}
-.endif
- cleanup
- pop {r4-r12, pc} /* exit */
-
- .purgem fetch_src_pixblock
- .purgem pixld_src
-
- .unreq SRC
- .unreq MASK
- .unreq DST_R
- .unreq DST_W
- .unreq ORIG_W
- .unreq W
- .unreq H
- .unreq SRC_STRIDE
- .unreq DST_STRIDE
- .unreq MASK_STRIDE
- .unreq PF_CTL
- .unreq PF_X
- .unreq PF_SRC
- .unreq PF_DST
- .unreq PF_MASK
- .unreq DUMMY
- .endfunc
-.endm
-
-/*
- * A simplified variant of function generation template for a single
- * scanline processing (for implementing pixman combine functions)
- */
-.macro generate_composite_function_scanline use_nearest_scaling, \
- fname, \
- src_bpp_, \
- mask_bpp_, \
- dst_w_bpp_, \
- flags, \
- pixblock_size_, \
- init, \
- cleanup, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head, \
- dst_w_basereg_ = 28, \
- dst_r_basereg_ = 4, \
- src_basereg_ = 0, \
- mask_basereg_ = 24
-
- .func fname
- .global fname
- /* For ELF format also set function visibility to hidden */
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
- .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
-/*
- * Make some macro arguments globally visible and accessible
- * from other macros
- */
- .set src_bpp, src_bpp_
- .set mask_bpp, mask_bpp_
- .set dst_w_bpp, dst_w_bpp_
- .set pixblock_size, pixblock_size_
- .set dst_w_basereg, dst_w_basereg_
- .set dst_r_basereg, dst_r_basereg_
- .set src_basereg, src_basereg_
- .set mask_basereg, mask_basereg_
-
-.if use_nearest_scaling != 0
- /*
- * Assign symbolic names to registers for nearest scaling
- */
- W .req r0
- DST_W .req r1
- SRC .req r2
- VX .req r3
- UNIT_X .req ip
- MASK .req lr
- TMP1 .req r4
- TMP2 .req r5
- DST_R .req r6
-
- .macro pixld_src x:vararg
- pixld_s x
- .endm
-
- ldr UNIT_X, [sp]
- push {r4-r6, lr}
- .if mask_bpp != 0
- ldr MASK, [sp, #(16 + 4)]
- .endif
-.else
- /*
- * Assign symbolic names to registers
- */
- W .req r0 /* width (is updated during processing) */
- DST_W .req r1 /* destination buffer pointer for writes */
- SRC .req r2 /* source buffer pointer */
- DST_R .req ip /* destination buffer pointer for reads */
- MASK .req r3 /* mask pointer */
-
- .macro pixld_src x:vararg
- pixld x
- .endm
-.endif
-
-.if (((flags) & FLAG_DST_READWRITE) != 0)
- .set dst_r_bpp, dst_w_bpp
-.else
- .set dst_r_bpp, 0
-.endif
-.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
- .set DEINTERLEAVE_32BPP_ENABLED, 1
-.else
- .set DEINTERLEAVE_32BPP_ENABLED, 0
-.endif
-
- .macro fetch_src_pixblock
- pixld_src pixblock_size, src_bpp, \
- (src_basereg - pixblock_size * src_bpp / 64), SRC
- .endm
-
- init
- mov DST_R, DST_W
-
- cmp W, #pixblock_size
- blt 8f
-
- ensure_destination_ptr_alignment process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
-
- subs W, W, #pixblock_size
- blt 7f
-
- /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
- pixld_a pixblock_size, dst_r_bpp, \
- (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
- fetch_src_pixblock
- pixld pixblock_size, mask_bpp, \
- (mask_basereg - pixblock_size * mask_bpp / 64), MASK
- process_pixblock_head
- subs W, W, #pixblock_size
- blt 2f
-1:
- process_pixblock_tail_head
- subs W, W, #pixblock_size
- bge 1b
-2:
- process_pixblock_tail
- pixst_a pixblock_size, dst_w_bpp, \
- (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
-7:
- /* Process the remaining trailing pixels in the scanline (dst aligned) */
- process_trailing_pixels 0, 1, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
-
- cleanup
-.if use_nearest_scaling != 0
- pop {r4-r6, pc} /* exit */
-.else
- bx lr /* exit */
-.endif
-8:
- /* Process the remaining trailing pixels in the scanline (dst unaligned) */
- process_trailing_pixels 0, 0, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
-
- cleanup
-
-.if use_nearest_scaling != 0
- pop {r4-r6, pc} /* exit */
-
- .unreq DST_R
- .unreq SRC
- .unreq W
- .unreq VX
- .unreq UNIT_X
- .unreq TMP1
- .unreq TMP2
- .unreq DST_W
- .unreq MASK
-
-.else
- bx lr /* exit */
-
- .unreq SRC
- .unreq MASK
- .unreq DST_R
- .unreq DST_W
- .unreq W
-.endif
-
- .purgem fetch_src_pixblock
- .purgem pixld_src
-
- .endfunc
-.endm
-
-.macro generate_composite_function_single_scanline x:vararg
- generate_composite_function_scanline 0, x
-.endm
-
-.macro generate_composite_function_nearest_scanline x:vararg
- generate_composite_function_scanline 1, x
-.endm
-
-/* Default prologue/epilogue, nothing special needs to be done */
-
-.macro default_init
-.endm
-
-.macro default_cleanup
-.endm
-
-/*
- * Prologue/epilogue variant which additionally saves/restores d8-d15
- * registers (they need to be saved/restored by callee according to ABI).
- * This is required if the code needs to use all the NEON registers.
- */
-
-.macro default_init_need_all_regs
- vpush {d8-d15}
-.endm
-
-.macro default_cleanup_need_all_regs
- vpop {d8-d15}
-.endm
-
-/******************************************************************************/
-
-/*
- * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in)
- * into a planar a8r8g8b8 format (with a, r, g, b color components
- * stored into 64-bit registers out_a, out_r, out_g, out_b respectively).
- *
- * Warning: the conversion is destructive and the original
- * value (in) is lost.
- */
-.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b
- vshrn.u16 out_r, in, #8
- vshrn.u16 out_g, in, #3
- vsli.u16 in, in, #5
- vmov.u8 out_a, #255
- vsri.u8 out_r, out_r, #5
- vsri.u8 out_g, out_g, #6
- vshrn.u16 out_b, in, #2
-.endm
-
-.macro convert_0565_to_x888 in, out_r, out_g, out_b
- vshrn.u16 out_r, in, #8
- vshrn.u16 out_g, in, #3
- vsli.u16 in, in, #5
- vsri.u8 out_r, out_r, #5
- vsri.u8 out_g, out_g, #6
- vshrn.u16 out_b, in, #2
-.endm
-
-/*
- * Conversion from planar a8r8g8b8 format (with a, r, g, b color components
- * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6
- * pixels packed in 128-bit register (out). Requires two temporary 128-bit
- * registers (tmp1, tmp2)
- */
-.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2
- vshll.u8 tmp1, in_g, #8
- vshll.u8 out, in_r, #8
- vshll.u8 tmp2, in_b, #8
- vsri.u16 out, tmp1, #5
- vsri.u16 out, tmp2, #11
-.endm
-
-/*
- * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels
- * returned in (out0, out1) registers pair. Requires one temporary
- * 64-bit register (tmp). 'out1' and 'in' may overlap, the original
- * value from 'in' is lost
- */
-.macro convert_four_0565_to_x888_packed in, out0, out1, tmp
- vshl.u16 out0, in, #5 /* G top 6 bits */
- vshl.u16 tmp, in, #11 /* B top 5 bits */
- vsri.u16 in, in, #5 /* R is ready in top bits */
- vsri.u16 out0, out0, #6 /* G is ready in top bits */
- vsri.u16 tmp, tmp, #5 /* B is ready in top bits */
- vshr.u16 out1, in, #8 /* R is in place */
- vsri.u16 out0, tmp, #8 /* G & B is in place */
- vzip.u16 out0, out1 /* everything is in place */
-.endm
+/* + * Copyright © 2009 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +/* + * This file contains a macro ('generate_composite_function') which can + * construct 2D image processing functions, based on a common template. + * Any combinations of source, destination and mask images with 8bpp, + * 16bpp, 24bpp, 32bpp color formats are supported. + * + * This macro takes care of: + * - handling of leading and trailing unaligned pixels + * - doing most of the work related to L2 cache preload + * - encourages the use of software pipelining for better instructions + * scheduling + * + * The user of this macro has to provide some configuration parameters + * (bit depths for the images, prefetch distance, etc.) and a set of + * macros, which should implement basic code chunks responsible for + * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage + * examples. + * + * TODO: + * - try overlapped pixel method (from Ian Rickards) when processing + * exactly two blocks of pixels + * - maybe add an option to do reverse scanline processing + */ + +/* + * Bit flags for 'generate_composite_function' macro which are used + * to tune generated functions behavior. + */ +.set FLAG_DST_WRITEONLY, 0 +.set FLAG_DST_READWRITE, 1 +.set FLAG_DEINTERLEAVE_32BPP, 2 + +/* + * Offset in stack where mask and source pointer/stride can be accessed + * from 'init' macro. This is useful for doing special handling for solid mask. + */ +.set ARGS_STACK_OFFSET, 40 + +/* + * Constants for selecting preferable prefetch type. + */ +.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */ +.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */ +.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */ + +/* + * Definitions of supplementary pixld/pixst macros (for partial load/store of + * pixel data). + */ + +.macro pixldst1 op, elem_size, reg1, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1}, [&mem_operand&]! +.endif +.endm + +.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1, d®2}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1, d®2}, [&mem_operand&]! +.endif +.endm + +.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits +.if abits > 0 + op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&, :&abits&]! +.else + op&.&elem_size {d®1, d®2, d®3, d®4}, [&mem_operand&]! +.endif +.endm + +.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits + op&.&elem_size {d®1[idx]}, [&mem_operand&]! +.endm + +.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand + op&.&elem_size {d®1, d®2, d®3}, [&mem_operand&]! +.endm + +.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand + op&.&elem_size {d®1[idx], d®2[idx], d®3[idx]}, [&mem_operand&]! +.endm + +.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits +.if numbytes == 32 + pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif numbytes == 16 + pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits +.elseif numbytes == 8 + pixldst1 op, elem_size, %(basereg+1), mem_operand, abits +.elseif numbytes == 4 + .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32) + pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits + .elseif elem_size == 16 + pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits + pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits + .else + pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits + .endif +.elseif numbytes == 2 + .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16) + pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits + .else + pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits + pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits + .endif +.elseif numbytes == 1 + pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits +.else + .error "unsupported size: numbytes" +.endif +.endm + +.macro pixld numpix, bpp, basereg, mem_operand, abits=0 +.if bpp > 0 +.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif (bpp == 24) && (numpix == 8) + pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand +.elseif (bpp == 24) && (numpix == 4) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand +.elseif (bpp == 24) && (numpix == 2) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand +.elseif (bpp == 24) && (numpix == 1) + pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand +.else + pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits +.endif +.endif +.endm + +.macro pixst numpix, bpp, basereg, mem_operand, abits=0 +.if bpp > 0 +.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0) + pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \ + %(basereg+6), %(basereg+7), mem_operand, abits +.elseif (bpp == 24) && (numpix == 8) + pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand +.elseif (bpp == 24) && (numpix == 4) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand +.elseif (bpp == 24) && (numpix == 2) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand +.elseif (bpp == 24) && (numpix == 1) + pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand +.else + pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits +.endif +.endif +.endm + +.macro pixld_a numpix, bpp, basereg, mem_operand +.if (bpp * numpix) <= 128 + pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix) +.else + pixld numpix, bpp, basereg, mem_operand, 128 +.endif +.endm + +.macro pixst_a numpix, bpp, basereg, mem_operand +.if (bpp * numpix) <= 128 + pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix) +.else + pixst numpix, bpp, basereg, mem_operand, 128 +.endif +.endm + +/* + * Pixel fetcher for nearest scaling (needs TMP1, TMP2, VX, UNIT_X register + * aliases to be defined) + */ +.macro pixld1_s elem_size, reg1, mem_operand +.if elem_size == 16 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #1 + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #1 + vld1.16 {d®1&[0]}, [TMP1, :16] + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #1 + vld1.16 {d®1&[1]}, [TMP2, :16] + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #1 + vld1.16 {d®1&[2]}, [TMP1, :16] + vld1.16 {d®1&[3]}, [TMP2, :16] +.elseif elem_size == 32 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #2 + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[0]}, [TMP1, :32] + vld1.32 {d®1&[1]}, [TMP2, :32] +.else + .error "unsupported" +.endif +.endm + +.macro pixld2_s elem_size, reg1, reg2, mem_operand +.if elem_size == 32 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X, asl #1 + add TMP1, mem_operand, TMP1, asl #2 + mov TMP2, VX, asr #16 + sub VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[0]}, [TMP1, :32] + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X, asl #1 + add TMP1, mem_operand, TMP1, asl #2 + vld1.32 {d®2&[0]}, [TMP2, :32] + mov TMP2, VX, asr #16 + add VX, VX, UNIT_X + add TMP2, mem_operand, TMP2, asl #2 + vld1.32 {d®1&[1]}, [TMP1, :32] + vld1.32 {d®2&[1]}, [TMP2, :32] +.else + pixld1_s elem_size, reg1, mem_operand + pixld1_s elem_size, reg2, mem_operand +.endif +.endm + +.macro pixld0_s elem_size, reg1, idx, mem_operand +.if elem_size == 16 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #1 + vld1.16 {d®1&[idx]}, [TMP1, :16] +.elseif elem_size == 32 + mov TMP1, VX, asr #16 + add VX, VX, UNIT_X + add TMP1, mem_operand, TMP1, asl #2 + vld1.32 {d®1&[idx]}, [TMP1, :32] +.endif +.endm + +.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand +.if numbytes == 32 + pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand + pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand + pixdeinterleave elem_size, %(basereg+4) +.elseif numbytes == 16 + pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand +.elseif numbytes == 8 + pixld1_s elem_size, %(basereg+1), mem_operand +.elseif numbytes == 4 + .if elem_size == 32 + pixld0_s elem_size, %(basereg+0), 1, mem_operand + .elseif elem_size == 16 + pixld0_s elem_size, %(basereg+0), 2, mem_operand + pixld0_s elem_size, %(basereg+0), 3, mem_operand + .else + pixld0_s elem_size, %(basereg+0), 4, mem_operand + pixld0_s elem_size, %(basereg+0), 5, mem_operand + pixld0_s elem_size, %(basereg+0), 6, mem_operand + pixld0_s elem_size, %(basereg+0), 7, mem_operand + .endif +.elseif numbytes == 2 + .if elem_size == 16 + pixld0_s elem_size, %(basereg+0), 1, mem_operand + .else + pixld0_s elem_size, %(basereg+0), 2, mem_operand + pixld0_s elem_size, %(basereg+0), 3, mem_operand + .endif +.elseif numbytes == 1 + pixld0_s elem_size, %(basereg+0), 1, mem_operand +.else + .error "unsupported size: numbytes" +.endif +.endm + +.macro pixld_s numpix, bpp, basereg, mem_operand +.if bpp > 0 + pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand +.endif +.endm + +.macro vuzp8 reg1, reg2 + vuzp.8 d®1, d®2 +.endm + +.macro vzip8 reg1, reg2 + vzip.8 d®1, d®2 +.endm + +/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ +.macro pixdeinterleave bpp, basereg +.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) + vuzp8 %(basereg+0), %(basereg+1) + vuzp8 %(basereg+2), %(basereg+3) + vuzp8 %(basereg+1), %(basereg+3) + vuzp8 %(basereg+0), %(basereg+2) +.endif +.endm + +/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */ +.macro pixinterleave bpp, basereg +.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0) + vzip8 %(basereg+0), %(basereg+2) + vzip8 %(basereg+1), %(basereg+3) + vzip8 %(basereg+2), %(basereg+3) + vzip8 %(basereg+0), %(basereg+1) +.endif +.endm + +/* + * This is a macro for implementing cache preload. The main idea is that + * cache preload logic is mostly independent from the rest of pixels + * processing code. It starts at the top left pixel and moves forward + * across pixels and can jump across scanlines. Prefetch distance is + * handled in an 'incremental' way: it starts from 0 and advances to the + * optimal distance over time. After reaching optimal prefetch distance, + * it is kept constant. There are some checks which prevent prefetching + * unneeded pixel lines below the image (but it still can prefetch a bit + * more data on the right side of the image - not a big issue and may + * be actually helpful when rendering text glyphs). Additional trick is + * the use of LDR instruction for prefetch instead of PLD when moving to + * the next line, the point is that we have a high chance of getting TLB + * miss in this case, and PLD would be useless. + * + * This sounds like it may introduce a noticeable overhead (when working with + * fully cached data). But in reality, due to having a separate pipeline and + * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can + * execute simultaneously with NEON and be completely shadowed by it. Thus + * we get no performance overhead at all (*). This looks like a very nice + * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, + * but still can implement some rather advanced prefetch logic in sofware + * for almost zero cost! + * + * (*) The overhead of the prefetcher is visible when running some trivial + * pixels processing like simple copy. Anyway, having prefetch is a must + * when working with the graphics data. + */ +.macro PF a, x:vararg +.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED) + a x +.endif +.endm + +.macro cache_preload std_increment, boost_increment +.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0) +.if regs_shortage + PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */ +.endif +.if std_increment != 0 + PF add PF_X, PF_X, #std_increment +.endif + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #boost_increment + PF subne PF_CTL, PF_CTL, #1 + PF cmp PF_X, ORIG_W +.if src_bpp_shift >= 0 + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] +.endif +.if dst_r_bpp != 0 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] +.endif +.if mask_bpp_shift >= 0 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] +.endif + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 +.if src_bpp_shift >= 0 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endif +.if dst_r_bpp != 0 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! +.endif +.if mask_bpp_shift >= 0 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! +.endif +.endif +.endm + +.macro cache_preload_simple +.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE) +.if src_bpp > 0 + pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)] +.endif +.if dst_r_bpp > 0 + pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)] +.endif +.if mask_bpp > 0 + pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)] +.endif +.endif +.endm + +.macro fetch_mask_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK +.endm + +/* + * Macro which is used to process leading pixels until destination + * pointer is properly aligned (at 16 bytes boundary). When destination + * buffer uses 16bpp format, this is unnecessary, or even pointless. + */ +.macro ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head +.if dst_w_bpp != 24 + tst DST_R, #0xF + beq 2f + +.irp lowbit, 1, 2, 4, 8, 16 +local skip1 +.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) +.if lowbit < 16 /* we don't need more than 16-byte alignment */ + tst DST_R, #lowbit + beq 1f +.endif + pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC + pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK +.if dst_r_bpp > 0 + pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R +.else + add DST_R, DST_R, #lowbit +.endif + PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp) + sub W, W, #(lowbit * 8 / dst_w_bpp) +1: +.endif +.endr + pixdeinterleave src_bpp, src_basereg + pixdeinterleave mask_bpp, mask_basereg + pixdeinterleave dst_r_bpp, dst_r_basereg + + process_pixblock_head + cache_preload 0, pixblock_size + cache_preload_simple + process_pixblock_tail + + pixinterleave dst_w_bpp, dst_w_basereg +.irp lowbit, 1, 2, 4, 8, 16 +.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp)) +.if lowbit < 16 /* we don't need more than 16-byte alignment */ + tst DST_W, #lowbit + beq 1f +.endif + pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W +1: +.endif +.endr +.endif +2: +.endm + +/* + * Special code for processing up to (pixblock_size - 1) remaining + * trailing pixels. As SIMD processing performs operation on + * pixblock_size pixels, anything smaller than this has to be loaded + * and stored in a special way. Loading and storing of pixel data is + * performed in such a way that we fill some 'slots' in the NEON + * registers (some slots naturally are unused), then perform compositing + * operation as usual. In the end, the data is taken from these 'slots' + * and saved to memory. + * + * cache_preload_flag - allows to suppress prefetch if + * set to 0 + * dst_aligned_flag - selects whether destination buffer + * is aligned + */ +.macro process_trailing_pixels cache_preload_flag, \ + dst_aligned_flag, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + tst W, #(pixblock_size - 1) + beq 2f +.irp chunk_size, 16, 8, 4, 2, 1 +.if pixblock_size > chunk_size + tst W, #chunk_size + beq 1f + pixld_src chunk_size, src_bpp, src_basereg, SRC + pixld chunk_size, mask_bpp, mask_basereg, MASK +.if dst_aligned_flag != 0 + pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R +.else + pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R +.endif +.if cache_preload_flag != 0 + PF add PF_X, PF_X, #chunk_size +.endif +1: +.endif +.endr + pixdeinterleave src_bpp, src_basereg + pixdeinterleave mask_bpp, mask_basereg + pixdeinterleave dst_r_bpp, dst_r_basereg + + process_pixblock_head +.if cache_preload_flag != 0 + cache_preload 0, pixblock_size + cache_preload_simple +.endif + process_pixblock_tail + pixinterleave dst_w_bpp, dst_w_basereg +.irp chunk_size, 16, 8, 4, 2, 1 +.if pixblock_size > chunk_size + tst W, #chunk_size + beq 1f +.if dst_aligned_flag != 0 + pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W +.else + pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W +.endif +1: +.endif +.endr +2: +.endm + +/* + * Macro, which performs all the needed operations to switch to the next + * scanline and start the next loop iteration unless all the scanlines + * are already processed. + */ +.macro advance_to_next_scanline start_of_loop_label +.if regs_shortage + ldrd W, [sp] /* load W and H (width and height) from stack */ +.else + mov W, ORIG_W +.endif + add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift +.if src_bpp != 0 + add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift +.endif +.if mask_bpp != 0 + add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift +.endif +.if (dst_w_bpp != 24) + sub DST_W, DST_W, W, lsl #dst_bpp_shift +.endif +.if (src_bpp != 24) && (src_bpp != 0) + sub SRC, SRC, W, lsl #src_bpp_shift +.endif +.if (mask_bpp != 24) && (mask_bpp != 0) + sub MASK, MASK, W, lsl #mask_bpp_shift +.endif + subs H, H, #1 + mov DST_R, DST_W +.if regs_shortage + str H, [sp, #4] /* save updated height to stack */ +.endif + bge start_of_loop_label +.endm + +/* + * Registers are allocated in the following way by default: + * d0, d1, d2, d3 - reserved for loading source pixel data + * d4, d5, d6, d7 - reserved for loading destination pixel data + * d24, d25, d26, d27 - reserved for loading mask pixel data + * d28, d29, d30, d31 - final destination pixel data for writeback to memory + */ +.macro generate_composite_function fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags, \ + pixblock_size_, \ + prefetch_distance, \ + init, \ + cleanup, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head, \ + dst_w_basereg_ = 28, \ + dst_r_basereg_ = 4, \ + src_basereg_ = 0, \ + mask_basereg_ = 24 + + .func fname + .global fname + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: + push {r4-r12, lr} /* save all registers */ + +/* + * Select prefetch type for this function. If prefetch distance is + * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch + * has to be used instead of ADVANCED. + */ + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT +.if prefetch_distance == 0 + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE +.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \ + ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24)) + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE +.endif + +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set pixblock_size, pixblock_size_ + .set dst_w_basereg, dst_w_basereg_ + .set dst_r_basereg, dst_r_basereg_ + .set src_basereg, src_basereg_ + .set mask_basereg, mask_basereg_ + + .macro pixld_src x:vararg + pixld x + .endm + .macro fetch_src_pixblock + pixld_src pixblock_size, src_bpp, \ + (src_basereg - pixblock_size * src_bpp / 64), SRC + .endm +/* + * Assign symbolic names to registers + */ + W .req r0 /* width (is updated during processing) */ + H .req r1 /* height (is updated during processing) */ + DST_W .req r2 /* destination buffer pointer for writes */ + DST_STRIDE .req r3 /* destination image stride */ + SRC .req r4 /* source buffer pointer */ + SRC_STRIDE .req r5 /* source image stride */ + DST_R .req r6 /* destination buffer pointer for reads */ + + MASK .req r7 /* mask pointer */ + MASK_STRIDE .req r8 /* mask stride */ + + PF_CTL .req r9 /* combined lines counter and prefetch */ + /* distance increment counter */ + PF_X .req r10 /* pixel index in a scanline for current */ + /* pretetch position */ + PF_SRC .req r11 /* pointer to source scanline start */ + /* for prefetch purposes */ + PF_DST .req r12 /* pointer to destination scanline start */ + /* for prefetch purposes */ + PF_MASK .req r14 /* pointer to mask scanline start */ + /* for prefetch purposes */ +/* + * Check whether we have enough registers for all the local variables. + * If we don't have enough registers, original width and height are + * kept on top of stack (and 'regs_shortage' variable is set to indicate + * this for the rest of code). Even if there are enough registers, the + * allocation scheme may be a bit different depending on whether source + * or mask is not used. + */ +.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED) + ORIG_W .req r10 /* saved original width */ + DUMMY .req r12 /* temporary register */ + .set regs_shortage, 0 +.elseif mask_bpp == 0 + ORIG_W .req r7 /* saved original width */ + DUMMY .req r8 /* temporary register */ + .set regs_shortage, 0 +.elseif src_bpp == 0 + ORIG_W .req r4 /* saved original width */ + DUMMY .req r5 /* temporary register */ + .set regs_shortage, 0 +.else + ORIG_W .req r1 /* saved original width */ + DUMMY .req r1 /* temporary register */ + .set regs_shortage, 1 +.endif + + .set mask_bpp_shift, -1 +.if src_bpp == 32 + .set src_bpp_shift, 2 +.elseif src_bpp == 24 + .set src_bpp_shift, 0 +.elseif src_bpp == 16 + .set src_bpp_shift, 1 +.elseif src_bpp == 8 + .set src_bpp_shift, 0 +.elseif src_bpp == 0 + .set src_bpp_shift, -1 +.else + .error "requested src bpp (src_bpp) is not supported" +.endif +.if mask_bpp == 32 + .set mask_bpp_shift, 2 +.elseif mask_bpp == 24 + .set mask_bpp_shift, 0 +.elseif mask_bpp == 8 + .set mask_bpp_shift, 0 +.elseif mask_bpp == 0 + .set mask_bpp_shift, -1 +.else + .error "requested mask bpp (mask_bpp) is not supported" +.endif +.if dst_w_bpp == 32 + .set dst_bpp_shift, 2 +.elseif dst_w_bpp == 24 + .set dst_bpp_shift, 0 +.elseif dst_w_bpp == 16 + .set dst_bpp_shift, 1 +.elseif dst_w_bpp == 8 + .set dst_bpp_shift, 0 +.else + .error "requested dst bpp (dst_w_bpp) is not supported" +.endif + +.if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp +.else + .set dst_r_bpp, 0 +.endif +.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) + .set DEINTERLEAVE_32BPP_ENABLED, 1 +.else + .set DEINTERLEAVE_32BPP_ENABLED, 0 +.endif + +.if prefetch_distance < 0 || prefetch_distance > 15 + .error "invalid prefetch distance (prefetch_distance)" +.endif + +.if src_bpp > 0 + ldr SRC, [sp, #40] +.endif +.if mask_bpp > 0 + ldr MASK, [sp, #48] +.endif + PF mov PF_X, #0 +.if src_bpp > 0 + ldr SRC_STRIDE, [sp, #44] +.endif +.if mask_bpp > 0 + ldr MASK_STRIDE, [sp, #52] +.endif + mov DST_R, DST_W + +.if src_bpp == 24 + sub SRC_STRIDE, SRC_STRIDE, W + sub SRC_STRIDE, SRC_STRIDE, W, lsl #1 +.endif +.if mask_bpp == 24 + sub MASK_STRIDE, MASK_STRIDE, W + sub MASK_STRIDE, MASK_STRIDE, W, lsl #1 +.endif +.if dst_w_bpp == 24 + sub DST_STRIDE, DST_STRIDE, W + sub DST_STRIDE, DST_STRIDE, W, lsl #1 +.endif + +/* + * Setup advanced prefetcher initial state + */ + PF mov PF_SRC, SRC + PF mov PF_DST, DST_R + PF mov PF_MASK, MASK + /* PF_CTL = prefetch_distance | ((h - 1) << 4) */ + PF mov PF_CTL, H, lsl #4 + PF add PF_CTL, #(prefetch_distance - 0x10) + + init +.if regs_shortage + push {r0, r1} +.endif + subs H, H, #1 +.if regs_shortage + str H, [sp, #4] /* save updated height to stack */ +.else + mov ORIG_W, W +.endif + blt 9f + cmp W, #(pixblock_size * 2) + blt 8f +/* + * This is the start of the pipelined loop, which if optimized for + * long scanlines + */ +0: + ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ + pixld_a pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + PF add PF_X, PF_X, #pixblock_size + process_pixblock_head + cache_preload 0, pixblock_size + cache_preload_simple + subs W, W, #(pixblock_size * 2) + blt 2f +1: + process_pixblock_tail_head + cache_preload_simple + subs W, W, #pixblock_size + bge 1b +2: + process_pixblock_tail + pixst_a pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W + + /* Process the remaining trailing pixels in the scanline */ + process_trailing_pixels 1, 1, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + advance_to_next_scanline 0b + +.if regs_shortage + pop {r0, r1} +.endif + cleanup + pop {r4-r12, pc} /* exit */ +/* + * This is the start of the loop, designed to process images with small width + * (less than pixblock_size * 2 pixels). In this case neither pipelining + * nor prefetch are used. + */ +8: + /* Process exactly pixblock_size pixels if needed */ + tst W, #pixblock_size + beq 1f + pixld pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + process_pixblock_head + process_pixblock_tail + pixst pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W +1: + /* Process the remaining trailing pixels in the scanline */ + process_trailing_pixels 0, 0, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + advance_to_next_scanline 8b +9: +.if regs_shortage + pop {r0, r1} +.endif + cleanup + pop {r4-r12, pc} /* exit */ + + .purgem fetch_src_pixblock + .purgem pixld_src + + .unreq SRC + .unreq MASK + .unreq DST_R + .unreq DST_W + .unreq ORIG_W + .unreq W + .unreq H + .unreq SRC_STRIDE + .unreq DST_STRIDE + .unreq MASK_STRIDE + .unreq PF_CTL + .unreq PF_X + .unreq PF_SRC + .unreq PF_DST + .unreq PF_MASK + .unreq DUMMY + .endfunc +.endm + +/* + * A simplified variant of function generation template for a single + * scanline processing (for implementing pixman combine functions) + */ +.macro generate_composite_function_scanline use_nearest_scaling, \ + fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags, \ + pixblock_size_, \ + init, \ + cleanup, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head, \ + dst_w_basereg_ = 28, \ + dst_r_basereg_ = 4, \ + src_basereg_ = 0, \ + mask_basereg_ = 24 + + .func fname + .global fname + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set pixblock_size, pixblock_size_ + .set dst_w_basereg, dst_w_basereg_ + .set dst_r_basereg, dst_r_basereg_ + .set src_basereg, src_basereg_ + .set mask_basereg, mask_basereg_ + +.if use_nearest_scaling != 0 + /* + * Assign symbolic names to registers for nearest scaling + */ + W .req r0 + DST_W .req r1 + SRC .req r2 + VX .req r3 + UNIT_X .req ip + MASK .req lr + TMP1 .req r4 + TMP2 .req r5 + DST_R .req r6 + + .macro pixld_src x:vararg + pixld_s x + .endm + + ldr UNIT_X, [sp] + push {r4-r6, lr} + .if mask_bpp != 0 + ldr MASK, [sp, #(16 + 4)] + .endif +.else + /* + * Assign symbolic names to registers + */ + W .req r0 /* width (is updated during processing) */ + DST_W .req r1 /* destination buffer pointer for writes */ + SRC .req r2 /* source buffer pointer */ + DST_R .req ip /* destination buffer pointer for reads */ + MASK .req r3 /* mask pointer */ + + .macro pixld_src x:vararg + pixld x + .endm +.endif + +.if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp +.else + .set dst_r_bpp, 0 +.endif +.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0) + .set DEINTERLEAVE_32BPP_ENABLED, 1 +.else + .set DEINTERLEAVE_32BPP_ENABLED, 0 +.endif + + .macro fetch_src_pixblock + pixld_src pixblock_size, src_bpp, \ + (src_basereg - pixblock_size * src_bpp / 64), SRC + .endm + + init + mov DST_R, DST_W + + cmp W, #pixblock_size + blt 8f + + ensure_destination_ptr_alignment process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + subs W, W, #pixblock_size + blt 7f + + /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */ + pixld_a pixblock_size, dst_r_bpp, \ + (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R + fetch_src_pixblock + pixld pixblock_size, mask_bpp, \ + (mask_basereg - pixblock_size * mask_bpp / 64), MASK + process_pixblock_head + subs W, W, #pixblock_size + blt 2f +1: + process_pixblock_tail_head + subs W, W, #pixblock_size + bge 1b +2: + process_pixblock_tail + pixst_a pixblock_size, dst_w_bpp, \ + (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W +7: + /* Process the remaining trailing pixels in the scanline (dst aligned) */ + process_trailing_pixels 0, 1, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + cleanup +.if use_nearest_scaling != 0 + pop {r4-r6, pc} /* exit */ +.else + bx lr /* exit */ +.endif +8: + /* Process the remaining trailing pixels in the scanline (dst unaligned) */ + process_trailing_pixels 0, 0, \ + process_pixblock_head, \ + process_pixblock_tail, \ + process_pixblock_tail_head + + cleanup + +.if use_nearest_scaling != 0 + pop {r4-r6, pc} /* exit */ + + .unreq DST_R + .unreq SRC + .unreq W + .unreq VX + .unreq UNIT_X + .unreq TMP1 + .unreq TMP2 + .unreq DST_W + .unreq MASK + +.else + bx lr /* exit */ + + .unreq SRC + .unreq MASK + .unreq DST_R + .unreq DST_W + .unreq W +.endif + + .purgem fetch_src_pixblock + .purgem pixld_src + + .endfunc +.endm + +.macro generate_composite_function_single_scanline x:vararg + generate_composite_function_scanline 0, x +.endm + +.macro generate_composite_function_nearest_scanline x:vararg + generate_composite_function_scanline 1, x +.endm + +/* Default prologue/epilogue, nothing special needs to be done */ + +.macro default_init +.endm + +.macro default_cleanup +.endm + +/* + * Prologue/epilogue variant which additionally saves/restores d8-d15 + * registers (they need to be saved/restored by callee according to ABI). + * This is required if the code needs to use all the NEON registers. + */ + +.macro default_init_need_all_regs + vpush {d8-d15} +.endm + +.macro default_cleanup_need_all_regs + vpop {d8-d15} +.endm + +/******************************************************************************/ + +/* + * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in) + * into a planar a8r8g8b8 format (with a, r, g, b color components + * stored into 64-bit registers out_a, out_r, out_g, out_b respectively). + * + * Warning: the conversion is destructive and the original + * value (in) is lost. + */ +.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b + vshrn.u16 out_r, in, #8 + vshrn.u16 out_g, in, #3 + vsli.u16 in, in, #5 + vmov.u8 out_a, #255 + vsri.u8 out_r, out_r, #5 + vsri.u8 out_g, out_g, #6 + vshrn.u16 out_b, in, #2 +.endm + +.macro convert_0565_to_x888 in, out_r, out_g, out_b + vshrn.u16 out_r, in, #8 + vshrn.u16 out_g, in, #3 + vsli.u16 in, in, #5 + vsri.u8 out_r, out_r, #5 + vsri.u8 out_g, out_g, #6 + vshrn.u16 out_b, in, #2 +.endm + +/* + * Conversion from planar a8r8g8b8 format (with a, r, g, b color components + * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6 + * pixels packed in 128-bit register (out). Requires two temporary 128-bit + * registers (tmp1, tmp2) + */ +.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2 + vshll.u8 tmp1, in_g, #8 + vshll.u8 out, in_r, #8 + vshll.u8 tmp2, in_b, #8 + vsri.u16 out, tmp1, #5 + vsri.u16 out, tmp2, #11 +.endm + +/* + * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels + * returned in (out0, out1) registers pair. Requires one temporary + * 64-bit register (tmp). 'out1' and 'in' may overlap, the original + * value from 'in' is lost + */ +.macro convert_four_0565_to_x888_packed in, out0, out1, tmp + vshl.u16 out0, in, #5 /* G top 6 bits */ + vshl.u16 tmp, in, #11 /* B top 5 bits */ + vsri.u16 in, in, #5 /* R is ready in top bits */ + vsri.u16 out0, out0, #6 /* G is ready in top bits */ + vsri.u16 tmp, tmp, #5 /* B is ready in top bits */ + vshr.u16 out1, in, #8 /* R is in place */ + vsri.u16 out0, tmp, #8 /* G & B is in place */ + vzip.u16 out0, out1 /* everything is in place */ +.endm diff --git a/pixman/pixman/pixman-edge-accessors.c b/pixman/pixman/pixman-edge-accessors.c index ea3a31e2f..0f2c56e74 100644 --- a/pixman/pixman/pixman-edge-accessors.c +++ b/pixman/pixman/pixman-edge-accessors.c @@ -1,4 +1,4 @@ - -#define PIXMAN_FB_ACCESSORS - -#include "pixman-edge.c" +
+#define PIXMAN_FB_ACCESSORS
+
+#include "pixman-edge.c"
diff --git a/pixman/pixman/pixman-edge-imp.h b/pixman/pixman/pixman-edge-imp.h index a4698eddb..20ffda896 100644 --- a/pixman/pixman/pixman-edge-imp.h +++ b/pixman/pixman/pixman-edge-imp.h @@ -1,182 +1,182 @@ -/* - * Copyright © 2004 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef rasterize_span -#endif - -static void -RASTERIZE_EDGES (pixman_image_t *image, - pixman_edge_t *l, - pixman_edge_t *r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - pixman_fixed_t y = t; - uint32_t *line; - uint32_t *buf = (image)->bits.bits; - int stride = (image)->bits.rowstride; - int width = (image)->bits.width; - - line = buf + pixman_fixed_to_int (y) * stride; - - for (;;) - { - pixman_fixed_t lx; - pixman_fixed_t rx; - int lxi; - int rxi; - - lx = l->x; - rx = r->x; -#if N_BITS == 1 - /* For the non-antialiased case, round the coordinates up, in effect - * sampling just slightly to the left of the pixel. This is so that - * when the sample point lies exactly on the line, we round towards - * north-west. - * - * (The AA case does a similar adjustment in RENDER_SAMPLES_X) - */ - lx += X_FRAC_FIRST(1) - pixman_fixed_e; - rx += X_FRAC_FIRST(1) - pixman_fixed_e; -#endif - /* clip X */ - if (lx < 0) - lx = 0; - if (pixman_fixed_to_int (rx) >= width) -#if N_BITS == 1 - rx = pixman_int_to_fixed (width); -#else - /* Use the last pixel of the scanline, covered 100%. - * We can't use the first pixel following the scanline, - * because accessing it could result in a buffer overrun. - */ - rx = pixman_int_to_fixed (width) - 1; -#endif - - /* Skip empty (or backwards) sections */ - if (rx > lx) - { - - /* Find pixel bounds for span */ - lxi = pixman_fixed_to_int (lx); - rxi = pixman_fixed_to_int (rx); - -#if N_BITS == 1 - { - -#define LEFT_MASK(x) \ - (((x) & 0x1f) ? \ - SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0) -#define RIGHT_MASK(x) \ - (((32 - (x)) & 0x1f) ? \ - SCREEN_SHIFT_LEFT (0xffffffff, (32 - (x)) & 0x1f) : 0) - -#define MASK_BITS(x,w,l,n,r) { \ - n = (w); \ - r = RIGHT_MASK ((x) + n); \ - l = LEFT_MASK (x); \ - if (l) { \ - n -= 32 - ((x) & 0x1f); \ - if (n < 0) { \ - n = 0; \ - l &= r; \ - r = 0; \ - } \ - } \ - n >>= 5; \ - } - - uint32_t *a = line; - uint32_t startmask; - uint32_t endmask; - int nmiddle; - int width = rxi - lxi; - int x = lxi; - - a += x >> 5; - x &= 0x1f; - - MASK_BITS (x, width, startmask, nmiddle, endmask); - - if (startmask) { - WRITE(image, a, READ(image, a) | startmask); - a++; - } - while (nmiddle--) - WRITE(image, a++, 0xffffffff); - if (endmask) - WRITE(image, a, READ(image, a) | endmask); - } -#else - { - DEFINE_ALPHA(line,lxi); - int lxs; - int rxs; - - /* Sample coverage for edge pixels */ - lxs = RENDER_SAMPLES_X (lx, N_BITS); - rxs = RENDER_SAMPLES_X (rx, N_BITS); - - /* Add coverage across row */ - if (lxi == rxi) - { - ADD_ALPHA (rxs - lxs); - } - else - { - int xi; - - ADD_ALPHA (N_X_FRAC(N_BITS) - lxs); - STEP_ALPHA; - for (xi = lxi + 1; xi < rxi; xi++) - { - ADD_ALPHA (N_X_FRAC(N_BITS)); - STEP_ALPHA; - } - ADD_ALPHA (rxs); - } - } -#endif - } - - if (y == b) - break; - -#if N_BITS > 1 - if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS)) - { - RENDER_EDGE_STEP_SMALL (l); - RENDER_EDGE_STEP_SMALL (r); - y += STEP_Y_SMALL(N_BITS); - } - else -#endif - { - RENDER_EDGE_STEP_BIG (l); - RENDER_EDGE_STEP_BIG (r); - y += STEP_Y_BIG(N_BITS); - line += stride; - } - } -} - -#undef rasterize_span +/*
+ * Copyright © 2004 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Keith Packard makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef rasterize_span
+#endif
+
+static void
+RASTERIZE_EDGES (pixman_image_t *image,
+ pixman_edge_t *l,
+ pixman_edge_t *r,
+ pixman_fixed_t t,
+ pixman_fixed_t b)
+{
+ pixman_fixed_t y = t;
+ uint32_t *line;
+ uint32_t *buf = (image)->bits.bits;
+ int stride = (image)->bits.rowstride;
+ int width = (image)->bits.width;
+
+ line = buf + pixman_fixed_to_int (y) * stride;
+
+ for (;;)
+ {
+ pixman_fixed_t lx;
+ pixman_fixed_t rx;
+ int lxi;
+ int rxi;
+
+ lx = l->x;
+ rx = r->x;
+#if N_BITS == 1
+ /* For the non-antialiased case, round the coordinates up, in effect
+ * sampling just slightly to the left of the pixel. This is so that
+ * when the sample point lies exactly on the line, we round towards
+ * north-west.
+ *
+ * (The AA case does a similar adjustment in RENDER_SAMPLES_X)
+ */
+ lx += X_FRAC_FIRST(1) - pixman_fixed_e;
+ rx += X_FRAC_FIRST(1) - pixman_fixed_e;
+#endif
+ /* clip X */
+ if (lx < 0)
+ lx = 0;
+ if (pixman_fixed_to_int (rx) >= width)
+#if N_BITS == 1
+ rx = pixman_int_to_fixed (width);
+#else
+ /* Use the last pixel of the scanline, covered 100%.
+ * We can't use the first pixel following the scanline,
+ * because accessing it could result in a buffer overrun.
+ */
+ rx = pixman_int_to_fixed (width) - 1;
+#endif
+
+ /* Skip empty (or backwards) sections */
+ if (rx > lx)
+ {
+
+ /* Find pixel bounds for span */
+ lxi = pixman_fixed_to_int (lx);
+ rxi = pixman_fixed_to_int (rx);
+
+#if N_BITS == 1
+ {
+
+#define LEFT_MASK(x) \
+ (((x) & 0x1f) ? \
+ SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0)
+#define RIGHT_MASK(x) \
+ (((32 - (x)) & 0x1f) ? \
+ SCREEN_SHIFT_LEFT (0xffffffff, (32 - (x)) & 0x1f) : 0)
+
+#define MASK_BITS(x,w,l,n,r) { \
+ n = (w); \
+ r = RIGHT_MASK ((x) + n); \
+ l = LEFT_MASK (x); \
+ if (l) { \
+ n -= 32 - ((x) & 0x1f); \
+ if (n < 0) { \
+ n = 0; \
+ l &= r; \
+ r = 0; \
+ } \
+ } \
+ n >>= 5; \
+ }
+
+ uint32_t *a = line;
+ uint32_t startmask;
+ uint32_t endmask;
+ int nmiddle;
+ int width = rxi - lxi;
+ int x = lxi;
+
+ a += x >> 5;
+ x &= 0x1f;
+
+ MASK_BITS (x, width, startmask, nmiddle, endmask);
+
+ if (startmask) {
+ WRITE(image, a, READ(image, a) | startmask);
+ a++;
+ }
+ while (nmiddle--)
+ WRITE(image, a++, 0xffffffff);
+ if (endmask)
+ WRITE(image, a, READ(image, a) | endmask);
+ }
+#else
+ {
+ DEFINE_ALPHA(line,lxi);
+ int lxs;
+ int rxs;
+
+ /* Sample coverage for edge pixels */
+ lxs = RENDER_SAMPLES_X (lx, N_BITS);
+ rxs = RENDER_SAMPLES_X (rx, N_BITS);
+
+ /* Add coverage across row */
+ if (lxi == rxi)
+ {
+ ADD_ALPHA (rxs - lxs);
+ }
+ else
+ {
+ int xi;
+
+ ADD_ALPHA (N_X_FRAC(N_BITS) - lxs);
+ STEP_ALPHA;
+ for (xi = lxi + 1; xi < rxi; xi++)
+ {
+ ADD_ALPHA (N_X_FRAC(N_BITS));
+ STEP_ALPHA;
+ }
+ ADD_ALPHA (rxs);
+ }
+ }
+#endif
+ }
+
+ if (y == b)
+ break;
+
+#if N_BITS > 1
+ if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS))
+ {
+ RENDER_EDGE_STEP_SMALL (l);
+ RENDER_EDGE_STEP_SMALL (r);
+ y += STEP_Y_SMALL(N_BITS);
+ }
+ else
+#endif
+ {
+ RENDER_EDGE_STEP_BIG (l);
+ RENDER_EDGE_STEP_BIG (r);
+ y += STEP_Y_BIG(N_BITS);
+ line += stride;
+ }
+ }
+}
+
+#undef rasterize_span
diff --git a/pixman/pixman/pixman-edge.c b/pixman/pixman/pixman-edge.c index 8d498ab44..22b0158ba 100644 --- a/pixman/pixman/pixman-edge.c +++ b/pixman/pixman/pixman-edge.c @@ -1,384 +1,384 @@ -/* - * Copyright © 2004 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <string.h> - -#include "pixman-private.h" -#include "pixman-accessor.h" - -/* - * Step across a small sample grid gap - */ -#define RENDER_EDGE_STEP_SMALL(edge) \ - { \ - edge->x += edge->stepx_small; \ - edge->e += edge->dx_small; \ - if (edge->e > 0) \ - { \ - edge->e -= edge->dy; \ - edge->x += edge->signdx; \ - } \ - } - -/* - * Step across a large sample grid gap - */ -#define RENDER_EDGE_STEP_BIG(edge) \ - { \ - edge->x += edge->stepx_big; \ - edge->e += edge->dx_big; \ - if (edge->e > 0) \ - { \ - edge->e -= edge->dy; \ - edge->x += edge->signdx; \ - } \ - } - -#ifdef PIXMAN_FB_ACCESSORS -#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors -#else -#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_no_accessors -#endif - -/* - * 4 bit alpha - */ - -#define N_BITS 4 -#define RASTERIZE_EDGES rasterize_edges_4 - -#ifndef WORDS_BIGENDIAN -#define SHIFT_4(o) ((o) << 2) -#else -#define SHIFT_4(o) ((1 - (o)) << 2) -#endif - -#define GET_4(x, o) (((x) >> SHIFT_4 (o)) & 0xf) -#define PUT_4(x, o, v) \ - (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o))) - -#define DEFINE_ALPHA(line, x) \ - uint8_t *__ap = (uint8_t *) line + ((x) >> 1); \ - int __ao = (x) & 1 - -#define STEP_ALPHA ((__ap += __ao), (__ao ^= 1)) - -#define ADD_ALPHA(a) \ - { \ - uint8_t __o = READ (image, __ap); \ - uint8_t __a = (a) + GET_4 (__o, __ao); \ - WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \ - } - -#include "pixman-edge-imp.h" - -#undef ADD_ALPHA -#undef STEP_ALPHA -#undef DEFINE_ALPHA -#undef RASTERIZE_EDGES -#undef N_BITS - - -/* - * 1 bit alpha - */ - -#define N_BITS 1 -#define RASTERIZE_EDGES rasterize_edges_1 - -#include "pixman-edge-imp.h" - -#undef RASTERIZE_EDGES -#undef N_BITS - -/* - * 8 bit alpha - */ - -static force_inline uint8_t -clip255 (int x) -{ - if (x > 255) - return 255; - - return x; -} - -#define ADD_SATURATE_8(buf, val, length) \ - do \ - { \ - int i__ = (length); \ - uint8_t *buf__ = (buf); \ - int val__ = (val); \ - \ - while (i__--) \ - { \ - WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \ - (buf__)++; \ - } \ - } while (0) - -/* - * We want to detect the case where we add the same value to a long - * span of pixels. The triangles on the end are filled in while we - * count how many sub-pixel scanlines contribute to the middle section. - * - * +--------------------------+ - * fill_height =| \ / - * +------------------+ - * |================| - * fill_start fill_end - */ -static void -rasterize_edges_8 (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - pixman_fixed_t y = t; - uint32_t *line; - int fill_start = -1, fill_end = -1; - int fill_size = 0; - uint32_t *buf = (image)->bits.bits; - int stride = (image)->bits.rowstride; - int width = (image)->bits.width; - - line = buf + pixman_fixed_to_int (y) * stride; - - for (;;) - { - uint8_t *ap = (uint8_t *) line; - pixman_fixed_t lx, rx; - int lxi, rxi; - - /* clip X */ - lx = l->x; - if (lx < 0) - lx = 0; - - rx = r->x; - - if (pixman_fixed_to_int (rx) >= width) - { - /* Use the last pixel of the scanline, covered 100%. - * We can't use the first pixel following the scanline, - * because accessing it could result in a buffer overrun. - */ - rx = pixman_int_to_fixed (width) - 1; - } - - /* Skip empty (or backwards) sections */ - if (rx > lx) - { - int lxs, rxs; - - /* Find pixel bounds for span. */ - lxi = pixman_fixed_to_int (lx); - rxi = pixman_fixed_to_int (rx); - - /* Sample coverage for edge pixels */ - lxs = RENDER_SAMPLES_X (lx, 8); - rxs = RENDER_SAMPLES_X (rx, 8); - - /* Add coverage across row */ - if (lxi == rxi) - { - WRITE (image, ap + lxi, - clip255 (READ (image, ap + lxi) + rxs - lxs)); - } - else - { - WRITE (image, ap + lxi, - clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs)); - - /* Move forward so that lxi/rxi is the pixel span */ - lxi++; - - /* Don't bother trying to optimize the fill unless - * the span is longer than 4 pixels. */ - if (rxi - lxi > 4) - { - if (fill_start < 0) - { - fill_start = lxi; - fill_end = rxi; - fill_size++; - } - else - { - if (lxi >= fill_end || rxi < fill_start) - { - /* We're beyond what we saved, just fill it */ - ADD_SATURATE_8 (ap + fill_start, - fill_size * N_X_FRAC (8), - fill_end - fill_start); - fill_start = lxi; - fill_end = rxi; - fill_size = 1; - } - else - { - /* Update fill_start */ - if (lxi > fill_start) - { - ADD_SATURATE_8 (ap + fill_start, - fill_size * N_X_FRAC (8), - lxi - fill_start); - fill_start = lxi; - } - else if (lxi < fill_start) - { - ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), - fill_start - lxi); - } - - /* Update fill_end */ - if (rxi < fill_end) - { - ADD_SATURATE_8 (ap + rxi, - fill_size * N_X_FRAC (8), - fill_end - rxi); - fill_end = rxi; - } - else if (fill_end < rxi) - { - ADD_SATURATE_8 (ap + fill_end, - N_X_FRAC (8), - rxi - fill_end); - } - fill_size++; - } - } - } - else - { - ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi); - } - - WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs)); - } - } - - if (y == b) - { - /* We're done, make sure we clean up any remaining fill. */ - if (fill_start != fill_end) - { - if (fill_size == N_Y_FRAC (8)) - { - MEMSET_WRAPPED (image, ap + fill_start, - 0xff, fill_end - fill_start); - } - else - { - ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), - fill_end - fill_start); - } - } - break; - } - - if (pixman_fixed_frac (y) != Y_FRAC_LAST (8)) - { - RENDER_EDGE_STEP_SMALL (l); - RENDER_EDGE_STEP_SMALL (r); - y += STEP_Y_SMALL (8); - } - else - { - RENDER_EDGE_STEP_BIG (l); - RENDER_EDGE_STEP_BIG (r); - y += STEP_Y_BIG (8); - if (fill_start != fill_end) - { - if (fill_size == N_Y_FRAC (8)) - { - MEMSET_WRAPPED (image, ap + fill_start, - 0xff, fill_end - fill_start); - } - else - { - ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), - fill_end - fill_start); - } - - fill_start = fill_end = -1; - fill_size = 0; - } - - line += stride; - } - } -} - -#ifndef PIXMAN_FB_ACCESSORS -static -#endif -void -PIXMAN_RASTERIZE_EDGES (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - switch (PIXMAN_FORMAT_BPP (image->bits.format)) - { - case 1: - rasterize_edges_1 (image, l, r, t, b); - break; - - case 4: - rasterize_edges_4 (image, l, r, t, b); - break; - - case 8: - rasterize_edges_8 (image, l, r, t, b); - break; - - default: - break; - } -} - -#ifndef PIXMAN_FB_ACCESSORS - -PIXMAN_EXPORT void -pixman_rasterize_edges (pixman_image_t *image, - pixman_edge_t * l, - pixman_edge_t * r, - pixman_fixed_t t, - pixman_fixed_t b) -{ - return_if_fail (image->type == BITS); - - if (image->bits.read_func || image->bits.write_func) - pixman_rasterize_edges_accessors (image, l, r, t, b); - else - pixman_rasterize_edges_no_accessors (image, l, r, t, b); -} - -#endif +/*
+ * Copyright © 2004 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Keith Packard makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include "pixman-private.h"
+#include "pixman-accessor.h"
+
+/*
+ * Step across a small sample grid gap
+ */
+#define RENDER_EDGE_STEP_SMALL(edge) \
+ { \
+ edge->x += edge->stepx_small; \
+ edge->e += edge->dx_small; \
+ if (edge->e > 0) \
+ { \
+ edge->e -= edge->dy; \
+ edge->x += edge->signdx; \
+ } \
+ }
+
+/*
+ * Step across a large sample grid gap
+ */
+#define RENDER_EDGE_STEP_BIG(edge) \
+ { \
+ edge->x += edge->stepx_big; \
+ edge->e += edge->dx_big; \
+ if (edge->e > 0) \
+ { \
+ edge->e -= edge->dy; \
+ edge->x += edge->signdx; \
+ } \
+ }
+
+#ifdef PIXMAN_FB_ACCESSORS
+#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors
+#else
+#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_no_accessors
+#endif
+
+/*
+ * 4 bit alpha
+ */
+
+#define N_BITS 4
+#define RASTERIZE_EDGES rasterize_edges_4
+
+#ifndef WORDS_BIGENDIAN
+#define SHIFT_4(o) ((o) << 2)
+#else
+#define SHIFT_4(o) ((1 - (o)) << 2)
+#endif
+
+#define GET_4(x, o) (((x) >> SHIFT_4 (o)) & 0xf)
+#define PUT_4(x, o, v) \
+ (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o)))
+
+#define DEFINE_ALPHA(line, x) \
+ uint8_t *__ap = (uint8_t *) line + ((x) >> 1); \
+ int __ao = (x) & 1
+
+#define STEP_ALPHA ((__ap += __ao), (__ao ^= 1))
+
+#define ADD_ALPHA(a) \
+ { \
+ uint8_t __o = READ (image, __ap); \
+ uint8_t __a = (a) + GET_4 (__o, __ao); \
+ WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \
+ }
+
+#include "pixman-edge-imp.h"
+
+#undef ADD_ALPHA
+#undef STEP_ALPHA
+#undef DEFINE_ALPHA
+#undef RASTERIZE_EDGES
+#undef N_BITS
+
+
+/*
+ * 1 bit alpha
+ */
+
+#define N_BITS 1
+#define RASTERIZE_EDGES rasterize_edges_1
+
+#include "pixman-edge-imp.h"
+
+#undef RASTERIZE_EDGES
+#undef N_BITS
+
+/*
+ * 8 bit alpha
+ */
+
+static force_inline uint8_t
+clip255 (int x)
+{
+ if (x > 255)
+ return 255;
+
+ return x;
+}
+
+#define ADD_SATURATE_8(buf, val, length) \
+ do \
+ { \
+ int i__ = (length); \
+ uint8_t *buf__ = (buf); \
+ int val__ = (val); \
+ \
+ while (i__--) \
+ { \
+ WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \
+ (buf__)++; \
+ } \
+ } while (0)
+
+/*
+ * We want to detect the case where we add the same value to a long
+ * span of pixels. The triangles on the end are filled in while we
+ * count how many sub-pixel scanlines contribute to the middle section.
+ *
+ * +--------------------------+
+ * fill_height =| \ /
+ * +------------------+
+ * |================|
+ * fill_start fill_end
+ */
+static void
+rasterize_edges_8 (pixman_image_t *image,
+ pixman_edge_t * l,
+ pixman_edge_t * r,
+ pixman_fixed_t t,
+ pixman_fixed_t b)
+{
+ pixman_fixed_t y = t;
+ uint32_t *line;
+ int fill_start = -1, fill_end = -1;
+ int fill_size = 0;
+ uint32_t *buf = (image)->bits.bits;
+ int stride = (image)->bits.rowstride;
+ int width = (image)->bits.width;
+
+ line = buf + pixman_fixed_to_int (y) * stride;
+
+ for (;;)
+ {
+ uint8_t *ap = (uint8_t *) line;
+ pixman_fixed_t lx, rx;
+ int lxi, rxi;
+
+ /* clip X */
+ lx = l->x;
+ if (lx < 0)
+ lx = 0;
+
+ rx = r->x;
+
+ if (pixman_fixed_to_int (rx) >= width)
+ {
+ /* Use the last pixel of the scanline, covered 100%.
+ * We can't use the first pixel following the scanline,
+ * because accessing it could result in a buffer overrun.
+ */
+ rx = pixman_int_to_fixed (width) - 1;
+ }
+
+ /* Skip empty (or backwards) sections */
+ if (rx > lx)
+ {
+ int lxs, rxs;
+
+ /* Find pixel bounds for span. */
+ lxi = pixman_fixed_to_int (lx);
+ rxi = pixman_fixed_to_int (rx);
+
+ /* Sample coverage for edge pixels */
+ lxs = RENDER_SAMPLES_X (lx, 8);
+ rxs = RENDER_SAMPLES_X (rx, 8);
+
+ /* Add coverage across row */
+ if (lxi == rxi)
+ {
+ WRITE (image, ap + lxi,
+ clip255 (READ (image, ap + lxi) + rxs - lxs));
+ }
+ else
+ {
+ WRITE (image, ap + lxi,
+ clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs));
+
+ /* Move forward so that lxi/rxi is the pixel span */
+ lxi++;
+
+ /* Don't bother trying to optimize the fill unless
+ * the span is longer than 4 pixels. */
+ if (rxi - lxi > 4)
+ {
+ if (fill_start < 0)
+ {
+ fill_start = lxi;
+ fill_end = rxi;
+ fill_size++;
+ }
+ else
+ {
+ if (lxi >= fill_end || rxi < fill_start)
+ {
+ /* We're beyond what we saved, just fill it */
+ ADD_SATURATE_8 (ap + fill_start,
+ fill_size * N_X_FRAC (8),
+ fill_end - fill_start);
+ fill_start = lxi;
+ fill_end = rxi;
+ fill_size = 1;
+ }
+ else
+ {
+ /* Update fill_start */
+ if (lxi > fill_start)
+ {
+ ADD_SATURATE_8 (ap + fill_start,
+ fill_size * N_X_FRAC (8),
+ lxi - fill_start);
+ fill_start = lxi;
+ }
+ else if (lxi < fill_start)
+ {
+ ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8),
+ fill_start - lxi);
+ }
+
+ /* Update fill_end */
+ if (rxi < fill_end)
+ {
+ ADD_SATURATE_8 (ap + rxi,
+ fill_size * N_X_FRAC (8),
+ fill_end - rxi);
+ fill_end = rxi;
+ }
+ else if (fill_end < rxi)
+ {
+ ADD_SATURATE_8 (ap + fill_end,
+ N_X_FRAC (8),
+ rxi - fill_end);
+ }
+ fill_size++;
+ }
+ }
+ }
+ else
+ {
+ ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi);
+ }
+
+ WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs));
+ }
+ }
+
+ if (y == b)
+ {
+ /* We're done, make sure we clean up any remaining fill. */
+ if (fill_start != fill_end)
+ {
+ if (fill_size == N_Y_FRAC (8))
+ {
+ MEMSET_WRAPPED (image, ap + fill_start,
+ 0xff, fill_end - fill_start);
+ }
+ else
+ {
+ ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8),
+ fill_end - fill_start);
+ }
+ }
+ break;
+ }
+
+ if (pixman_fixed_frac (y) != Y_FRAC_LAST (8))
+ {
+ RENDER_EDGE_STEP_SMALL (l);
+ RENDER_EDGE_STEP_SMALL (r);
+ y += STEP_Y_SMALL (8);
+ }
+ else
+ {
+ RENDER_EDGE_STEP_BIG (l);
+ RENDER_EDGE_STEP_BIG (r);
+ y += STEP_Y_BIG (8);
+ if (fill_start != fill_end)
+ {
+ if (fill_size == N_Y_FRAC (8))
+ {
+ MEMSET_WRAPPED (image, ap + fill_start,
+ 0xff, fill_end - fill_start);
+ }
+ else
+ {
+ ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8),
+ fill_end - fill_start);
+ }
+
+ fill_start = fill_end = -1;
+ fill_size = 0;
+ }
+
+ line += stride;
+ }
+ }
+}
+
+#ifndef PIXMAN_FB_ACCESSORS
+static
+#endif
+void
+PIXMAN_RASTERIZE_EDGES (pixman_image_t *image,
+ pixman_edge_t * l,
+ pixman_edge_t * r,
+ pixman_fixed_t t,
+ pixman_fixed_t b)
+{
+ switch (PIXMAN_FORMAT_BPP (image->bits.format))
+ {
+ case 1:
+ rasterize_edges_1 (image, l, r, t, b);
+ break;
+
+ case 4:
+ rasterize_edges_4 (image, l, r, t, b);
+ break;
+
+ case 8:
+ rasterize_edges_8 (image, l, r, t, b);
+ break;
+
+ default:
+ break;
+ }
+}
+
+#ifndef PIXMAN_FB_ACCESSORS
+
+PIXMAN_EXPORT void
+pixman_rasterize_edges (pixman_image_t *image,
+ pixman_edge_t * l,
+ pixman_edge_t * r,
+ pixman_fixed_t t,
+ pixman_fixed_t b)
+{
+ return_if_fail (image->type == BITS);
+
+ if (image->bits.read_func || image->bits.write_func)
+ pixman_rasterize_edges_accessors (image, l, r, t, b);
+ else
+ pixman_rasterize_edges_no_accessors (image, l, r, t, b);
+}
+
+#endif
diff --git a/pixman/pixman/pixman-matrix.c b/pixman/pixman/pixman-matrix.c index 0b3ae78b3..8d0d97325 100644 --- a/pixman/pixman/pixman-matrix.c +++ b/pixman/pixman/pixman-matrix.c @@ -1,766 +1,766 @@ -/*
- * Copyright © 2008 Keith Packard
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that copyright
- * notice and this permission notice appear in supporting documentation, and
- * that the name of the copyright holders not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. The copyright holders make no representations
- * about the suitability of this software for any purpose. It is provided "as
- * is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THIS SOFTWARE.
- */
-
-/*
- * Matrix interfaces
- */
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <math.h>
-#include <string.h>
-#include "pixman-private.h"
-
-#define F(x) pixman_int_to_fixed (x)
-
-PIXMAN_EXPORT void
-pixman_transform_init_identity (struct pixman_transform *matrix)
-{
- int i;
-
- memset (matrix, '\0', sizeof (struct pixman_transform));
- for (i = 0; i < 3; i++)
- matrix->matrix[i][i] = F (1);
-}
-
-typedef pixman_fixed_32_32_t pixman_fixed_34_30_t;
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_point_3d (const struct pixman_transform *transform,
- struct pixman_vector * vector)
-{
- struct pixman_vector result;
- pixman_fixed_32_32_t partial;
- pixman_fixed_48_16_t v;
- int i, j;
-
- for (j = 0; j < 3; j++)
- {
- v = 0;
- for (i = 0; i < 3; i++)
- {
- partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] *
- (pixman_fixed_48_16_t) vector->vector[i]);
- v += partial >> 16;
- }
-
- if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
- return FALSE;
-
- result.vector[j] = (pixman_fixed_t) v;
- }
-
- *vector = result;
-
- if (!result.vector[2])
- return FALSE;
-
- return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_point (const struct pixman_transform *transform,
- struct pixman_vector * vector)
-{
- pixman_fixed_32_32_t partial;
- pixman_fixed_34_30_t v[3];
- pixman_fixed_48_16_t quo;
- int i, j;
-
- for (j = 0; j < 3; j++)
- {
- v[j] = 0;
-
- for (i = 0; i < 3; i++)
- {
- partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] *
- (pixman_fixed_32_32_t) vector->vector[i]);
- v[j] += partial >> 2;
- }
- }
-
- if (!(v[2] >> 16))
- return FALSE;
-
- for (j = 0; j < 2; j++)
- {
- quo = v[j] / (v[2] >> 16);
- if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16)
- return FALSE;
- vector->vector[j] = (pixman_fixed_t) quo;
- }
-
- vector->vector[2] = pixman_fixed_1;
- return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_multiply (struct pixman_transform * dst,
- const struct pixman_transform *l,
- const struct pixman_transform *r)
-{
- struct pixman_transform d;
- int dx, dy;
- int o;
-
- for (dy = 0; dy < 3; dy++)
- {
- for (dx = 0; dx < 3; dx++)
- {
- pixman_fixed_48_16_t v;
- pixman_fixed_32_32_t partial;
-
- v = 0;
- for (o = 0; o < 3; o++)
- {
- partial =
- (pixman_fixed_32_32_t) l->matrix[dy][o] *
- (pixman_fixed_32_32_t) r->matrix[o][dx];
-
- v += partial >> 16;
- }
-
- if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
- return FALSE;
-
- d.matrix[dy][dx] = (pixman_fixed_t) v;
- }
- }
-
- *dst = d;
- return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_transform_init_scale (struct pixman_transform *t,
- pixman_fixed_t sx,
- pixman_fixed_t sy)
-{
- memset (t, '\0', sizeof (struct pixman_transform));
-
- t->matrix[0][0] = sx;
- t->matrix[1][1] = sy;
- t->matrix[2][2] = F (1);
-}
-
-static pixman_fixed_t
-fixed_inverse (pixman_fixed_t x)
-{
- return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_scale (struct pixman_transform *forward,
- struct pixman_transform *reverse,
- pixman_fixed_t sx,
- pixman_fixed_t sy)
-{
- struct pixman_transform t;
-
- if (sx == 0 || sy == 0)
- return FALSE;
-
- if (forward)
- {
- pixman_transform_init_scale (&t, sx, sy);
- if (!pixman_transform_multiply (forward, &t, forward))
- return FALSE;
- }
-
- if (reverse)
- {
- pixman_transform_init_scale (&t, fixed_inverse (sx),
- fixed_inverse (sy));
- if (!pixman_transform_multiply (reverse, reverse, &t))
- return FALSE;
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_transform_init_rotate (struct pixman_transform *t,
- pixman_fixed_t c,
- pixman_fixed_t s)
-{
- memset (t, '\0', sizeof (struct pixman_transform));
-
- t->matrix[0][0] = c;
- t->matrix[0][1] = -s;
- t->matrix[1][0] = s;
- t->matrix[1][1] = c;
- t->matrix[2][2] = F (1);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_rotate (struct pixman_transform *forward,
- struct pixman_transform *reverse,
- pixman_fixed_t c,
- pixman_fixed_t s)
-{
- struct pixman_transform t;
-
- if (forward)
- {
- pixman_transform_init_rotate (&t, c, s);
- if (!pixman_transform_multiply (forward, &t, forward))
- return FALSE;
- }
-
- if (reverse)
- {
- pixman_transform_init_rotate (&t, c, -s);
- if (!pixman_transform_multiply (reverse, reverse, &t))
- return FALSE;
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_transform_init_translate (struct pixman_transform *t,
- pixman_fixed_t tx,
- pixman_fixed_t ty)
-{
- memset (t, '\0', sizeof (struct pixman_transform));
-
- t->matrix[0][0] = F (1);
- t->matrix[0][2] = tx;
- t->matrix[1][1] = F (1);
- t->matrix[1][2] = ty;
- t->matrix[2][2] = F (1);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_translate (struct pixman_transform *forward,
- struct pixman_transform *reverse,
- pixman_fixed_t tx,
- pixman_fixed_t ty)
-{
- struct pixman_transform t;
-
- if (forward)
- {
- pixman_transform_init_translate (&t, tx, ty);
-
- if (!pixman_transform_multiply (forward, &t, forward))
- return FALSE;
- }
-
- if (reverse)
- {
- pixman_transform_init_translate (&t, -tx, -ty);
-
- if (!pixman_transform_multiply (reverse, reverse, &t))
- return FALSE;
- }
- return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_bounds (const struct pixman_transform *matrix,
- struct pixman_box16 * b)
-
-{
- struct pixman_vector v[4];
- int i;
- int x1, y1, x2, y2;
-
- v[0].vector[0] = F (b->x1);
- v[0].vector[1] = F (b->y1);
- v[0].vector[2] = F (1);
-
- v[1].vector[0] = F (b->x2);
- v[1].vector[1] = F (b->y1);
- v[1].vector[2] = F (1);
-
- v[2].vector[0] = F (b->x2);
- v[2].vector[1] = F (b->y2);
- v[2].vector[2] = F (1);
-
- v[3].vector[0] = F (b->x1);
- v[3].vector[1] = F (b->y2);
- v[3].vector[2] = F (1);
-
- for (i = 0; i < 4; i++)
- {
- if (!pixman_transform_point (matrix, &v[i]))
- return FALSE;
-
- x1 = pixman_fixed_to_int (v[i].vector[0]);
- y1 = pixman_fixed_to_int (v[i].vector[1]);
- x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0]));
- y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1]));
-
- if (i == 0)
- {
- b->x1 = x1;
- b->y1 = y1;
- b->x2 = x2;
- b->y2 = y2;
- }
- else
- {
- if (x1 < b->x1) b->x1 = x1;
- if (y1 < b->y1) b->y1 = y1;
- if (x2 > b->x2) b->x2 = x2;
- if (y2 > b->y2) b->y2 = y2;
- }
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_invert (struct pixman_transform * dst,
- const struct pixman_transform *src)
-{
- struct pixman_f_transform m, r;
-
- pixman_f_transform_from_pixman_transform (&m, src);
-
- if (!pixman_f_transform_invert (&r, &m))
- return FALSE;
-
- if (!pixman_transform_from_pixman_f_transform (dst, &r))
- return FALSE;
-
- return TRUE;
-}
-
-static pixman_bool_t
-within_epsilon (pixman_fixed_t a,
- pixman_fixed_t b,
- pixman_fixed_t epsilon)
-{
- pixman_fixed_t t = a - b;
-
- if (t < 0)
- t = -t;
-
- return t <= epsilon;
-}
-
-#define EPSILON (pixman_fixed_t) (2)
-
-#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON))
-#define IS_ZERO(a) (within_epsilon (a, 0, EPSILON))
-#define IS_ONE(a) (within_epsilon (a, F (1), EPSILON))
-#define IS_UNIT(a) \
- (within_epsilon (a, F (1), EPSILON) || \
- within_epsilon (a, F (-1), EPSILON) || \
- IS_ZERO (a))
-#define IS_INT(a) (IS_ZERO (pixman_fixed_frac (a)))
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_identity (const struct pixman_transform *t)
-{
- return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) &&
- IS_SAME (t->matrix[0][0], t->matrix[2][2]) &&
- !IS_ZERO (t->matrix[0][0]) &&
- IS_ZERO (t->matrix[0][1]) &&
- IS_ZERO (t->matrix[0][2]) &&
- IS_ZERO (t->matrix[1][0]) &&
- IS_ZERO (t->matrix[1][2]) &&
- IS_ZERO (t->matrix[2][0]) &&
- IS_ZERO (t->matrix[2][1]));
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_scale (const struct pixman_transform *t)
-{
- return (!IS_ZERO (t->matrix[0][0]) &&
- IS_ZERO (t->matrix[0][1]) &&
- IS_ZERO (t->matrix[0][2]) &&
-
- IS_ZERO (t->matrix[1][0]) &&
- !IS_ZERO (t->matrix[1][1]) &&
- IS_ZERO (t->matrix[1][2]) &&
-
- IS_ZERO (t->matrix[2][0]) &&
- IS_ZERO (t->matrix[2][1]) &&
- !IS_ZERO (t->matrix[2][2]));
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_int_translate (const struct pixman_transform *t)
-{
- return (IS_ONE (t->matrix[0][0]) &&
- IS_ZERO (t->matrix[0][1]) &&
- IS_INT (t->matrix[0][2]) &&
-
- IS_ZERO (t->matrix[1][0]) &&
- IS_ONE (t->matrix[1][1]) &&
- IS_INT (t->matrix[1][2]) &&
-
- IS_ZERO (t->matrix[2][0]) &&
- IS_ZERO (t->matrix[2][1]) &&
- IS_ONE (t->matrix[2][2]));
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_inverse (const struct pixman_transform *a,
- const struct pixman_transform *b)
-{
- struct pixman_transform t;
-
- if (!pixman_transform_multiply (&t, a, b))
- return FALSE;
-
- return pixman_transform_is_identity (&t);
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_from_pixman_transform (struct pixman_f_transform * ft,
- const struct pixman_transform *t)
-{
- int i, j;
-
- for (j = 0; j < 3; j++)
- {
- for (i = 0; i < 3; i++)
- ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]);
- }
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_transform_from_pixman_f_transform (struct pixman_transform * t,
- const struct pixman_f_transform *ft)
-{
- int i, j;
-
- for (j = 0; j < 3; j++)
- {
- for (i = 0; i < 3; i++)
- {
- double d = ft->m[j][i];
- if (d < -32767.0 || d > 32767.0)
- return FALSE;
- d = d * 65536.0 + 0.5;
- t->matrix[j][i] = (pixman_fixed_t) floor (d);
- }
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_invert (struct pixman_f_transform * dst,
- const struct pixman_f_transform *src)
-{
- double det;
- int i, j;
- static int a[3] = { 2, 2, 1 };
- static int b[3] = { 1, 0, 0 };
-
- det = 0;
- for (i = 0; i < 3; i++)
- {
- double p;
- int ai = a[i];
- int bi = b[i];
- p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] -
- src->m[ai][1] * src->m[bi][2]);
- if (i == 1)
- p = -p;
- det += p;
- }
-
- if (det == 0)
- return FALSE;
-
- det = 1 / det;
- for (j = 0; j < 3; j++)
- {
- for (i = 0; i < 3; i++)
- {
- double p;
- int ai = a[i];
- int aj = a[j];
- int bi = b[i];
- int bj = b[j];
-
- p = (src->m[ai][aj] * src->m[bi][bj] -
- src->m[ai][bj] * src->m[bi][aj]);
-
- if (((i + j) & 1) != 0)
- p = -p;
-
- dst->m[j][i] = det * p;
- }
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_point (const struct pixman_f_transform *t,
- struct pixman_f_vector * v)
-{
- struct pixman_f_vector result;
- int i, j;
- double a;
-
- for (j = 0; j < 3; j++)
- {
- a = 0;
- for (i = 0; i < 3; i++)
- a += t->m[j][i] * v->v[i];
- result.v[j] = a;
- }
-
- if (!result.v[2])
- return FALSE;
-
- for (j = 0; j < 2; j++)
- v->v[j] = result.v[j] / result.v[2];
-
- v->v[2] = 1;
-
- return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_point_3d (const struct pixman_f_transform *t,
- struct pixman_f_vector * v)
-{
- struct pixman_f_vector result;
- int i, j;
- double a;
-
- for (j = 0; j < 3; j++)
- {
- a = 0;
- for (i = 0; i < 3; i++)
- a += t->m[j][i] * v->v[i];
- result.v[j] = a;
- }
-
- *v = result;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_multiply (struct pixman_f_transform * dst,
- const struct pixman_f_transform *l,
- const struct pixman_f_transform *r)
-{
- struct pixman_f_transform d;
- int dx, dy;
- int o;
-
- for (dy = 0; dy < 3; dy++)
- {
- for (dx = 0; dx < 3; dx++)
- {
- double v = 0;
- for (o = 0; o < 3; o++)
- v += l->m[dy][o] * r->m[o][dx];
- d.m[dy][dx] = v;
- }
- }
-
- *dst = d;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_init_scale (struct pixman_f_transform *t,
- double sx,
- double sy)
-{
- t->m[0][0] = sx;
- t->m[0][1] = 0;
- t->m[0][2] = 0;
- t->m[1][0] = 0;
- t->m[1][1] = sy;
- t->m[1][2] = 0;
- t->m[2][0] = 0;
- t->m[2][1] = 0;
- t->m[2][2] = 1;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_scale (struct pixman_f_transform *forward,
- struct pixman_f_transform *reverse,
- double sx,
- double sy)
-{
- struct pixman_f_transform t;
-
- if (sx == 0 || sy == 0)
- return FALSE;
-
- if (forward)
- {
- pixman_f_transform_init_scale (&t, sx, sy);
- pixman_f_transform_multiply (forward, &t, forward);
- }
-
- if (reverse)
- {
- pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy);
- pixman_f_transform_multiply (reverse, reverse, &t);
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_init_rotate (struct pixman_f_transform *t,
- double c,
- double s)
-{
- t->m[0][0] = c;
- t->m[0][1] = -s;
- t->m[0][2] = 0;
- t->m[1][0] = s;
- t->m[1][1] = c;
- t->m[1][2] = 0;
- t->m[2][0] = 0;
- t->m[2][1] = 0;
- t->m[2][2] = 1;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_rotate (struct pixman_f_transform *forward,
- struct pixman_f_transform *reverse,
- double c,
- double s)
-{
- struct pixman_f_transform t;
-
- if (forward)
- {
- pixman_f_transform_init_rotate (&t, c, s);
- pixman_f_transform_multiply (forward, &t, forward);
- }
-
- if (reverse)
- {
- pixman_f_transform_init_rotate (&t, c, -s);
- pixman_f_transform_multiply (reverse, reverse, &t);
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_init_translate (struct pixman_f_transform *t,
- double tx,
- double ty)
-{
- t->m[0][0] = 1;
- t->m[0][1] = 0;
- t->m[0][2] = tx;
- t->m[1][0] = 0;
- t->m[1][1] = 1;
- t->m[1][2] = ty;
- t->m[2][0] = 0;
- t->m[2][1] = 0;
- t->m[2][2] = 1;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_translate (struct pixman_f_transform *forward,
- struct pixman_f_transform *reverse,
- double tx,
- double ty)
-{
- struct pixman_f_transform t;
-
- if (forward)
- {
- pixman_f_transform_init_translate (&t, tx, ty);
- pixman_f_transform_multiply (forward, &t, forward);
- }
-
- if (reverse)
- {
- pixman_f_transform_init_translate (&t, -tx, -ty);
- pixman_f_transform_multiply (reverse, reverse, &t);
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_bounds (const struct pixman_f_transform *t,
- struct pixman_box16 * b)
-{
- struct pixman_f_vector v[4];
- int i;
- int x1, y1, x2, y2;
-
- v[0].v[0] = b->x1;
- v[0].v[1] = b->y1;
- v[0].v[2] = 1;
- v[1].v[0] = b->x2;
- v[1].v[1] = b->y1;
- v[1].v[2] = 1;
- v[2].v[0] = b->x2;
- v[2].v[1] = b->y2;
- v[2].v[2] = 1;
- v[3].v[0] = b->x1;
- v[3].v[1] = b->y2;
- v[3].v[2] = 1;
-
- for (i = 0; i < 4; i++)
- {
- if (!pixman_f_transform_point (t, &v[i]))
- return FALSE;
-
- x1 = floor (v[i].v[0]);
- y1 = floor (v[i].v[1]);
- x2 = ceil (v[i].v[0]);
- y2 = ceil (v[i].v[1]);
-
- if (i == 0)
- {
- b->x1 = x1;
- b->y1 = y1;
- b->x2 = x2;
- b->y2 = y2;
- }
- else
- {
- if (x1 < b->x1) b->x1 = x1;
- if (y1 < b->y1) b->y1 = y1;
- if (x2 > b->x2) b->x2 = x2;
- if (y2 > b->y2) b->y2 = y2;
- }
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_f_transform_init_identity (struct pixman_f_transform *t)
-{
- int i, j;
-
- for (j = 0; j < 3; j++)
- {
- for (i = 0; i < 3; i++)
- t->m[j][i] = i == j ? 1 : 0;
- }
-}
+/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +/* + * Matrix interfaces + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <math.h> +#include <string.h> +#include "pixman-private.h" + +#define F(x) pixman_int_to_fixed (x) + +PIXMAN_EXPORT void +pixman_transform_init_identity (struct pixman_transform *matrix) +{ + int i; + + memset (matrix, '\0', sizeof (struct pixman_transform)); + for (i = 0; i < 3; i++) + matrix->matrix[i][i] = F (1); +} + +typedef pixman_fixed_32_32_t pixman_fixed_34_30_t; + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_3d (const struct pixman_transform *transform, + struct pixman_vector * vector) +{ + struct pixman_vector result; + pixman_fixed_32_32_t partial; + pixman_fixed_48_16_t v; + int i, j; + + for (j = 0; j < 3; j++) + { + v = 0; + for (i = 0; i < 3; i++) + { + partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * + (pixman_fixed_48_16_t) vector->vector[i]); + v += partial >> 16; + } + + if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) + return FALSE; + + result.vector[j] = (pixman_fixed_t) v; + } + + *vector = result; + + if (!result.vector[2]) + return FALSE; + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point (const struct pixman_transform *transform, + struct pixman_vector * vector) +{ + pixman_fixed_32_32_t partial; + pixman_fixed_34_30_t v[3]; + pixman_fixed_48_16_t quo; + int i, j; + + for (j = 0; j < 3; j++) + { + v[j] = 0; + + for (i = 0; i < 3; i++) + { + partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * + (pixman_fixed_32_32_t) vector->vector[i]); + v[j] += partial >> 2; + } + } + + if (!(v[2] >> 16)) + return FALSE; + + for (j = 0; j < 2; j++) + { + quo = v[j] / (v[2] >> 16); + if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) + return FALSE; + vector->vector[j] = (pixman_fixed_t) quo; + } + + vector->vector[2] = pixman_fixed_1; + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_multiply (struct pixman_transform * dst, + const struct pixman_transform *l, + const struct pixman_transform *r) +{ + struct pixman_transform d; + int dx, dy; + int o; + + for (dy = 0; dy < 3; dy++) + { + for (dx = 0; dx < 3; dx++) + { + pixman_fixed_48_16_t v; + pixman_fixed_32_32_t partial; + + v = 0; + for (o = 0; o < 3; o++) + { + partial = + (pixman_fixed_32_32_t) l->matrix[dy][o] * + (pixman_fixed_32_32_t) r->matrix[o][dx]; + + v += partial >> 16; + } + + if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) + return FALSE; + + d.matrix[dy][dx] = (pixman_fixed_t) v; + } + } + + *dst = d; + return TRUE; +} + +PIXMAN_EXPORT void +pixman_transform_init_scale (struct pixman_transform *t, + pixman_fixed_t sx, + pixman_fixed_t sy) +{ + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = sx; + t->matrix[1][1] = sy; + t->matrix[2][2] = F (1); +} + +static pixman_fixed_t +fixed_inverse (pixman_fixed_t x) +{ + return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_scale (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t sx, + pixman_fixed_t sy) +{ + struct pixman_transform t; + + if (sx == 0 || sy == 0) + return FALSE; + + if (forward) + { + pixman_transform_init_scale (&t, sx, sy); + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_scale (&t, fixed_inverse (sx), + fixed_inverse (sy)); + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_transform_init_rotate (struct pixman_transform *t, + pixman_fixed_t c, + pixman_fixed_t s) +{ + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = c; + t->matrix[0][1] = -s; + t->matrix[1][0] = s; + t->matrix[1][1] = c; + t->matrix[2][2] = F (1); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_rotate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t c, + pixman_fixed_t s) +{ + struct pixman_transform t; + + if (forward) + { + pixman_transform_init_rotate (&t, c, s); + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_rotate (&t, c, -s); + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_transform_init_translate (struct pixman_transform *t, + pixman_fixed_t tx, + pixman_fixed_t ty) +{ + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = F (1); + t->matrix[0][2] = tx; + t->matrix[1][1] = F (1); + t->matrix[1][2] = ty; + t->matrix[2][2] = F (1); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_translate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t tx, + pixman_fixed_t ty) +{ + struct pixman_transform t; + + if (forward) + { + pixman_transform_init_translate (&t, tx, ty); + + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_translate (&t, -tx, -ty); + + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_bounds (const struct pixman_transform *matrix, + struct pixman_box16 * b) + +{ + struct pixman_vector v[4]; + int i; + int x1, y1, x2, y2; + + v[0].vector[0] = F (b->x1); + v[0].vector[1] = F (b->y1); + v[0].vector[2] = F (1); + + v[1].vector[0] = F (b->x2); + v[1].vector[1] = F (b->y1); + v[1].vector[2] = F (1); + + v[2].vector[0] = F (b->x2); + v[2].vector[1] = F (b->y2); + v[2].vector[2] = F (1); + + v[3].vector[0] = F (b->x1); + v[3].vector[1] = F (b->y2); + v[3].vector[2] = F (1); + + for (i = 0; i < 4; i++) + { + if (!pixman_transform_point (matrix, &v[i])) + return FALSE; + + x1 = pixman_fixed_to_int (v[i].vector[0]); + y1 = pixman_fixed_to_int (v[i].vector[1]); + x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0])); + y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1])); + + if (i == 0) + { + b->x1 = x1; + b->y1 = y1; + b->x2 = x2; + b->y2 = y2; + } + else + { + if (x1 < b->x1) b->x1 = x1; + if (y1 < b->y1) b->y1 = y1; + if (x2 > b->x2) b->x2 = x2; + if (y2 > b->y2) b->y2 = y2; + } + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_invert (struct pixman_transform * dst, + const struct pixman_transform *src) +{ + struct pixman_f_transform m, r; + + pixman_f_transform_from_pixman_transform (&m, src); + + if (!pixman_f_transform_invert (&r, &m)) + return FALSE; + + if (!pixman_transform_from_pixman_f_transform (dst, &r)) + return FALSE; + + return TRUE; +} + +static pixman_bool_t +within_epsilon (pixman_fixed_t a, + pixman_fixed_t b, + pixman_fixed_t epsilon) +{ + pixman_fixed_t t = a - b; + + if (t < 0) + t = -t; + + return t <= epsilon; +} + +#define EPSILON (pixman_fixed_t) (2) + +#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON)) +#define IS_ZERO(a) (within_epsilon (a, 0, EPSILON)) +#define IS_ONE(a) (within_epsilon (a, F (1), EPSILON)) +#define IS_UNIT(a) \ + (within_epsilon (a, F (1), EPSILON) || \ + within_epsilon (a, F (-1), EPSILON) || \ + IS_ZERO (a)) +#define IS_INT(a) (IS_ZERO (pixman_fixed_frac (a))) + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_identity (const struct pixman_transform *t) +{ + return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) && + IS_SAME (t->matrix[0][0], t->matrix[2][2]) && + !IS_ZERO (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_ZERO (t->matrix[0][2]) && + IS_ZERO (t->matrix[1][0]) && + IS_ZERO (t->matrix[1][2]) && + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1])); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_scale (const struct pixman_transform *t) +{ + return (!IS_ZERO (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_ZERO (t->matrix[0][2]) && + + IS_ZERO (t->matrix[1][0]) && + !IS_ZERO (t->matrix[1][1]) && + IS_ZERO (t->matrix[1][2]) && + + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1]) && + !IS_ZERO (t->matrix[2][2])); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_int_translate (const struct pixman_transform *t) +{ + return (IS_ONE (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_INT (t->matrix[0][2]) && + + IS_ZERO (t->matrix[1][0]) && + IS_ONE (t->matrix[1][1]) && + IS_INT (t->matrix[1][2]) && + + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1]) && + IS_ONE (t->matrix[2][2])); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_is_inverse (const struct pixman_transform *a, + const struct pixman_transform *b) +{ + struct pixman_transform t; + + if (!pixman_transform_multiply (&t, a, b)) + return FALSE; + + return pixman_transform_is_identity (&t); +} + +PIXMAN_EXPORT void +pixman_f_transform_from_pixman_transform (struct pixman_f_transform * ft, + const struct pixman_transform *t) +{ + int i, j; + + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]); + } +} + +PIXMAN_EXPORT pixman_bool_t +pixman_transform_from_pixman_f_transform (struct pixman_transform * t, + const struct pixman_f_transform *ft) +{ + int i, j; + + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + { + double d = ft->m[j][i]; + if (d < -32767.0 || d > 32767.0) + return FALSE; + d = d * 65536.0 + 0.5; + t->matrix[j][i] = (pixman_fixed_t) floor (d); + } + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_invert (struct pixman_f_transform * dst, + const struct pixman_f_transform *src) +{ + double det; + int i, j; + static int a[3] = { 2, 2, 1 }; + static int b[3] = { 1, 0, 0 }; + + det = 0; + for (i = 0; i < 3; i++) + { + double p; + int ai = a[i]; + int bi = b[i]; + p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] - + src->m[ai][1] * src->m[bi][2]); + if (i == 1) + p = -p; + det += p; + } + + if (det == 0) + return FALSE; + + det = 1 / det; + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + { + double p; + int ai = a[i]; + int aj = a[j]; + int bi = b[i]; + int bj = b[j]; + + p = (src->m[ai][aj] * src->m[bi][bj] - + src->m[ai][bj] * src->m[bi][aj]); + + if (((i + j) & 1) != 0) + p = -p; + + dst->m[j][i] = det * p; + } + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_point (const struct pixman_f_transform *t, + struct pixman_f_vector * v) +{ + struct pixman_f_vector result; + int i, j; + double a; + + for (j = 0; j < 3; j++) + { + a = 0; + for (i = 0; i < 3; i++) + a += t->m[j][i] * v->v[i]; + result.v[j] = a; + } + + if (!result.v[2]) + return FALSE; + + for (j = 0; j < 2; j++) + v->v[j] = result.v[j] / result.v[2]; + + v->v[2] = 1; + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_point_3d (const struct pixman_f_transform *t, + struct pixman_f_vector * v) +{ + struct pixman_f_vector result; + int i, j; + double a; + + for (j = 0; j < 3; j++) + { + a = 0; + for (i = 0; i < 3; i++) + a += t->m[j][i] * v->v[i]; + result.v[j] = a; + } + + *v = result; +} + +PIXMAN_EXPORT void +pixman_f_transform_multiply (struct pixman_f_transform * dst, + const struct pixman_f_transform *l, + const struct pixman_f_transform *r) +{ + struct pixman_f_transform d; + int dx, dy; + int o; + + for (dy = 0; dy < 3; dy++) + { + for (dx = 0; dx < 3; dx++) + { + double v = 0; + for (o = 0; o < 3; o++) + v += l->m[dy][o] * r->m[o][dx]; + d.m[dy][dx] = v; + } + } + + *dst = d; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_scale (struct pixman_f_transform *t, + double sx, + double sy) +{ + t->m[0][0] = sx; + t->m[0][1] = 0; + t->m[0][2] = 0; + t->m[1][0] = 0; + t->m[1][1] = sy; + t->m[1][2] = 0; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_scale (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double sx, + double sy) +{ + struct pixman_f_transform t; + + if (sx == 0 || sy == 0) + return FALSE; + + if (forward) + { + pixman_f_transform_init_scale (&t, sx, sy); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_rotate (struct pixman_f_transform *t, + double c, + double s) +{ + t->m[0][0] = c; + t->m[0][1] = -s; + t->m[0][2] = 0; + t->m[1][0] = s; + t->m[1][1] = c; + t->m[1][2] = 0; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_rotate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double c, + double s) +{ + struct pixman_f_transform t; + + if (forward) + { + pixman_f_transform_init_rotate (&t, c, s); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_rotate (&t, c, -s); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_translate (struct pixman_f_transform *t, + double tx, + double ty) +{ + t->m[0][0] = 1; + t->m[0][1] = 0; + t->m[0][2] = tx; + t->m[1][0] = 0; + t->m[1][1] = 1; + t->m[1][2] = ty; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_translate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double tx, + double ty) +{ + struct pixman_f_transform t; + + if (forward) + { + pixman_f_transform_init_translate (&t, tx, ty); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_translate (&t, -tx, -ty); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_f_transform_bounds (const struct pixman_f_transform *t, + struct pixman_box16 * b) +{ + struct pixman_f_vector v[4]; + int i; + int x1, y1, x2, y2; + + v[0].v[0] = b->x1; + v[0].v[1] = b->y1; + v[0].v[2] = 1; + v[1].v[0] = b->x2; + v[1].v[1] = b->y1; + v[1].v[2] = 1; + v[2].v[0] = b->x2; + v[2].v[1] = b->y2; + v[2].v[2] = 1; + v[3].v[0] = b->x1; + v[3].v[1] = b->y2; + v[3].v[2] = 1; + + for (i = 0; i < 4; i++) + { + if (!pixman_f_transform_point (t, &v[i])) + return FALSE; + + x1 = floor (v[i].v[0]); + y1 = floor (v[i].v[1]); + x2 = ceil (v[i].v[0]); + y2 = ceil (v[i].v[1]); + + if (i == 0) + { + b->x1 = x1; + b->y1 = y1; + b->x2 = x2; + b->y2 = y2; + } + else + { + if (x1 < b->x1) b->x1 = x1; + if (y1 < b->y1) b->y1 = y1; + if (x2 > b->x2) b->x2 = x2; + if (y2 > b->y2) b->y2 = y2; + } + } + + return TRUE; +} + +PIXMAN_EXPORT void +pixman_f_transform_init_identity (struct pixman_f_transform *t) +{ + int i, j; + + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + t->m[j][i] = i == j ? 1 : 0; + } +} diff --git a/pixman/pixman/pixman-timer.c b/pixman/pixman/pixman-timer.c index f5ae18e89..c45d7b4fa 100644 --- a/pixman/pixman/pixman-timer.c +++ b/pixman/pixman/pixman-timer.c @@ -1,66 +1,66 @@ -/* - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Red Hat not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. Red Hat makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL RED HAT - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <stdlib.h> -#include <stdio.h> -#include "pixman-private.h" - -#ifdef PIXMAN_TIMERS - -static pixman_timer_t *timers; - -static void -dump_timers (void) -{ - pixman_timer_t *timer; - - for (timer = timers; timer != NULL; timer = timer->next) - { - printf ("%s: total: %llu n: %llu avg: %f\n", - timer->name, - timer->total, - timer->n_times, - timer->total / (double)timer->n_times); - } -} - -void -pixman_timer_register (pixman_timer_t *timer) -{ - static int initialized; - - int atexit (void (*function)(void)); - - if (!initialized) - { - atexit (dump_timers); - initialized = 1; - } - - timer->next = timers; - timers = timer; -} - -#endif +/*
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. Red Hat makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL RED HAT
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "pixman-private.h"
+
+#ifdef PIXMAN_TIMERS
+
+static pixman_timer_t *timers;
+
+static void
+dump_timers (void)
+{
+ pixman_timer_t *timer;
+
+ for (timer = timers; timer != NULL; timer = timer->next)
+ {
+ printf ("%s: total: %llu n: %llu avg: %f\n",
+ timer->name,
+ timer->total,
+ timer->n_times,
+ timer->total / (double)timer->n_times);
+ }
+}
+
+void
+pixman_timer_register (pixman_timer_t *timer)
+{
+ static int initialized;
+
+ int atexit (void (*function)(void));
+
+ if (!initialized)
+ {
+ atexit (dump_timers);
+ initialized = 1;
+ }
+
+ timer->next = timers;
+ timers = timer;
+}
+
+#endif
diff --git a/pixman/pixman/pixman-version.h.in b/pixman/pixman/pixman-version.h.in index 256b2e6f1..022bf1a3c 100644 --- a/pixman/pixman/pixman-version.h.in +++ b/pixman/pixman/pixman-version.h.in @@ -1,50 +1,50 @@ -/* - * Copyright © 2008 Red Hat, Inc. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Author: Carl D. Worth <cworth@cworth.org> - */ - -#ifndef PIXMAN_VERSION_H__ -#define PIXMAN_VERSION_H__ - -#ifndef PIXMAN_H__ -# error pixman-version.h should only be included by pixman.h -#endif - -#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@ -#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@ -#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@ - -#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@" - -#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \ - ((major) * 10000) \ - + ((minor) * 100) \ - + ((micro) * 1)) - -#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \ - PIXMAN_VERSION_MAJOR, \ - PIXMAN_VERSION_MINOR, \ - PIXMAN_VERSION_MICRO) - -#endif /* PIXMAN_VERSION_H__ */ +/*
+ * Copyright © 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Carl D. Worth <cworth@cworth.org>
+ */
+
+#ifndef PIXMAN_VERSION_H__
+#define PIXMAN_VERSION_H__
+
+#ifndef PIXMAN_H__
+# error pixman-version.h should only be included by pixman.h
+#endif
+
+#define PIXMAN_VERSION_MAJOR @PIXMAN_VERSION_MAJOR@
+#define PIXMAN_VERSION_MINOR @PIXMAN_VERSION_MINOR@
+#define PIXMAN_VERSION_MICRO @PIXMAN_VERSION_MICRO@
+
+#define PIXMAN_VERSION_STRING "@PIXMAN_VERSION_MAJOR@.@PIXMAN_VERSION_MINOR@.@PIXMAN_VERSION_MICRO@"
+
+#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \
+ ((major) * 10000) \
+ + ((minor) * 100) \
+ + ((micro) * 1))
+
+#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \
+ PIXMAN_VERSION_MAJOR, \
+ PIXMAN_VERSION_MINOR, \
+ PIXMAN_VERSION_MICRO)
+
+#endif /* PIXMAN_VERSION_H__ */
diff --git a/pixman/test/fuzzer-find-diff.pl b/pixman/test/fuzzer-find-diff.pl index a43f6fb83..53d9b8de1 100644 --- a/pixman/test/fuzzer-find-diff.pl +++ b/pixman/test/fuzzer-find-diff.pl @@ -1,68 +1,68 @@ -#!/usr/bin/env perl
-
-$usage = "Usage:
- fuzzer-find-diff.pl reference_binary new_binary [number_of_tests_to_run]
-
-The first two input arguments are the commands to run the test programs
-based on fuzzer_test_main() function from 'util.c' (preferably they should
-be statically compiled, this can be achieved via '--disable-shared' pixman
-configure option). The third optional argument is the number of test rounds
-to run (if not specified, then testing runs infinitely or until some problem
-is detected).
-
-Usage examples:
- fuzzer-find-diff.pl ./blitters-test-with-sse-disabled ./blitters-test 9000000
- fuzzer-find-diff.pl ./blitters-test \"ssh ppc64_host /path/to/blitters-test\"
-";
-
-$#ARGV >= 1 or die $usage;
-
-$batch_size = 10000;
-
-if ($#ARGV >= 2) {
- $number_of_tests = int($ARGV[2]);
-} else {
- $number_of_tests = -1
-}
-
-sub test_range {
- my $min = shift;
- my $max = shift;
-
- if (`$ARGV[0] $min $max 2>/dev/null` eq `$ARGV[1] $min $max 2>/dev/null`) {
- return;
- }
-
- while ($max != $min + 1) {
- my $avg = int(($min + $max) / 2);
- my $res1 = `$ARGV[0] $min $avg 2>/dev/null`;
- my $res2 = `$ARGV[1] $min $avg 2>/dev/null`;
- if ($res1 ne $res2) {
- $max = $avg;
- } else {
- $min = $avg;
- }
- }
- return $max;
-}
-
-$base = 1;
-while ($number_of_tests <= 0 || $base <= $number_of_tests) {
- printf("testing %-12d\r", $base + $batch_size - 1);
- my $res = test_range($base, $base + $batch_size - 1);
- if ($res) {
- printf("Failure: results are different for test %d:\n", $res);
-
- printf("\n-- ref --\n");
- print `$ARGV[0] $res`;
- printf("-- new --\n");
- print `$ARGV[1] $res`;
-
- printf("The problematic conditions can be reproduced by running:\n");
- printf("$ARGV[1] %d\n", $res);
-
- exit(1);
- }
- $base += $batch_size;
-}
-printf("Success: %d tests finished\n", $base - 1);
+#!/usr/bin/env perl + +$usage = "Usage: + fuzzer-find-diff.pl reference_binary new_binary [number_of_tests_to_run] + +The first two input arguments are the commands to run the test programs +based on fuzzer_test_main() function from 'util.c' (preferably they should +be statically compiled, this can be achieved via '--disable-shared' pixman +configure option). The third optional argument is the number of test rounds +to run (if not specified, then testing runs infinitely or until some problem +is detected). + +Usage examples: + fuzzer-find-diff.pl ./blitters-test-with-sse-disabled ./blitters-test 9000000 + fuzzer-find-diff.pl ./blitters-test \"ssh ppc64_host /path/to/blitters-test\" +"; + +$#ARGV >= 1 or die $usage; + +$batch_size = 10000; + +if ($#ARGV >= 2) { + $number_of_tests = int($ARGV[2]); +} else { + $number_of_tests = -1 +} + +sub test_range { + my $min = shift; + my $max = shift; + + if (`$ARGV[0] $min $max 2>/dev/null` eq `$ARGV[1] $min $max 2>/dev/null`) { + return; + } + + while ($max != $min + 1) { + my $avg = int(($min + $max) / 2); + my $res1 = `$ARGV[0] $min $avg 2>/dev/null`; + my $res2 = `$ARGV[1] $min $avg 2>/dev/null`; + if ($res1 ne $res2) { + $max = $avg; + } else { + $min = $avg; + } + } + return $max; +} + +$base = 1; +while ($number_of_tests <= 0 || $base <= $number_of_tests) { + printf("testing %-12d\r", $base + $batch_size - 1); + my $res = test_range($base, $base + $batch_size - 1); + if ($res) { + printf("Failure: results are different for test %d:\n", $res); + + printf("\n-- ref --\n"); + print `$ARGV[0] $res`; + printf("-- new --\n"); + print `$ARGV[1] $res`; + + printf("The problematic conditions can be reproduced by running:\n"); + printf("$ARGV[1] %d\n", $res); + + exit(1); + } + $base += $batch_size; +} +printf("Success: %d tests finished\n", $base - 1); diff --git a/pixman/test/region-test.c b/pixman/test/region-test.c index 9d5a41eb9..a1fc4a837 100644 --- a/pixman/test/region-test.c +++ b/pixman/test/region-test.c @@ -1,123 +1,123 @@ -#include <assert.h> -#include <stdlib.h> -#include <stdio.h> -#include "utils.h" - -int -main () -{ - pixman_region32_t r1; - pixman_region32_t r2; - pixman_region32_t r3; - pixman_box32_t boxes[] = { - { 10, 10, 20, 20 }, - { 30, 30, 30, 40 }, - { 50, 45, 60, 44 }, - }; - pixman_box32_t boxes2[] = { - { 2, 6, 7, 6 }, - { 4, 1, 6, 7 }, - }; - pixman_box32_t boxes3[] = { - { 2, 6, 7, 6 }, - { 4, 1, 6, 1 }, - }; - int i, j; - pixman_box32_t *b; - pixman_image_t *image, *fill; - pixman_color_t white = { - 0xffff, - 0xffff, - 0xffff, - 0xffff - }; - - /* This used to go into an infinite loop before pixman-region.c - * was fixed to not use explict "short" variables - */ - pixman_region32_init_rect (&r1, 0, 0, 20, 64000); - pixman_region32_init_rect (&r2, 0, 0, 20, 64000); - pixman_region32_init_rect (&r3, 0, 0, 20, 64000); - - pixman_region32_subtract (&r1, &r2, &r3); - - - /* This would produce a region containing an empty - * rectangle in it. Such regions are considered malformed, - * but using an empty rectangle for initialization should - * work. - */ - pixman_region32_init_rects (&r1, boxes, 3); - - b = pixman_region32_rectangles (&r1, &i); - - assert (i == 1); - - while (i--) - { - assert (b[i].x1 < b[i].x2); - assert (b[i].y1 < b[i].y2); - } - - /* This would produce a rectangle containing the bounding box - * of the two rectangles. The correct result is to eliminate - * the broken rectangle. - */ - pixman_region32_init_rects (&r1, boxes2, 2); - - b = pixman_region32_rectangles (&r1, &i); - - assert (i == 1); - - assert (b[0].x1 == 4); - assert (b[0].y1 == 1); - assert (b[0].x2 == 6); - assert (b[0].y2 == 7); - - /* This should produce an empty region */ - pixman_region32_init_rects (&r1, boxes3, 2); - - b = pixman_region32_rectangles (&r1, &i); - - assert (i == 0); - - fill = pixman_image_create_solid_fill (&white); - for (i = 0; i < 100; i++) - { - int image_size = 128; - - pixman_region32_init (&r1); - - /* Add some random rectangles */ - for (j = 0; j < 64; j++) - pixman_region32_union_rect (&r1, &r1, - lcg_rand_n (image_size), - lcg_rand_n (image_size), - lcg_rand_n (25), - lcg_rand_n (25)); - - /* Clip to image size */ - pixman_region32_init_rect (&r2, 0, 0, image_size, image_size); - pixman_region32_intersect (&r1, &r1, &r2); - pixman_region32_fini (&r2); - - /* render region to a1 mask */ - image = pixman_image_create_bits (PIXMAN_a1, image_size, image_size, NULL, 0); - pixman_image_set_clip_region32 (image, &r1); - pixman_image_composite32 (PIXMAN_OP_SRC, - fill, NULL, image, - 0, 0, 0, 0, 0, 0, - image_size, image_size); - pixman_region32_init_from_image (&r2, image); - - pixman_image_unref (image); - - assert (pixman_region32_equal (&r1, &r2)); - pixman_region32_fini (&r1); - pixman_region32_fini (&r2); - - } - pixman_image_unref (fill); - - return 0; -} +#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "utils.h"
+
+int
+main ()
+{
+ pixman_region32_t r1;
+ pixman_region32_t r2;
+ pixman_region32_t r3;
+ pixman_box32_t boxes[] = {
+ { 10, 10, 20, 20 },
+ { 30, 30, 30, 40 },
+ { 50, 45, 60, 44 },
+ };
+ pixman_box32_t boxes2[] = {
+ { 2, 6, 7, 6 },
+ { 4, 1, 6, 7 },
+ };
+ pixman_box32_t boxes3[] = {
+ { 2, 6, 7, 6 },
+ { 4, 1, 6, 1 },
+ };
+ int i, j;
+ pixman_box32_t *b;
+ pixman_image_t *image, *fill;
+ pixman_color_t white = {
+ 0xffff,
+ 0xffff,
+ 0xffff,
+ 0xffff
+ };
+
+ /* This used to go into an infinite loop before pixman-region.c
+ * was fixed to not use explict "short" variables
+ */
+ pixman_region32_init_rect (&r1, 0, 0, 20, 64000);
+ pixman_region32_init_rect (&r2, 0, 0, 20, 64000);
+ pixman_region32_init_rect (&r3, 0, 0, 20, 64000);
+
+ pixman_region32_subtract (&r1, &r2, &r3);
+
+
+ /* This would produce a region containing an empty
+ * rectangle in it. Such regions are considered malformed,
+ * but using an empty rectangle for initialization should
+ * work.
+ */
+ pixman_region32_init_rects (&r1, boxes, 3);
+
+ b = pixman_region32_rectangles (&r1, &i);
+
+ assert (i == 1);
+
+ while (i--)
+ {
+ assert (b[i].x1 < b[i].x2);
+ assert (b[i].y1 < b[i].y2);
+ }
+
+ /* This would produce a rectangle containing the bounding box
+ * of the two rectangles. The correct result is to eliminate
+ * the broken rectangle.
+ */
+ pixman_region32_init_rects (&r1, boxes2, 2);
+
+ b = pixman_region32_rectangles (&r1, &i);
+
+ assert (i == 1);
+
+ assert (b[0].x1 == 4);
+ assert (b[0].y1 == 1);
+ assert (b[0].x2 == 6);
+ assert (b[0].y2 == 7);
+
+ /* This should produce an empty region */
+ pixman_region32_init_rects (&r1, boxes3, 2);
+
+ b = pixman_region32_rectangles (&r1, &i);
+
+ assert (i == 0);
+
+ fill = pixman_image_create_solid_fill (&white);
+ for (i = 0; i < 100; i++)
+ {
+ int image_size = 128;
+
+ pixman_region32_init (&r1);
+
+ /* Add some random rectangles */
+ for (j = 0; j < 64; j++)
+ pixman_region32_union_rect (&r1, &r1,
+ lcg_rand_n (image_size),
+ lcg_rand_n (image_size),
+ lcg_rand_n (25),
+ lcg_rand_n (25));
+
+ /* Clip to image size */
+ pixman_region32_init_rect (&r2, 0, 0, image_size, image_size);
+ pixman_region32_intersect (&r1, &r1, &r2);
+ pixman_region32_fini (&r2);
+
+ /* render region to a1 mask */
+ image = pixman_image_create_bits (PIXMAN_a1, image_size, image_size, NULL, 0);
+ pixman_image_set_clip_region32 (image, &r1);
+ pixman_image_composite32 (PIXMAN_OP_SRC,
+ fill, NULL, image,
+ 0, 0, 0, 0, 0, 0,
+ image_size, image_size);
+ pixman_region32_init_from_image (&r2, image);
+
+ pixman_image_unref (image);
+
+ assert (pixman_region32_equal (&r1, &r2));
+ pixman_region32_fini (&r1);
+ pixman_region32_fini (&r2);
+
+ }
+ pixman_image_unref (fill);
+
+ return 0;
+}
|