aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman')
-rw-r--r--pixman/Makefile.am264
-rw-r--r--pixman/RELEASING114
-rw-r--r--pixman/configure.ac1642
-rw-r--r--pixman/pixman/Makefile.am254
-rw-r--r--pixman/pixman/Makefile.win32292
-rw-r--r--pixman/pixman/config.h227
-rw-r--r--pixman/pixman/makefile76
-rw-r--r--pixman/pixman/pixman-compiler.h18
-rw-r--r--pixman/pixman/pixman-fast-path.c4456
-rw-r--r--pixman/pixman/pixman-fast-path.h2044
-rw-r--r--pixman/pixman/pixman-general.c622
-rw-r--r--pixman/pixman/pixman-image.c1562
-rw-r--r--pixman/pixman/pixman-mmx.c24
-rw-r--r--pixman/pixman/pixman-sse2.c12154
-rw-r--r--pixman/pixman/pixman-trap.c4
-rw-r--r--pixman/pixman/pixman-version.h50
-rw-r--r--pixman/test/Makefile.am96
-rw-r--r--pixman/test/composite.c1814
-rw-r--r--pixman/test/fetch-test.c392
-rw-r--r--pixman/test/stress-test.c1710
-rw-r--r--pixman/test/trap-crasher.c54
21 files changed, 14125 insertions, 13744 deletions
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index f479a6632..7eb9f59ac 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -1,132 +1,132 @@
-SUBDIRS = pixman demos test
-
-pkgconfigdir=$(libdir)/pkgconfig
-pkgconfig_DATA=pixman-1.pc
-
-$(pkgconfig_DATA): pixman-1.pc.in
-
-snapshot:
- distdir="$(distdir)-`date '+%Y%m%d'`"; \
- test -d "$(srcdir)/.git" && distdir=$$distdir-`cd "$(srcdir)" && git rev-parse HEAD | cut -c 1-6`; \
- $(MAKE) $(AM_MAKEFLAGS) distdir="$$distdir" dist
-
-GPGKEY=6FF7C1A8
-USERNAME=$$USER
-RELEASE_OR_SNAPSHOT = $$(if test "x$(CAIRO_VERSION_MINOR)" = "x$$(echo "$(CAIRO_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi)
-RELEASE_CAIRO_HOST = $(USERNAME)@cairographics.org
-RELEASE_CAIRO_DIR = /srv/cairo.freedesktop.org/www/releases
-RELEASE_CAIRO_URL = http://cairographics.org/releases
-RELEASE_XORG_URL = http://xorg.freedesktop.org/archive/individual/lib
-RELEASE_XORG_HOST = $(USERNAME)@xorg.freedesktop.org
-RELEASE_XORG_DIR = /srv/xorg.freedesktop.org/archive/individual/lib
-RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org
-
-tar_gz = $(PACKAGE)-$(VERSION).tar.gz
-tar_bz2 = $(PACKAGE)-$(VERSION).tar.bz2
-
-sha1_tgz = $(tar_gz).sha1
-md5_tgz = $(tar_gz).md5
-
-sha1_tbz2 = $(tar_bz2).sha1
-md5_tbz2 = $(tar_bz2).md5
-
-gpg_file = $(sha1_tgz).asc
-
-$(sha1_tgz): $(tar_gz)
- sha1sum $^ > $@
-
-$(md5_tgz): $(tar_gz)
- md5sum $^ > $@
-
-$(sha1_tbz2): $(tar_bz2)
- sha1sum $^ > $@
-
-$(md5_tbz2): $(tar_bz2)
- md5sum $^ > $@
-
-$(gpg_file): $(sha1_tgz)
- @echo "Please enter your GPG password to sign the checksum."
- gpg --armor --sign $^
-
-HASHFILES = $(sha1_tgz) $(sha1_tbz2) $(md5_tgz) $(md5_tbz2)
-
-release-verify-newer:
- @echo -n "Checking that no $(VERSION) release already exists at $(RELEASE_XORG_HOST)..."
- @ssh $(RELEASE_XORG_HOST) test ! -e $(RELEASE_XORG_DIR)/$(tar_gz) \
- || (echo "Ouch." && echo "Found: $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR)/$(tar_gz)" \
- && echo "Refusing to try to generate a new release of the same name." \
- && false)
- @ssh $(RELEASE_CAIRO_HOST) test ! -e $(RELEASE_CAIRO_DIR)/$(tar_gz) \
- || (echo "Ouch." && echo "Found: $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR)/$(tar_gz)" \
- && echo "Refusing to try to generate a new release of the same name." \
- && false)
- @echo "Good."
-
-release-remove-old:
- $(RM) $(tar_gz) $(tar_bz2) $(HASHFILES) $(gpg_file)
-
-ensure-prev:
- @if [[ "$(PREV)" == "" ]]; then \
- echo "" && \
- echo "You must set the PREV variable on the make command line to" && \
- echo "the last version." && \
- echo "" && \
- echo "For example:" && \
- echo " make PREV=0.7.3" && \
- echo "" && \
- false; \
- fi
-
-release-check: ensure-prev release-verify-newer release-remove-old distcheck
-
-release-tag:
- git tag -u $(GPGKEY) -m "$(PACKAGE) $(VERSION) release" $(PACKAGE)-$(VERSION)
-
-release-upload: release-check $(tar_gz) $(tar_bz2) $(sha1_tgz) $(sha1_tbz2) $(md5_tgz) $(gpg_file)
- mkdir -p releases
- scp $(tar_gz) $(sha1_tgz) $(gpg_file) $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR)
- scp $(tar_gz) $(tar_bz2) $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR)
- ssh $(RELEASE_CAIRO_HOST) "rm -f $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-[0-9]* && ln -s $(tar_gz) $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-$(VERSION)"
-
-release-publish-message: $(HASHFILES) ensure-prev
- @echo "Please follow the instructions in RELEASING to push stuff out and"
- @echo "send out the announcement mails. Here is the excerpt you need:"
- @echo ""
- @echo "Lists: $(RELEASE_ANNOUNCE_LIST)"
- @echo "Subject: [ANNOUNCE] $(PACKAGE) release $(VERSION) now available"
- @echo "============================== CUT HERE =============================="
- @echo "A new $(PACKAGE) release $(VERSION) is now available"
- @echo ""
- @echo "tar.gz:"
- @echo " $(RELEASE_CAIRO_URL)/$(tar_gz)"
- @echo " $(RELEASE_XORG_URL)/$(tar_gz)"
- @echo ""
- @echo "tar.bz2:"
- @echo " $(RELEASE_XORG_URL)/$(tar_bz2)"
- @echo ""
- @echo "Hashes:"
- @echo -n " MD5: "
- @cat $(md5_tgz)
- @echo -n " MD5: "
- @cat $(md5_tbz2)
- @echo -n " SHA1: "
- @cat $(sha1_tgz)
- @echo -n " SHA1: "
- @cat $(sha1_tbz2)
- @echo ""
- @echo "GPG signature:"
- @echo " $(RELEASE_CAIRO_URL)/$(gpg_file)"
- @echo " (signed by `git config --get user.name` <`git config --get user.email`>)"
- @echo ""
- @echo "Git:"
- @echo " git://git.freedesktop.org/git/pixman"
- @echo " tag: $(PACKAGE)-$(VERSION)"
- @echo ""
- @echo "Log:"
- @git log --no-merges "$(PACKAGE)-$(PREV)".."$(PACKAGE)-$(VERSION)" | git shortlog | awk '{ printf "\t"; print ; }' | cut -b1-80
- @echo "============================== CUT HERE =============================="
- @echo ""
-
-release-publish: release-upload release-tag release-publish-message
-
-.PHONY: release-upload release-publish release-publish-message release-tag
+SUBDIRS = pixman demos test
+
+pkgconfigdir=$(libdir)/pkgconfig
+pkgconfig_DATA=pixman-1.pc
+
+$(pkgconfig_DATA): pixman-1.pc.in
+
+snapshot:
+ distdir="$(distdir)-`date '+%Y%m%d'`"; \
+ test -d "$(srcdir)/.git" && distdir=$$distdir-`cd "$(srcdir)" && git rev-parse HEAD | cut -c 1-6`; \
+ $(MAKE) $(AM_MAKEFLAGS) distdir="$$distdir" dist
+
+GPGKEY=6FF7C1A8
+USERNAME=$$USER
+RELEASE_OR_SNAPSHOT = $$(if test "x$(CAIRO_VERSION_MINOR)" = "x$$(echo "$(CAIRO_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi)
+RELEASE_CAIRO_HOST = $(USERNAME)@cairographics.org
+RELEASE_CAIRO_DIR = /srv/cairo.freedesktop.org/www/releases
+RELEASE_CAIRO_URL = http://cairographics.org/releases
+RELEASE_XORG_URL = http://xorg.freedesktop.org/archive/individual/lib
+RELEASE_XORG_HOST = $(USERNAME)@xorg.freedesktop.org
+RELEASE_XORG_DIR = /srv/xorg.freedesktop.org/archive/individual/lib
+RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org
+
+tar_gz = $(PACKAGE)-$(VERSION).tar.gz
+tar_bz2 = $(PACKAGE)-$(VERSION).tar.bz2
+
+sha1_tgz = $(tar_gz).sha1
+md5_tgz = $(tar_gz).md5
+
+sha1_tbz2 = $(tar_bz2).sha1
+md5_tbz2 = $(tar_bz2).md5
+
+gpg_file = $(sha1_tgz).asc
+
+$(sha1_tgz): $(tar_gz)
+ sha1sum $^ > $@
+
+$(md5_tgz): $(tar_gz)
+ md5sum $^ > $@
+
+$(sha1_tbz2): $(tar_bz2)
+ sha1sum $^ > $@
+
+$(md5_tbz2): $(tar_bz2)
+ md5sum $^ > $@
+
+$(gpg_file): $(sha1_tgz)
+ @echo "Please enter your GPG password to sign the checksum."
+ gpg --armor --sign $^
+
+HASHFILES = $(sha1_tgz) $(sha1_tbz2) $(md5_tgz) $(md5_tbz2)
+
+release-verify-newer:
+ @echo -n "Checking that no $(VERSION) release already exists at $(RELEASE_XORG_HOST)..."
+ @ssh $(RELEASE_XORG_HOST) test ! -e $(RELEASE_XORG_DIR)/$(tar_gz) \
+ || (echo "Ouch." && echo "Found: $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR)/$(tar_gz)" \
+ && echo "Refusing to try to generate a new release of the same name." \
+ && false)
+ @ssh $(RELEASE_CAIRO_HOST) test ! -e $(RELEASE_CAIRO_DIR)/$(tar_gz) \
+ || (echo "Ouch." && echo "Found: $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR)/$(tar_gz)" \
+ && echo "Refusing to try to generate a new release of the same name." \
+ && false)
+ @echo "Good."
+
+release-remove-old:
+ $(RM) $(tar_gz) $(tar_bz2) $(HASHFILES) $(gpg_file)
+
+ensure-prev:
+ @if [[ "$(PREV)" == "" ]]; then \
+ echo "" && \
+ echo "You must set the PREV variable on the make command line to" && \
+ echo "the last version." && \
+ echo "" && \
+ echo "For example:" && \
+ echo " make PREV=0.7.3" && \
+ echo "" && \
+ false; \
+ fi
+
+release-check: ensure-prev release-verify-newer release-remove-old distcheck
+
+release-tag:
+ git tag -u $(GPGKEY) -m "$(PACKAGE) $(VERSION) release" $(PACKAGE)-$(VERSION)
+
+release-upload: release-check $(tar_gz) $(tar_bz2) $(sha1_tgz) $(sha1_tbz2) $(md5_tgz) $(gpg_file)
+ mkdir -p releases
+ scp $(tar_gz) $(sha1_tgz) $(gpg_file) $(RELEASE_CAIRO_HOST):$(RELEASE_CAIRO_DIR)
+ scp $(tar_gz) $(tar_bz2) $(RELEASE_XORG_HOST):$(RELEASE_XORG_DIR)
+ ssh $(RELEASE_CAIRO_HOST) "rm -f $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-[0-9]* && ln -s $(tar_gz) $(RELEASE_CAIRO_DIR)/LATEST-$(PACKAGE)-$(VERSION)"
+
+release-publish-message: $(HASHFILES) ensure-prev
+ @echo "Please follow the instructions in RELEASING to push stuff out and"
+ @echo "send out the announcement mails. Here is the excerpt you need:"
+ @echo ""
+ @echo "Lists: $(RELEASE_ANNOUNCE_LIST)"
+ @echo "Subject: [ANNOUNCE] $(PACKAGE) release $(VERSION) now available"
+ @echo "============================== CUT HERE =============================="
+ @echo "A new $(PACKAGE) release $(VERSION) is now available"
+ @echo ""
+ @echo "tar.gz:"
+ @echo " $(RELEASE_CAIRO_URL)/$(tar_gz)"
+ @echo " $(RELEASE_XORG_URL)/$(tar_gz)"
+ @echo ""
+ @echo "tar.bz2:"
+ @echo " $(RELEASE_XORG_URL)/$(tar_bz2)"
+ @echo ""
+ @echo "Hashes:"
+ @echo -n " MD5: "
+ @cat $(md5_tgz)
+ @echo -n " MD5: "
+ @cat $(md5_tbz2)
+ @echo -n " SHA1: "
+ @cat $(sha1_tgz)
+ @echo -n " SHA1: "
+ @cat $(sha1_tbz2)
+ @echo ""
+ @echo "GPG signature:"
+ @echo " $(RELEASE_CAIRO_URL)/$(gpg_file)"
+ @echo " (signed by `git config --get user.name` <`git config --get user.email`>)"
+ @echo ""
+ @echo "Git:"
+ @echo " git://git.freedesktop.org/git/pixman"
+ @echo " tag: $(PACKAGE)-$(VERSION)"
+ @echo ""
+ @echo "Log:"
+ @git log --no-merges "$(PACKAGE)-$(PREV)".."$(PACKAGE)-$(VERSION)" | git shortlog | awk '{ printf "\t"; print ; }' | cut -b1-80
+ @echo "============================== CUT HERE =============================="
+ @echo ""
+
+release-publish: release-upload release-tag release-publish-message
+
+.PHONY: release-upload release-publish release-publish-message release-tag
diff --git a/pixman/RELEASING b/pixman/RELEASING
index fbe15813d..8644f2d8d 100644
--- a/pixman/RELEASING
+++ b/pixman/RELEASING
@@ -1,57 +1,57 @@
-Here are the steps to follow to create a new pixman release:
-
-1) Ensure that there are no uncommitted changes or unpushed commits,
- and that you are up to date with the latest commits in the central
- repository. Here are a couple of useful commands:
-
- git diff (no output)
-
- git status (should report "nothing to commit")
-
- git log master...origin (no output; note: *3* dots)
-
-2) Increment pixman_(major|minor|micro) in configure.ac according to
- the directions in that file.
-
-3) Make sure that new version works, including
-
- - make distcheck passes
-
- - the X server still works with the new pixman version
- installed
-
- - the cairo test suite hasn't gained any new failures compared
- to last pixman version.
-
-4) Use "git commit" to record the changes made in step 2 and 3.
-
-5) Generate and publish the tar files by running
-
- make PREV=<last version> GPGKEY=<your gpg key id> release-publish
-
- If your freedesktop user name is different from your local one,
- then also set the variable USER to your freedesktop user name.
-
-6) Run
-
- make release-publish-message
-
- to generate a draft release announcement. Edit it as appropriate and
- send it to
-
- cairo-announce@cairographics.org
-
- pixman@lists.freedesktop.org
-
- xorg-announce@lists.freedesktop.org
-
-7) Increment pixman_micro to the next larger (odd) number in
- configure.ac. Commit this change, and push all commits created
- during this process using
-
- git push
- git push --tags
-
- You must use "--tags" here; otherwise the new tag will not
- be pushed out.
-
+Here are the steps to follow to create a new pixman release:
+
+1) Ensure that there are no uncommitted changes or unpushed commits,
+ and that you are up to date with the latest commits in the central
+ repository. Here are a couple of useful commands:
+
+ git diff (no output)
+
+ git status (should report "nothing to commit")
+
+ git log master...origin (no output; note: *3* dots)
+
+2) Increment pixman_(major|minor|micro) in configure.ac according to
+ the directions in that file.
+
+3) Make sure that new version works, including
+
+ - make distcheck passes
+
+ - the X server still works with the new pixman version
+ installed
+
+ - the cairo test suite hasn't gained any new failures compared
+ to last pixman version.
+
+4) Use "git commit" to record the changes made in step 2 and 3.
+
+5) Generate and publish the tar files by running
+
+ make PREV=<last version> GPGKEY=<your gpg key id> release-publish
+
+ If your freedesktop user name is different from your local one,
+ then also set the variable USER to your freedesktop user name.
+
+6) Run
+
+ make release-publish-message
+
+ to generate a draft release announcement. Edit it as appropriate and
+ send it to
+
+ cairo-announce@cairographics.org
+
+ pixman@lists.freedesktop.org
+
+ xorg-announce@lists.freedesktop.org
+
+7) Increment pixman_micro to the next larger (odd) number in
+ configure.ac. Commit this change, and push all commits created
+ during this process using
+
+ git push
+ git push --tags
+
+ You must use "--tags" here; otherwise the new tag will not
+ be pushed out.
+
diff --git a/pixman/configure.ac b/pixman/configure.ac
index db9a883b6..d76e0a2a5 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -1,821 +1,821 @@
-dnl Copyright 2005 Red Hat, Inc.
-dnl
-dnl Permission to use, copy, modify, distribute, and sell this software and its
-dnl documentation for any purpose is hereby granted without fee, provided that
-dnl the above copyright notice appear in all copies and that both that
-dnl copyright notice and this permission notice appear in supporting
-dnl documentation, and that the name of Red Hat not be used in
-dnl advertising or publicity pertaining to distribution of the software without
-dnl specific, written prior permission. Red Hat makes no
-dnl representations about the suitability of this software for any purpose. It
-dnl is provided "as is" without express or implied warranty.
-dnl
-dnl RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
-dnl INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
-dnl EVENT SHALL RED HAT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
-dnl CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
-dnl DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
-dnl TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
-dnl PERFORMANCE OF THIS SOFTWARE.
-dnl
-dnl Process this file with autoconf to create configure.
-
-AC_PREREQ([2.57])
-
-# Pixman versioning scheme
-#
-# - The version in git has an odd MICRO version number
-#
-# - Released versions both development and stable have an even MICRO
-# version number
-#
-# - Released development versions have an odd MINOR number
-#
-# - Released stable versions have an even MINOR number
-#
-# - Versions that break ABI must have a new MAJOR number
-#
-# - If you break the ABI, then at least this must be done:
-#
-# - increment MAJOR
-#
-# - In the first development release where you break ABI, find
-# all instances of "pixman-n" and change them to pixman-(n+1)
-#
-# This needs to be done at least in
-# configure.ac
-# all Makefile.am's
-# pixman-n.pc.in
-#
-# This ensures that binary incompatible versions can be installed
-# in parallel. See http://www106.pair.com/rhp/parallel.html for
-# more information
-#
-
-m4_define([pixman_major], 0)
-m4_define([pixman_minor], 21)
-m4_define([pixman_micro], 7)
-
-m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
-
-AC_INIT(pixman, pixman_version, [pixman@lists.freedesktop.org], pixman)
-AM_INIT_AUTOMAKE([foreign dist-bzip2])
-
-# Suppress verbose compile lines
-m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
-
-AM_CONFIG_HEADER(config.h)
-
-AC_CANONICAL_HOST
-
-test_CFLAGS=${CFLAGS+set} # We may override autoconf default CFLAGS.
-
-AC_PROG_CC
-AM_PROG_AS
-AC_PROG_LIBTOOL
-AC_CHECK_FUNCS([getisax])
-AC_C_BIGENDIAN
-AC_C_INLINE
-
-dnl PIXMAN_LINK_WITH_ENV(env-setup, program, true-action, false-action)
-dnl
-dnl Compiles and links the given program in the environment setup by env-setup
-dnl and executes true-action on success and false-action on failure.
-AC_DEFUN([PIXMAN_LINK_WITH_ENV],[dnl
- save_CFLAGS="$CFLAGS"
- save_LDFLAGS="$LDFLAGS"
- save_LIBS="$LIBS"
- CFLAGS=""
- LDFLAGS=""
- LIBS=""
- $1
- AC_LINK_IFELSE(
- [$2],
- [pixman_cc_stderr=`test -f conftest.err && cat conftest.err`
- pixman_cc_flag=yes],
- [pixman_cc_stderr=`test -f conftest.err && cat conftest.err`
- pixman_cc_flag=no])
-
- if test "x$pixman_cc_stderr" != "x"; then
- pixman_cc_flag=no
- fi
-
- if test "x$pixman_cc_flag" = "xyes"; then
- ifelse([$3], , :, [$3])
- else
- ifelse([$4], , :, [$4])
- fi
- CFLAGS="$save_CFLAGS"
- LDFLAGS="$save_LDFLAGS"
- LIBS="$save_LIBS"
-])
-
-dnl Find a -Werror for catching warnings.
-WERROR=
-for w in -Werror -errwarn; do
- if test "z$WERROR" = "z"; then
- AC_MSG_CHECKING([whether the compiler supports $w])
- PIXMAN_LINK_WITH_ENV(
- [CFLAGS=$w],
- [int main(int c, char **v) { (void)c; (void)v; return 0; }],
- [WERROR=$w; yesno=yes], [yesno=no])
- AC_MSG_RESULT($_yesno)
- fi
-done
-
-dnl PIXMAN_CHECK_CFLAG(flag, [program])
-dnl Adds flag to CFLAGS if the given program links without warnings or errors.
-AC_DEFUN([PIXMAN_CHECK_CFLAG], [dnl
- AC_MSG_CHECKING([whether the compiler supports $1])
- PIXMAN_LINK_WITH_ENV(
- [CFLAGS="$WERROR $1"],
- [$2
- int main(int c, char **v) { (void)c; (void)v; return 0; }
- ],
- [_yesno=yes],
- [_yesno=no])
- if test "x$_yesno" = xyes; then
- CFLAGS="$CFLAGS $1"
- fi
- AC_MSG_RESULT($_yesno)
-])
-
-AC_CHECK_SIZEOF(long)
-
-# Checks for Sun Studio compilers
-AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"])
-AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"])
-
-# Default CFLAGS to -O -g rather than just the -g from AC_PROG_CC
-# if we're using Sun Studio and neither the user nor a config.site
-# has set CFLAGS.
-if test $SUNCC = yes && \
- test "$test_CFLAGS" == "" && \
- test "$CFLAGS" = "-g"
-then
- CFLAGS="-O -g"
-fi
-
-#
-# We ignore pixman_major in the version here because the major version should
-# always be encoded in the actual library name. Ie., the soname is:
-#
-# pixman-$(pixman_major).0.minor.micro
-#
-m4_define([lt_current], [pixman_minor])
-m4_define([lt_revision], [pixman_micro])
-m4_define([lt_age], [pixman_minor])
-
-LT_VERSION_INFO="lt_current:lt_revision:lt_age"
-
-PIXMAN_VERSION_MAJOR=pixman_major()
-AC_SUBST(PIXMAN_VERSION_MAJOR)
-PIXMAN_VERSION_MINOR=pixman_minor()
-AC_SUBST(PIXMAN_VERSION_MINOR)
-PIXMAN_VERSION_MICRO=pixman_micro()
-AC_SUBST(PIXMAN_VERSION_MICRO)
-
-AC_SUBST(LT_VERSION_INFO)
-
-# Check for dependencies
-
-PIXMAN_CHECK_CFLAG([-Wall])
-PIXMAN_CHECK_CFLAG([-fno-strict-aliasing])
-
-AC_PATH_PROG(PERL, perl, no)
-if test "x$PERL" = xno; then
- AC_MSG_ERROR([Perl is required to build pixman.])
-fi
-AC_SUBST(PERL)
-
-dnl =========================================================================
-dnl OpenMP for the test suite?
-dnl
-
-# Check for OpenMP support (only supported by autoconf >=2.62)
-OPENMP_CFLAGS=
-m4_ifdef([AC_OPENMP], [AC_OPENMP])
-
-m4_define([openmp_test_program],[dnl
-#include <stdio.h>
-
-extern unsigned int lcg_seed;
-#pragma omp threadprivate(lcg_seed)
-unsigned int lcg_seed;
-
-unsigned function(unsigned a, unsigned b)
-{
- lcg_seed ^= b;
- return ((a + b) ^ a ) + lcg_seed;
-}
-
-int main(int argc, char **argv)
-{
- int i;
- int n1 = 0, n2 = argc;
- unsigned checksum = 0;
- int verbose = argv != NULL;
- unsigned (*test_function)(unsigned, unsigned);
- test_function = function;
- #pragma omp parallel for reduction(+:checksum) default(none) \
- shared(n1, n2, test_function, verbose)
- for (i = n1; i < n2; i++)
- {
- unsigned crc = test_function (i, 0);
- if (verbose)
- printf ("%d: %08X\n", i, crc);
- checksum += crc;
- }
- printf("%u\n", checksum);
- return 0;
-}
-])
-
-PIXMAN_LINK_WITH_ENV(
- [CFLAGS="$OPENMP_CFLAGS" LDFLAGS="$OPENMP_CFLAGS"],
- [openmp_test_program],
- [have_openmp=yes],
- [have_openmp=no])
-if test "x$have_openmp" = "xyes"; then
- AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite])
-else
- OPENMP_CFLAGS=""
-fi
-AC_SUBST(OPENMP_CFLAGS)
-
-dnl =========================================================================
-dnl -fvisibility stuff
-
-PIXMAN_CHECK_CFLAG([-fvisibility=hidden], [dnl
-#if defined(__GNUC__) && (__GNUC__ >= 4)
-#ifdef _WIN32
-#error Have -fvisibility but it is ignored and generates a warning
-#endif
-#else
-error Need GCC 4.0 for visibility
-#endif
-])
-
-PIXMAN_CHECK_CFLAG([-xldscope=hidden], [dnl
-#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
-#else
-error Need Sun Studio 8 for visibility
-#endif
-])
-
-dnl ===========================================================================
-dnl Check for MMX
-
-if test "x$MMX_CFLAGS" = "x" ; then
- if test "x$SUNCC" = "xyes"; then
- # Sun Studio doesn't have an -xarch=mmx flag, so we have to use sse
- # but if we're building 64-bit, mmx & sse support is on by default and
- # -xarch=sse throws an error instead
- if test "$AMD64_ABI" = "no" ; then
- MMX_CFLAGS="-xarch=sse"
- fi
- else
- MMX_CFLAGS="-mmmx -Winline"
- fi
-fi
-
-have_mmx_intrinsics=no
-AC_MSG_CHECKING(whether to use MMX intrinsics)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$MMX_CFLAGS $CFLAGS"
-AC_COMPILE_IFELSE([
-#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
-error "Need GCC >= 3.4 for MMX intrinsics"
-#endif
-#include <mmintrin.h>
-int main () {
- __m64 v = _mm_cvtsi32_si64 (1);
- return _mm_cvtsi64_si32 (v);
-}], have_mmx_intrinsics=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(mmx,
- [AC_HELP_STRING([--disable-mmx],
- [disable MMX fast paths])],
- [enable_mmx=$enableval], [enable_mmx=auto])
-
-if test $enable_mmx = no ; then
- have_mmx_intrinsics=disabled
-fi
-
-if test $have_mmx_intrinsics = yes ; then
- AC_DEFINE(USE_MMX, 1, [use MMX compiler intrinsics])
-else
- MMX_CFLAGS=
-fi
-
-AC_MSG_RESULT($have_mmx_intrinsics)
-if test $enable_mmx = yes && test $have_mmx_intrinsics = no ; then
- AC_MSG_ERROR([MMX intrinsics not detected])
-fi
-
-AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes)
-
-dnl ===========================================================================
-dnl Check for SSE2
-
-if test "x$SSE2_CFLAGS" = "x" ; then
- if test "x$SUNCC" = "xyes"; then
- # SSE2 is enabled by default in the Sun Studio 64-bit environment
- if test "$AMD64_ABI" = "no" ; then
- SSE2_CFLAGS="-xarch=sse2"
- fi
- else
- SSE2_CFLAGS="-msse2 -Winline"
- fi
-fi
-
-have_sse2_intrinsics=no
-AC_MSG_CHECKING(whether to use SSE2 intrinsics)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$SSE2_CFLAGS $CFLAGS"
-
-AC_COMPILE_IFELSE([
-#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2))
-# if !defined(__amd64__) && !defined(__x86_64__)
-# error "Need GCC >= 4.2 for SSE2 intrinsics on x86"
-# endif
-#endif
-#include <mmintrin.h>
-#include <xmmintrin.h>
-#include <emmintrin.h>
-int main () {
- __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
- c = _mm_xor_si128 (a, b);
- return 0;
-}], have_sse2_intrinsics=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(sse2,
- [AC_HELP_STRING([--disable-sse2],
- [disable SSE2 fast paths])],
- [enable_sse2=$enableval], [enable_sse2=auto])
-
-if test $enable_sse2 = no ; then
- have_sse2_intrinsics=disabled
-fi
-
-if test $have_sse2_intrinsics = yes ; then
- AC_DEFINE(USE_SSE2, 1, [use SSE2 compiler intrinsics])
-fi
-
-AC_MSG_RESULT($have_sse2_intrinsics)
-if test $enable_sse2 = yes && test $have_sse2_intrinsics = no ; then
- AC_MSG_ERROR([SSE2 intrinsics not detected])
-fi
-
-AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes)
-
-dnl ===========================================================================
-dnl Other special flags needed when building code using MMX or SSE instructions
-case $host_os in
- solaris*)
- # When building 32-bit binaries, apply a mapfile to ensure that the
- # binaries aren't flagged as only able to run on MMX+SSE capable CPUs
- # since they check at runtime before using those instructions.
- # Not all linkers grok the mapfile format so we check for that first.
- if test "$AMD64_ABI" = "no" ; then
- use_hwcap_mapfile=no
- AC_MSG_CHECKING(whether to use a hardware capability map file)
- hwcap_save_LDFLAGS="$LDFLAGS"
- HWCAP_LDFLAGS='-Wl,-M,$(srcdir)/solaris-hwcap.mapfile'
- LDFLAGS="$LDFLAGS -Wl,-M,pixman/solaris-hwcap.mapfile"
- AC_LINK_IFELSE([int main() { return 0; }],
- use_hwcap_mapfile=yes,
- HWCAP_LDFLAGS="")
- LDFLAGS="$hwcap_save_LDFLAGS"
- AC_MSG_RESULT($use_hwcap_mapfile)
- fi
- if test "x$MMX_LDFLAGS" = "x" ; then
- MMX_LDFLAGS="$HWCAP_LDFLAGS"
- fi
- if test "x$SSE2_LDFLAGS" = "x" ; then
- SSE2_LDFLAGS="$HWCAP_LDFLAGS"
- fi
- ;;
-esac
-
-AC_SUBST(MMX_CFLAGS)
-AC_SUBST(MMX_LDFLAGS)
-AC_SUBST(SSE2_CFLAGS)
-AC_SUBST(SSE2_LDFLAGS)
-
-dnl ===========================================================================
-dnl Check for VMX/Altivec
-if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then
- VMX_CFLAGS="-faltivec"
-else
- VMX_CFLAGS="-maltivec -mabi=altivec"
-fi
-
-have_vmx_intrinsics=no
-AC_MSG_CHECKING(whether to use VMX/Altivec intrinsics)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$VMX_CFLAGS $CFLAGS"
-AC_COMPILE_IFELSE([
-#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
-error "Need GCC >= 3.4 for sane altivec support"
-#endif
-#include <altivec.h>
-int main () {
- vector unsigned int v = vec_splat_u32 (1);
- v = vec_sub (v, v);
- return 0;
-}], have_vmx_intrinsics=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(vmx,
- [AC_HELP_STRING([--disable-vmx],
- [disable VMX fast paths])],
- [enable_vmx=$enableval], [enable_vmx=auto])
-
-if test $enable_vmx = no ; then
- have_vmx_intrinsics=disabled
-fi
-
-if test $have_vmx_intrinsics = yes ; then
- AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
-else
- VMX_CFLAGS=
-fi
-
-AC_MSG_RESULT($have_vmx_intrinsics)
-if test $enable_vmx = yes && test $have_vmx_intrinsics = no ; then
- AC_MSG_ERROR([VMX intrinsics not detected])
-fi
-
-AC_SUBST(VMX_CFLAGS)
-
-AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
-
-dnl ==========================================================================
-dnl Check if assembler is gas compatible and supports ARM SIMD instructions
-have_arm_simd=no
-AC_MSG_CHECKING(whether to use ARM SIMD assembler)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="-x assembler-with-cpp $CFLAGS"
-AC_COMPILE_IFELSE([[
-.text
-.arch armv6
-.object_arch armv4
-.arm
-.altmacro
-#ifndef __ARM_EABI__
-#error EABI is required (to be sure that calling conventions are compatible)
-#endif
-pld [r0]
-uqadd8 r0, r0, r0]], have_arm_simd=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(arm-simd,
- [AC_HELP_STRING([--disable-arm-simd],
- [disable ARM SIMD fast paths])],
- [enable_arm_simd=$enableval], [enable_arm_simd=auto])
-
-if test $enable_arm_simd = no ; then
- have_arm_simd=disabled
-fi
-
-if test $have_arm_simd = yes ; then
- AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations])
-fi
-
-AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
-
-AC_MSG_RESULT($have_arm_simd)
-if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
- AC_MSG_ERROR([ARM SIMD intrinsics not detected])
-fi
-
-dnl ==========================================================================
-dnl Check if assembler is gas compatible and supports NEON instructions
-have_arm_neon=no
-AC_MSG_CHECKING(whether to use ARM NEON assembler)
-xserver_save_CFLAGS=$CFLAGS
-CFLAGS="-x assembler-with-cpp $CFLAGS"
-AC_COMPILE_IFELSE([[
-.text
-.fpu neon
-.arch armv7a
-.object_arch armv4
-.eabi_attribute 10, 0
-.arm
-.altmacro
-#ifndef __ARM_EABI__
-#error EABI is required (to be sure that calling conventions are compatible)
-#endif
-pld [r0]
-vmovn.u16 d0, q0]], have_arm_neon=yes)
-CFLAGS=$xserver_save_CFLAGS
-
-AC_ARG_ENABLE(arm-neon,
- [AC_HELP_STRING([--disable-arm-neon],
- [disable ARM NEON fast paths])],
- [enable_arm_neon=$enableval], [enable_arm_neon=auto])
-
-if test $enable_arm_neon = no ; then
- have_arm_neon=disabled
-fi
-
-if test $have_arm_neon = yes ; then
- AC_DEFINE(USE_ARM_NEON, 1, [use ARM NEON assembly optimizations])
-fi
-
-AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes)
-
-AC_MSG_RESULT($have_arm_neon)
-if test $enable_arm_neon = yes && test $have_arm_neon = no ; then
- AC_MSG_ERROR([ARM NEON intrinsics not detected])
-fi
-
-dnl =========================================================================================
-dnl Check for GNU-style inline assembly support
-
-have_gcc_inline_asm=no
-AC_MSG_CHECKING(whether to use GNU-style inline assembler)
-AC_COMPILE_IFELSE([
-int main () {
- /* Most modern architectures have a NOP instruction, so this is a fairly generic test. */
- asm volatile ( "\tnop\n" : : : "cc", "memory" );
- return 0;
-}], have_gcc_inline_asm=yes)
-
-AC_ARG_ENABLE(gcc-inline-asm,
- [AC_HELP_STRING([--disable-gcc-inline-asm],
- [disable GNU-style inline assembler])],
- [enable_gcc_inline_asm=$enableval], [enable_gcc_inline_asm=auto])
-
-if test $enable_gcc_inline_asm = no ; then
- have_gcc_inline_asm=disabled
-fi
-
-if test $have_gcc_inline_asm = yes ; then
- AC_DEFINE(USE_GCC_INLINE_ASM, 1, [use GNU-style inline assembler])
-fi
-
-AC_MSG_RESULT($have_gcc_inline_asm)
-if test $enable_gcc_inline_asm = yes && test $have_gcc_inline_asm = no ; then
- AC_MSG_ERROR([GNU-style inline assembler not detected])
-fi
-
-AM_CONDITIONAL(USE_GCC_INLINE_ASM, test $have_gcc_inline_asm = yes)
-
-dnl ==============================================
-dnl Static test programs
-
-AC_ARG_ENABLE(static-testprogs,
- [AC_HELP_STRING([--enable-static-testprogs],
- [build test programs as static binaries [default=no]])],
- [enable_static_testprogs=$enableval], [enable_static_testprogs=no])
-
-TESTPROGS_EXTRA_LDFLAGS=
-if test "x$enable_static_testprogs" = "xyes" ; then
- TESTPROGS_EXTRA_LDFLAGS="-all-static"
-fi
-AC_SUBST(TESTPROGS_EXTRA_LDFLAGS)
-
-dnl ==============================================
-dnl Timers
-
-AC_ARG_ENABLE(timers,
- [AC_HELP_STRING([--enable-timers],
- [enable TIMER_BEGIN and TIMER_END macros [default=no]])],
- [enable_timers=$enableval], [enable_timers=no])
-
-if test $enable_timers = yes ; then
- AC_DEFINE(PIXMAN_TIMERS, 1, [enable TIMER_BEGIN/TIMER_END macros])
-fi
-AC_SUBST(PIXMAN_TIMERS)
-
-dnl ===================================
-dnl GTK+
-
-AC_ARG_ENABLE(gtk,
- [AC_HELP_STRING([--enable-gtk],
- [enable tests using GTK+ [default=auto]])],
- [enable_gtk=$enableval], [enable_gtk=auto])
-
-PKG_PROG_PKG_CONFIG
-
-if test $enable_gtk = yes ; then
- AC_CHECK_LIB([pixman-1], [pixman_version_string])
- PKG_CHECK_MODULES(GTK, [gtk+-2.0 pixman-1])
-fi
-
-if test $enable_gtk = auto ; then
- AC_CHECK_LIB([pixman-1], [pixman_version_string], [enable_gtk=auto], [enable_gtk=no])
-fi
-
-if test $enable_gtk = auto ; then
- PKG_CHECK_MODULES(GTK, [gtk+-2.0 pixman-1], [enable_gtk=yes], [enable_gtk=no])
-fi
-
-AM_CONDITIONAL(HAVE_GTK, [test "x$enable_gtk" = xyes])
-
-AC_SUBST(GTK_CFLAGS)
-AC_SUBST(GTK_LIBS)
-AC_SUBST(DEP_CFLAGS)
-AC_SUBST(DEP_LIBS)
-
-dnl =====================================
-dnl posix_memalign, sigaction, alarm, gettimeofday
-
-AC_CHECK_FUNC(posix_memalign, have_posix_memalign=yes, have_posix_memalign=no)
-if test x$have_posix_memalign = xyes; then
- AC_DEFINE(HAVE_POSIX_MEMALIGN, 1, [Whether we have posix_memalign()])
-fi
-
-AC_CHECK_FUNC(sigaction, have_sigaction=yes, have_sigaction=no)
-if test x$have_sigaction = xyes; then
- AC_DEFINE(HAVE_SIGACTION, 1, [Whether we have sigaction()])
-fi
-
-AC_CHECK_FUNC(alarm, have_alarm=yes, have_alarm=no)
-if test x$have_alarm = xyes; then
- AC_DEFINE(HAVE_ALARM, 1, [Whether we have alarm()])
-fi
-
-AC_CHECK_HEADER([sys/mman.h],
- [AC_DEFINE(HAVE_SYS_MMAN_H, [1], [Define to 1 if we have <sys/mman.h>])])
-
-AC_CHECK_FUNC(mprotect, have_mprotect=yes, have_mprotect=no)
-if test x$have_mprotect = xyes; then
- AC_DEFINE(HAVE_MPROTECT, 1, [Whether we have mprotect()])
-fi
-
-AC_CHECK_FUNC(getpagesize, have_getpagesize=yes, have_getpagesize=no)
-if test x$have_getpagesize = xyes; then
- AC_DEFINE(HAVE_GETPAGESIZE, 1, [Whether we have getpagesize()])
-fi
-
-AC_CHECK_HEADER([fenv.h],
- [AC_DEFINE(HAVE_FENV_H, [1], [Define to 1 if we have <fenv.h>])])
-
-AC_CHECK_LIB(m, feenableexcept, have_feenableexcept=yes, have_feenableexcept=no)
-if test x$have_feenableexcept = xyes; then
- AC_DEFINE(HAVE_FEENABLEEXCEPT, 1, [Whether we have feenableexcept()])
-fi
-
-AC_CHECK_FUNC(gettimeofday, have_gettimeofday=yes, have_gettimeofday=no)
-AC_CHECK_HEADER(sys/time.h, have_sys_time_h=yes, have_sys_time_h=no)
-if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then
- AC_DEFINE(HAVE_GETTIMEOFDAY, 1, [Whether we have gettimeofday()])
-fi
-
-dnl =====================================
-dnl Thread local storage
-
-support_for__thread=no
-
-AC_MSG_CHECKING(for __thread)
-AC_LINK_IFELSE([
-#if defined(__MINGW32__) && !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
-#error This MinGW version has broken __thread support
-#endif
-#ifdef __OpenBSD__
-#error OpenBSD has broken __thread support
-#endif
-static __thread int x ;
-int main () { x = 123; return x; }
-], support_for__thread=yes)
-
-if test $support_for__thread = yes; then
- AC_DEFINE([TOOLCHAIN_SUPPORTS__THREAD],[],[Whether the tool chain supports __thread])
-fi
-
-AC_MSG_RESULT($support_for__thread)
-
-dnl
-dnl posix tls
-dnl
-
-m4_define([pthread_test_program],[dnl
-#include <stdlib.h>
-#include <pthread.h>
-
-static pthread_once_t once_control = PTHREAD_ONCE_INIT;
-static pthread_key_t key;
-
-static void
-make_key (void)
-{
- pthread_key_create (&key, NULL);
-}
-
-int
-main ()
-{
- void *value = NULL;
-
- if (pthread_once (&once_control, make_key) != 0)
- {
- value = NULL;
- }
- else
- {
- value = pthread_getspecific (key);
- if (!value)
- {
- value = malloc (100);
- pthread_setspecific (key, value);
- }
- }
- return 0;
-}
-])
-
-AC_DEFUN([PIXMAN_CHECK_PTHREAD],[dnl
- if test "z$support_for_pthread_setspecific" != "zyes"; then
- PIXMAN_LINK_WITH_ENV(
- [$1], [pthread_test_program],
- [PTHREAD_CFLAGS="$CFLAGS"
- PTHREAD_LIBS="$LIBS"
- PTHREAD_LDFLAGS="$LDFLAGS"
- support_for_pthread_setspecific=yes])
- fi
-])
-
-if test $support_for__thread = no; then
- support_for_pthread_setspecific=no
-
- AC_MSG_CHECKING(for pthread_setspecific)
-
- PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"])
- PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"])
- PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"])
-
- if test $support_for_pthread_setspecific = yes; then
- CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
- AC_DEFINE([HAVE_PTHREAD_SETSPECIFIC], [], [Whether pthread_setspecific() is supported])
- fi
-
- AC_MSG_RESULT($support_for_pthread_setspecific);
-fi
-
-AC_SUBST(TOOLCHAIN_SUPPORTS__THREAD)
-AC_SUBST(HAVE_PTHREAD_SETSPECIFIC)
-AC_SUBST(PTHREAD_LDFLAGS)
-AC_SUBST(PTHREAD_LIBS)
-
-dnl =====================================
-dnl __attribute__((constructor))
-
-support_for_attribute_constructor=no
-
-AC_MSG_CHECKING(for __attribute__((constructor)))
-AC_LINK_IFELSE([
-#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7))
-/* attribute 'constructor' is supported since gcc 2.7, but some compilers
- * may only pretend to be gcc, so let's try to actually use it
- */
-static int x = 1;
-static void __attribute__((constructor)) constructor_function () { x = 0; }
-int main (void) { return x; }
-#else
-#error not gcc or gcc version is older than 2.7
-#endif
-], support_for_attribute_constructor=yes)
-
-if test x$support_for_attribute_constructor = xyes; then
- AC_DEFINE([TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR],
- [],[Whether the tool chain supports __attribute__((constructor))])
-fi
-
-AC_MSG_RESULT($support_for_attribute_constructor)
-AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR)
-
-AC_OUTPUT([pixman-1.pc
- pixman-1-uninstalled.pc
- Makefile
- pixman/Makefile
- pixman/pixman-version.h
- demos/Makefile
- test/Makefile])
-
-m4_if(m4_eval(pixman_minor % 2), [1], [
- echo
- echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
- echo
- echo " Thanks for testing this development snapshot of pixman. Please"
- echo " report any problems you find, either by sending email to "
- echo
- echo " pixman@lists.freedesktop.org"
- echo
- echo " or by filing a bug at "
- echo
- echo " https://bugs.freedesktop.org/enter_bug.cgi?product=pixman "
- echo
- echo " If you are looking for a stable release of pixman, please note "
- echo " that stable releases have _even_ minor version numbers. Ie., "
- echo " pixman-0.]m4_eval(pixman_minor & ~1)[.x are stable releases, whereas pixman-$PIXMAN_VERSION_MAJOR.$PIXMAN_VERSION_MINOR.$PIXMAN_VERSION_MICRO is a "
- echo " development snapshot that may contain bugs and experimental "
- echo " features. "
- echo
- echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
- echo
-])
+dnl Copyright 2005 Red Hat, Inc.
+dnl
+dnl Permission to use, copy, modify, distribute, and sell this software and its
+dnl documentation for any purpose is hereby granted without fee, provided that
+dnl the above copyright notice appear in all copies and that both that
+dnl copyright notice and this permission notice appear in supporting
+dnl documentation, and that the name of Red Hat not be used in
+dnl advertising or publicity pertaining to distribution of the software without
+dnl specific, written prior permission. Red Hat makes no
+dnl representations about the suitability of this software for any purpose. It
+dnl is provided "as is" without express or implied warranty.
+dnl
+dnl RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+dnl INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+dnl EVENT SHALL RED HAT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+dnl CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+dnl DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+dnl TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+dnl PERFORMANCE OF THIS SOFTWARE.
+dnl
+dnl Process this file with autoconf to create configure.
+
+AC_PREREQ([2.57])
+
+# Pixman versioning scheme
+#
+# - The version in git has an odd MICRO version number
+#
+# - Released versions both development and stable have an even MICRO
+# version number
+#
+# - Released development versions have an odd MINOR number
+#
+# - Released stable versions have an even MINOR number
+#
+# - Versions that break ABI must have a new MAJOR number
+#
+# - If you break the ABI, then at least this must be done:
+#
+# - increment MAJOR
+#
+# - In the first development release where you break ABI, find
+# all instances of "pixman-n" and change them to pixman-(n+1)
+#
+# This needs to be done at least in
+# configure.ac
+# all Makefile.am's
+# pixman-n.pc.in
+#
+# This ensures that binary incompatible versions can be installed
+# in parallel. See http://www106.pair.com/rhp/parallel.html for
+# more information
+#
+
+m4_define([pixman_major], 0)
+m4_define([pixman_minor], 21)
+m4_define([pixman_micro], 7)
+
+m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
+
+AC_INIT(pixman, pixman_version, [pixman@lists.freedesktop.org], pixman)
+AM_INIT_AUTOMAKE([foreign dist-bzip2])
+
+# Suppress verbose compile lines
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+
+AM_CONFIG_HEADER(config.h)
+
+AC_CANONICAL_HOST
+
+test_CFLAGS=${CFLAGS+set} # We may override autoconf default CFLAGS.
+
+AC_PROG_CC
+AM_PROG_AS
+AC_PROG_LIBTOOL
+AC_CHECK_FUNCS([getisax])
+AC_C_BIGENDIAN
+AC_C_INLINE
+
+dnl PIXMAN_LINK_WITH_ENV(env-setup, program, true-action, false-action)
+dnl
+dnl Compiles and links the given program in the environment setup by env-setup
+dnl and executes true-action on success and false-action on failure.
+AC_DEFUN([PIXMAN_LINK_WITH_ENV],[dnl
+ save_CFLAGS="$CFLAGS"
+ save_LDFLAGS="$LDFLAGS"
+ save_LIBS="$LIBS"
+ CFLAGS=""
+ LDFLAGS=""
+ LIBS=""
+ $1
+ AC_LINK_IFELSE(
+ [$2],
+ [pixman_cc_stderr=`test -f conftest.err && cat conftest.err`
+ pixman_cc_flag=yes],
+ [pixman_cc_stderr=`test -f conftest.err && cat conftest.err`
+ pixman_cc_flag=no])
+
+ if test "x$pixman_cc_stderr" != "x"; then
+ pixman_cc_flag=no
+ fi
+
+ if test "x$pixman_cc_flag" = "xyes"; then
+ ifelse([$3], , :, [$3])
+ else
+ ifelse([$4], , :, [$4])
+ fi
+ CFLAGS="$save_CFLAGS"
+ LDFLAGS="$save_LDFLAGS"
+ LIBS="$save_LIBS"
+])
+
+dnl Find a -Werror for catching warnings.
+WERROR=
+for w in -Werror -errwarn; do
+ if test "z$WERROR" = "z"; then
+ AC_MSG_CHECKING([whether the compiler supports $w])
+ PIXMAN_LINK_WITH_ENV(
+ [CFLAGS=$w],
+ [int main(int c, char **v) { (void)c; (void)v; return 0; }],
+ [WERROR=$w; yesno=yes], [yesno=no])
+ AC_MSG_RESULT($_yesno)
+ fi
+done
+
+dnl PIXMAN_CHECK_CFLAG(flag, [program])
+dnl Adds flag to CFLAGS if the given program links without warnings or errors.
+AC_DEFUN([PIXMAN_CHECK_CFLAG], [dnl
+ AC_MSG_CHECKING([whether the compiler supports $1])
+ PIXMAN_LINK_WITH_ENV(
+ [CFLAGS="$WERROR $1"],
+ [$2
+ int main(int c, char **v) { (void)c; (void)v; return 0; }
+ ],
+ [_yesno=yes],
+ [_yesno=no])
+ if test "x$_yesno" = xyes; then
+ CFLAGS="$CFLAGS $1"
+ fi
+ AC_MSG_RESULT($_yesno)
+])
+
+AC_CHECK_SIZEOF(long)
+
+# Checks for Sun Studio compilers
+AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"])
+AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"])
+
+# Default CFLAGS to -O -g rather than just the -g from AC_PROG_CC
+# if we're using Sun Studio and neither the user nor a config.site
+# has set CFLAGS.
+if test $SUNCC = yes && \
+ test "$test_CFLAGS" == "" && \
+ test "$CFLAGS" = "-g"
+then
+ CFLAGS="-O -g"
+fi
+
+#
+# We ignore pixman_major in the version here because the major version should
+# always be encoded in the actual library name. Ie., the soname is:
+#
+# pixman-$(pixman_major).0.minor.micro
+#
+m4_define([lt_current], [pixman_minor])
+m4_define([lt_revision], [pixman_micro])
+m4_define([lt_age], [pixman_minor])
+
+LT_VERSION_INFO="lt_current:lt_revision:lt_age"
+
+PIXMAN_VERSION_MAJOR=pixman_major()
+AC_SUBST(PIXMAN_VERSION_MAJOR)
+PIXMAN_VERSION_MINOR=pixman_minor()
+AC_SUBST(PIXMAN_VERSION_MINOR)
+PIXMAN_VERSION_MICRO=pixman_micro()
+AC_SUBST(PIXMAN_VERSION_MICRO)
+
+AC_SUBST(LT_VERSION_INFO)
+
+# Check for dependencies
+
+PIXMAN_CHECK_CFLAG([-Wall])
+PIXMAN_CHECK_CFLAG([-fno-strict-aliasing])
+
+AC_PATH_PROG(PERL, perl, no)
+if test "x$PERL" = xno; then
+ AC_MSG_ERROR([Perl is required to build pixman.])
+fi
+AC_SUBST(PERL)
+
+dnl =========================================================================
+dnl OpenMP for the test suite?
+dnl
+
+# Check for OpenMP support (only supported by autoconf >=2.62)
+OPENMP_CFLAGS=
+m4_ifdef([AC_OPENMP], [AC_OPENMP])
+
+m4_define([openmp_test_program],[dnl
+#include <stdio.h>
+
+extern unsigned int lcg_seed;
+#pragma omp threadprivate(lcg_seed)
+unsigned int lcg_seed;
+
+unsigned function(unsigned a, unsigned b)
+{
+ lcg_seed ^= b;
+ return ((a + b) ^ a ) + lcg_seed;
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+ int n1 = 0, n2 = argc;
+ unsigned checksum = 0;
+ int verbose = argv != NULL;
+ unsigned (*test_function)(unsigned, unsigned);
+ test_function = function;
+ #pragma omp parallel for reduction(+:checksum) default(none) \
+ shared(n1, n2, test_function, verbose)
+ for (i = n1; i < n2; i++)
+ {
+ unsigned crc = test_function (i, 0);
+ if (verbose)
+ printf ("%d: %08X\n", i, crc);
+ checksum += crc;
+ }
+ printf("%u\n", checksum);
+ return 0;
+}
+])
+
+PIXMAN_LINK_WITH_ENV(
+ [CFLAGS="$OPENMP_CFLAGS" LDFLAGS="$OPENMP_CFLAGS"],
+ [openmp_test_program],
+ [have_openmp=yes],
+ [have_openmp=no])
+if test "x$have_openmp" = "xyes"; then
+ AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite])
+else
+ OPENMP_CFLAGS=""
+fi
+AC_SUBST(OPENMP_CFLAGS)
+
+dnl =========================================================================
+dnl -fvisibility stuff
+
+PIXMAN_CHECK_CFLAG([-fvisibility=hidden], [dnl
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#ifdef _WIN32
+#error Have -fvisibility but it is ignored and generates a warning
+#endif
+#else
+error Need GCC 4.0 for visibility
+#endif
+])
+
+PIXMAN_CHECK_CFLAG([-xldscope=hidden], [dnl
+#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
+#else
+error Need Sun Studio 8 for visibility
+#endif
+])
+
+dnl ===========================================================================
+dnl Check for MMX
+
+if test "x$MMX_CFLAGS" = "x" ; then
+ if test "x$SUNCC" = "xyes"; then
+ # Sun Studio doesn't have an -xarch=mmx flag, so we have to use sse
+ # but if we're building 64-bit, mmx & sse support is on by default and
+ # -xarch=sse throws an error instead
+ if test "$AMD64_ABI" = "no" ; then
+ MMX_CFLAGS="-xarch=sse"
+ fi
+ else
+ MMX_CFLAGS="-mmmx -Winline"
+ fi
+fi
+
+have_mmx_intrinsics=no
+AC_MSG_CHECKING(whether to use MMX intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$MMX_CFLAGS $CFLAGS"
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
+error "Need GCC >= 3.4 for MMX intrinsics"
+#endif
+#include <mmintrin.h>
+int main () {
+ __m64 v = _mm_cvtsi32_si64 (1);
+ return _mm_cvtsi64_si32 (v);
+}], have_mmx_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(mmx,
+ [AC_HELP_STRING([--disable-mmx],
+ [disable MMX fast paths])],
+ [enable_mmx=$enableval], [enable_mmx=auto])
+
+if test $enable_mmx = no ; then
+ have_mmx_intrinsics=disabled
+fi
+
+if test $have_mmx_intrinsics = yes ; then
+ AC_DEFINE(USE_MMX, 1, [use MMX compiler intrinsics])
+else
+ MMX_CFLAGS=
+fi
+
+AC_MSG_RESULT($have_mmx_intrinsics)
+if test $enable_mmx = yes && test $have_mmx_intrinsics = no ; then
+ AC_MSG_ERROR([MMX intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes)
+
+dnl ===========================================================================
+dnl Check for SSE2
+
+if test "x$SSE2_CFLAGS" = "x" ; then
+ if test "x$SUNCC" = "xyes"; then
+ # SSE2 is enabled by default in the Sun Studio 64-bit environment
+ if test "$AMD64_ABI" = "no" ; then
+ SSE2_CFLAGS="-xarch=sse2"
+ fi
+ else
+ SSE2_CFLAGS="-msse2 -Winline"
+ fi
+fi
+
+have_sse2_intrinsics=no
+AC_MSG_CHECKING(whether to use SSE2 intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$SSE2_CFLAGS $CFLAGS"
+
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2))
+# if !defined(__amd64__) && !defined(__x86_64__)
+# error "Need GCC >= 4.2 for SSE2 intrinsics on x86"
+# endif
+#endif
+#include <mmintrin.h>
+#include <xmmintrin.h>
+#include <emmintrin.h>
+int main () {
+ __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
+ c = _mm_xor_si128 (a, b);
+ return 0;
+}], have_sse2_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(sse2,
+ [AC_HELP_STRING([--disable-sse2],
+ [disable SSE2 fast paths])],
+ [enable_sse2=$enableval], [enable_sse2=auto])
+
+if test $enable_sse2 = no ; then
+ have_sse2_intrinsics=disabled
+fi
+
+if test $have_sse2_intrinsics = yes ; then
+ AC_DEFINE(USE_SSE2, 1, [use SSE2 compiler intrinsics])
+fi
+
+AC_MSG_RESULT($have_sse2_intrinsics)
+if test $enable_sse2 = yes && test $have_sse2_intrinsics = no ; then
+ AC_MSG_ERROR([SSE2 intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes)
+
+dnl ===========================================================================
+dnl Other special flags needed when building code using MMX or SSE instructions
+case $host_os in
+ solaris*)
+ # When building 32-bit binaries, apply a mapfile to ensure that the
+ # binaries aren't flagged as only able to run on MMX+SSE capable CPUs
+ # since they check at runtime before using those instructions.
+ # Not all linkers grok the mapfile format so we check for that first.
+ if test "$AMD64_ABI" = "no" ; then
+ use_hwcap_mapfile=no
+ AC_MSG_CHECKING(whether to use a hardware capability map file)
+ hwcap_save_LDFLAGS="$LDFLAGS"
+ HWCAP_LDFLAGS='-Wl,-M,$(srcdir)/solaris-hwcap.mapfile'
+ LDFLAGS="$LDFLAGS -Wl,-M,pixman/solaris-hwcap.mapfile"
+ AC_LINK_IFELSE([int main() { return 0; }],
+ use_hwcap_mapfile=yes,
+ HWCAP_LDFLAGS="")
+ LDFLAGS="$hwcap_save_LDFLAGS"
+ AC_MSG_RESULT($use_hwcap_mapfile)
+ fi
+ if test "x$MMX_LDFLAGS" = "x" ; then
+ MMX_LDFLAGS="$HWCAP_LDFLAGS"
+ fi
+ if test "x$SSE2_LDFLAGS" = "x" ; then
+ SSE2_LDFLAGS="$HWCAP_LDFLAGS"
+ fi
+ ;;
+esac
+
+AC_SUBST(MMX_CFLAGS)
+AC_SUBST(MMX_LDFLAGS)
+AC_SUBST(SSE2_CFLAGS)
+AC_SUBST(SSE2_LDFLAGS)
+
+dnl ===========================================================================
+dnl Check for VMX/Altivec
+if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then
+ VMX_CFLAGS="-faltivec"
+else
+ VMX_CFLAGS="-maltivec -mabi=altivec"
+fi
+
+have_vmx_intrinsics=no
+AC_MSG_CHECKING(whether to use VMX/Altivec intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$VMX_CFLAGS $CFLAGS"
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
+error "Need GCC >= 3.4 for sane altivec support"
+#endif
+#include <altivec.h>
+int main () {
+ vector unsigned int v = vec_splat_u32 (1);
+ v = vec_sub (v, v);
+ return 0;
+}], have_vmx_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(vmx,
+ [AC_HELP_STRING([--disable-vmx],
+ [disable VMX fast paths])],
+ [enable_vmx=$enableval], [enable_vmx=auto])
+
+if test $enable_vmx = no ; then
+ have_vmx_intrinsics=disabled
+fi
+
+if test $have_vmx_intrinsics = yes ; then
+ AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
+else
+ VMX_CFLAGS=
+fi
+
+AC_MSG_RESULT($have_vmx_intrinsics)
+if test $enable_vmx = yes && test $have_vmx_intrinsics = no ; then
+ AC_MSG_ERROR([VMX intrinsics not detected])
+fi
+
+AC_SUBST(VMX_CFLAGS)
+
+AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
+
+dnl ==========================================================================
+dnl Check if assembler is gas compatible and supports ARM SIMD instructions
+have_arm_simd=no
+AC_MSG_CHECKING(whether to use ARM SIMD assembler)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="-x assembler-with-cpp $CFLAGS"
+AC_COMPILE_IFELSE([[
+.text
+.arch armv6
+.object_arch armv4
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+uqadd8 r0, r0, r0]], have_arm_simd=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(arm-simd,
+ [AC_HELP_STRING([--disable-arm-simd],
+ [disable ARM SIMD fast paths])],
+ [enable_arm_simd=$enableval], [enable_arm_simd=auto])
+
+if test $enable_arm_simd = no ; then
+ have_arm_simd=disabled
+fi
+
+if test $have_arm_simd = yes ; then
+ AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations])
+fi
+
+AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
+
+AC_MSG_RESULT($have_arm_simd)
+if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
+ AC_MSG_ERROR([ARM SIMD intrinsics not detected])
+fi
+
+dnl ==========================================================================
+dnl Check if assembler is gas compatible and supports NEON instructions
+have_arm_neon=no
+AC_MSG_CHECKING(whether to use ARM NEON assembler)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="-x assembler-with-cpp $CFLAGS"
+AC_COMPILE_IFELSE([[
+.text
+.fpu neon
+.arch armv7a
+.object_arch armv4
+.eabi_attribute 10, 0
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+vmovn.u16 d0, q0]], have_arm_neon=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(arm-neon,
+ [AC_HELP_STRING([--disable-arm-neon],
+ [disable ARM NEON fast paths])],
+ [enable_arm_neon=$enableval], [enable_arm_neon=auto])
+
+if test $enable_arm_neon = no ; then
+ have_arm_neon=disabled
+fi
+
+if test $have_arm_neon = yes ; then
+ AC_DEFINE(USE_ARM_NEON, 1, [use ARM NEON assembly optimizations])
+fi
+
+AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes)
+
+AC_MSG_RESULT($have_arm_neon)
+if test $enable_arm_neon = yes && test $have_arm_neon = no ; then
+ AC_MSG_ERROR([ARM NEON intrinsics not detected])
+fi
+
+dnl =========================================================================================
+dnl Check for GNU-style inline assembly support
+
+have_gcc_inline_asm=no
+AC_MSG_CHECKING(whether to use GNU-style inline assembler)
+AC_COMPILE_IFELSE([
+int main () {
+ /* Most modern architectures have a NOP instruction, so this is a fairly generic test. */
+ asm volatile ( "\tnop\n" : : : "cc", "memory" );
+ return 0;
+}], have_gcc_inline_asm=yes)
+
+AC_ARG_ENABLE(gcc-inline-asm,
+ [AC_HELP_STRING([--disable-gcc-inline-asm],
+ [disable GNU-style inline assembler])],
+ [enable_gcc_inline_asm=$enableval], [enable_gcc_inline_asm=auto])
+
+if test $enable_gcc_inline_asm = no ; then
+ have_gcc_inline_asm=disabled
+fi
+
+if test $have_gcc_inline_asm = yes ; then
+ AC_DEFINE(USE_GCC_INLINE_ASM, 1, [use GNU-style inline assembler])
+fi
+
+AC_MSG_RESULT($have_gcc_inline_asm)
+if test $enable_gcc_inline_asm = yes && test $have_gcc_inline_asm = no ; then
+ AC_MSG_ERROR([GNU-style inline assembler not detected])
+fi
+
+AM_CONDITIONAL(USE_GCC_INLINE_ASM, test $have_gcc_inline_asm = yes)
+
+dnl ==============================================
+dnl Static test programs
+
+AC_ARG_ENABLE(static-testprogs,
+ [AC_HELP_STRING([--enable-static-testprogs],
+ [build test programs as static binaries [default=no]])],
+ [enable_static_testprogs=$enableval], [enable_static_testprogs=no])
+
+TESTPROGS_EXTRA_LDFLAGS=
+if test "x$enable_static_testprogs" = "xyes" ; then
+ TESTPROGS_EXTRA_LDFLAGS="-all-static"
+fi
+AC_SUBST(TESTPROGS_EXTRA_LDFLAGS)
+
+dnl ==============================================
+dnl Timers
+
+AC_ARG_ENABLE(timers,
+ [AC_HELP_STRING([--enable-timers],
+ [enable TIMER_BEGIN and TIMER_END macros [default=no]])],
+ [enable_timers=$enableval], [enable_timers=no])
+
+if test $enable_timers = yes ; then
+ AC_DEFINE(PIXMAN_TIMERS, 1, [enable TIMER_BEGIN/TIMER_END macros])
+fi
+AC_SUBST(PIXMAN_TIMERS)
+
+dnl ===================================
+dnl GTK+
+
+AC_ARG_ENABLE(gtk,
+ [AC_HELP_STRING([--enable-gtk],
+ [enable tests using GTK+ [default=auto]])],
+ [enable_gtk=$enableval], [enable_gtk=auto])
+
+PKG_PROG_PKG_CONFIG
+
+if test $enable_gtk = yes ; then
+ AC_CHECK_LIB([pixman-1], [pixman_version_string])
+ PKG_CHECK_MODULES(GTK, [gtk+-2.0 pixman-1])
+fi
+
+if test $enable_gtk = auto ; then
+ AC_CHECK_LIB([pixman-1], [pixman_version_string], [enable_gtk=auto], [enable_gtk=no])
+fi
+
+if test $enable_gtk = auto ; then
+ PKG_CHECK_MODULES(GTK, [gtk+-2.0 pixman-1], [enable_gtk=yes], [enable_gtk=no])
+fi
+
+AM_CONDITIONAL(HAVE_GTK, [test "x$enable_gtk" = xyes])
+
+AC_SUBST(GTK_CFLAGS)
+AC_SUBST(GTK_LIBS)
+AC_SUBST(DEP_CFLAGS)
+AC_SUBST(DEP_LIBS)
+
+dnl =====================================
+dnl posix_memalign, sigaction, alarm, gettimeofday
+
+AC_CHECK_FUNC(posix_memalign, have_posix_memalign=yes, have_posix_memalign=no)
+if test x$have_posix_memalign = xyes; then
+ AC_DEFINE(HAVE_POSIX_MEMALIGN, 1, [Whether we have posix_memalign()])
+fi
+
+AC_CHECK_FUNC(sigaction, have_sigaction=yes, have_sigaction=no)
+if test x$have_sigaction = xyes; then
+ AC_DEFINE(HAVE_SIGACTION, 1, [Whether we have sigaction()])
+fi
+
+AC_CHECK_FUNC(alarm, have_alarm=yes, have_alarm=no)
+if test x$have_alarm = xyes; then
+ AC_DEFINE(HAVE_ALARM, 1, [Whether we have alarm()])
+fi
+
+AC_CHECK_HEADER([sys/mman.h],
+ [AC_DEFINE(HAVE_SYS_MMAN_H, [1], [Define to 1 if we have <sys/mman.h>])])
+
+AC_CHECK_FUNC(mprotect, have_mprotect=yes, have_mprotect=no)
+if test x$have_mprotect = xyes; then
+ AC_DEFINE(HAVE_MPROTECT, 1, [Whether we have mprotect()])
+fi
+
+AC_CHECK_FUNC(getpagesize, have_getpagesize=yes, have_getpagesize=no)
+if test x$have_getpagesize = xyes; then
+ AC_DEFINE(HAVE_GETPAGESIZE, 1, [Whether we have getpagesize()])
+fi
+
+AC_CHECK_HEADER([fenv.h],
+ [AC_DEFINE(HAVE_FENV_H, [1], [Define to 1 if we have <fenv.h>])])
+
+AC_CHECK_LIB(m, feenableexcept, have_feenableexcept=yes, have_feenableexcept=no)
+if test x$have_feenableexcept = xyes; then
+ AC_DEFINE(HAVE_FEENABLEEXCEPT, 1, [Whether we have feenableexcept()])
+fi
+
+AC_CHECK_FUNC(gettimeofday, have_gettimeofday=yes, have_gettimeofday=no)
+AC_CHECK_HEADER(sys/time.h, have_sys_time_h=yes, have_sys_time_h=no)
+if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then
+ AC_DEFINE(HAVE_GETTIMEOFDAY, 1, [Whether we have gettimeofday()])
+fi
+
+dnl =====================================
+dnl Thread local storage
+
+support_for__thread=no
+
+AC_MSG_CHECKING(for __thread)
+AC_LINK_IFELSE([
+#if defined(__MINGW32__) && !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))
+#error This MinGW version has broken __thread support
+#endif
+#ifdef __OpenBSD__
+#error OpenBSD has broken __thread support
+#endif
+static __thread int x ;
+int main () { x = 123; return x; }
+], support_for__thread=yes)
+
+if test $support_for__thread = yes; then
+ AC_DEFINE([TOOLCHAIN_SUPPORTS__THREAD],[],[Whether the tool chain supports __thread])
+fi
+
+AC_MSG_RESULT($support_for__thread)
+
+dnl
+dnl posix tls
+dnl
+
+m4_define([pthread_test_program],[dnl
+#include <stdlib.h>
+#include <pthread.h>
+
+static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+static pthread_key_t key;
+
+static void
+make_key (void)
+{
+ pthread_key_create (&key, NULL);
+}
+
+int
+main ()
+{
+ void *value = NULL;
+
+ if (pthread_once (&once_control, make_key) != 0)
+ {
+ value = NULL;
+ }
+ else
+ {
+ value = pthread_getspecific (key);
+ if (!value)
+ {
+ value = malloc (100);
+ pthread_setspecific (key, value);
+ }
+ }
+ return 0;
+}
+])
+
+AC_DEFUN([PIXMAN_CHECK_PTHREAD],[dnl
+ if test "z$support_for_pthread_setspecific" != "zyes"; then
+ PIXMAN_LINK_WITH_ENV(
+ [$1], [pthread_test_program],
+ [PTHREAD_CFLAGS="$CFLAGS"
+ PTHREAD_LIBS="$LIBS"
+ PTHREAD_LDFLAGS="$LDFLAGS"
+ support_for_pthread_setspecific=yes])
+ fi
+])
+
+if test $support_for__thread = no; then
+ support_for_pthread_setspecific=no
+
+ AC_MSG_CHECKING(for pthread_setspecific)
+
+ PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"])
+ PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"])
+ PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"])
+
+ if test $support_for_pthread_setspecific = yes; then
+ CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+ AC_DEFINE([HAVE_PTHREAD_SETSPECIFIC], [], [Whether pthread_setspecific() is supported])
+ fi
+
+ AC_MSG_RESULT($support_for_pthread_setspecific);
+fi
+
+AC_SUBST(TOOLCHAIN_SUPPORTS__THREAD)
+AC_SUBST(HAVE_PTHREAD_SETSPECIFIC)
+AC_SUBST(PTHREAD_LDFLAGS)
+AC_SUBST(PTHREAD_LIBS)
+
+dnl =====================================
+dnl __attribute__((constructor))
+
+support_for_attribute_constructor=no
+
+AC_MSG_CHECKING(for __attribute__((constructor)))
+AC_LINK_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7))
+/* attribute 'constructor' is supported since gcc 2.7, but some compilers
+ * may only pretend to be gcc, so let's try to actually use it
+ */
+static int x = 1;
+static void __attribute__((constructor)) constructor_function () { x = 0; }
+int main (void) { return x; }
+#else
+#error not gcc or gcc version is older than 2.7
+#endif
+], support_for_attribute_constructor=yes)
+
+if test x$support_for_attribute_constructor = xyes; then
+ AC_DEFINE([TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR],
+ [],[Whether the tool chain supports __attribute__((constructor))])
+fi
+
+AC_MSG_RESULT($support_for_attribute_constructor)
+AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR)
+
+AC_OUTPUT([pixman-1.pc
+ pixman-1-uninstalled.pc
+ Makefile
+ pixman/Makefile
+ pixman/pixman-version.h
+ demos/Makefile
+ test/Makefile])
+
+m4_if(m4_eval(pixman_minor % 2), [1], [
+ echo
+ echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+ echo
+ echo " Thanks for testing this development snapshot of pixman. Please"
+ echo " report any problems you find, either by sending email to "
+ echo
+ echo " pixman@lists.freedesktop.org"
+ echo
+ echo " or by filing a bug at "
+ echo
+ echo " https://bugs.freedesktop.org/enter_bug.cgi?product=pixman "
+ echo
+ echo " If you are looking for a stable release of pixman, please note "
+ echo " that stable releases have _even_ minor version numbers. Ie., "
+ echo " pixman-0.]m4_eval(pixman_minor & ~1)[.x are stable releases, whereas pixman-$PIXMAN_VERSION_MAJOR.$PIXMAN_VERSION_MINOR.$PIXMAN_VERSION_MICRO is a "
+ echo " development snapshot that may contain bugs and experimental "
+ echo " features. "
+ echo
+ echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+ echo
+])
diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am
index d016e9f25..06a4dfb6a 100644
--- a/pixman/pixman/Makefile.am
+++ b/pixman/pixman/Makefile.am
@@ -1,127 +1,127 @@
-lib_LTLIBRARIES = libpixman-1.la
-libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined @PTHREAD_LDFLAGS@
-libpixman_1_la_LIBADD = @PTHREAD_LIBS@ @DEP_LIBS@ -lm
-libpixman_1_la_SOURCES = \
- pixman.h \
- pixman-accessor.h \
- pixman-access.c \
- pixman-access-accessors.c \
- pixman-cpu.c \
- pixman-gradient-walker.c \
- pixman-region16.c \
- pixman-region32.c \
- pixman-compiler.h \
- pixman-private.h \
- pixman-image.c \
- pixman-implementation.c \
- pixman-combine32.c \
- pixman-combine32.h \
- pixman-combine64.c \
- pixman-combine64.h \
- pixman-general.c \
- pixman.c \
- pixman-fast-path.c \
- pixman-fast-path.h \
- pixman-solid-fill.c \
- pixman-conical-gradient.c \
- pixman-linear-gradient.c \
- pixman-radial-gradient.c \
- pixman-bits-image.c \
- pixman-utils.c \
- pixman-edge.c \
- pixman-edge-accessors.c \
- pixman-edge-imp.h \
- pixman-trap.c \
- pixman-timer.c \
- pixman-matrix.c
-
-libpixmanincludedir = $(includedir)/pixman-1
-libpixmaninclude_HEADERS = pixman.h pixman-version.h
-noinst_LTLIBRARIES =
-
-BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c
-
-pixman-combine32.c : pixman-combine.c.template pixman-combine32.h make-combine.pl
- $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1)
-pixman-combine32.h : pixman-combine.h.template make-combine.pl
- $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1)
-
-pixman-combine64.c : pixman-combine.c.template pixman-combine64.h make-combine.pl
- $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1)
-pixman-combine64.h : pixman-combine.h.template make-combine.pl
- $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1)
-
-EXTRA_DIST = Makefile.win32 pixman-combine.c.template make-combine.pl pixman-region.c \
- pixman-combine.h.template solaris-hwcap.mapfile
-CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h
-
-# mmx code
-if USE_MMX
-noinst_LTLIBRARIES += libpixman-mmx.la
-libpixman_mmx_la_SOURCES = \
- pixman-mmx.c
-libpixman_mmx_la_CFLAGS = $(DEP_CFLAGS) $(MMX_CFLAGS)
-libpixman_mmx_la_LIBADD = $(DEP_LIBS)
-libpixman_1_la_LDFLAGS += $(MMX_LDFLAGS)
-libpixman_1_la_LIBADD += libpixman-mmx.la
-
-ASM_CFLAGS_mmx=$(MMX_CFLAGS)
-endif
-
-# vmx code
-if USE_VMX
-noinst_LTLIBRARIES += libpixman-vmx.la
-libpixman_vmx_la_SOURCES = \
- pixman-vmx.c \
- pixman-combine32.h
-libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
-libpixman_vmx_la_LIBADD = $(DEP_LIBS)
-libpixman_1_la_LIBADD += libpixman-vmx.la
-
-ASM_CFLAGS_vmx=$(VMX_CFLAGS)
-endif
-
-# sse2 code
-if USE_SSE2
-noinst_LTLIBRARIES += libpixman-sse2.la
-libpixman_sse2_la_SOURCES = \
- pixman-sse2.c
-libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS)
-libpixman_sse2_la_LIBADD = $(DEP_LIBS)
-libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS)
-libpixman_1_la_LIBADD += libpixman-sse2.la
-
-ASM_CFLAGS_sse2=$(SSE2_CFLAGS)
-endif
-
-# arm simd code
-if USE_ARM_SIMD
-noinst_LTLIBRARIES += libpixman-arm-simd.la
-libpixman_arm_simd_la_SOURCES = \
- pixman-arm-simd.c \
- pixman-arm-common.h \
- pixman-arm-simd-asm.S
-libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS)
-libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
-libpixman_1_la_LIBADD += libpixman-arm-simd.la
-
-ASM_CFLAGS_arm_simd=
-endif
-
-# arm neon code
-if USE_ARM_NEON
-noinst_LTLIBRARIES += libpixman-arm-neon.la
-libpixman_arm_neon_la_SOURCES = \
- pixman-arm-neon.c \
- pixman-arm-common.h \
- pixman-arm-neon-asm.S \
- pixman-arm-neon-asm.h
-libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
-libpixman_arm_neon_la_LIBADD = $(DEP_LIBS)
-libpixman_1_la_LIBADD += libpixman-arm-neon.la
-
-ASM_CFLAGS_arm_neon=
-endif
-
-.c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
- $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
+lib_LTLIBRARIES = libpixman-1.la
+libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined @PTHREAD_LDFLAGS@
+libpixman_1_la_LIBADD = @PTHREAD_LIBS@ @DEP_LIBS@ -lm
+libpixman_1_la_SOURCES = \
+ pixman.h \
+ pixman-accessor.h \
+ pixman-access.c \
+ pixman-access-accessors.c \
+ pixman-cpu.c \
+ pixman-gradient-walker.c \
+ pixman-region16.c \
+ pixman-region32.c \
+ pixman-compiler.h \
+ pixman-private.h \
+ pixman-image.c \
+ pixman-implementation.c \
+ pixman-combine32.c \
+ pixman-combine32.h \
+ pixman-combine64.c \
+ pixman-combine64.h \
+ pixman-general.c \
+ pixman.c \
+ pixman-fast-path.c \
+ pixman-fast-path.h \
+ pixman-solid-fill.c \
+ pixman-conical-gradient.c \
+ pixman-linear-gradient.c \
+ pixman-radial-gradient.c \
+ pixman-bits-image.c \
+ pixman-utils.c \
+ pixman-edge.c \
+ pixman-edge-accessors.c \
+ pixman-edge-imp.h \
+ pixman-trap.c \
+ pixman-timer.c \
+ pixman-matrix.c
+
+libpixmanincludedir = $(includedir)/pixman-1
+libpixmaninclude_HEADERS = pixman.h pixman-version.h
+noinst_LTLIBRARIES =
+
+BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c
+
+pixman-combine32.c : pixman-combine.c.template pixman-combine32.h make-combine.pl
+ $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1)
+pixman-combine32.h : pixman-combine.h.template make-combine.pl
+ $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1)
+
+pixman-combine64.c : pixman-combine.c.template pixman-combine64.h make-combine.pl
+ $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1)
+pixman-combine64.h : pixman-combine.h.template make-combine.pl
+ $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1)
+
+EXTRA_DIST = Makefile.win32 pixman-combine.c.template make-combine.pl pixman-region.c \
+ pixman-combine.h.template solaris-hwcap.mapfile
+CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h
+
+# mmx code
+if USE_MMX
+noinst_LTLIBRARIES += libpixman-mmx.la
+libpixman_mmx_la_SOURCES = \
+ pixman-mmx.c
+libpixman_mmx_la_CFLAGS = $(DEP_CFLAGS) $(MMX_CFLAGS)
+libpixman_mmx_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LDFLAGS += $(MMX_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-mmx.la
+
+ASM_CFLAGS_mmx=$(MMX_CFLAGS)
+endif
+
+# vmx code
+if USE_VMX
+noinst_LTLIBRARIES += libpixman-vmx.la
+libpixman_vmx_la_SOURCES = \
+ pixman-vmx.c \
+ pixman-combine32.h
+libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
+libpixman_vmx_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LIBADD += libpixman-vmx.la
+
+ASM_CFLAGS_vmx=$(VMX_CFLAGS)
+endif
+
+# sse2 code
+if USE_SSE2
+noinst_LTLIBRARIES += libpixman-sse2.la
+libpixman_sse2_la_SOURCES = \
+ pixman-sse2.c
+libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS)
+libpixman_sse2_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-sse2.la
+
+ASM_CFLAGS_sse2=$(SSE2_CFLAGS)
+endif
+
+# arm simd code
+if USE_ARM_SIMD
+noinst_LTLIBRARIES += libpixman-arm-simd.la
+libpixman_arm_simd_la_SOURCES = \
+ pixman-arm-simd.c \
+ pixman-arm-common.h \
+ pixman-arm-simd-asm.S
+libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS)
+libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LIBADD += libpixman-arm-simd.la
+
+ASM_CFLAGS_arm_simd=
+endif
+
+# arm neon code
+if USE_ARM_NEON
+noinst_LTLIBRARIES += libpixman-arm-neon.la
+libpixman_arm_neon_la_SOURCES = \
+ pixman-arm-neon.c \
+ pixman-arm-common.h \
+ pixman-arm-neon-asm.S \
+ pixman-arm-neon-asm.h
+libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
+libpixman_arm_neon_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LIBADD += libpixman-arm-neon.la
+
+ASM_CFLAGS_arm_neon=
+endif
+
+.c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
+ $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
diff --git a/pixman/pixman/Makefile.win32 b/pixman/pixman/Makefile.win32
index b5f9397a0..c5e8dfaf2 100644
--- a/pixman/pixman/Makefile.win32
+++ b/pixman/pixman/Makefile.win32
@@ -1,146 +1,146 @@
-LIBRARY = pixman-1
-
-CC = cl
-LINK = link
-
-CFG_VAR = $(CFG)
-ifeq ($(CFG_VAR),)
-CFG_VAR=release
-endif
-
-MMX_VAR = $(MMX)
-ifeq ($(MMX_VAR),)
-MMX_VAR=on
-endif
-
-SSE2_VAR = $(SSE2)
-ifeq ($(SSE2_VAR),)
-SSE2_VAR=on
-endif
-
-CFLAGS = -MD -nologo -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -I../pixman/src -I. -DPACKAGE=$(LIBRARY) -DPACKAGE_VERSION="" -DPACKAGE_BUGREPORT=""
-MMX_CFLAGS = -DUSE_MMX -w14710 -w14714
-SSE2_CFLAGS = -DUSE_SSE2
-
-# optimization flags
-ifeq ($(CFG_VAR),debug)
-CFLAGS += -Od -Zi
-else
-CFLAGS += -O2
-endif
-
-SOURCES = \
- pixman-image.c \
- pixman-access.c \
- pixman-access-accessors.c \
- pixman-region16.c \
- pixman-region32.c \
- pixman-combine32.c \
- pixman-combine64.c \
- pixman-utils.c \
- pixman-edge.c \
- pixman-edge-accessors.c \
- pixman-trap.c \
- pixman-timer.c \
- pixman-matrix.c \
- pixman-gradient-walker.c \
- pixman-conical-gradient.c \
- pixman-linear-gradient.c \
- pixman-radial-gradient.c \
- pixman-bits-image.c \
- pixman.c \
- pixman-cpu.c \
- pixman-fast-path.c \
- pixman-implementation.c \
- pixman-solid-fill.c \
- pixman-general.c \
- $(NULL)
-
-BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c
-
-# MMX compilation flags
-ifeq ($(MMX_VAR),on)
-CFLAGS += $(MMX_CFLAGS)
-SOURCES += pixman-mmx.c
-endif
-
-# SSE2 compilation flags
-ifeq ($(SSE2_VAR),on)
-CFLAGS += $(SSE2_CFLAGS)
-SOURCES += pixman-sse2.c
-endif
-
-OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(SOURCES))
-
-# targets
-all: inform informMMX informSSE2 $(CFG_VAR)/$(LIBRARY).lib
- @exit 0
-clean: inform clean_r
- @exit 0
-pixman: inform informMMX informSSE2 $(CFG_VAR)/$(LIBRARY).lib
- @exit 0
-
-inform:
-ifneq ($(CFG),release)
-ifneq ($(CFG),debug)
-ifneq ($(CFG),)
- @echo "Invalid specified configuration option : "$(CFG)"."
- @echo
- @echo -n "Possible choices for configuration are "
- @echo "'release' and 'debug'"
- @echo ""
- @exit 1
-endif
- @echo "Using default RELEASE configuration... (use CFG=release or CFG=debug)"
-endif
-endif
-
-informMMX:
-ifneq ($(MMX),off)
-ifneq ($(MMX),on)
-ifneq ($(MMX),)
- @echo "Invalid specified MMX option : "$(MMX_VAR)"."
- @echo
- @echo -n "Possible choices for MMX are 'on' or 'off'"
- @echo ""
- @exit 1
-endif
- @echo "Setting MMX flag to default value 'on'... (use MMX=on or MMX=off)"
-endif
-endif
-
-informSSE2:
-ifneq ($(SSE2),off)
-ifneq ($(SSE2),on)
-ifneq ($(SSE2),)
- @echo "Invalid specified SSE option : "$(SSE2)"."
- @echo
- @echo -n "Possible choices for SSE2 are 'on' or 'off'"
- @echo ""
- @exit 1
-endif
- @echo "Setting SSE2 flag to default value 'on'... (use SSE2=on or SSE2=off)"
-endif
-endif
-
-# pixman compilation and linking
-$(CFG_VAR)/%.obj: %.c $(BUILT_SOURCES)
- @mkdir -p $(CFG_VAR)
- @$(CC) -c $(CFLAGS) -Fo"$@" $<
-
-$(CFG_VAR)/$(LIBRARY).lib: $(OBJECTS)
- lib -NOLOGO -OUT:$@ $(OBJECTS) || exit 0
-
-pixman-combine32.c: pixman-combine.c.template pixman-combine32.h make-combine.pl
- perl ./make-combine.pl 8 < $< > $@ || ($(RM) $@; exit 1)
-pixman-combine32.h: pixman-combine.h.template make-combine.pl
- perl ./make-combine.pl 8 < $< > $@ || ($(RM) $@; exit 1)
-
-pixman-combine64.c: pixman-combine.c.template pixman-combine64.h make-combine.pl
- perl ./make-combine.pl 16 < $< > $@ || ($(RM) $@; exit 1)
-pixman-combine64.h: pixman-combine.h.template make-combine.pl
- perl ./make-combine.pl 16 < $< > $@ || ($(RM) $@; exit 1)
-
-clean_r:
- @rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.lib $(CFG_VAR)/*.pdb $(CFG)/*.ilk || exit 0
- @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk $(BUILT_SOURCES) || exit 0
+LIBRARY = pixman-1
+
+CC = cl
+LINK = link
+
+CFG_VAR = $(CFG)
+ifeq ($(CFG_VAR),)
+CFG_VAR=release
+endif
+
+MMX_VAR = $(MMX)
+ifeq ($(MMX_VAR),)
+MMX_VAR=on
+endif
+
+SSE2_VAR = $(SSE2)
+ifeq ($(SSE2_VAR),)
+SSE2_VAR=on
+endif
+
+CFLAGS = -MD -nologo -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -I../pixman/src -I. -DPACKAGE=$(LIBRARY) -DPACKAGE_VERSION="" -DPACKAGE_BUGREPORT=""
+MMX_CFLAGS = -DUSE_MMX -w14710 -w14714
+SSE2_CFLAGS = -DUSE_SSE2
+
+# optimization flags
+ifeq ($(CFG_VAR),debug)
+CFLAGS += -Od -Zi
+else
+CFLAGS += -O2
+endif
+
+SOURCES = \
+ pixman-image.c \
+ pixman-access.c \
+ pixman-access-accessors.c \
+ pixman-region16.c \
+ pixman-region32.c \
+ pixman-combine32.c \
+ pixman-combine64.c \
+ pixman-utils.c \
+ pixman-edge.c \
+ pixman-edge-accessors.c \
+ pixman-trap.c \
+ pixman-timer.c \
+ pixman-matrix.c \
+ pixman-gradient-walker.c \
+ pixman-conical-gradient.c \
+ pixman-linear-gradient.c \
+ pixman-radial-gradient.c \
+ pixman-bits-image.c \
+ pixman.c \
+ pixman-cpu.c \
+ pixman-fast-path.c \
+ pixman-implementation.c \
+ pixman-solid-fill.c \
+ pixman-general.c \
+ $(NULL)
+
+BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c
+
+# MMX compilation flags
+ifeq ($(MMX_VAR),on)
+CFLAGS += $(MMX_CFLAGS)
+SOURCES += pixman-mmx.c
+endif
+
+# SSE2 compilation flags
+ifeq ($(SSE2_VAR),on)
+CFLAGS += $(SSE2_CFLAGS)
+SOURCES += pixman-sse2.c
+endif
+
+OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(SOURCES))
+
+# targets
+all: inform informMMX informSSE2 $(CFG_VAR)/$(LIBRARY).lib
+ @exit 0
+clean: inform clean_r
+ @exit 0
+pixman: inform informMMX informSSE2 $(CFG_VAR)/$(LIBRARY).lib
+ @exit 0
+
+inform:
+ifneq ($(CFG),release)
+ifneq ($(CFG),debug)
+ifneq ($(CFG),)
+ @echo "Invalid specified configuration option : "$(CFG)"."
+ @echo
+ @echo -n "Possible choices for configuration are "
+ @echo "'release' and 'debug'"
+ @echo ""
+ @exit 1
+endif
+ @echo "Using default RELEASE configuration... (use CFG=release or CFG=debug)"
+endif
+endif
+
+informMMX:
+ifneq ($(MMX),off)
+ifneq ($(MMX),on)
+ifneq ($(MMX),)
+ @echo "Invalid specified MMX option : "$(MMX_VAR)"."
+ @echo
+ @echo -n "Possible choices for MMX are 'on' or 'off'"
+ @echo ""
+ @exit 1
+endif
+ @echo "Setting MMX flag to default value 'on'... (use MMX=on or MMX=off)"
+endif
+endif
+
+informSSE2:
+ifneq ($(SSE2),off)
+ifneq ($(SSE2),on)
+ifneq ($(SSE2),)
+ @echo "Invalid specified SSE option : "$(SSE2)"."
+ @echo
+ @echo -n "Possible choices for SSE2 are 'on' or 'off'"
+ @echo ""
+ @exit 1
+endif
+ @echo "Setting SSE2 flag to default value 'on'... (use SSE2=on or SSE2=off)"
+endif
+endif
+
+# pixman compilation and linking
+$(CFG_VAR)/%.obj: %.c $(BUILT_SOURCES)
+ @mkdir -p $(CFG_VAR)
+ @$(CC) -c $(CFLAGS) -Fo"$@" $<
+
+$(CFG_VAR)/$(LIBRARY).lib: $(OBJECTS)
+ lib -NOLOGO -OUT:$@ $(OBJECTS) || exit 0
+
+pixman-combine32.c: pixman-combine.c.template pixman-combine32.h make-combine.pl
+ perl ./make-combine.pl 8 < $< > $@ || ($(RM) $@; exit 1)
+pixman-combine32.h: pixman-combine.h.template make-combine.pl
+ perl ./make-combine.pl 8 < $< > $@ || ($(RM) $@; exit 1)
+
+pixman-combine64.c: pixman-combine.c.template pixman-combine64.h make-combine.pl
+ perl ./make-combine.pl 16 < $< > $@ || ($(RM) $@; exit 1)
+pixman-combine64.h: pixman-combine.h.template make-combine.pl
+ perl ./make-combine.pl 16 < $< > $@ || ($(RM) $@; exit 1)
+
+clean_r:
+ @rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.lib $(CFG_VAR)/*.pdb $(CFG)/*.ilk || exit 0
+ @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk $(BUILT_SOURCES) || exit 0
diff --git a/pixman/pixman/config.h b/pixman/pixman/config.h
new file mode 100644
index 000000000..50adacc11
--- /dev/null
+++ b/pixman/pixman/config.h
@@ -0,0 +1,227 @@
+/* config.h. Generated from config.h.in by configure. */
+/* config.h.in. Generated from configure.ac by autoheader. */
+
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+
+/* Include compose table cache support */
+#define COMPOSECACHE 1
+
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <endian.h> header file. */
+#define HAVE_ENDIAN_H 1
+
+/* Use dlopen to load shared libraries */
+#define HAVE_DLOPEN 1
+
+
+/* Define to 1 if you have the `getpagesize' function. */
+#define HAVE_GETPAGESIZE 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* launchd support available */
+/* #undef HAVE_LAUNCHD */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `poll' function. */
+#define HAVE_POLL 1
+
+/* Define to 1 if you have a working `mmap' system call. */
+#define HAVE_MMAP 1
+
+/* Use shl_load to load shared libraries */
+/* #undef HAVE_SHL_LOAD */
+
+/* Define to 1 if the system has the type `socklen_t'. */
+#define HAVE_SOCKLEN_T 0
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/poll.h> header file. */
+#define HAVE_SYS_POLL_H 1
+
+/* Define to 1 if you have the `strtol' function. */
+#define HAVE_STRTOL 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Support IPv6 for TCP connections */
+/* #undef IPv6 */
+
+/* Support dynamically loaded font modules */
+#define LOADABLEFONTS 1
+
+/* Support os-specific local connections */
+/* #undef LOCALCONN */
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+ */
+#define LT_OBJDIR ".libs/"
+
+/* Disable XLOCALEDIR environment variable */
+#define NO_XLOCALEDIR 1
+
+/* Name of package */
+#define PACKAGE "pixman"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""sandmann@daimi.au.dk""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "pixman"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "pixman 0.19.1"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "pixman"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "0.19.1"
+
+/* Major version of this package */
+#define PACKAGE_VERSION_MAJOR 0
+
+/* Minor version of this package */
+#define PACKAGE_VERSION_MINOR 19
+
+/* Patch version of this package */
+#define PACKAGE_VERSION_PATCHLEVEL 1
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Support TCP socket connections */
+#define TCPCONN 1
+
+/* launchd support available */
+/* #undef TRANS_REOPEN */
+
+/* use MMX compiler intrinsics */
+#define USE_MMX 1
+
+/* Support UNIX socket connections */
+#define UNIXCONN 1
+
+/* Split some i18n functions into loadable modules */
+/* #undef USE_DYNAMIC_LC */
+
+/* Use the X cursor library to load cursors */
+#define USE_DYNAMIC_XCURSOR 1
+
+/* use SSE2 compiler intrinsics */
+#undef USE_SSE2
+
+/* use VMX compiler intrinsics */
+#undef USE_VMX
+
+/* poll() function is available */
+#define USE_POLL 1
+
+/* Use XCB for low-level protocol implementation */
+#define USE_XCB 1
+
+/* Version number of package */
+#define VERSION "0.15.18"
+
+/* Support bdf format bitmap font files */
+#define XFONT_BDFFORMAT 1
+
+/* Location of libX11 data */
+#define X11_DATADIR "/usr/share/X11"
+
+/* Location of libX11 library data */
+#define X11_LIBDIR "/usr/lib/X11"
+
+/* Include support for XCMS */
+#define XCMS 1
+
+/* Location of error message database */
+#define XERRORDB "XErrorDB"
+
+/* Enable XF86BIGFONT extension */
+/* #undef XF86BIGFONT */
+
+/* Use XKB */
+#define XKB 1
+
+/* Location of keysym database */
+#define XKEYSYMDB "XKeysymDB"
+
+/* support for X Locales */
+#define XLOCALE 1
+
+/* Location of libX11 locale data */
+#define XLOCALEDATADIR "locale"
+
+/* Location of libX11 locale data */
+#define XLOCALEDIR "locale"
+
+/* Location of libX11 locale libraries */
+#define XLOCALELIBDIR "locale"
+
+/* Whether libX11 is compiled with thread support */
+#define XTHREADS /**/
+
+/* Whether libX11 needs to use MT safe API's */
+#define XUSE_MTSAFE_API /**/
+
+/* Enable GNU and other extensions to the C environment for glibc */
+/* #undef _GNU_SOURCE */
+
+/* Support bitmap font files */
+#define XFONT_BITMAP 1
+
+/* Support built-in fonts */
+#define XFONT_BUILTINS 1
+
+/* Support the X Font Services Protocol */
+#define XFONT_FC 1
+
+/* Support fonts in files */
+#define XFONT_FONTFILE 1
+
+/* Support FreeType rasterizer for nearly all font file formats */
+#define XFONT_FREETYPE 1
+
+/* Support pcf format bitmap font files */
+#define XFONT_PCFFORMAT 1
+
+/* Support snf format bitmap font files */
+#define XFONT_SNFFORMAT 1
+
+/* Support Speedo font files */
+#define XFONT_SPEEDO 1
+
+/* Support IBM Type 1 rasterizer for Type1 font files */
+#define XFONT_TYPE1 1
+
+/* Support bzip2 for bitmap fonts */
+/* #undef X_BZIP2_FONT_COMPRESSION */
+
+/* Support gzip for bitmap fonts */
+#define X_GZIP_FONT_COMPRESSION 1
diff --git a/pixman/pixman/makefile b/pixman/pixman/makefile
new file mode 100644
index 000000000..eef65121f
--- /dev/null
+++ b/pixman/pixman/makefile
@@ -0,0 +1,76 @@
+LIBRARY = libpixman-1
+
+INCLUDES += $(OBJDIR)
+
+CSRCS = \
+ pixman-access.c \
+ pixman-access-accessors.c \
+ pixman-cpu.c \
+ pixman-gradient-walker.c \
+ pixman-region16.c \
+ pixman-region32.c \
+ pixman-image.c \
+ pixman-implementation.c \
+ pixman-combine32.c \
+ pixman-combine64.c \
+ pixman-general.c \
+ pixman.c \
+ pixman-fast-path.c \
+ pixman-solid-fill.c \
+ pixman-conical-gradient.c \
+ pixman-linear-gradient.c \
+ pixman-radial-gradient.c \
+ pixman-bits-image.c \
+ pixman-utils.c \
+ pixman-edge.c \
+ pixman-edge-accessors.c \
+ pixman-trap.c \
+ pixman-timer.c \
+ pixman-matrix.c
+
+BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c
+
+$(OBJDIR)\pixman-combine32.c : pixman-combine.c.template $(OBJDIR)\pixman-combine32.h make-combine.pl
+ perl make-combine.pl 8 < pixman-combine.c.template > $@
+$(OBJDIR)\pixman-combine32.h : pixman-combine.h.template make-combine.pl
+ perl make-combine.pl 8 < pixman-combine.h.template > $@
+
+$(OBJDIR)\pixman-combine64.c : pixman-combine.c.template $(OBJDIR)\pixman-combine64.h make-combine.pl
+ perl make-combine.pl 16 < pixman-combine.c.template > $@
+$(OBJDIR)\pixman-combine64.h : pixman-combine.h.template make-combine.pl
+ perl make-combine.pl 16 < pixman-combine.h.template > $@
+
+## mmx code
+#if USE_MMX
+#noinst_LTLIBRARIES += libpixman-mmx.la
+CSRCS += \
+ pixman-mmx.c
+# pixman-mmx.h
+#libpixman_mmx_la_CFLAGS = $(DEP_CFLAGS) $(MMX_CFLAGS)
+#libpixman_mmx_la_LIBADD = $(DEP_LIBS)
+#libpixman_1_la_LIBADD += libpixman-mmx.la
+#endif
+
+## vmx code
+#if USE_VMX
+#noinst_LTLIBRARIES += libpixman-vmx.la
+#libpixman_vmx_la_SOURCES = \
+# pixman-vmx.c \
+# pixman-vmx.h \
+# pixman-combine32.h
+#libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
+#libpixman_vmx_la_LIBADD = $(DEP_LIBS)
+#libpixman_1_la_LIBADD += libpixman-vmx.la
+#endif
+
+# sse2 code
+#if USE_SSE2
+#noinst_LTLIBRARIES += libpixman-sse2.la
+#libpixman_sse2_la_SOURCES = \
+# pixman-sse2.c \
+# pixman-sse2.h
+#libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS)
+#libpixman_sse2_la_LIBADD = $(DEP_LIBS)
+#libpixman_1_la_LIBADD += libpixman-sse2.la
+#endif
+
diff --git a/pixman/pixman/pixman-compiler.h b/pixman/pixman/pixman-compiler.h
index 8f6c787f6..ebbffc3f3 100644
--- a/pixman/pixman/pixman-compiler.h
+++ b/pixman/pixman/pixman-compiler.h
@@ -89,10 +89,22 @@
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
-#elif defined(__MINGW32__)
+#elif defined(__MINGW32__) && !defined(__WIN64)
-# define _NO_W32_PSEUDO_MODIFIERS
-# include <windows.h>
+/* We can't include <windows.h> as it causes carious clashes with
+ * identifiers in pixman, sigh. So just declare the functions we need
+ * here.
+ */
+extern long __stdcall InterlockedCompareExchange(long volatile *, long, long);
+#define InterlockedCompareExchangePointer(d,e,c) \
+ (void *)InterlockedCompareExchange((long volatile *)(d),(long)(e),(long)(c))
+extern int __stdcall TlsAlloc (void);
+extern void * __stdcall TlsGetValue (unsigned);
+extern int __stdcall TlsSetValue (unsigned, void *);
+extern void * __stdcall CreateMutexA(void *, int, char *);
+extern int __stdcall CloseHandle(void *);
+extern unsigned __stdcall WaitForSingleObject (void *, unsigned);
+extern int __stdcall ReleaseMutex (void *);
# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
static volatile int tls_ ## name ## _initialized = 0; \
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index 92f030871..5c0965d83 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -1,2228 +1,2228 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Keith Packard, SuSE, Inc.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include <string.h>
-#include <stdlib.h>
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-fast-path.h"
-
-static force_inline uint32_t
-fetch_24 (uint8_t *a)
-{
- if (((unsigned long)a) & 1)
- {
-#ifdef WORDS_BIGENDIAN
- return (*a << 16) | (*(uint16_t *)(a + 1));
-#else
- return *a | (*(uint16_t *)(a + 1) << 8);
-#endif
- }
- else
- {
-#ifdef WORDS_BIGENDIAN
- return (*(uint16_t *)a << 8) | *(a + 2);
-#else
- return *(uint16_t *)a | (*(a + 2) << 16);
-#endif
- }
-}
-
-static force_inline void
-store_24 (uint8_t *a,
- uint32_t v)
-{
- if (((unsigned long)a) & 1)
- {
-#ifdef WORDS_BIGENDIAN
- *a = (uint8_t) (v >> 16);
- *(uint16_t *)(a + 1) = (uint16_t) (v);
-#else
- *a = (uint8_t) (v);
- *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
-#endif
- }
- else
- {
-#ifdef WORDS_BIGENDIAN
- *(uint16_t *)a = (uint16_t)(v >> 8);
- *(a + 2) = (uint8_t)v;
-#else
- *(uint16_t *)a = (uint16_t)v;
- *(a + 2) = (uint8_t)(v >> 16);
-#endif
- }
-}
-
-static force_inline uint32_t
-over (uint32_t src,
- uint32_t dest)
-{
- uint32_t a = ~src >> 24;
-
- UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
-
- return dest;
-}
-
-static uint32_t
-in (uint32_t x,
- uint8_t y)
-{
- uint16_t a = y;
-
- UN8x4_MUL_UN8 (x, a);
-
- return x;
-}
-
-/*
- * Naming convention:
- *
- * op_src_mask_dest
- */
-static void
-fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *src, *src_line;
- uint32_t *dst, *dst_line;
- uint8_t *mask, *mask_line;
- int src_stride, mask_stride, dst_stride;
- uint8_t m;
- uint32_t s, d;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- src = src_line;
- src_line += src_stride;
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
-
- w = width;
- while (w--)
- {
- m = *mask++;
- if (m)
- {
- s = *src | 0xff000000;
-
- if (m == 0xff)
- {
- *dst = s;
- }
- else
- {
- d = in (s, m);
- *dst = over (d, *dst);
- }
- }
- src++;
- dst++;
- }
- }
-}
-
-static void
-fast_composite_in_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dest_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
- uint16_t t;
-
- src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
- srca = src >> 24;
-
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- if (srca == 0xff)
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
-
- if (m == 0)
- *dst = 0;
- else if (m != 0xff)
- *dst = MUL_UN8 (m, *dst, t);
-
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- m = MUL_UN8 (m, srca, t);
-
- if (m == 0)
- *dst = 0;
- else if (m != 0xff)
- *dst = MUL_UN8 (m, *dst, t);
-
- dst++;
- }
- }
- }
-}
-
-static void
-fast_composite_in_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dest_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint8_t s;
- uint16_t t;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
-
- if (s == 0)
- *dst = 0;
- else if (s != 0xff)
- *dst = MUL_UN8 (s, *dst, t);
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst_line, *dst, d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- *dst = src;
- else
- *dst = over (src, *dst);
- }
- else if (m)
- {
- d = in (src, m);
- *dst = over (d, *dst);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, s;
- uint32_t *dst_line, *dst, d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
-
- if (ma)
- {
- d = *dst;
- s = src;
-
- UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
-
- *dst = s;
- }
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca, s;
- uint32_t *dst_line, *dst, d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
- if (ma == 0xffffffff)
- {
- if (srca == 0xff)
- *dst = src;
- else
- *dst = over (src, *dst);
- }
- else if (ma)
- {
- d = *dst;
- s = src;
-
- UN8x4_MUL_UN8x4 (s, ma);
- UN8x4_MUL_UN8 (ma, srca);
- ma = ~ma;
- UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
- *dst = d;
- }
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint8_t *dst_line, *dst;
- uint32_t d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- {
- d = src;
- }
- else
- {
- d = fetch_24 (dst);
- d = over (src, d);
- }
- store_24 (dst, d);
- }
- else if (m)
- {
- d = over (in (src, m), fetch_24 (dst));
- store_24 (dst, d);
- }
- dst += 3;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- {
- d = src;
- }
- else
- {
- d = *dst;
- d = over (src, CONVERT_0565_TO_0888 (d));
- }
- *dst = CONVERT_8888_TO_0565 (d);
- }
- else if (m)
- {
- d = *dst;
- d = over (in (src, m), CONVERT_0565_TO_0888 (d));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca, s;
- uint16_t src16;
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- src16 = CONVERT_8888_TO_0565 (src);
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
- if (ma == 0xffffffff)
- {
- if (srca == 0xff)
- {
- *dst = src16;
- }
- else
- {
- d = *dst;
- d = over (src, CONVERT_0565_TO_0888 (d));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- }
- else if (ma)
- {
- d = *dst;
- d = CONVERT_0565_TO_0888 (d);
-
- s = src;
-
- UN8x4_MUL_UN8x4 (s, ma);
- UN8x4_MUL_UN8 (ma, srca);
- ma = ~ma;
- UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- uint8_t a;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (a == 0xff)
- *dst = s;
- else if (s)
- *dst = over (s, *dst);
- dst++;
- }
- }
-}
-
-static void
-fast_composite_src_x888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- *dst++ = (*src++) | 0xff000000;
- }
-}
-
-#if 0
-static void
-fast_composite_over_8888_0888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint32_t d;
- uint32_t *src_line, *src, s;
- uint8_t a;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (a)
- {
- if (a == 0xff)
- d = s;
- else
- d = over (s, fetch_24 (dst));
-
- store_24 (dst, d);
- }
- dst += 3;
- }
- }
-}
-#endif
-
-static void
-fast_composite_over_8888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint32_t *src_line, *src, s;
- uint8_t a;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (s)
- {
- if (a == 0xff)
- {
- d = s;
- }
- else
- {
- d = *dst;
- d = over (s, CONVERT_0565_TO_0888 (d));
- }
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_src_x888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- *dst = CONVERT_8888_TO_0565 (s);
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint8_t s, d;
- uint16_t t;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- if (s)
- {
- if (s != 0xff)
- {
- d = *dst;
- t = d + s;
- s = t | (0 - (t >> 8));
- }
- *dst = s;
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint32_t s, d;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- if (s)
- {
- if (s != 0xffffffff)
- {
- d = *dst;
- if (d)
- UN8x4_ADD_UN8x4 (s, d);
- }
- *dst = s;
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- int32_t w;
- uint32_t src;
- uint8_t sa;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- sa = (src >> 24);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- uint16_t tmp;
- uint16_t a;
- uint32_t m, d;
- uint32_t r;
-
- a = *mask++;
- d = *dst;
-
- m = MUL_UN8 (sa, a, tmp);
- r = ADD_UN8 (m, d, tmp);
-
- *dst++ = r;
- }
- }
-}
-
-#ifdef WORDS_BIGENDIAN
-#define CREATE_BITMASK(n) (0x80000000 >> (n))
-#define UPDATE_BITMASK(n) ((n) >> 1)
-#else
-#define CREATE_BITMASK(n) (1 << (n))
-#define UPDATE_BITMASK(n) ((n) << 1)
-#endif
-
-#define TEST_BIT(p, n) \
- (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
-#define SET_BIT(p, n) \
- do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
-
-static void
-fast_composite_add_1000_1000 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
- src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
- dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- /*
- * TODO: improve performance by processing uint32_t data instead
- * of individual bits
- */
- if (TEST_BIT (src, src_x + w))
- SET_BIT (dst, dest_x + w);
- }
- }
-}
-
-static void
-fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst, *dst_line;
- uint32_t *mask, *mask_line;
- int mask_stride, dst_stride;
- uint32_t bitcache, bitmask;
- int32_t w;
-
- if (width <= 0)
- return;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
- dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
- mask_stride, mask_line, 1);
- mask_line += mask_x >> 5;
-
- if (srca == 0xff)
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = src;
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = over (src, *dst);
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
-}
-
-static void
-fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst, *dst_line;
- uint32_t *mask, *mask_line;
- int mask_stride, dst_stride;
- uint32_t bitcache, bitmask;
- int32_t w;
- uint32_t d;
- uint16_t src565;
-
- if (width <= 0)
- return;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
- dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
- mask_stride, mask_line, 1);
- mask_line += mask_x >> 5;
-
- if (srca == 0xff)
- {
- src565 = CONVERT_8888_TO_0565 (src);
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = src565;
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- {
- d = over (src, CONVERT_0565_TO_0888 (*dst));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
-}
-
-/*
- * Simple bitblt
- */
-
-static void
-fast_composite_solid_fill (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (dst_image->bits.format == PIXMAN_a1)
- {
- src = src >> 31;
- }
- else if (dst_image->bits.format == PIXMAN_a8)
- {
- src = src >> 24;
- }
- else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
- dst_image->bits.format == PIXMAN_b5g6r5)
- {
- src = CONVERT_8888_TO_0565 (src);
- }
-
- pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
- dest_x, dest_y,
- width, height,
- src);
-}
-
-static void
-fast_composite_src_memcpy (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8;
- uint32_t n_bytes = width * bpp;
- int dst_stride, src_stride;
- uint8_t *dst;
- uint8_t *src;
-
- src_stride = src_image->bits.rowstride * 4;
- dst_stride = dst_image->bits.rowstride * 4;
-
- src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
- dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
-
- while (height--)
- {
- memcpy (dst, src, n_bytes);
-
- dst += dst_stride;
- src += src_stride;
- }
-}
-
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
-
-/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
-static force_inline void
-scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
- const uint16_t * src,
- int32_t w,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t fully_transparent_src)
-{
- uint16_t tmp1, tmp2, tmp3, tmp4;
- while ((w -= 4) >= 0)
- {
- tmp1 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp2 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp3 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp4 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- *dst++ = tmp1;
- *dst++ = tmp2;
- *dst++ = tmp3;
- *dst++ = tmp4;
- }
- if (w & 2)
- {
- tmp1 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp2 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- *dst++ = tmp1;
- *dst++ = tmp2;
- }
- if (w & 1)
- *dst++ = src[pixman_fixed_to_int (vx)];
-}
-
-FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, COVER)
-FAST_NEAREST_MAINLOOP (565_565_none_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, NONE)
-FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, PAD)
-
-static force_inline uint32_t
-fetch_nearest (pixman_repeat_t src_repeat,
- pixman_format_code_t format,
- uint32_t *src, int x, int src_width)
-{
- if (repeat (src_repeat, &x, src_width))
- {
- if (format == PIXMAN_x8r8g8b8)
- return *(src + x) | 0xff000000;
- else
- return *(src + x);
- }
- else
- {
- return 0;
- }
-}
-
-static force_inline void
-combine_over (uint32_t s, uint32_t *dst)
-{
- if (s)
- {
- uint8_t ia = 0xff - (s >> 24);
-
- if (ia)
- UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
- else
- *dst = s;
- }
-}
-
-static force_inline void
-combine_src (uint32_t s, uint32_t *dst)
-{
- *dst = s;
-}
-
-static void
-fast_composite_scaled_nearest (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line;
- uint32_t *src_line;
- int dst_stride, src_stride;
- int src_width, src_height;
- pixman_repeat_t src_repeat;
- pixman_fixed_t unit_x, unit_y;
- pixman_format_code_t src_format;
- pixman_vector_t v;
- pixman_fixed_t vy;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
- * transformed from destination space to source space
- */
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (!pixman_transform_point_3d (src_image->common.transform, &v))
- return;
-
- unit_x = src_image->common.transform->matrix[0][0];
- unit_y = src_image->common.transform->matrix[1][1];
-
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
- v.vector[0] -= pixman_fixed_e;
- v.vector[1] -= pixman_fixed_e;
-
- src_height = src_image->bits.height;
- src_width = src_image->bits.width;
- src_repeat = src_image->common.repeat;
- src_format = src_image->bits.format;
-
- vy = v.vector[1];
- while (height--)
- {
- pixman_fixed_t vx = v.vector[0];
- int y = pixman_fixed_to_int (vy);
- uint32_t *dst = dst_line;
-
- dst_line += dst_stride;
-
- /* adjust the y location by a unit vector in the y direction
- * this is equivalent to transforming y+1 of the destination point to source space */
- vy += unit_y;
-
- if (!repeat (src_repeat, &y, src_height))
- {
- if (op == PIXMAN_OP_SRC)
- memset (dst, 0, sizeof (*dst) * width);
- }
- else
- {
- int w = width;
-
- uint32_t *src = src_line + y * src_stride;
-
- while (w >= 2)
- {
- uint32_t s1, s2;
- int x1, x2;
-
- x1 = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- x2 = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- w -= 2;
-
- s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
- s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
-
- if (op == PIXMAN_OP_OVER)
- {
- combine_over (s1, dst++);
- combine_over (s2, dst++);
- }
- else
- {
- combine_src (s1, dst++);
- combine_src (s2, dst++);
- }
- }
-
- while (w--)
- {
- uint32_t s;
- int x;
-
- x = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- s = fetch_nearest (src_repeat, src_format, src, x, src_width);
-
- if (op == PIXMAN_OP_OVER)
- combine_over (s, dst++);
- else
- combine_src (s, dst++);
- }
- }
- }
-}
-
-#define CACHE_LINE_SIZE 64
-
-#define FAST_SIMPLE_ROTATE(suffix, pix_type) \
- \
-static void \
-blt_rotated_90_trivial_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int w, \
- int h) \
-{ \
- int x, y; \
- for (y = 0; y < h; y++) \
- { \
- const pix_type *s = src + (h - y - 1); \
- pix_type *d = dst + dst_stride * y; \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s += src_stride; \
- } \
- } \
-} \
- \
-static void \
-blt_rotated_270_trivial_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int w, \
- int h) \
-{ \
- int x, y; \
- for (y = 0; y < h; y++) \
- { \
- const pix_type *s = src + src_stride * (w - 1) + y; \
- pix_type *d = dst + dst_stride * y; \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s -= src_stride; \
- } \
- } \
-} \
- \
-static void \
-blt_rotated_90_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int W, \
- int H) \
-{ \
- int x; \
- int leading_pixels = 0, trailing_pixels = 0; \
- const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
- \
- /* \
- * split processing into handling destination as TILE_SIZExH cache line \
- * aligned vertical stripes (optimistically assuming that destination \
- * stride is a multiple of cache line, if not - it will be just a bit \
- * slower) \
- */ \
- \
- if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
- { \
- leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (leading_pixels > W) \
- leading_pixels = W; \
- \
- /* unaligned leading part NxH (where N < TILE_SIZE) */ \
- blt_rotated_90_trivial_##suffix ( \
- dst, \
- dst_stride, \
- src, \
- src_stride, \
- leading_pixels, \
- H); \
- \
- dst += leading_pixels; \
- src += leading_pixels * src_stride; \
- W -= leading_pixels; \
- } \
- \
- if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
- { \
- trailing_pixels = (((uintptr_t)(dst + W) & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (trailing_pixels > W) \
- trailing_pixels = W; \
- W -= trailing_pixels; \
- } \
- \
- for (x = 0; x < W; x += TILE_SIZE) \
- { \
- /* aligned middle part TILE_SIZExH */ \
- blt_rotated_90_trivial_##suffix ( \
- dst + x, \
- dst_stride, \
- src + src_stride * x, \
- src_stride, \
- TILE_SIZE, \
- H); \
- } \
- \
- if (trailing_pixels) \
- { \
- /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
- blt_rotated_90_trivial_##suffix ( \
- dst + W, \
- dst_stride, \
- src + W * src_stride, \
- src_stride, \
- trailing_pixels, \
- H); \
- } \
-} \
- \
-static void \
-blt_rotated_270_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int W, \
- int H) \
-{ \
- int x; \
- int leading_pixels = 0, trailing_pixels = 0; \
- const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
- \
- /* \
- * split processing into handling destination as TILE_SIZExH cache line \
- * aligned vertical stripes (optimistically assuming that destination \
- * stride is a multiple of cache line, if not - it will be just a bit \
- * slower) \
- */ \
- \
- if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
- { \
- leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (leading_pixels > W) \
- leading_pixels = W; \
- \
- /* unaligned leading part NxH (where N < TILE_SIZE) */ \
- blt_rotated_270_trivial_##suffix ( \
- dst, \
- dst_stride, \
- src + src_stride * (W - leading_pixels), \
- src_stride, \
- leading_pixels, \
- H); \
- \
- dst += leading_pixels; \
- W -= leading_pixels; \
- } \
- \
- if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
- { \
- trailing_pixels = (((uintptr_t)(dst + W) & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (trailing_pixels > W) \
- trailing_pixels = W; \
- W -= trailing_pixels; \
- src += trailing_pixels * src_stride; \
- } \
- \
- for (x = 0; x < W; x += TILE_SIZE) \
- { \
- /* aligned middle part TILE_SIZExH */ \
- blt_rotated_270_trivial_##suffix ( \
- dst + x, \
- dst_stride, \
- src + src_stride * (W - x - TILE_SIZE), \
- src_stride, \
- TILE_SIZE, \
- H); \
- } \
- \
- if (trailing_pixels) \
- { \
- /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
- blt_rotated_270_trivial_##suffix ( \
- dst + W, \
- dst_stride, \
- src - trailing_pixels * src_stride, \
- src_stride, \
- trailing_pixels, \
- H); \
- } \
-} \
- \
-static void \
-fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- pix_type *dst_line; \
- pix_type *src_line; \
- int dst_stride, src_stride; \
- int src_x_t, src_y_t; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
- dst_stride, dst_line, 1); \
- src_x_t = -src_y + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[0][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
- src_y_t = src_x + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[1][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
- src_stride, src_line, 1); \
- blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \
- width, height); \
-} \
- \
-static void \
-fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- pix_type *dst_line; \
- pix_type *src_line; \
- int dst_stride, src_stride; \
- int src_x_t, src_y_t; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
- dst_stride, dst_line, 1); \
- src_x_t = src_y + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[0][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e); \
- src_y_t = -src_x + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[1][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
- src_stride, src_line, 1); \
- blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \
- width, height); \
-}
-
-FAST_SIMPLE_ROTATE (8, uint8_t)
-FAST_SIMPLE_ROTATE (565, uint16_t)
-FAST_SIMPLE_ROTATE (8888, uint32_t)
-
-static const pixman_fast_path_t c_fast_paths[] =
-{
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
- PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
- PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
- PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
-
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
-
-#define NEAREST_FAST_PATH(op,s,d) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest, \
- }
-
- NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
- NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
-
- NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
- NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
-
- NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
- NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
-
- NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
- NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
-
-#define SIMPLE_ROTATE_FLAGS(angle) \
- (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \
- FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_SAMPLES_COVER_CLIP | \
- FAST_PATH_STANDARD_FLAGS)
-
-#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_rotate_90_##suffix, \
- }, \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_rotate_270_##suffix, \
- }
-
- SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
- SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
- SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
- SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
- SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
-
- { PIXMAN_OP_NONE },
-};
-
-#ifdef WORDS_BIGENDIAN
-#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
-#else
-#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
-#endif
-
-static force_inline void
-pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
-{
- if (offs)
- {
- int leading_pixels = 32 - offs;
- if (leading_pixels >= width)
- {
- if (v)
- *dst |= A1_FILL_MASK (width, offs);
- else
- *dst &= ~A1_FILL_MASK (width, offs);
- return;
- }
- else
- {
- if (v)
- *dst++ |= A1_FILL_MASK (leading_pixels, offs);
- else
- *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
- width -= leading_pixels;
- }
- }
- while (width >= 32)
- {
- if (v)
- *dst++ = 0xFFFFFFFF;
- else
- *dst++ = 0;
- width -= 32;
- }
- if (width > 0)
- {
- if (v)
- *dst |= A1_FILL_MASK (width, 0);
- else
- *dst &= ~A1_FILL_MASK (width, 0);
- }
-}
-
-static void
-pixman_fill1 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- uint32_t *dst = bits + y * stride + (x >> 5);
- int offs = x & 31;
-
- if (xor & 1)
- {
- while (height--)
- {
- pixman_fill1_line (dst, offs, width, 1);
- dst += stride;
- }
- }
- else
- {
- while (height--)
- {
- pixman_fill1_line (dst, offs, width, 0);
- dst += stride;
- }
- }
-}
-
-static void
-pixman_fill8 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int byte_stride = stride * (int) sizeof (uint32_t);
- uint8_t *dst = (uint8_t *) bits;
- uint8_t v = xor & 0xff;
- int i;
-
- dst = dst + y * byte_stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- dst[i] = v;
-
- dst += byte_stride;
- }
-}
-
-static void
-pixman_fill16 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int short_stride =
- (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
- uint16_t *dst = (uint16_t *)bits;
- uint16_t v = xor & 0xffff;
- int i;
-
- dst = dst + y * short_stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- dst[i] = v;
-
- dst += short_stride;
- }
-}
-
-static void
-pixman_fill32 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int i;
-
- bits = bits + y * stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- bits[i] = xor;
-
- bits += stride;
- }
-}
-
-static pixman_bool_t
-fast_path_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- switch (bpp)
- {
- case 1:
- pixman_fill1 (bits, stride, x, y, width, height, xor);
- break;
-
- case 8:
- pixman_fill8 (bits, stride, x, y, width, height, xor);
- break;
-
- case 16:
- pixman_fill16 (bits, stride, x, y, width, height, xor);
- break;
-
- case 32:
- pixman_fill32 (bits, stride, x, y, width, height, xor);
- break;
-
- default:
- return _pixman_implementation_fill (
- imp->delegate, bits, stride, bpp, x, y, width, height, xor);
- break;
- }
-
- return TRUE;
-}
-
-pixman_implementation_t *
-_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
-{
- pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
-
- imp->fill = fast_path_fill;
-
- return imp;
-}
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author: Keith Packard, SuSE, Inc.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <string.h>
+#include <stdlib.h>
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-fast-path.h"
+
+static force_inline uint32_t
+fetch_24 (uint8_t *a)
+{
+ if (((unsigned long)a) & 1)
+ {
+#ifdef WORDS_BIGENDIAN
+ return (*a << 16) | (*(uint16_t *)(a + 1));
+#else
+ return *a | (*(uint16_t *)(a + 1) << 8);
+#endif
+ }
+ else
+ {
+#ifdef WORDS_BIGENDIAN
+ return (*(uint16_t *)a << 8) | *(a + 2);
+#else
+ return *(uint16_t *)a | (*(a + 2) << 16);
+#endif
+ }
+}
+
+static force_inline void
+store_24 (uint8_t *a,
+ uint32_t v)
+{
+ if (((unsigned long)a) & 1)
+ {
+#ifdef WORDS_BIGENDIAN
+ *a = (uint8_t) (v >> 16);
+ *(uint16_t *)(a + 1) = (uint16_t) (v);
+#else
+ *a = (uint8_t) (v);
+ *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
+#endif
+ }
+ else
+ {
+#ifdef WORDS_BIGENDIAN
+ *(uint16_t *)a = (uint16_t)(v >> 8);
+ *(a + 2) = (uint8_t)v;
+#else
+ *(uint16_t *)a = (uint16_t)v;
+ *(a + 2) = (uint8_t)(v >> 16);
+#endif
+ }
+}
+
+static force_inline uint32_t
+over (uint32_t src,
+ uint32_t dest)
+{
+ uint32_t a = ~src >> 24;
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
+
+ return dest;
+}
+
+static uint32_t
+in (uint32_t x,
+ uint8_t y)
+{
+ uint16_t a = y;
+
+ UN8x4_MUL_UN8 (x, a);
+
+ return x;
+}
+
+/*
+ * Naming convention:
+ *
+ * op_src_mask_dest
+ */
+static void
+fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *src, *src_line;
+ uint32_t *dst, *dst_line;
+ uint8_t *mask, *mask_line;
+ int src_stride, mask_stride, dst_stride;
+ uint8_t m;
+ uint32_t s, d;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ src = src_line;
+ src_line += src_stride;
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+
+ w = width;
+ while (w--)
+ {
+ m = *mask++;
+ if (m)
+ {
+ s = *src | 0xff000000;
+
+ if (m == 0xff)
+ {
+ *dst = s;
+ }
+ else
+ {
+ d = in (s, m);
+ *dst = over (d, *dst);
+ }
+ }
+ src++;
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_in_n_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dest_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint8_t *dst_line, *dst;
+ uint8_t *mask_line, *mask, m;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint16_t t;
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+ srca = src >> 24;
+
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ if (srca == 0xff)
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+
+ if (m == 0)
+ *dst = 0;
+ else if (m != 0xff)
+ *dst = MUL_UN8 (m, *dst, t);
+
+ dst++;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+ m = MUL_UN8 (m, srca, t);
+
+ if (m == 0)
+ *dst = 0;
+ else if (m != 0xff)
+ *dst = MUL_UN8 (m, *dst, t);
+
+ dst++;
+ }
+ }
+ }
+}
+
+static void
+fast_composite_in_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dest_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint8_t s;
+ uint16_t t;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+
+ if (s == 0)
+ *dst = 0;
+ else if (s != 0xff)
+ *dst = MUL_UN8 (s, *dst, t);
+
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst_line, *dst, d;
+ uint8_t *mask_line, *mask, m;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ *dst = src;
+ else
+ *dst = over (src, *dst);
+ }
+ else if (m)
+ {
+ d = in (src, m);
+ *dst = over (d, *dst);
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, s;
+ uint32_t *dst_line, *dst, d;
+ uint32_t *mask_line, *mask, ma;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ ma = *mask++;
+
+ if (ma)
+ {
+ d = *dst;
+ s = src;
+
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
+
+ *dst = s;
+ }
+
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca, s;
+ uint32_t *dst_line, *dst, d;
+ uint32_t *mask_line, *mask, ma;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ ma = *mask++;
+ if (ma == 0xffffffff)
+ {
+ if (srca == 0xff)
+ *dst = src;
+ else
+ *dst = over (src, *dst);
+ }
+ else if (ma)
+ {
+ d = *dst;
+ s = src;
+
+ UN8x4_MUL_UN8x4 (s, ma);
+ UN8x4_MUL_UN8 (ma, srca);
+ ma = ~ma;
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
+
+ *dst = d;
+ }
+
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint8_t *dst_line, *dst;
+ uint32_t d;
+ uint8_t *mask_line, *mask, m;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ {
+ d = src;
+ }
+ else
+ {
+ d = fetch_24 (dst);
+ d = over (src, d);
+ }
+ store_24 (dst, d);
+ }
+ else if (m)
+ {
+ d = over (in (src, m), fetch_24 (dst));
+ store_24 (dst, d);
+ }
+ dst += 3;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint16_t *dst_line, *dst;
+ uint32_t d;
+ uint8_t *mask_line, *mask, m;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ {
+ d = src;
+ }
+ else
+ {
+ d = *dst;
+ d = over (src, CONVERT_0565_TO_0888 (d));
+ }
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ else if (m)
+ {
+ d = *dst;
+ d = over (in (src, m), CONVERT_0565_TO_0888 (d));
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca, s;
+ uint16_t src16;
+ uint16_t *dst_line, *dst;
+ uint32_t d;
+ uint32_t *mask_line, *mask, ma;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ src16 = CONVERT_8888_TO_0565 (src);
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ ma = *mask++;
+ if (ma == 0xffffffff)
+ {
+ if (srca == 0xff)
+ {
+ *dst = src16;
+ }
+ else
+ {
+ d = *dst;
+ d = over (src, CONVERT_0565_TO_0888 (d));
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ }
+ else if (ma)
+ {
+ d = *dst;
+ d = CONVERT_0565_TO_0888 (d);
+
+ s = src;
+
+ UN8x4_MUL_UN8x4 (s, ma);
+ UN8x4_MUL_UN8 (ma, srca);
+ ma = ~ma;
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
+
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_8888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src, s;
+ int dst_stride, src_stride;
+ uint8_t a;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ a = s >> 24;
+ if (a == 0xff)
+ *dst = s;
+ else if (s)
+ *dst = over (s, *dst);
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_src_x888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ *dst++ = (*src++) | 0xff000000;
+ }
+}
+
+#if 0
+static void
+fast_composite_over_8888_0888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint32_t d;
+ uint32_t *src_line, *src, s;
+ uint8_t a;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ a = s >> 24;
+ if (a)
+ {
+ if (a == 0xff)
+ d = s;
+ else
+ d = over (s, fetch_24 (dst));
+
+ store_24 (dst, d);
+ }
+ dst += 3;
+ }
+ }
+}
+#endif
+
+static void
+fast_composite_over_8888_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint16_t *dst_line, *dst;
+ uint32_t d;
+ uint32_t *src_line, *src, s;
+ uint8_t a;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ a = s >> 24;
+ if (s)
+ {
+ if (a == 0xff)
+ {
+ d = s;
+ }
+ else
+ {
+ d = *dst;
+ d = over (s, CONVERT_0565_TO_0888 (d));
+ }
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_src_x888_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint16_t *dst_line, *dst;
+ uint32_t *src_line, *src, s;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ *dst = CONVERT_8888_TO_0565 (s);
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_add_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint8_t s, d;
+ uint16_t t;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ if (s)
+ {
+ if (s != 0xff)
+ {
+ d = *dst;
+ t = d + s;
+ s = t | (0 - (t >> 8));
+ }
+ *dst = s;
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_add_8888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint32_t s, d;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ if (s)
+ {
+ if (s != 0xffffffff)
+ {
+ d = *dst;
+ if (d)
+ UN8x4_ADD_UN8x4 (s, d);
+ }
+ *dst = s;
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_add_n_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint32_t src;
+ uint8_t sa;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ sa = (src >> 24);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ uint16_t tmp;
+ uint16_t a;
+ uint32_t m, d;
+ uint32_t r;
+
+ a = *mask++;
+ d = *dst;
+
+ m = MUL_UN8 (sa, a, tmp);
+ r = ADD_UN8 (m, d, tmp);
+
+ *dst++ = r;
+ }
+ }
+}
+
+#ifdef WORDS_BIGENDIAN
+#define CREATE_BITMASK(n) (0x80000000 >> (n))
+#define UPDATE_BITMASK(n) ((n) >> 1)
+#else
+#define CREATE_BITMASK(n) (1 << (n))
+#define UPDATE_BITMASK(n) ((n) << 1)
+#endif
+
+#define TEST_BIT(p, n) \
+ (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
+#define SET_BIT(p, n) \
+ do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
+
+static void
+fast_composite_add_1000_1000 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
+ src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
+ dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ /*
+ * TODO: improve performance by processing uint32_t data instead
+ * of individual bits
+ */
+ if (TEST_BIT (src, src_x + w))
+ SET_BIT (dst, dest_x + w);
+ }
+ }
+}
+
+static void
+fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst, *dst_line;
+ uint32_t *mask, *mask_line;
+ int mask_stride, dst_stride;
+ uint32_t bitcache, bitmask;
+ int32_t w;
+
+ if (width <= 0)
+ return;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
+ dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
+ mask_stride, mask_line, 1);
+ mask_line += mask_x >> 5;
+
+ if (srca == 0xff)
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ *dst = src;
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ *dst = over (src, *dst);
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+}
+
+static void
+fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint16_t *dst, *dst_line;
+ uint32_t *mask, *mask_line;
+ int mask_stride, dst_stride;
+ uint32_t bitcache, bitmask;
+ int32_t w;
+ uint32_t d;
+ uint16_t src565;
+
+ if (width <= 0)
+ return;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
+ dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
+ mask_stride, mask_line, 1);
+ mask_line += mask_x >> 5;
+
+ if (srca == 0xff)
+ {
+ src565 = CONVERT_8888_TO_0565 (src);
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ *dst = src565;
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ {
+ d = over (src, CONVERT_0565_TO_0888 (*dst));
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+}
+
+/*
+ * Simple bitblt
+ */
+
+static void
+fast_composite_solid_fill (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ if (dst_image->bits.format == PIXMAN_a1)
+ {
+ src = src >> 31;
+ }
+ else if (dst_image->bits.format == PIXMAN_a8)
+ {
+ src = src >> 24;
+ }
+ else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
+ dst_image->bits.format == PIXMAN_b5g6r5)
+ {
+ src = CONVERT_8888_TO_0565 (src);
+ }
+
+ pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
+ PIXMAN_FORMAT_BPP (dst_image->bits.format),
+ dest_x, dest_y,
+ width, height,
+ src);
+}
+
+static void
+fast_composite_src_memcpy (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8;
+ uint32_t n_bytes = width * bpp;
+ int dst_stride, src_stride;
+ uint8_t *dst;
+ uint8_t *src;
+
+ src_stride = src_image->bits.rowstride * 4;
+ dst_stride = dst_image->bits.rowstride * 4;
+
+ src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
+ dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
+
+ while (height--)
+ {
+ memcpy (dst, src, n_bytes);
+
+ dst += dst_stride;
+ src += src_stride;
+ }
+}
+
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
+FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
+FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
+FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
+FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
+FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
+FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
+FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
+FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
+FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
+
+/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
+static force_inline void
+scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
+ const uint16_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t fully_transparent_src)
+{
+ uint16_t tmp1, tmp2, tmp3, tmp4;
+ while ((w -= 4) >= 0)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp3 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp4 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ *dst++ = tmp3;
+ *dst++ = tmp4;
+ }
+ if (w & 2)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ }
+ if (w & 1)
+ *dst++ = src[pixman_fixed_to_int (vx)];
+}
+
+FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, COVER)
+FAST_NEAREST_MAINLOOP (565_565_none_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, NONE)
+FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, PAD)
+
+static force_inline uint32_t
+fetch_nearest (pixman_repeat_t src_repeat,
+ pixman_format_code_t format,
+ uint32_t *src, int x, int src_width)
+{
+ if (repeat (src_repeat, &x, src_width))
+ {
+ if (format == PIXMAN_x8r8g8b8)
+ return *(src + x) | 0xff000000;
+ else
+ return *(src + x);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+static force_inline void
+combine_over (uint32_t s, uint32_t *dst)
+{
+ if (s)
+ {
+ uint8_t ia = 0xff - (s >> 24);
+
+ if (ia)
+ UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
+ else
+ *dst = s;
+ }
+}
+
+static force_inline void
+combine_src (uint32_t s, uint32_t *dst)
+{
+ *dst = s;
+}
+
+static void
+fast_composite_scaled_nearest (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line;
+ uint32_t *src_line;
+ int dst_stride, src_stride;
+ int src_width, src_height;
+ pixman_repeat_t src_repeat;
+ pixman_fixed_t unit_x, unit_y;
+ pixman_format_code_t src_format;
+ pixman_vector_t v;
+ pixman_fixed_t vy;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
+ * transformed from destination space to source space
+ */
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (src_image->common.transform, &v))
+ return;
+
+ unit_x = src_image->common.transform->matrix[0][0];
+ unit_y = src_image->common.transform->matrix[1][1];
+
+ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
+ v.vector[0] -= pixman_fixed_e;
+ v.vector[1] -= pixman_fixed_e;
+
+ src_height = src_image->bits.height;
+ src_width = src_image->bits.width;
+ src_repeat = src_image->common.repeat;
+ src_format = src_image->bits.format;
+
+ vy = v.vector[1];
+ while (height--)
+ {
+ pixman_fixed_t vx = v.vector[0];
+ int y = pixman_fixed_to_int (vy);
+ uint32_t *dst = dst_line;
+
+ dst_line += dst_stride;
+
+ /* adjust the y location by a unit vector in the y direction
+ * this is equivalent to transforming y+1 of the destination point to source space */
+ vy += unit_y;
+
+ if (!repeat (src_repeat, &y, src_height))
+ {
+ if (op == PIXMAN_OP_SRC)
+ memset (dst, 0, sizeof (*dst) * width);
+ }
+ else
+ {
+ int w = width;
+
+ uint32_t *src = src_line + y * src_stride;
+
+ while (w >= 2)
+ {
+ uint32_t s1, s2;
+ int x1, x2;
+
+ x1 = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ x2 = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ w -= 2;
+
+ s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
+ s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
+
+ if (op == PIXMAN_OP_OVER)
+ {
+ combine_over (s1, dst++);
+ combine_over (s2, dst++);
+ }
+ else
+ {
+ combine_src (s1, dst++);
+ combine_src (s2, dst++);
+ }
+ }
+
+ while (w--)
+ {
+ uint32_t s;
+ int x;
+
+ x = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ s = fetch_nearest (src_repeat, src_format, src, x, src_width);
+
+ if (op == PIXMAN_OP_OVER)
+ combine_over (s, dst++);
+ else
+ combine_src (s, dst++);
+ }
+ }
+ }
+}
+
+#define CACHE_LINE_SIZE 64
+
+#define FAST_SIMPLE_ROTATE(suffix, pix_type) \
+ \
+static void \
+blt_rotated_90_trivial_##suffix (pix_type *dst, \
+ int dst_stride, \
+ const pix_type *src, \
+ int src_stride, \
+ int w, \
+ int h) \
+{ \
+ int x, y; \
+ for (y = 0; y < h; y++) \
+ { \
+ const pix_type *s = src + (h - y - 1); \
+ pix_type *d = dst + dst_stride * y; \
+ for (x = 0; x < w; x++) \
+ { \
+ *d++ = *s; \
+ s += src_stride; \
+ } \
+ } \
+} \
+ \
+static void \
+blt_rotated_270_trivial_##suffix (pix_type *dst, \
+ int dst_stride, \
+ const pix_type *src, \
+ int src_stride, \
+ int w, \
+ int h) \
+{ \
+ int x, y; \
+ for (y = 0; y < h; y++) \
+ { \
+ const pix_type *s = src + src_stride * (w - 1) + y; \
+ pix_type *d = dst + dst_stride * y; \
+ for (x = 0; x < w; x++) \
+ { \
+ *d++ = *s; \
+ s -= src_stride; \
+ } \
+ } \
+} \
+ \
+static void \
+blt_rotated_90_##suffix (pix_type *dst, \
+ int dst_stride, \
+ const pix_type *src, \
+ int src_stride, \
+ int W, \
+ int H) \
+{ \
+ int x; \
+ int leading_pixels = 0, trailing_pixels = 0; \
+ const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
+ \
+ /* \
+ * split processing into handling destination as TILE_SIZExH cache line \
+ * aligned vertical stripes (optimistically assuming that destination \
+ * stride is a multiple of cache line, if not - it will be just a bit \
+ * slower) \
+ */ \
+ \
+ if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
+ { \
+ leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
+ (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (leading_pixels > W) \
+ leading_pixels = W; \
+ \
+ /* unaligned leading part NxH (where N < TILE_SIZE) */ \
+ blt_rotated_90_trivial_##suffix ( \
+ dst, \
+ dst_stride, \
+ src, \
+ src_stride, \
+ leading_pixels, \
+ H); \
+ \
+ dst += leading_pixels; \
+ src += leading_pixels * src_stride; \
+ W -= leading_pixels; \
+ } \
+ \
+ if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
+ { \
+ trailing_pixels = (((uintptr_t)(dst + W) & \
+ (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (trailing_pixels > W) \
+ trailing_pixels = W; \
+ W -= trailing_pixels; \
+ } \
+ \
+ for (x = 0; x < W; x += TILE_SIZE) \
+ { \
+ /* aligned middle part TILE_SIZExH */ \
+ blt_rotated_90_trivial_##suffix ( \
+ dst + x, \
+ dst_stride, \
+ src + src_stride * x, \
+ src_stride, \
+ TILE_SIZE, \
+ H); \
+ } \
+ \
+ if (trailing_pixels) \
+ { \
+ /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
+ blt_rotated_90_trivial_##suffix ( \
+ dst + W, \
+ dst_stride, \
+ src + W * src_stride, \
+ src_stride, \
+ trailing_pixels, \
+ H); \
+ } \
+} \
+ \
+static void \
+blt_rotated_270_##suffix (pix_type *dst, \
+ int dst_stride, \
+ const pix_type *src, \
+ int src_stride, \
+ int W, \
+ int H) \
+{ \
+ int x; \
+ int leading_pixels = 0, trailing_pixels = 0; \
+ const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
+ \
+ /* \
+ * split processing into handling destination as TILE_SIZExH cache line \
+ * aligned vertical stripes (optimistically assuming that destination \
+ * stride is a multiple of cache line, if not - it will be just a bit \
+ * slower) \
+ */ \
+ \
+ if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
+ { \
+ leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
+ (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (leading_pixels > W) \
+ leading_pixels = W; \
+ \
+ /* unaligned leading part NxH (where N < TILE_SIZE) */ \
+ blt_rotated_270_trivial_##suffix ( \
+ dst, \
+ dst_stride, \
+ src + src_stride * (W - leading_pixels), \
+ src_stride, \
+ leading_pixels, \
+ H); \
+ \
+ dst += leading_pixels; \
+ W -= leading_pixels; \
+ } \
+ \
+ if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
+ { \
+ trailing_pixels = (((uintptr_t)(dst + W) & \
+ (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (trailing_pixels > W) \
+ trailing_pixels = W; \
+ W -= trailing_pixels; \
+ src += trailing_pixels * src_stride; \
+ } \
+ \
+ for (x = 0; x < W; x += TILE_SIZE) \
+ { \
+ /* aligned middle part TILE_SIZExH */ \
+ blt_rotated_270_trivial_##suffix ( \
+ dst + x, \
+ dst_stride, \
+ src + src_stride * (W - x - TILE_SIZE), \
+ src_stride, \
+ TILE_SIZE, \
+ H); \
+ } \
+ \
+ if (trailing_pixels) \
+ { \
+ /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
+ blt_rotated_270_trivial_##suffix ( \
+ dst + W, \
+ dst_stride, \
+ src - trailing_pixels * src_stride, \
+ src_stride, \
+ trailing_pixels, \
+ H); \
+ } \
+} \
+ \
+static void \
+fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ pix_type *dst_line; \
+ pix_type *src_line; \
+ int dst_stride, src_stride; \
+ int src_x_t, src_y_t; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
+ dst_stride, dst_line, 1); \
+ src_x_t = -src_y + pixman_fixed_to_int ( \
+ src_image->common.transform->matrix[0][2] + \
+ pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
+ src_y_t = src_x + pixman_fixed_to_int ( \
+ src_image->common.transform->matrix[1][2] + \
+ pixman_fixed_1 / 2 - pixman_fixed_e); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
+ src_stride, src_line, 1); \
+ blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \
+ width, height); \
+} \
+ \
+static void \
+fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ pix_type *dst_line; \
+ pix_type *src_line; \
+ int dst_stride, src_stride; \
+ int src_x_t, src_y_t; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
+ dst_stride, dst_line, 1); \
+ src_x_t = src_y + pixman_fixed_to_int ( \
+ src_image->common.transform->matrix[0][2] + \
+ pixman_fixed_1 / 2 - pixman_fixed_e); \
+ src_y_t = -src_x + pixman_fixed_to_int ( \
+ src_image->common.transform->matrix[1][2] + \
+ pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
+ src_stride, src_line, 1); \
+ blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \
+ width, height); \
+}
+
+FAST_SIMPLE_ROTATE (8, uint8_t)
+FAST_SIMPLE_ROTATE (565, uint16_t)
+FAST_SIMPLE_ROTATE (8888, uint32_t)
+
+static const pixman_fast_path_t c_fast_paths[] =
+{
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
+ PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
+ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
+
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
+
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
+
+#define NEAREST_FAST_PATH(op,s,d) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest, \
+ }
+
+ NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
+ NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
+
+ NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
+ NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
+
+ NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
+ NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
+
+ NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
+ NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
+
+#define SIMPLE_ROTATE_FLAGS(angle) \
+ (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \
+ FAST_PATH_NEAREST_FILTER | \
+ FAST_PATH_SAMPLES_COVER_CLIP | \
+ FAST_PATH_STANDARD_FLAGS)
+
+#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_rotate_90_##suffix, \
+ }, \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_rotate_270_##suffix, \
+ }
+
+ SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
+ SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
+ SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
+ SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
+ SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
+
+ { PIXMAN_OP_NONE },
+};
+
+#ifdef WORDS_BIGENDIAN
+#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
+#else
+#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
+#endif
+
+static force_inline void
+pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
+{
+ if (offs)
+ {
+ int leading_pixels = 32 - offs;
+ if (leading_pixels >= width)
+ {
+ if (v)
+ *dst |= A1_FILL_MASK (width, offs);
+ else
+ *dst &= ~A1_FILL_MASK (width, offs);
+ return;
+ }
+ else
+ {
+ if (v)
+ *dst++ |= A1_FILL_MASK (leading_pixels, offs);
+ else
+ *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
+ width -= leading_pixels;
+ }
+ }
+ while (width >= 32)
+ {
+ if (v)
+ *dst++ = 0xFFFFFFFF;
+ else
+ *dst++ = 0;
+ width -= 32;
+ }
+ if (width > 0)
+ {
+ if (v)
+ *dst |= A1_FILL_MASK (width, 0);
+ else
+ *dst &= ~A1_FILL_MASK (width, 0);
+ }
+}
+
+static void
+pixman_fill1 (uint32_t *bits,
+ int stride,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ uint32_t *dst = bits + y * stride + (x >> 5);
+ int offs = x & 31;
+
+ if (xor & 1)
+ {
+ while (height--)
+ {
+ pixman_fill1_line (dst, offs, width, 1);
+ dst += stride;
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ pixman_fill1_line (dst, offs, width, 0);
+ dst += stride;
+ }
+ }
+}
+
+static void
+pixman_fill8 (uint32_t *bits,
+ int stride,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ int byte_stride = stride * (int) sizeof (uint32_t);
+ uint8_t *dst = (uint8_t *) bits;
+ uint8_t v = xor & 0xff;
+ int i;
+
+ dst = dst + y * byte_stride + x;
+
+ while (height--)
+ {
+ for (i = 0; i < width; ++i)
+ dst[i] = v;
+
+ dst += byte_stride;
+ }
+}
+
+static void
+pixman_fill16 (uint32_t *bits,
+ int stride,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ int short_stride =
+ (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
+ uint16_t *dst = (uint16_t *)bits;
+ uint16_t v = xor & 0xffff;
+ int i;
+
+ dst = dst + y * short_stride + x;
+
+ while (height--)
+ {
+ for (i = 0; i < width; ++i)
+ dst[i] = v;
+
+ dst += short_stride;
+ }
+}
+
+static void
+pixman_fill32 (uint32_t *bits,
+ int stride,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ int i;
+
+ bits = bits + y * stride + x;
+
+ while (height--)
+ {
+ for (i = 0; i < width; ++i)
+ bits[i] = xor;
+
+ bits += stride;
+ }
+}
+
+static pixman_bool_t
+fast_path_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ switch (bpp)
+ {
+ case 1:
+ pixman_fill1 (bits, stride, x, y, width, height, xor);
+ break;
+
+ case 8:
+ pixman_fill8 (bits, stride, x, y, width, height, xor);
+ break;
+
+ case 16:
+ pixman_fill16 (bits, stride, x, y, width, height, xor);
+ break;
+
+ case 32:
+ pixman_fill32 (bits, stride, x, y, width, height, xor);
+ break;
+
+ default:
+ return _pixman_implementation_fill (
+ imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+ break;
+ }
+
+ return TRUE;
+}
+
+pixman_implementation_t *
+_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
+{
+ pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
+
+ imp->fill = fast_path_fill;
+
+ return imp;
+}
diff --git a/pixman/pixman/pixman-fast-path.h b/pixman/pixman/pixman-fast-path.h
index 1885d47e7..89b25be88 100644
--- a/pixman/pixman/pixman-fast-path.h
+++ b/pixman/pixman/pixman-fast-path.h
@@ -1,1022 +1,1022 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Keith Packard, SuSE, Inc.
- */
-
-#ifndef PIXMAN_FAST_PATH_H__
-#define PIXMAN_FAST_PATH_H__
-
-#include "pixman-private.h"
-
-#define PIXMAN_REPEAT_COVER -1
-
-static force_inline pixman_bool_t
-repeat (pixman_repeat_t repeat, int *c, int size)
-{
- if (repeat == PIXMAN_REPEAT_NONE)
- {
- if (*c < 0 || *c >= size)
- return FALSE;
- }
- else if (repeat == PIXMAN_REPEAT_NORMAL)
- {
- while (*c >= size)
- *c -= size;
- while (*c < 0)
- *c += size;
- }
- else if (repeat == PIXMAN_REPEAT_PAD)
- {
- *c = CLIP (*c, 0, size - 1);
- }
- else /* REFLECT */
- {
- *c = MOD (*c, size * 2);
- if (*c >= size)
- *c = size * 2 - *c - 1;
- }
- return TRUE;
-}
-
-/*
- * For each scanline fetched from source image with PAD repeat:
- * - calculate how many pixels need to be padded on the left side
- * - calculate how many pixels need to be padded on the right side
- * - update width to only count pixels which are fetched from the image
- * All this information is returned via 'width', 'left_pad', 'right_pad'
- * arguments. The code is assuming that 'unit_x' is positive.
- *
- * Note: 64-bit math is used in order to avoid potential overflows, which
- * is probably excessive in many cases. This particular function
- * may need its own correctness test and performance tuning.
- */
-static force_inline void
-pad_repeat_get_scanline_bounds (int32_t source_image_width,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- int32_t * width,
- int32_t * left_pad,
- int32_t * right_pad)
-{
- int64_t max_vx = (int64_t) source_image_width << 16;
- int64_t tmp;
- if (vx < 0)
- {
- tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
- if (tmp > *width)
- {
- *left_pad = *width;
- *width = 0;
- }
- else
- {
- *left_pad = (int32_t) tmp;
- *width -= (int32_t) tmp;
- }
- }
- else
- {
- *left_pad = 0;
- }
- tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
- if (tmp < 0)
- {
- *right_pad = *width;
- *width = 0;
- }
- else if (tmp >= *width)
- {
- *right_pad = 0;
- }
- else
- {
- *right_pad = *width - (int32_t) tmp;
- *width = (int32_t) tmp;
- }
-}
-
-/* A macroified version of specialized nearest scalers for some
- * common 8888 and 565 formats. It supports SRC and OVER ops.
- *
- * There are two repeat versions, one that handles repeat normal,
- * and one without repeat handling that only works if the src region
- * used is completely covered by the pre-repeated source samples.
- *
- * The loops are unrolled to process two pixels per iteration for better
- * performance on most CPU architectures (superscalar processors
- * can issue several operations simultaneously, other processors can hide
- * instructions latencies by pipelining operations). Unrolling more
- * does not make much sense because the compiler will start running out
- * of spare registers soon.
- */
-
-#define GET_8888_ALPHA(s) ((s) >> 24)
- /* This is not actually used since we don't have an OVER with
- 565 source, but it is needed to build. */
-#define GET_0565_ALPHA(s) 0xff
-
-#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
-static force_inline void \
-scanline_func_name (dst_type_t *dst, \
- const src_type_t *src, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t fully_transparent_src) \
-{ \
- uint32_t d; \
- src_type_t s1, s2; \
- uint8_t a1, a2; \
- int x1, x2; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
- return; \
- \
- if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
- abort(); \
- \
- while ((w -= 2) >= 0) \
- { \
- x1 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s1 = src[x1]; \
- \
- x2 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s2 = src[x2]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- \
- if (a2 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- else if (s2) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
- a2 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- } \
- \
- if (w & 1) \
- { \
- x1 = vx >> 16; \
- s1 = src[x1]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- } \
-}
-
-#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
-static void \
-fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dst_x, \
- int32_t dst_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type_t *dst_line; \
- mask_type_t *mask_line; \
- src_type_t *src_first_line; \
- int y; \
- pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
- pixman_fixed_t max_vy; \
- pixman_vector_t v; \
- pixman_fixed_t vx, vy; \
- pixman_fixed_t unit_x, unit_y; \
- int32_t left_pad, right_pad; \
- \
- src_type_t *src; \
- dst_type_t *dst; \
- mask_type_t solid_mask; \
- const mask_type_t *mask = &solid_mask; \
- int src_stride, mask_stride, dst_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
- if (have_mask) \
- { \
- if (mask_is_solid) \
- solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
- else \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
- mask_stride, mask_line, 1); \
- } \
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
- * transformed from destination space to source space */ \
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
- \
- /* reference point is the center of the pixel */ \
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
- v.vector[2] = pixman_fixed_1; \
- \
- if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
- return; \
- \
- unit_x = src_image->common.transform->matrix[0][0]; \
- unit_y = src_image->common.transform->matrix[1][1]; \
- \
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
- v.vector[0] -= pixman_fixed_e; \
- v.vector[1] -= pixman_fixed_e; \
- \
- vx = v.vector[0]; \
- vy = v.vector[1]; \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* Clamp repeating positions inside the actual samples */ \
- max_vx = src_image->bits.width << 16; \
- max_vy = src_image->bits.height << 16; \
- \
- repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- } \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
- PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
- &width, &left_pad, &right_pad); \
- vx += left_pad * unit_x; \
- } \
- \
- while (--height >= 0) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- if (have_mask && !mask_is_solid) \
- { \
- mask = mask_line; \
- mask_line += mask_stride; \
- } \
- \
- y = vy >> 16; \
- vy += unit_y; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
- { \
- repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
- src = src_first_line + src_stride * y; \
- if (left_pad > 0) \
- { \
- scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \
- } \
- if (width > 0) \
- { \
- scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
- dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
- } \
- if (right_pad > 0) \
- { \
- scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
- dst + left_pad + width, src + src_image->bits.width - 1, \
- right_pad, 0, 0, 0, FALSE); \
- } \
- } \
- else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- static const src_type_t zero[1] = { 0 }; \
- if (y < 0 || y >= src_image->bits.height) \
- { \
- scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \
- continue; \
- } \
- src = src_first_line + src_stride * y; \
- if (left_pad > 0) \
- { \
- scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \
- } \
- if (width > 0) \
- { \
- scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
- dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
- } \
- if (right_pad > 0) \
- { \
- scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
- dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \
- } \
- } \
- else \
- { \
- src = src_first_line + src_stride * y; \
- scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \
- } \
- } \
-}
-
-/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
- FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid)
-
-#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
- static force_inline void \
- scanline_func##scale_func_name##_wrapper ( \
- const uint8_t *mask, \
- dst_type_t *dst, \
- const src_type_t *src, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t fully_transparent_src) \
- { \
- scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
- } \
- FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
- src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
-
-#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
- FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
- dst_type_t, repeat_mode)
-
-#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
- FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
- SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
- OP, repeat_mode) \
- FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
- scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
- src_type_t, dst_type_t, repeat_mode)
-
-
-#define SCALED_NEAREST_FLAGS \
- (FAST_PATH_SCALE_TRANSFORM | \
- FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NARROW_FORMAT)
-
-#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
- }
-
-/* Prefer the use of 'cover' variant, because it is faster */
-#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
-
-#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
-#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
-
-/*****************************************************************************/
-
-/*
- * Identify 5 zones in each scanline for bilinear scaling. Depending on
- * whether 2 pixels to be interpolated are fetched from the image itself,
- * from the padding area around it or from both image and padding area.
- */
-static force_inline void
-bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- int32_t * left_pad,
- int32_t * left_tz,
- int32_t * width,
- int32_t * right_tz,
- int32_t * right_pad)
-{
- int width1 = *width, left_pad1, right_pad1;
- int width2 = *width, left_pad2, right_pad2;
-
- pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
- &width1, &left_pad1, &right_pad1);
- pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
- unit_x, &width2, &left_pad2, &right_pad2);
-
- *left_pad = left_pad2;
- *left_tz = left_pad1 - left_pad2;
- *right_tz = right_pad2 - right_pad1;
- *right_pad = right_pad1;
- *width -= *left_pad + *left_tz + *right_tz + *right_pad;
-}
-
-/*
- * Main loop template for single pass bilinear scaling. It needs to be
- * provided with 'scanline_func' which should do the compositing operation.
- * The needed function has the following prototype:
- *
- * scanline_func (dst_type_t * dst,
- * const mask_type_ * mask,
- * const src_type_t * src_top,
- * const src_type_t * src_bottom,
- * int32_t width,
- * int weight_top,
- * int weight_bottom,
- * pixman_fixed_t vx,
- * pixman_fixed_t unit_x,
- * pixman_fixed_t max_vx,
- * pixman_bool_t zero_src)
- *
- * Where:
- * dst - destination scanline buffer for storing results
- * mask - mask buffer (or single value for solid mask)
- * src_top, src_bottom - two source scanlines
- * width - number of pixels to process
- * weight_top - weight of the top row for interpolation
- * weight_bottom - weight of the bottom row for interpolation
- * vx - initial position for fetching the first pair of
- * pixels from the source buffer
- * unit_x - position increment needed to move to the next pair
- * of pixels
- * max_vx - image size as a fixed point value, can be used for
- * implementing NORMAL repeat (when it is supported)
- * zero_src - boolean hint variable, which is set to TRUE when
- * all source pixels are fetched from zero padding
- * zone for NONE repeat
- *
- * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
- * but sometimes it may be less than that for NONE repeat when handling
- * fuzzy antialiased top or bottom image edges. Also both top and
- * bottom weight variables are guaranteed to have value in 0-255
- * range and can fit into unsigned byte or be used with 8-bit SIMD
- * multiplication instructions.
- */
-#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
-static void \
-fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dst_x, \
- int32_t dst_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type_t *dst_line; \
- mask_type_t *mask_line; \
- src_type_t *src_first_line; \
- int y1, y2; \
- pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
- pixman_vector_t v; \
- pixman_fixed_t vx, vy; \
- pixman_fixed_t unit_x, unit_y; \
- int32_t left_pad, left_tz, right_tz, right_pad; \
- \
- dst_type_t *dst; \
- mask_type_t solid_mask; \
- const mask_type_t *mask = &solid_mask; \
- int src_stride, mask_stride, dst_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
- if (have_mask) \
- { \
- if (mask_is_solid) \
- { \
- solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
- mask_stride = 0; \
- } \
- else \
- { \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
- mask_stride, mask_line, 1); \
- } \
- } \
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
- * transformed from destination space to source space */ \
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
- \
- /* reference point is the center of the pixel */ \
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
- v.vector[2] = pixman_fixed_1; \
- \
- if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
- return; \
- \
- unit_x = src_image->common.transform->matrix[0][0]; \
- unit_y = src_image->common.transform->matrix[1][1]; \
- \
- v.vector[0] -= pixman_fixed_1 / 2; \
- v.vector[1] -= pixman_fixed_1 / 2; \
- \
- vy = v.vector[1]; \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
- PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
- &left_pad, &left_tz, &width, &right_tz, &right_pad); \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
- { \
- /* PAD repeat does not need special handling for 'transition zones' and */ \
- /* they can be combined with 'padding zones' safely */ \
- left_pad += left_tz; \
- right_pad += right_tz; \
- left_tz = right_tz = 0; \
- } \
- v.vector[0] += left_pad * unit_x; \
- } \
- \
- while (--height >= 0) \
- { \
- int weight1, weight2; \
- dst = dst_line; \
- dst_line += dst_stride; \
- vx = v.vector[0]; \
- if (have_mask && !mask_is_solid) \
- { \
- mask = mask_line; \
- mask_line += mask_stride; \
- } \
- \
- y1 = pixman_fixed_to_int (vy); \
- weight2 = (vy >> 8) & 0xff; \
- if (weight2) \
- { \
- /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \
- y2 = y1 + 1; \
- weight1 = 256 - weight2; \
- } \
- else \
- { \
- /* set both top and bottom row to the same scanline, and weights to 128+128 */ \
- y2 = y1; \
- weight1 = weight2 = 128; \
- } \
- vy += unit_y; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
- { \
- src_type_t *src1, *src2; \
- src_type_t buf1[2]; \
- src_type_t buf2[2]; \
- repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
- repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
- src1 = src_first_line + src_stride * y1; \
- src2 = src_first_line + src_stride * y2; \
- \
- if (left_pad > 0) \
- { \
- buf1[0] = buf1[1] = src1[0]; \
- buf2[0] = buf2[1] = src2[0]; \
- scanline_func (dst, mask, \
- buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
- dst += left_pad; \
- if (have_mask && !mask_is_solid) \
- mask += left_pad; \
- } \
- if (width > 0) \
- { \
- scanline_func (dst, mask, \
- src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
- dst += width; \
- if (have_mask && !mask_is_solid) \
- mask += width; \
- } \
- if (right_pad > 0) \
- { \
- buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
- buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
- scanline_func (dst, mask, \
- buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
- } \
- } \
- else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- src_type_t *src1, *src2; \
- src_type_t buf1[2]; \
- src_type_t buf2[2]; \
- /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
- if (y1 < 0) \
- { \
- weight1 = 0; \
- y1 = 0; \
- } \
- if (y1 >= src_image->bits.height) \
- { \
- weight1 = 0; \
- y1 = src_image->bits.height - 1; \
- } \
- if (y2 < 0) \
- { \
- weight2 = 0; \
- y2 = 0; \
- } \
- if (y2 >= src_image->bits.height) \
- { \
- weight2 = 0; \
- y2 = src_image->bits.height - 1; \
- } \
- src1 = src_first_line + src_stride * y1; \
- src2 = src_first_line + src_stride * y2; \
- \
- if (left_pad > 0) \
- { \
- buf1[0] = buf1[1] = 0; \
- buf2[0] = buf2[1] = 0; \
- scanline_func (dst, mask, \
- buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
- dst += left_pad; \
- if (have_mask && !mask_is_solid) \
- mask += left_pad; \
- } \
- if (left_tz > 0) \
- { \
- buf1[0] = 0; \
- buf1[1] = src1[0]; \
- buf2[0] = 0; \
- buf2[1] = src2[0]; \
- scanline_func (dst, mask, \
- buf1, buf2, left_tz, weight1, weight2, \
- pixman_fixed_frac (vx), unit_x, 0, FALSE); \
- dst += left_tz; \
- if (have_mask && !mask_is_solid) \
- mask += left_tz; \
- vx += left_tz * unit_x; \
- } \
- if (width > 0) \
- { \
- scanline_func (dst, mask, \
- src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
- dst += width; \
- if (have_mask && !mask_is_solid) \
- mask += width; \
- vx += width * unit_x; \
- } \
- if (right_tz > 0) \
- { \
- buf1[0] = src1[src_image->bits.width - 1]; \
- buf1[1] = 0; \
- buf2[0] = src2[src_image->bits.width - 1]; \
- buf2[1] = 0; \
- scanline_func (dst, mask, \
- buf1, buf2, right_tz, weight1, weight2, \
- pixman_fixed_frac (vx), unit_x, 0, FALSE); \
- dst += right_tz; \
- if (have_mask && !mask_is_solid) \
- mask += right_tz; \
- } \
- if (right_pad > 0) \
- { \
- buf1[0] = buf1[1] = 0; \
- buf2[0] = buf2[1] = 0; \
- scanline_func (dst, mask, \
- buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
- } \
- } \
- else \
- { \
- scanline_func (dst, mask, src_first_line + src_stride * y1, \
- src_first_line + src_stride * y2, width, \
- weight1, weight2, vx, unit_x, max_vx, FALSE); \
- } \
- } \
-}
-
-/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
- FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
- dst_type_t, repeat_mode, have_mask, mask_is_solid)
-
-#define SCALED_BILINEAR_FLAGS \
- (FAST_PATH_SCALE_TRANSFORM | \
- FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_BILINEAR_FILTER | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NARROW_FORMAT)
-
-#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_BILINEAR_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
- }
-
-/* Prefer the use of 'cover' variant, because it is faster */
-#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
- SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func)
-
-#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
-#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
-
-#endif
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author: Keith Packard, SuSE, Inc.
+ */
+
+#ifndef PIXMAN_FAST_PATH_H__
+#define PIXMAN_FAST_PATH_H__
+
+#include "pixman-private.h"
+
+#define PIXMAN_REPEAT_COVER -1
+
+static force_inline pixman_bool_t
+repeat (pixman_repeat_t repeat, int *c, int size)
+{
+ if (repeat == PIXMAN_REPEAT_NONE)
+ {
+ if (*c < 0 || *c >= size)
+ return FALSE;
+ }
+ else if (repeat == PIXMAN_REPEAT_NORMAL)
+ {
+ while (*c >= size)
+ *c -= size;
+ while (*c < 0)
+ *c += size;
+ }
+ else if (repeat == PIXMAN_REPEAT_PAD)
+ {
+ *c = CLIP (*c, 0, size - 1);
+ }
+ else /* REFLECT */
+ {
+ *c = MOD (*c, size * 2);
+ if (*c >= size)
+ *c = size * 2 - *c - 1;
+ }
+ return TRUE;
+}
+
+/*
+ * For each scanline fetched from source image with PAD repeat:
+ * - calculate how many pixels need to be padded on the left side
+ * - calculate how many pixels need to be padded on the right side
+ * - update width to only count pixels which are fetched from the image
+ * All this information is returned via 'width', 'left_pad', 'right_pad'
+ * arguments. The code is assuming that 'unit_x' is positive.
+ *
+ * Note: 64-bit math is used in order to avoid potential overflows, which
+ * is probably excessive in many cases. This particular function
+ * may need its own correctness test and performance tuning.
+ */
+static force_inline void
+pad_repeat_get_scanline_bounds (int32_t source_image_width,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ int32_t * width,
+ int32_t * left_pad,
+ int32_t * right_pad)
+{
+ int64_t max_vx = (int64_t) source_image_width << 16;
+ int64_t tmp;
+ if (vx < 0)
+ {
+ tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
+ if (tmp > *width)
+ {
+ *left_pad = *width;
+ *width = 0;
+ }
+ else
+ {
+ *left_pad = (int32_t) tmp;
+ *width -= (int32_t) tmp;
+ }
+ }
+ else
+ {
+ *left_pad = 0;
+ }
+ tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
+ if (tmp < 0)
+ {
+ *right_pad = *width;
+ *width = 0;
+ }
+ else if (tmp >= *width)
+ {
+ *right_pad = 0;
+ }
+ else
+ {
+ *right_pad = *width - (int32_t) tmp;
+ *width = (int32_t) tmp;
+ }
+}
+
+/* A macroified version of specialized nearest scalers for some
+ * common 8888 and 565 formats. It supports SRC and OVER ops.
+ *
+ * There are two repeat versions, one that handles repeat normal,
+ * and one without repeat handling that only works if the src region
+ * used is completely covered by the pre-repeated source samples.
+ *
+ * The loops are unrolled to process two pixels per iteration for better
+ * performance on most CPU architectures (superscalar processors
+ * can issue several operations simultaneously, other processors can hide
+ * instructions latencies by pipelining operations). Unrolling more
+ * does not make much sense because the compiler will start running out
+ * of spare registers soon.
+ */
+
+#define GET_8888_ALPHA(s) ((s) >> 24)
+ /* This is not actually used since we don't have an OVER with
+ 565 source, but it is needed to build. */
+#define GET_0565_ALPHA(s) 0xff
+
+#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, repeat_mode) \
+static force_inline void \
+scanline_func_name (dst_type_t *dst, \
+ const src_type_t *src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t fully_transparent_src) \
+{ \
+ uint32_t d; \
+ src_type_t s1, s2; \
+ uint8_t a1, a2; \
+ int x1, x2; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
+ return; \
+ \
+ if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
+ abort(); \
+ \
+ while ((w -= 2) >= 0) \
+ { \
+ x1 = vx >> 16; \
+ vx += unit_x; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s1 = src[x1]; \
+ \
+ x2 = vx >> 16; \
+ vx += unit_x; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s2 = src[x2]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
+ s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ \
+ if (a2 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ else if (s2) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
+ a2 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ } \
+ \
+ if (w & 1) \
+ { \
+ x1 = vx >> 16; \
+ s1 = src[x1]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ } \
+}
+
+#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
+static void \
+fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dst_x, \
+ int32_t dst_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type_t *dst_line; \
+ mask_type_t *mask_line; \
+ src_type_t *src_first_line; \
+ int y; \
+ pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
+ pixman_fixed_t max_vy; \
+ pixman_vector_t v; \
+ pixman_fixed_t vx, vy; \
+ pixman_fixed_t unit_x, unit_y; \
+ int32_t left_pad, right_pad; \
+ \
+ src_type_t *src; \
+ dst_type_t *dst; \
+ mask_type_t solid_mask; \
+ const mask_type_t *mask = &solid_mask; \
+ int src_stride, mask_stride, dst_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
+ if (have_mask) \
+ { \
+ if (mask_is_solid) \
+ solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
+ else \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
+ mask_stride, mask_line, 1); \
+ } \
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
+ * transformed from destination space to source space */ \
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
+ \
+ /* reference point is the center of the pixel */ \
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
+ v.vector[2] = pixman_fixed_1; \
+ \
+ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
+ return; \
+ \
+ unit_x = src_image->common.transform->matrix[0][0]; \
+ unit_y = src_image->common.transform->matrix[1][1]; \
+ \
+ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
+ v.vector[0] -= pixman_fixed_e; \
+ v.vector[1] -= pixman_fixed_e; \
+ \
+ vx = v.vector[0]; \
+ vy = v.vector[1]; \
+ \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* Clamp repeating positions inside the actual samples */ \
+ max_vx = src_image->bits.width << 16; \
+ max_vy = src_image->bits.height << 16; \
+ \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ } \
+ \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
+ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
+ &width, &left_pad, &right_pad); \
+ vx += left_pad * unit_x; \
+ } \
+ \
+ while (--height >= 0) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ if (have_mask && !mask_is_solid) \
+ { \
+ mask = mask_line; \
+ mask_line += mask_stride; \
+ } \
+ \
+ y = vy >> 16; \
+ vy += unit_y; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
+ { \
+ repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
+ src = src_first_line + src_stride * y; \
+ if (left_pad > 0) \
+ { \
+ scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
+ dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
+ } \
+ if (right_pad > 0) \
+ { \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
+ dst + left_pad + width, src + src_image->bits.width - 1, \
+ right_pad, 0, 0, 0, FALSE); \
+ } \
+ } \
+ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ static const src_type_t zero[1] = { 0 }; \
+ if (y < 0 || y >= src_image->bits.height) \
+ { \
+ scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \
+ continue; \
+ } \
+ src = src_first_line + src_stride * y; \
+ if (left_pad > 0) \
+ { \
+ scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
+ dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
+ } \
+ if (right_pad > 0) \
+ { \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
+ dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \
+ } \
+ } \
+ else \
+ { \
+ src = src_first_line + src_stride * y; \
+ scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \
+ } \
+ } \
+}
+
+/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
+#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
+ FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid)
+
+#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
+ repeat_mode) \
+ static force_inline void \
+ scanline_func##scale_func_name##_wrapper ( \
+ const uint8_t *mask, \
+ dst_type_t *dst, \
+ const src_type_t *src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t fully_transparent_src) \
+ { \
+ scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
+ } \
+ FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
+ src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
+
+#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
+ repeat_mode) \
+ FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
+ dst_type_t, repeat_mode)
+
+#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, repeat_mode) \
+ FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
+ SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
+ OP, repeat_mode) \
+ FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
+ scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
+ src_type_t, dst_type_t, repeat_mode)
+
+
+#define SCALED_NEAREST_FLAGS \
+ (FAST_PATH_SCALE_TRANSFORM | \
+ FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NEAREST_FILTER | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_NARROW_FORMAT)
+
+#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
+/* Prefer the use of 'cover' variant, because it is faster */
+#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+
+/*****************************************************************************/
+
+/*
+ * Identify 5 zones in each scanline for bilinear scaling. Depending on
+ * whether 2 pixels to be interpolated are fetched from the image itself,
+ * from the padding area around it or from both image and padding area.
+ */
+static force_inline void
+bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ int32_t * left_pad,
+ int32_t * left_tz,
+ int32_t * width,
+ int32_t * right_tz,
+ int32_t * right_pad)
+{
+ int width1 = *width, left_pad1, right_pad1;
+ int width2 = *width, left_pad2, right_pad2;
+
+ pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
+ &width1, &left_pad1, &right_pad1);
+ pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
+ unit_x, &width2, &left_pad2, &right_pad2);
+
+ *left_pad = left_pad2;
+ *left_tz = left_pad1 - left_pad2;
+ *right_tz = right_pad2 - right_pad1;
+ *right_pad = right_pad1;
+ *width -= *left_pad + *left_tz + *right_tz + *right_pad;
+}
+
+/*
+ * Main loop template for single pass bilinear scaling. It needs to be
+ * provided with 'scanline_func' which should do the compositing operation.
+ * The needed function has the following prototype:
+ *
+ * scanline_func (dst_type_t * dst,
+ * const mask_type_ * mask,
+ * const src_type_t * src_top,
+ * const src_type_t * src_bottom,
+ * int32_t width,
+ * int weight_top,
+ * int weight_bottom,
+ * pixman_fixed_t vx,
+ * pixman_fixed_t unit_x,
+ * pixman_fixed_t max_vx,
+ * pixman_bool_t zero_src)
+ *
+ * Where:
+ * dst - destination scanline buffer for storing results
+ * mask - mask buffer (or single value for solid mask)
+ * src_top, src_bottom - two source scanlines
+ * width - number of pixels to process
+ * weight_top - weight of the top row for interpolation
+ * weight_bottom - weight of the bottom row for interpolation
+ * vx - initial position for fetching the first pair of
+ * pixels from the source buffer
+ * unit_x - position increment needed to move to the next pair
+ * of pixels
+ * max_vx - image size as a fixed point value, can be used for
+ * implementing NORMAL repeat (when it is supported)
+ * zero_src - boolean hint variable, which is set to TRUE when
+ * all source pixels are fetched from zero padding
+ * zone for NONE repeat
+ *
+ * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
+ * but sometimes it may be less than that for NONE repeat when handling
+ * fuzzy antialiased top or bottom image edges. Also both top and
+ * bottom weight variables are guaranteed to have value in 0-255
+ * range and can fit into unsigned byte or be used with 8-bit SIMD
+ * multiplication instructions.
+ */
+#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
+static void \
+fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dst_x, \
+ int32_t dst_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type_t *dst_line; \
+ mask_type_t *mask_line; \
+ src_type_t *src_first_line; \
+ int y1, y2; \
+ pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
+ pixman_vector_t v; \
+ pixman_fixed_t vx, vy; \
+ pixman_fixed_t unit_x, unit_y; \
+ int32_t left_pad, left_tz, right_tz, right_pad; \
+ \
+ dst_type_t *dst; \
+ mask_type_t solid_mask; \
+ const mask_type_t *mask = &solid_mask; \
+ int src_stride, mask_stride, dst_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
+ if (have_mask) \
+ { \
+ if (mask_is_solid) \
+ { \
+ solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
+ mask_stride = 0; \
+ } \
+ else \
+ { \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
+ mask_stride, mask_line, 1); \
+ } \
+ } \
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
+ * transformed from destination space to source space */ \
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
+ \
+ /* reference point is the center of the pixel */ \
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
+ v.vector[2] = pixman_fixed_1; \
+ \
+ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
+ return; \
+ \
+ unit_x = src_image->common.transform->matrix[0][0]; \
+ unit_y = src_image->common.transform->matrix[1][1]; \
+ \
+ v.vector[0] -= pixman_fixed_1 / 2; \
+ v.vector[1] -= pixman_fixed_1 / 2; \
+ \
+ vy = v.vector[1]; \
+ \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
+ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
+ &left_pad, &left_tz, &width, &right_tz, &right_pad); \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
+ { \
+ /* PAD repeat does not need special handling for 'transition zones' and */ \
+ /* they can be combined with 'padding zones' safely */ \
+ left_pad += left_tz; \
+ right_pad += right_tz; \
+ left_tz = right_tz = 0; \
+ } \
+ v.vector[0] += left_pad * unit_x; \
+ } \
+ \
+ while (--height >= 0) \
+ { \
+ int weight1, weight2; \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ vx = v.vector[0]; \
+ if (have_mask && !mask_is_solid) \
+ { \
+ mask = mask_line; \
+ mask_line += mask_stride; \
+ } \
+ \
+ y1 = pixman_fixed_to_int (vy); \
+ weight2 = (vy >> 8) & 0xff; \
+ if (weight2) \
+ { \
+ /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \
+ y2 = y1 + 1; \
+ weight1 = 256 - weight2; \
+ } \
+ else \
+ { \
+ /* set both top and bottom row to the same scanline, and weights to 128+128 */ \
+ y2 = y1; \
+ weight1 = weight2 = 128; \
+ } \
+ vy += unit_y; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
+ { \
+ src_type_t *src1, *src2; \
+ src_type_t buf1[2]; \
+ src_type_t buf2[2]; \
+ repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
+ repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
+ src1 = src_first_line + src_stride * y1; \
+ src2 = src_first_line + src_stride * y2; \
+ \
+ if (left_pad > 0) \
+ { \
+ buf1[0] = buf1[1] = src1[0]; \
+ buf2[0] = buf2[1] = src2[0]; \
+ scanline_func (dst, mask, \
+ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
+ dst += left_pad; \
+ if (have_mask && !mask_is_solid) \
+ mask += left_pad; \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (dst, mask, \
+ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
+ dst += width; \
+ if (have_mask && !mask_is_solid) \
+ mask += width; \
+ } \
+ if (right_pad > 0) \
+ { \
+ buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
+ buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
+ scanline_func (dst, mask, \
+ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
+ } \
+ } \
+ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ src_type_t *src1, *src2; \
+ src_type_t buf1[2]; \
+ src_type_t buf2[2]; \
+ /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
+ if (y1 < 0) \
+ { \
+ weight1 = 0; \
+ y1 = 0; \
+ } \
+ if (y1 >= src_image->bits.height) \
+ { \
+ weight1 = 0; \
+ y1 = src_image->bits.height - 1; \
+ } \
+ if (y2 < 0) \
+ { \
+ weight2 = 0; \
+ y2 = 0; \
+ } \
+ if (y2 >= src_image->bits.height) \
+ { \
+ weight2 = 0; \
+ y2 = src_image->bits.height - 1; \
+ } \
+ src1 = src_first_line + src_stride * y1; \
+ src2 = src_first_line + src_stride * y2; \
+ \
+ if (left_pad > 0) \
+ { \
+ buf1[0] = buf1[1] = 0; \
+ buf2[0] = buf2[1] = 0; \
+ scanline_func (dst, mask, \
+ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
+ dst += left_pad; \
+ if (have_mask && !mask_is_solid) \
+ mask += left_pad; \
+ } \
+ if (left_tz > 0) \
+ { \
+ buf1[0] = 0; \
+ buf1[1] = src1[0]; \
+ buf2[0] = 0; \
+ buf2[1] = src2[0]; \
+ scanline_func (dst, mask, \
+ buf1, buf2, left_tz, weight1, weight2, \
+ pixman_fixed_frac (vx), unit_x, 0, FALSE); \
+ dst += left_tz; \
+ if (have_mask && !mask_is_solid) \
+ mask += left_tz; \
+ vx += left_tz * unit_x; \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (dst, mask, \
+ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
+ dst += width; \
+ if (have_mask && !mask_is_solid) \
+ mask += width; \
+ vx += width * unit_x; \
+ } \
+ if (right_tz > 0) \
+ { \
+ buf1[0] = src1[src_image->bits.width - 1]; \
+ buf1[1] = 0; \
+ buf2[0] = src2[src_image->bits.width - 1]; \
+ buf2[1] = 0; \
+ scanline_func (dst, mask, \
+ buf1, buf2, right_tz, weight1, weight2, \
+ pixman_fixed_frac (vx), unit_x, 0, FALSE); \
+ dst += right_tz; \
+ if (have_mask && !mask_is_solid) \
+ mask += right_tz; \
+ } \
+ if (right_pad > 0) \
+ { \
+ buf1[0] = buf1[1] = 0; \
+ buf2[0] = buf2[1] = 0; \
+ scanline_func (dst, mask, \
+ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
+ } \
+ } \
+ else \
+ { \
+ scanline_func (dst, mask, src_first_line + src_stride * y1, \
+ src_first_line + src_stride * y2, width, \
+ weight1, weight2, vx, unit_x, max_vx, FALSE); \
+ } \
+ } \
+}
+
+/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
+#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
+ FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
+ dst_type_t, repeat_mode, have_mask, mask_is_solid)
+
+#define SCALED_BILINEAR_FLAGS \
+ (FAST_PATH_SCALE_TRANSFORM | \
+ FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_BILINEAR_FILTER | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_NARROW_FORMAT)
+
+#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_BILINEAR_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_BILINEAR_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
+ }
+
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_BILINEAR_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_BILINEAR_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
+ }
+
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_BILINEAR_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_BILINEAR_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
+ }
+
+/* Prefer the use of 'cover' variant, because it is faster */
+#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
+ SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func)
+
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+
+#endif
diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c
index 872fb7e9f..1e0091094 100644
--- a/pixman/pixman/pixman-general.c
+++ b/pixman/pixman/pixman-general.c
@@ -1,311 +1,311 @@
-/*
- * Copyright © 2009 Red Hat, Inc.
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- * 2005 Lars Knoll & Zack Rusin, Trolltech
- * 2008 Aaron Plattner, NVIDIA Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Red Hat not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. Red Hat makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "pixman-private.h"
-
-static void
-general_src_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x, int y, int width, int height,
- uint8_t *buffer, iter_flags_t flags)
-{
- iter->image = image;
- iter->x = x;
- iter->y = y;
- iter->width = width;
- iter->buffer = (uint32_t *)buffer;
-
- if (image->type == SOLID)
- {
- _pixman_solid_fill_iter_init (
- image, iter, x, y, width, height, buffer, flags);
- }
- else if (image->type == LINEAR)
- {
- _pixman_linear_gradient_iter_init (
- image, iter, x, y, width, height, buffer, flags);
- }
- else if (image->type == RADIAL)
- {
- _pixman_radial_gradient_iter_init (
- image, iter, x, y, width, height, buffer, flags);
- }
- else if (image->type == CONICAL)
- {
- _pixman_conical_gradient_iter_init (
- image, iter, x, y, width, height, buffer, flags);
- }
- else if (image->type == BITS)
- {
- _pixman_bits_image_src_iter_init (
- image, iter, x, y, width, height, buffer, flags);
- }
- else
- {
- _pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
- }
-}
-
-static void
-general_dest_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x, int y, int width, int height,
- uint8_t *buffer, iter_flags_t flags)
-{
- iter->image = image;
- iter->x = x;
- iter->y = y;
- iter->width = width;
- iter->buffer = (uint32_t *)buffer;
-
- if (image->type == BITS)
- {
- _pixman_bits_image_dest_iter_init (
- image, iter, x, y, width, height, buffer, flags);
- }
- else
- {
- _pixman_log_error (FUNC, "Trying to write to a non-writable image");
- }
-}
-
-typedef struct op_info_t op_info_t;
-struct op_info_t
-{
- uint8_t src, dst;
-};
-
-#define ITER_IGNORE_BOTH \
- (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_LOCALIZED_ALPHA)
-
-static const op_info_t op_flags[PIXMAN_N_OPERATORS] =
-{
- /* Src Dst */
- { ITER_IGNORE_BOTH, ITER_IGNORE_BOTH }, /* CLEAR */
- { ITER_LOCALIZED_ALPHA, ITER_IGNORE_BOTH }, /* SRC */
- { ITER_IGNORE_BOTH, ITER_LOCALIZED_ALPHA }, /* DST */
- { 0, ITER_LOCALIZED_ALPHA }, /* OVER */
- { ITER_LOCALIZED_ALPHA, 0 }, /* OVER_REVERSE */
- { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* IN */
- { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* IN_REVERSE */
- { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* OUT */
- { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* OUT_REVERSE */
- { 0, 0 }, /* ATOP */
- { 0, 0 }, /* ATOP_REVERSE */
- { 0, 0 }, /* XOR */
- { ITER_LOCALIZED_ALPHA, ITER_LOCALIZED_ALPHA }, /* ADD */
- { 0, 0 }, /* SATURATE */
-};
-
-#define SCANLINE_BUFFER_LENGTH 8192
-
-static void
-general_composite_rect (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8];
- uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
- uint8_t *src_buffer, *mask_buffer, *dest_buffer;
- pixman_iter_t src_iter, mask_iter, dest_iter;
- pixman_combine_32_func_t compose;
- pixman_bool_t component_alpha;
- iter_flags_t narrow, src_flags;
- int Bpp;
- int i;
-
- if ((src->common.flags & FAST_PATH_NARROW_FORMAT) &&
- (!mask || mask->common.flags & FAST_PATH_NARROW_FORMAT) &&
- (dest->common.flags & FAST_PATH_NARROW_FORMAT))
- {
- narrow = ITER_NARROW;
- Bpp = 4;
- }
- else
- {
- narrow = 0;
- Bpp = 8;
- }
-
- if (width * Bpp > SCANLINE_BUFFER_LENGTH)
- {
- scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
-
- if (!scanline_buffer)
- return;
- }
-
- src_buffer = scanline_buffer;
- mask_buffer = src_buffer + width * Bpp;
- dest_buffer = mask_buffer + width * Bpp;
-
- /* src iter */
- src_flags = narrow | op_flags[op].src;
-
- _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src,
- src_x, src_y, width, height,
- src_buffer, src_flags);
-
- /* mask iter */
- if ((src_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
- (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
- {
- /* If it doesn't matter what the source is, then it doesn't matter
- * what the mask is
- */
- mask = NULL;
- }
-
- component_alpha =
- mask &&
- mask->common.type == BITS &&
- mask->common.component_alpha &&
- PIXMAN_FORMAT_RGB (mask->bits.format);
-
- _pixman_implementation_src_iter_init (
- imp->toplevel, &mask_iter, mask, mask_x, mask_y, width, height,
- mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB));
-
- /* dest iter */
- _pixman_implementation_dest_iter_init (imp->toplevel, &dest_iter, dest,
- dest_x, dest_y, width, height,
- dest_buffer,
- narrow | op_flags[op].dst);
-
- if (narrow)
- {
- if (component_alpha)
- compose = _pixman_implementation_combine_32_ca;
- else
- compose = _pixman_implementation_combine_32;
- }
- else
- {
- if (component_alpha)
- compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
- else
- compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
- }
-
- if (!compose)
- return;
-
- for (i = 0; i < height; ++i)
- {
- uint32_t *s, *m, *d;
-
- m = mask_iter.get_scanline (&mask_iter, NULL);
- s = src_iter.get_scanline (&src_iter, m);
- d = dest_iter.get_scanline (&dest_iter, NULL);
-
- compose (imp->toplevel, op, d, s, m, width);
-
- dest_iter.write_back (&dest_iter);
- }
-
- if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
- free (scanline_buffer);
-}
-
-static const pixman_fast_path_t general_fast_path[] =
-{
- { PIXMAN_OP_any, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, general_composite_rect },
- { PIXMAN_OP_NONE }
-};
-
-static pixman_bool_t
-general_blt (pixman_implementation_t *imp,
- uint32_t * src_bits,
- uint32_t * dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- /* We can't blit unless we have sse2 or mmx */
-
- return FALSE;
-}
-
-static pixman_bool_t
-general_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- return FALSE;
-}
-
-pixman_implementation_t *
-_pixman_implementation_create_general (void)
-{
- pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
-
- _pixman_setup_combiner_functions_32 (imp);
- _pixman_setup_combiner_functions_64 (imp);
-
- imp->blt = general_blt;
- imp->fill = general_fill;
- imp->src_iter_init = general_src_iter_init;
- imp->dest_iter_init = general_dest_iter_init;
-
- return imp;
-}
-
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ * 2005 Lars Knoll & Zack Rusin, Trolltech
+ * 2008 Aaron Plattner, NVIDIA Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. Red Hat makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pixman-private.h"
+
+static void
+general_src_iter_init (pixman_implementation_t *imp,
+ pixman_iter_t *iter,
+ pixman_image_t *image,
+ int x, int y, int width, int height,
+ uint8_t *buffer, iter_flags_t flags)
+{
+ iter->image = image;
+ iter->x = x;
+ iter->y = y;
+ iter->width = width;
+ iter->buffer = (uint32_t *)buffer;
+
+ if (image->type == SOLID)
+ {
+ _pixman_solid_fill_iter_init (
+ image, iter, x, y, width, height, buffer, flags);
+ }
+ else if (image->type == LINEAR)
+ {
+ _pixman_linear_gradient_iter_init (
+ image, iter, x, y, width, height, buffer, flags);
+ }
+ else if (image->type == RADIAL)
+ {
+ _pixman_radial_gradient_iter_init (
+ image, iter, x, y, width, height, buffer, flags);
+ }
+ else if (image->type == CONICAL)
+ {
+ _pixman_conical_gradient_iter_init (
+ image, iter, x, y, width, height, buffer, flags);
+ }
+ else if (image->type == BITS)
+ {
+ _pixman_bits_image_src_iter_init (
+ image, iter, x, y, width, height, buffer, flags);
+ }
+ else
+ {
+ _pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
+ }
+}
+
+static void
+general_dest_iter_init (pixman_implementation_t *imp,
+ pixman_iter_t *iter,
+ pixman_image_t *image,
+ int x, int y, int width, int height,
+ uint8_t *buffer, iter_flags_t flags)
+{
+ iter->image = image;
+ iter->x = x;
+ iter->y = y;
+ iter->width = width;
+ iter->buffer = (uint32_t *)buffer;
+
+ if (image->type == BITS)
+ {
+ _pixman_bits_image_dest_iter_init (
+ image, iter, x, y, width, height, buffer, flags);
+ }
+ else
+ {
+ _pixman_log_error (FUNC, "Trying to write to a non-writable image");
+ }
+}
+
+typedef struct op_info_t op_info_t;
+struct op_info_t
+{
+ uint8_t src, dst;
+};
+
+#define ITER_IGNORE_BOTH \
+ (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_LOCALIZED_ALPHA)
+
+static const op_info_t op_flags[PIXMAN_N_OPERATORS] =
+{
+ /* Src Dst */
+ { ITER_IGNORE_BOTH, ITER_IGNORE_BOTH }, /* CLEAR */
+ { ITER_LOCALIZED_ALPHA, ITER_IGNORE_BOTH }, /* SRC */
+ { ITER_IGNORE_BOTH, ITER_LOCALIZED_ALPHA }, /* DST */
+ { 0, ITER_LOCALIZED_ALPHA }, /* OVER */
+ { ITER_LOCALIZED_ALPHA, 0 }, /* OVER_REVERSE */
+ { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* IN */
+ { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* IN_REVERSE */
+ { ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* OUT */
+ { ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* OUT_REVERSE */
+ { 0, 0 }, /* ATOP */
+ { 0, 0 }, /* ATOP_REVERSE */
+ { 0, 0 }, /* XOR */
+ { ITER_LOCALIZED_ALPHA, ITER_LOCALIZED_ALPHA }, /* ADD */
+ { 0, 0 }, /* SATURATE */
+};
+
+#define SCANLINE_BUFFER_LENGTH 8192
+
+static void
+general_composite_rect (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src,
+ pixman_image_t * mask,
+ pixman_image_t * dest,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8];
+ uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
+ uint8_t *src_buffer, *mask_buffer, *dest_buffer;
+ pixman_iter_t src_iter, mask_iter, dest_iter;
+ pixman_combine_32_func_t compose;
+ pixman_bool_t component_alpha;
+ iter_flags_t narrow, src_flags;
+ int Bpp;
+ int i;
+
+ if ((src->common.flags & FAST_PATH_NARROW_FORMAT) &&
+ (!mask || mask->common.flags & FAST_PATH_NARROW_FORMAT) &&
+ (dest->common.flags & FAST_PATH_NARROW_FORMAT))
+ {
+ narrow = ITER_NARROW;
+ Bpp = 4;
+ }
+ else
+ {
+ narrow = 0;
+ Bpp = 8;
+ }
+
+ if (width * Bpp > SCANLINE_BUFFER_LENGTH)
+ {
+ scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
+
+ if (!scanline_buffer)
+ return;
+ }
+
+ src_buffer = scanline_buffer;
+ mask_buffer = src_buffer + width * Bpp;
+ dest_buffer = mask_buffer + width * Bpp;
+
+ /* src iter */
+ src_flags = narrow | op_flags[op].src;
+
+ _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src,
+ src_x, src_y, width, height,
+ src_buffer, src_flags);
+
+ /* mask iter */
+ if ((src_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
+ (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
+ {
+ /* If it doesn't matter what the source is, then it doesn't matter
+ * what the mask is
+ */
+ mask = NULL;
+ }
+
+ component_alpha =
+ mask &&
+ mask->common.type == BITS &&
+ mask->common.component_alpha &&
+ PIXMAN_FORMAT_RGB (mask->bits.format);
+
+ _pixman_implementation_src_iter_init (
+ imp->toplevel, &mask_iter, mask, mask_x, mask_y, width, height,
+ mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB));
+
+ /* dest iter */
+ _pixman_implementation_dest_iter_init (imp->toplevel, &dest_iter, dest,
+ dest_x, dest_y, width, height,
+ dest_buffer,
+ narrow | op_flags[op].dst);
+
+ if (narrow)
+ {
+ if (component_alpha)
+ compose = _pixman_implementation_combine_32_ca;
+ else
+ compose = _pixman_implementation_combine_32;
+ }
+ else
+ {
+ if (component_alpha)
+ compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
+ else
+ compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
+ }
+
+ if (!compose)
+ return;
+
+ for (i = 0; i < height; ++i)
+ {
+ uint32_t *s, *m, *d;
+
+ m = mask_iter.get_scanline (&mask_iter, NULL);
+ s = src_iter.get_scanline (&src_iter, m);
+ d = dest_iter.get_scanline (&dest_iter, NULL);
+
+ compose (imp->toplevel, op, d, s, m, width);
+
+ dest_iter.write_back (&dest_iter);
+ }
+
+ if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
+ free (scanline_buffer);
+}
+
+static const pixman_fast_path_t general_fast_path[] =
+{
+ { PIXMAN_OP_any, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, general_composite_rect },
+ { PIXMAN_OP_NONE }
+};
+
+static pixman_bool_t
+general_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
+{
+ /* We can't blit unless we have sse2 or mmx */
+
+ return FALSE;
+}
+
+static pixman_bool_t
+general_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ return FALSE;
+}
+
+pixman_implementation_t *
+_pixman_implementation_create_general (void)
+{
+ pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
+
+ _pixman_setup_combiner_functions_32 (imp);
+ _pixman_setup_combiner_functions_64 (imp);
+
+ imp->blt = general_blt;
+ imp->fill = general_fill;
+ imp->src_iter_init = general_src_iter_init;
+ imp->dest_iter_init = general_dest_iter_init;
+
+ return imp;
+}
+
diff --git a/pixman/pixman/pixman-image.c b/pixman/pixman/pixman-image.c
index 84bacf87e..584150dca 100644
--- a/pixman/pixman/pixman-image.c
+++ b/pixman/pixman/pixman-image.c
@@ -1,781 +1,781 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pixman-private.h"
-
-pixman_bool_t
-_pixman_init_gradient (gradient_t * gradient,
- const pixman_gradient_stop_t *stops,
- int n_stops)
-{
- return_val_if_fail (n_stops > 0, FALSE);
-
- gradient->stops = pixman_malloc_ab (n_stops, sizeof (pixman_gradient_stop_t));
- if (!gradient->stops)
- return FALSE;
-
- memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t));
-
- gradient->n_stops = n_stops;
-
- return TRUE;
-}
-
-pixman_image_t *
-_pixman_image_allocate (void)
-{
- pixman_image_t *image = malloc (sizeof (pixman_image_t));
-
- if (image)
- {
- image_common_t *common = &image->common;
-
- pixman_region32_init (&common->clip_region);
-
- common->alpha_count = 0;
- common->have_clip_region = FALSE;
- common->clip_sources = FALSE;
- common->transform = NULL;
- common->repeat = PIXMAN_REPEAT_NONE;
- common->filter = PIXMAN_FILTER_NEAREST;
- common->filter_params = NULL;
- common->n_filter_params = 0;
- common->alpha_map = NULL;
- common->component_alpha = FALSE;
- common->ref_count = 1;
- common->property_changed = NULL;
- common->client_clip = FALSE;
- common->destroy_func = NULL;
- common->destroy_data = NULL;
- common->dirty = TRUE;
- }
-
- return image;
-}
-
-static void
-image_property_changed (pixman_image_t *image)
-{
- image->common.dirty = TRUE;
-}
-
-/* Ref Counting */
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_ref (pixman_image_t *image)
-{
- image->common.ref_count++;
-
- return image;
-}
-
-/* returns TRUE when the image is freed */
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_unref (pixman_image_t *image)
-{
- image_common_t *common = (image_common_t *)image;
-
- common->ref_count--;
-
- if (common->ref_count == 0)
- {
- if (image->common.destroy_func)
- image->common.destroy_func (image, image->common.destroy_data);
-
- pixman_region32_fini (&common->clip_region);
-
- if (common->transform)
- free (common->transform);
-
- if (common->filter_params)
- free (common->filter_params);
-
- if (common->alpha_map)
- pixman_image_unref ((pixman_image_t *)common->alpha_map);
-
- if (image->type == LINEAR ||
- image->type == RADIAL ||
- image->type == CONICAL)
- {
- if (image->gradient.stops)
- free (image->gradient.stops);
- }
-
- if (image->type == BITS && image->bits.free_me)
- free (image->bits.free_me);
-
- free (image);
-
- return TRUE;
- }
-
- return FALSE;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_destroy_function (pixman_image_t * image,
- pixman_image_destroy_func_t func,
- void * data)
-{
- image->common.destroy_func = func;
- image->common.destroy_data = data;
-}
-
-PIXMAN_EXPORT void *
-pixman_image_get_destroy_data (pixman_image_t *image)
-{
- return image->common.destroy_data;
-}
-
-void
-_pixman_image_reset_clip_region (pixman_image_t *image)
-{
- image->common.have_clip_region = FALSE;
-}
-
-/* Executive Summary: This function is a no-op that only exists
- * for historical reasons.
- *
- * There used to be a bug in the X server where it would rely on
- * out-of-bounds accesses when it was asked to composite with a
- * window as the source. It would create a pixman image pointing
- * to some bogus position in memory, but then set a clip region
- * to the position where the actual bits were.
- *
- * Due to a bug in old versions of pixman, where it would not clip
- * against the image bounds when a clip region was set, this would
- * actually work. So when the pixman bug was fixed, a workaround was
- * added to allow certain out-of-bound accesses. This function disabled
- * those workarounds.
- *
- * Since 0.21.2, pixman doesn't do these workarounds anymore, so now
- * this function is a no-op.
- */
-PIXMAN_EXPORT void
-pixman_disable_out_of_bounds_workaround (void)
-{
-}
-
-static void
-compute_image_info (pixman_image_t *image)
-{
- pixman_format_code_t code;
- uint32_t flags = 0;
-
- /* Transform */
- if (!image->common.transform)
- {
- flags |= (FAST_PATH_ID_TRANSFORM |
- FAST_PATH_X_UNIT_POSITIVE |
- FAST_PATH_Y_UNIT_ZERO |
- FAST_PATH_AFFINE_TRANSFORM);
- }
- else
- {
- flags |= FAST_PATH_HAS_TRANSFORM;
-
- if (image->common.transform->matrix[2][0] == 0 &&
- image->common.transform->matrix[2][1] == 0 &&
- image->common.transform->matrix[2][2] == pixman_fixed_1)
- {
- flags |= FAST_PATH_AFFINE_TRANSFORM;
-
- if (image->common.transform->matrix[0][1] == 0 &&
- image->common.transform->matrix[1][0] == 0)
- {
- if (image->common.transform->matrix[0][0] == -pixman_fixed_1 &&
- image->common.transform->matrix[1][1] == -pixman_fixed_1)
- {
- flags |= FAST_PATH_ROTATE_180_TRANSFORM;
- }
- flags |= FAST_PATH_SCALE_TRANSFORM;
- }
- else if (image->common.transform->matrix[0][0] == 0 &&
- image->common.transform->matrix[1][1] == 0)
- {
- pixman_fixed_t m01 = image->common.transform->matrix[0][1];
- if (m01 == -image->common.transform->matrix[1][0])
- {
- if (m01 == -pixman_fixed_1)
- flags |= FAST_PATH_ROTATE_90_TRANSFORM;
- else if (m01 == pixman_fixed_1)
- flags |= FAST_PATH_ROTATE_270_TRANSFORM;
- }
- }
- }
-
- if (image->common.transform->matrix[0][0] > 0)
- flags |= FAST_PATH_X_UNIT_POSITIVE;
-
- if (image->common.transform->matrix[1][0] == 0)
- flags |= FAST_PATH_Y_UNIT_ZERO;
- }
-
- /* Filter */
- switch (image->common.filter)
- {
- case PIXMAN_FILTER_NEAREST:
- case PIXMAN_FILTER_FAST:
- flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
- break;
-
- case PIXMAN_FILTER_BILINEAR:
- case PIXMAN_FILTER_GOOD:
- case PIXMAN_FILTER_BEST:
- flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
- break;
-
- case PIXMAN_FILTER_CONVOLUTION:
- break;
-
- default:
- flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
- break;
- }
-
- /* Repeat mode */
- switch (image->common.repeat)
- {
- case PIXMAN_REPEAT_NONE:
- flags |=
- FAST_PATH_NO_REFLECT_REPEAT |
- FAST_PATH_NO_PAD_REPEAT |
- FAST_PATH_NO_NORMAL_REPEAT;
- break;
-
- case PIXMAN_REPEAT_REFLECT:
- flags |=
- FAST_PATH_NO_PAD_REPEAT |
- FAST_PATH_NO_NONE_REPEAT |
- FAST_PATH_NO_NORMAL_REPEAT;
- break;
-
- case PIXMAN_REPEAT_PAD:
- flags |=
- FAST_PATH_NO_REFLECT_REPEAT |
- FAST_PATH_NO_NONE_REPEAT |
- FAST_PATH_NO_NORMAL_REPEAT;
- break;
-
- default:
- flags |=
- FAST_PATH_NO_REFLECT_REPEAT |
- FAST_PATH_NO_PAD_REPEAT |
- FAST_PATH_NO_NONE_REPEAT;
- break;
- }
-
- /* Component alpha */
- if (image->common.component_alpha)
- flags |= FAST_PATH_COMPONENT_ALPHA;
- else
- flags |= FAST_PATH_UNIFIED_ALPHA;
-
- flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT);
-
- /* Type specific checks */
- switch (image->type)
- {
- case SOLID:
- code = PIXMAN_solid;
-
- if (image->solid.color.alpha == 0xffff)
- flags |= FAST_PATH_IS_OPAQUE;
- break;
-
- case BITS:
- if (image->bits.width == 1 &&
- image->bits.height == 1 &&
- image->common.repeat != PIXMAN_REPEAT_NONE)
- {
- code = PIXMAN_solid;
- }
- else
- {
- code = image->bits.format;
- }
-
- if (!PIXMAN_FORMAT_A (image->bits.format) &&
- PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY &&
- PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR)
- {
- flags |= FAST_PATH_SAMPLES_OPAQUE;
-
- if (image->common.repeat != PIXMAN_REPEAT_NONE)
- flags |= FAST_PATH_IS_OPAQUE;
- }
-
- if (image->bits.read_func || image->bits.write_func)
- flags &= ~FAST_PATH_NO_ACCESSORS;
-
- if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
- flags &= ~FAST_PATH_NARROW_FORMAT;
- break;
-
- case RADIAL:
- code = PIXMAN_unknown;
-
- /*
- * As explained in pixman-radial-gradient.c, every point of
- * the plane has a valid associated radius (and thus will be
- * colored) if and only if a is negative (i.e. one of the two
- * circles contains the other one).
- */
-
- if (image->radial.a >= 0)
- break;
-
- /* Fall through */
-
- case CONICAL:
- case LINEAR:
- code = PIXMAN_unknown;
-
- if (image->common.repeat != PIXMAN_REPEAT_NONE)
- {
- int i;
-
- flags |= FAST_PATH_IS_OPAQUE;
- for (i = 0; i < image->gradient.n_stops; ++i)
- {
- if (image->gradient.stops[i].color.alpha != 0xffff)
- {
- flags &= ~FAST_PATH_IS_OPAQUE;
- break;
- }
- }
- }
- break;
-
- default:
- code = PIXMAN_unknown;
- break;
- }
-
- /* Alpha map */
- if (!image->common.alpha_map)
- {
- flags |= FAST_PATH_NO_ALPHA_MAP;
- }
- else
- {
- if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format))
- flags &= ~FAST_PATH_NARROW_FORMAT;
- }
-
- /* Both alpha maps and convolution filters can introduce
- * non-opaqueness in otherwise opaque images. Also
- * an image with component alpha turned on is only opaque
- * if all channels are opaque, so we simply turn it off
- * unconditionally for those images.
- */
- if (image->common.alpha_map ||
- image->common.filter == PIXMAN_FILTER_CONVOLUTION ||
- image->common.component_alpha)
- {
- flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE);
- }
-
- image->common.flags = flags;
- image->common.extended_format_code = code;
-}
-
-void
-_pixman_image_validate (pixman_image_t *image)
-{
- if (image->common.dirty)
- {
- compute_image_info (image);
-
- /* It is important that property_changed is
- * called *after* compute_image_info() because
- * property_changed() can make use of the flags
- * to set up accessors etc.
- */
- if (image->common.property_changed)
- image->common.property_changed (image);
-
- image->common.dirty = FALSE;
- }
-
- if (image->common.alpha_map)
- _pixman_image_validate ((pixman_image_t *)image->common.alpha_map);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_clip_region32 (pixman_image_t * image,
- pixman_region32_t *region)
-{
- image_common_t *common = (image_common_t *)image;
- pixman_bool_t result;
-
- if (region)
- {
- if ((result = pixman_region32_copy (&common->clip_region, region)))
- image->common.have_clip_region = TRUE;
- }
- else
- {
- _pixman_image_reset_clip_region (image);
-
- result = TRUE;
- }
-
- image_property_changed (image);
-
- return result;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_clip_region (pixman_image_t * image,
- pixman_region16_t *region)
-{
- image_common_t *common = (image_common_t *)image;
- pixman_bool_t result;
-
- if (region)
- {
- if ((result = pixman_region32_copy_from_region16 (&common->clip_region, region)))
- image->common.have_clip_region = TRUE;
- }
- else
- {
- _pixman_image_reset_clip_region (image);
-
- result = TRUE;
- }
-
- image_property_changed (image);
-
- return result;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_has_client_clip (pixman_image_t *image,
- pixman_bool_t client_clip)
-{
- image->common.client_clip = client_clip;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_transform (pixman_image_t * image,
- const pixman_transform_t *transform)
-{
- static const pixman_transform_t id =
- {
- { { pixman_fixed_1, 0, 0 },
- { 0, pixman_fixed_1, 0 },
- { 0, 0, pixman_fixed_1 } }
- };
-
- image_common_t *common = (image_common_t *)image;
- pixman_bool_t result;
-
- if (common->transform == transform)
- return TRUE;
-
- if (!transform || memcmp (&id, transform, sizeof (pixman_transform_t)) == 0)
- {
- free (common->transform);
- common->transform = NULL;
- result = TRUE;
-
- goto out;
- }
-
- if (common->transform &&
- memcmp (common->transform, transform, sizeof (pixman_transform_t) == 0))
- {
- return TRUE;
- }
-
- if (common->transform == NULL)
- common->transform = malloc (sizeof (pixman_transform_t));
-
- if (common->transform == NULL)
- {
- result = FALSE;
-
- goto out;
- }
-
- memcpy (common->transform, transform, sizeof(pixman_transform_t));
-
- result = TRUE;
-
-out:
- image_property_changed (image);
-
- return result;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_repeat (pixman_image_t *image,
- pixman_repeat_t repeat)
-{
- if (image->common.repeat == repeat)
- return;
-
- image->common.repeat = repeat;
-
- image_property_changed (image);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_filter (pixman_image_t * image,
- pixman_filter_t filter,
- const pixman_fixed_t *params,
- int n_params)
-{
- image_common_t *common = (image_common_t *)image;
- pixman_fixed_t *new_params;
-
- if (params == common->filter_params && filter == common->filter)
- return TRUE;
-
- new_params = NULL;
- if (params)
- {
- new_params = pixman_malloc_ab (n_params, sizeof (pixman_fixed_t));
- if (!new_params)
- return FALSE;
-
- memcpy (new_params,
- params, n_params * sizeof (pixman_fixed_t));
- }
-
- common->filter = filter;
-
- if (common->filter_params)
- free (common->filter_params);
-
- common->filter_params = new_params;
- common->n_filter_params = n_params;
-
- image_property_changed (image);
- return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_source_clipping (pixman_image_t *image,
- pixman_bool_t clip_sources)
-{
- if (image->common.clip_sources == clip_sources)
- return;
-
- image->common.clip_sources = clip_sources;
-
- image_property_changed (image);
-}
-
-/* Unlike all the other property setters, this function does not
- * copy the content of indexed. Doing this copying is simply
- * way, way too expensive.
- */
-PIXMAN_EXPORT void
-pixman_image_set_indexed (pixman_image_t * image,
- const pixman_indexed_t *indexed)
-{
- bits_image_t *bits = (bits_image_t *)image;
-
- if (bits->indexed == indexed)
- return;
-
- bits->indexed = indexed;
-
- image_property_changed (image);
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_alpha_map (pixman_image_t *image,
- pixman_image_t *alpha_map,
- int16_t x,
- int16_t y)
-{
- image_common_t *common = (image_common_t *)image;
-
- return_if_fail (!alpha_map || alpha_map->type == BITS);
-
- if (alpha_map && common->alpha_count > 0)
- {
- /* If this image is being used as an alpha map itself,
- * then you can't give it an alpha map of its own.
- */
- return;
- }
-
- if (alpha_map && alpha_map->common.alpha_map)
- {
- /* If the image has an alpha map of its own,
- * then it can't be used as an alpha map itself
- */
- return;
- }
-
- if (common->alpha_map != (bits_image_t *)alpha_map)
- {
- if (common->alpha_map)
- {
- common->alpha_map->common.alpha_count--;
-
- pixman_image_unref ((pixman_image_t *)common->alpha_map);
- }
-
- if (alpha_map)
- {
- common->alpha_map = (bits_image_t *)pixman_image_ref (alpha_map);
-
- common->alpha_map->common.alpha_count++;
- }
- else
- {
- common->alpha_map = NULL;
- }
- }
-
- common->alpha_origin_x = x;
- common->alpha_origin_y = y;
-
- image_property_changed (image);
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_component_alpha (pixman_image_t *image,
- pixman_bool_t component_alpha)
-{
- if (image->common.component_alpha == component_alpha)
- return;
-
- image->common.component_alpha = component_alpha;
-
- image_property_changed (image);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_get_component_alpha (pixman_image_t *image)
-{
- return image->common.component_alpha;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_accessors (pixman_image_t * image,
- pixman_read_memory_func_t read_func,
- pixman_write_memory_func_t write_func)
-{
- return_if_fail (image != NULL);
-
- if (image->type == BITS)
- {
- image->bits.read_func = read_func;
- image->bits.write_func = write_func;
-
- image_property_changed (image);
- }
-}
-
-PIXMAN_EXPORT uint32_t *
-pixman_image_get_data (pixman_image_t *image)
-{
- if (image->type == BITS)
- return image->bits.bits;
-
- return NULL;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_width (pixman_image_t *image)
-{
- if (image->type == BITS)
- return image->bits.width;
-
- return 0;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_height (pixman_image_t *image)
-{
- if (image->type == BITS)
- return image->bits.height;
-
- return 0;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_stride (pixman_image_t *image)
-{
- if (image->type == BITS)
- return image->bits.rowstride * (int) sizeof (uint32_t);
-
- return 0;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_depth (pixman_image_t *image)
-{
- if (image->type == BITS)
- return PIXMAN_FORMAT_DEPTH (image->bits.format);
-
- return 0;
-}
-
-PIXMAN_EXPORT pixman_format_code_t
-pixman_image_get_format (pixman_image_t *image)
-{
- if (image->type == BITS)
- return image->bits.format;
-
- return 0;
-}
-
-uint32_t
-_pixman_image_get_solid (pixman_implementation_t *imp,
- pixman_image_t * image,
- pixman_format_code_t format)
-{
- uint32_t result;
- pixman_iter_t iter;
-
- _pixman_implementation_src_iter_init (
- imp, &iter, image, 0, 0, 1, 1,
- (uint8_t *)&result, ITER_NARROW);
-
- result = *iter.get_scanline (&iter, NULL);
-
- /* If necessary, convert RGB <--> BGR. */
- if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB)
- {
- result = (((result & 0xff000000) >> 0) |
- ((result & 0x00ff0000) >> 16) |
- ((result & 0x0000ff00) >> 0) |
- ((result & 0x000000ff) << 16));
- }
-
- return result;
-}
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "pixman-private.h"
+
+pixman_bool_t
+_pixman_init_gradient (gradient_t * gradient,
+ const pixman_gradient_stop_t *stops,
+ int n_stops)
+{
+ return_val_if_fail (n_stops > 0, FALSE);
+
+ gradient->stops = pixman_malloc_ab (n_stops, sizeof (pixman_gradient_stop_t));
+ if (!gradient->stops)
+ return FALSE;
+
+ memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t));
+
+ gradient->n_stops = n_stops;
+
+ return TRUE;
+}
+
+pixman_image_t *
+_pixman_image_allocate (void)
+{
+ pixman_image_t *image = malloc (sizeof (pixman_image_t));
+
+ if (image)
+ {
+ image_common_t *common = &image->common;
+
+ pixman_region32_init (&common->clip_region);
+
+ common->alpha_count = 0;
+ common->have_clip_region = FALSE;
+ common->clip_sources = FALSE;
+ common->transform = NULL;
+ common->repeat = PIXMAN_REPEAT_NONE;
+ common->filter = PIXMAN_FILTER_NEAREST;
+ common->filter_params = NULL;
+ common->n_filter_params = 0;
+ common->alpha_map = NULL;
+ common->component_alpha = FALSE;
+ common->ref_count = 1;
+ common->property_changed = NULL;
+ common->client_clip = FALSE;
+ common->destroy_func = NULL;
+ common->destroy_data = NULL;
+ common->dirty = TRUE;
+ }
+
+ return image;
+}
+
+static void
+image_property_changed (pixman_image_t *image)
+{
+ image->common.dirty = TRUE;
+}
+
+/* Ref Counting */
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_ref (pixman_image_t *image)
+{
+ image->common.ref_count++;
+
+ return image;
+}
+
+/* returns TRUE when the image is freed */
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_unref (pixman_image_t *image)
+{
+ image_common_t *common = (image_common_t *)image;
+
+ common->ref_count--;
+
+ if (common->ref_count == 0)
+ {
+ if (image->common.destroy_func)
+ image->common.destroy_func (image, image->common.destroy_data);
+
+ pixman_region32_fini (&common->clip_region);
+
+ if (common->transform)
+ free (common->transform);
+
+ if (common->filter_params)
+ free (common->filter_params);
+
+ if (common->alpha_map)
+ pixman_image_unref ((pixman_image_t *)common->alpha_map);
+
+ if (image->type == LINEAR ||
+ image->type == RADIAL ||
+ image->type == CONICAL)
+ {
+ if (image->gradient.stops)
+ free (image->gradient.stops);
+ }
+
+ if (image->type == BITS && image->bits.free_me)
+ free (image->bits.free_me);
+
+ free (image);
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_destroy_function (pixman_image_t * image,
+ pixman_image_destroy_func_t func,
+ void * data)
+{
+ image->common.destroy_func = func;
+ image->common.destroy_data = data;
+}
+
+PIXMAN_EXPORT void *
+pixman_image_get_destroy_data (pixman_image_t *image)
+{
+ return image->common.destroy_data;
+}
+
+void
+_pixman_image_reset_clip_region (pixman_image_t *image)
+{
+ image->common.have_clip_region = FALSE;
+}
+
+/* Executive Summary: This function is a no-op that only exists
+ * for historical reasons.
+ *
+ * There used to be a bug in the X server where it would rely on
+ * out-of-bounds accesses when it was asked to composite with a
+ * window as the source. It would create a pixman image pointing
+ * to some bogus position in memory, but then set a clip region
+ * to the position where the actual bits were.
+ *
+ * Due to a bug in old versions of pixman, where it would not clip
+ * against the image bounds when a clip region was set, this would
+ * actually work. So when the pixman bug was fixed, a workaround was
+ * added to allow certain out-of-bound accesses. This function disabled
+ * those workarounds.
+ *
+ * Since 0.21.2, pixman doesn't do these workarounds anymore, so now
+ * this function is a no-op.
+ */
+PIXMAN_EXPORT void
+pixman_disable_out_of_bounds_workaround (void)
+{
+}
+
+static void
+compute_image_info (pixman_image_t *image)
+{
+ pixman_format_code_t code;
+ uint32_t flags = 0;
+
+ /* Transform */
+ if (!image->common.transform)
+ {
+ flags |= (FAST_PATH_ID_TRANSFORM |
+ FAST_PATH_X_UNIT_POSITIVE |
+ FAST_PATH_Y_UNIT_ZERO |
+ FAST_PATH_AFFINE_TRANSFORM);
+ }
+ else
+ {
+ flags |= FAST_PATH_HAS_TRANSFORM;
+
+ if (image->common.transform->matrix[2][0] == 0 &&
+ image->common.transform->matrix[2][1] == 0 &&
+ image->common.transform->matrix[2][2] == pixman_fixed_1)
+ {
+ flags |= FAST_PATH_AFFINE_TRANSFORM;
+
+ if (image->common.transform->matrix[0][1] == 0 &&
+ image->common.transform->matrix[1][0] == 0)
+ {
+ if (image->common.transform->matrix[0][0] == -pixman_fixed_1 &&
+ image->common.transform->matrix[1][1] == -pixman_fixed_1)
+ {
+ flags |= FAST_PATH_ROTATE_180_TRANSFORM;
+ }
+ flags |= FAST_PATH_SCALE_TRANSFORM;
+ }
+ else if (image->common.transform->matrix[0][0] == 0 &&
+ image->common.transform->matrix[1][1] == 0)
+ {
+ pixman_fixed_t m01 = image->common.transform->matrix[0][1];
+ if (m01 == -image->common.transform->matrix[1][0])
+ {
+ if (m01 == -pixman_fixed_1)
+ flags |= FAST_PATH_ROTATE_90_TRANSFORM;
+ else if (m01 == pixman_fixed_1)
+ flags |= FAST_PATH_ROTATE_270_TRANSFORM;
+ }
+ }
+ }
+
+ if (image->common.transform->matrix[0][0] > 0)
+ flags |= FAST_PATH_X_UNIT_POSITIVE;
+
+ if (image->common.transform->matrix[1][0] == 0)
+ flags |= FAST_PATH_Y_UNIT_ZERO;
+ }
+
+ /* Filter */
+ switch (image->common.filter)
+ {
+ case PIXMAN_FILTER_NEAREST:
+ case PIXMAN_FILTER_FAST:
+ flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+ break;
+
+ case PIXMAN_FILTER_BILINEAR:
+ case PIXMAN_FILTER_GOOD:
+ case PIXMAN_FILTER_BEST:
+ flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+ break;
+
+ case PIXMAN_FILTER_CONVOLUTION:
+ break;
+
+ default:
+ flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
+ break;
+ }
+
+ /* Repeat mode */
+ switch (image->common.repeat)
+ {
+ case PIXMAN_REPEAT_NONE:
+ flags |=
+ FAST_PATH_NO_REFLECT_REPEAT |
+ FAST_PATH_NO_PAD_REPEAT |
+ FAST_PATH_NO_NORMAL_REPEAT;
+ break;
+
+ case PIXMAN_REPEAT_REFLECT:
+ flags |=
+ FAST_PATH_NO_PAD_REPEAT |
+ FAST_PATH_NO_NONE_REPEAT |
+ FAST_PATH_NO_NORMAL_REPEAT;
+ break;
+
+ case PIXMAN_REPEAT_PAD:
+ flags |=
+ FAST_PATH_NO_REFLECT_REPEAT |
+ FAST_PATH_NO_NONE_REPEAT |
+ FAST_PATH_NO_NORMAL_REPEAT;
+ break;
+
+ default:
+ flags |=
+ FAST_PATH_NO_REFLECT_REPEAT |
+ FAST_PATH_NO_PAD_REPEAT |
+ FAST_PATH_NO_NONE_REPEAT;
+ break;
+ }
+
+ /* Component alpha */
+ if (image->common.component_alpha)
+ flags |= FAST_PATH_COMPONENT_ALPHA;
+ else
+ flags |= FAST_PATH_UNIFIED_ALPHA;
+
+ flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT);
+
+ /* Type specific checks */
+ switch (image->type)
+ {
+ case SOLID:
+ code = PIXMAN_solid;
+
+ if (image->solid.color.alpha == 0xffff)
+ flags |= FAST_PATH_IS_OPAQUE;
+ break;
+
+ case BITS:
+ if (image->bits.width == 1 &&
+ image->bits.height == 1 &&
+ image->common.repeat != PIXMAN_REPEAT_NONE)
+ {
+ code = PIXMAN_solid;
+ }
+ else
+ {
+ code = image->bits.format;
+ }
+
+ if (!PIXMAN_FORMAT_A (image->bits.format) &&
+ PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY &&
+ PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR)
+ {
+ flags |= FAST_PATH_SAMPLES_OPAQUE;
+
+ if (image->common.repeat != PIXMAN_REPEAT_NONE)
+ flags |= FAST_PATH_IS_OPAQUE;
+ }
+
+ if (image->bits.read_func || image->bits.write_func)
+ flags &= ~FAST_PATH_NO_ACCESSORS;
+
+ if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
+ flags &= ~FAST_PATH_NARROW_FORMAT;
+ break;
+
+ case RADIAL:
+ code = PIXMAN_unknown;
+
+ /*
+ * As explained in pixman-radial-gradient.c, every point of
+ * the plane has a valid associated radius (and thus will be
+ * colored) if and only if a is negative (i.e. one of the two
+ * circles contains the other one).
+ */
+
+ if (image->radial.a >= 0)
+ break;
+
+ /* Fall through */
+
+ case CONICAL:
+ case LINEAR:
+ code = PIXMAN_unknown;
+
+ if (image->common.repeat != PIXMAN_REPEAT_NONE)
+ {
+ int i;
+
+ flags |= FAST_PATH_IS_OPAQUE;
+ for (i = 0; i < image->gradient.n_stops; ++i)
+ {
+ if (image->gradient.stops[i].color.alpha != 0xffff)
+ {
+ flags &= ~FAST_PATH_IS_OPAQUE;
+ break;
+ }
+ }
+ }
+ break;
+
+ default:
+ code = PIXMAN_unknown;
+ break;
+ }
+
+ /* Alpha map */
+ if (!image->common.alpha_map)
+ {
+ flags |= FAST_PATH_NO_ALPHA_MAP;
+ }
+ else
+ {
+ if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format))
+ flags &= ~FAST_PATH_NARROW_FORMAT;
+ }
+
+ /* Both alpha maps and convolution filters can introduce
+ * non-opaqueness in otherwise opaque images. Also
+ * an image with component alpha turned on is only opaque
+ * if all channels are opaque, so we simply turn it off
+ * unconditionally for those images.
+ */
+ if (image->common.alpha_map ||
+ image->common.filter == PIXMAN_FILTER_CONVOLUTION ||
+ image->common.component_alpha)
+ {
+ flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE);
+ }
+
+ image->common.flags = flags;
+ image->common.extended_format_code = code;
+}
+
+void
+_pixman_image_validate (pixman_image_t *image)
+{
+ if (image->common.dirty)
+ {
+ compute_image_info (image);
+
+ /* It is important that property_changed is
+ * called *after* compute_image_info() because
+ * property_changed() can make use of the flags
+ * to set up accessors etc.
+ */
+ if (image->common.property_changed)
+ image->common.property_changed (image);
+
+ image->common.dirty = FALSE;
+ }
+
+ if (image->common.alpha_map)
+ _pixman_image_validate ((pixman_image_t *)image->common.alpha_map);
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_clip_region32 (pixman_image_t * image,
+ pixman_region32_t *region)
+{
+ image_common_t *common = (image_common_t *)image;
+ pixman_bool_t result;
+
+ if (region)
+ {
+ if ((result = pixman_region32_copy (&common->clip_region, region)))
+ image->common.have_clip_region = TRUE;
+ }
+ else
+ {
+ _pixman_image_reset_clip_region (image);
+
+ result = TRUE;
+ }
+
+ image_property_changed (image);
+
+ return result;
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_clip_region (pixman_image_t * image,
+ pixman_region16_t *region)
+{
+ image_common_t *common = (image_common_t *)image;
+ pixman_bool_t result;
+
+ if (region)
+ {
+ if ((result = pixman_region32_copy_from_region16 (&common->clip_region, region)))
+ image->common.have_clip_region = TRUE;
+ }
+ else
+ {
+ _pixman_image_reset_clip_region (image);
+
+ result = TRUE;
+ }
+
+ image_property_changed (image);
+
+ return result;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_has_client_clip (pixman_image_t *image,
+ pixman_bool_t client_clip)
+{
+ image->common.client_clip = client_clip;
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_transform (pixman_image_t * image,
+ const pixman_transform_t *transform)
+{
+ static const pixman_transform_t id =
+ {
+ { { pixman_fixed_1, 0, 0 },
+ { 0, pixman_fixed_1, 0 },
+ { 0, 0, pixman_fixed_1 } }
+ };
+
+ image_common_t *common = (image_common_t *)image;
+ pixman_bool_t result;
+
+ if (common->transform == transform)
+ return TRUE;
+
+ if (!transform || memcmp (&id, transform, sizeof (pixman_transform_t)) == 0)
+ {
+ free (common->transform);
+ common->transform = NULL;
+ result = TRUE;
+
+ goto out;
+ }
+
+ if (common->transform &&
+ memcmp (common->transform, transform, sizeof (pixman_transform_t) == 0))
+ {
+ return TRUE;
+ }
+
+ if (common->transform == NULL)
+ common->transform = malloc (sizeof (pixman_transform_t));
+
+ if (common->transform == NULL)
+ {
+ result = FALSE;
+
+ goto out;
+ }
+
+ memcpy (common->transform, transform, sizeof(pixman_transform_t));
+
+ result = TRUE;
+
+out:
+ image_property_changed (image);
+
+ return result;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_repeat (pixman_image_t *image,
+ pixman_repeat_t repeat)
+{
+ if (image->common.repeat == repeat)
+ return;
+
+ image->common.repeat = repeat;
+
+ image_property_changed (image);
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_filter (pixman_image_t * image,
+ pixman_filter_t filter,
+ const pixman_fixed_t *params,
+ int n_params)
+{
+ image_common_t *common = (image_common_t *)image;
+ pixman_fixed_t *new_params;
+
+ if (params == common->filter_params && filter == common->filter)
+ return TRUE;
+
+ new_params = NULL;
+ if (params)
+ {
+ new_params = pixman_malloc_ab (n_params, sizeof (pixman_fixed_t));
+ if (!new_params)
+ return FALSE;
+
+ memcpy (new_params,
+ params, n_params * sizeof (pixman_fixed_t));
+ }
+
+ common->filter = filter;
+
+ if (common->filter_params)
+ free (common->filter_params);
+
+ common->filter_params = new_params;
+ common->n_filter_params = n_params;
+
+ image_property_changed (image);
+ return TRUE;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_source_clipping (pixman_image_t *image,
+ pixman_bool_t clip_sources)
+{
+ if (image->common.clip_sources == clip_sources)
+ return;
+
+ image->common.clip_sources = clip_sources;
+
+ image_property_changed (image);
+}
+
+/* Unlike all the other property setters, this function does not
+ * copy the content of indexed. Doing this copying is simply
+ * way, way too expensive.
+ */
+PIXMAN_EXPORT void
+pixman_image_set_indexed (pixman_image_t * image,
+ const pixman_indexed_t *indexed)
+{
+ bits_image_t *bits = (bits_image_t *)image;
+
+ if (bits->indexed == indexed)
+ return;
+
+ bits->indexed = indexed;
+
+ image_property_changed (image);
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_alpha_map (pixman_image_t *image,
+ pixman_image_t *alpha_map,
+ int16_t x,
+ int16_t y)
+{
+ image_common_t *common = (image_common_t *)image;
+
+ return_if_fail (!alpha_map || alpha_map->type == BITS);
+
+ if (alpha_map && common->alpha_count > 0)
+ {
+ /* If this image is being used as an alpha map itself,
+ * then you can't give it an alpha map of its own.
+ */
+ return;
+ }
+
+ if (alpha_map && alpha_map->common.alpha_map)
+ {
+ /* If the image has an alpha map of its own,
+ * then it can't be used as an alpha map itself
+ */
+ return;
+ }
+
+ if (common->alpha_map != (bits_image_t *)alpha_map)
+ {
+ if (common->alpha_map)
+ {
+ common->alpha_map->common.alpha_count--;
+
+ pixman_image_unref ((pixman_image_t *)common->alpha_map);
+ }
+
+ if (alpha_map)
+ {
+ common->alpha_map = (bits_image_t *)pixman_image_ref (alpha_map);
+
+ common->alpha_map->common.alpha_count++;
+ }
+ else
+ {
+ common->alpha_map = NULL;
+ }
+ }
+
+ common->alpha_origin_x = x;
+ common->alpha_origin_y = y;
+
+ image_property_changed (image);
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_component_alpha (pixman_image_t *image,
+ pixman_bool_t component_alpha)
+{
+ if (image->common.component_alpha == component_alpha)
+ return;
+
+ image->common.component_alpha = component_alpha;
+
+ image_property_changed (image);
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_get_component_alpha (pixman_image_t *image)
+{
+ return image->common.component_alpha;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_accessors (pixman_image_t * image,
+ pixman_read_memory_func_t read_func,
+ pixman_write_memory_func_t write_func)
+{
+ return_if_fail (image != NULL);
+
+ if (image->type == BITS)
+ {
+ image->bits.read_func = read_func;
+ image->bits.write_func = write_func;
+
+ image_property_changed (image);
+ }
+}
+
+PIXMAN_EXPORT uint32_t *
+pixman_image_get_data (pixman_image_t *image)
+{
+ if (image->type == BITS)
+ return image->bits.bits;
+
+ return NULL;
+}
+
+PIXMAN_EXPORT int
+pixman_image_get_width (pixman_image_t *image)
+{
+ if (image->type == BITS)
+ return image->bits.width;
+
+ return 0;
+}
+
+PIXMAN_EXPORT int
+pixman_image_get_height (pixman_image_t *image)
+{
+ if (image->type == BITS)
+ return image->bits.height;
+
+ return 0;
+}
+
+PIXMAN_EXPORT int
+pixman_image_get_stride (pixman_image_t *image)
+{
+ if (image->type == BITS)
+ return image->bits.rowstride * (int) sizeof (uint32_t);
+
+ return 0;
+}
+
+PIXMAN_EXPORT int
+pixman_image_get_depth (pixman_image_t *image)
+{
+ if (image->type == BITS)
+ return PIXMAN_FORMAT_DEPTH (image->bits.format);
+
+ return 0;
+}
+
+PIXMAN_EXPORT pixman_format_code_t
+pixman_image_get_format (pixman_image_t *image)
+{
+ if (image->type == BITS)
+ return image->bits.format;
+
+ return 0;
+}
+
+uint32_t
+_pixman_image_get_solid (pixman_implementation_t *imp,
+ pixman_image_t * image,
+ pixman_format_code_t format)
+{
+ uint32_t result;
+ pixman_iter_t iter;
+
+ _pixman_implementation_src_iter_init (
+ imp, &iter, image, 0, 0, 1, 1,
+ (uint8_t *)&result, ITER_NARROW);
+
+ result = *iter.get_scanline (&iter, NULL);
+
+ /* If necessary, convert RGB <--> BGR. */
+ if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB)
+ {
+ result = (((result & 0xff000000) >> 0) |
+ ((result & 0x00ff0000) >> 16) |
+ ((result & 0x0000ff00) >> 0) |
+ ((result & 0x000000ff) << 16));
+ }
+
+ return result;
+}
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c
index 027234773..88442f70e 100644
--- a/pixman/pixman/pixman-mmx.c
+++ b/pixman/pixman/pixman-mmx.c
@@ -152,6 +152,9 @@ to_m64 (uint64_t x)
#endif
}
+#ifdef _MSC_VER
+#define to_uint64(arg) arg.M64_MEMBER
+#else
static force_inline uint64_t
to_uint64 (__m64 x)
{
@@ -164,6 +167,7 @@ to_uint64 (__m64 x)
return (uint64_t)x;
#endif
}
+#endif
static force_inline __m64
shift (__m64 v,
@@ -310,11 +314,15 @@ pack8888 (__m64 lo, __m64 hi)
return _mm_packs_pu16 (lo, hi);
}
+#ifdef _MSC_VER
+#define store8888(v) _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()))
+#else
static force_inline uint32_t
store8888 (__m64 v)
{
return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()));
}
+#endif
/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
*
@@ -417,6 +425,13 @@ pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
/* --------------- MMX code patch for fbcompose.c --------------------- */
+#ifdef _MSC_VER
+#define combine(src, mask) \
+ ((mask) ? \
+ store8888 (pix_multiply (load8888 (*src), expand_alpha (load8888 (*mask)))) \
+ : \
+ *src)
+#else
static force_inline uint32_t
combine (const uint32_t *src, const uint32_t *mask)
{
@@ -435,6 +450,7 @@ combine (const uint32_t *src, const uint32_t *mask)
return ssrc;
}
+#endif
static void
mmx_combine_over_u (pixman_implementation_t *imp,
@@ -1685,7 +1701,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
vdest = pack_565 (
over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
- *dst = to_uint64 (vdest);
+ *dst = to_uint64 (vdest)&0xffff;
w--;
dst++;
@@ -1728,7 +1744,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
- *dst = to_uint64 (vdest);
+ *dst = to_uint64 (vdest) & 0xffff;
w--;
dst++;
@@ -1943,7 +1959,7 @@ pixman_fill_mmx (uint32_t *bits,
while (w >= 2 && ((unsigned long)d & 3))
{
- *(uint16_t *)d = xor;
+ *(uint16_t *)d = (xor & 0xffff);
w -= 2;
d += 2;
}
@@ -1996,7 +2012,7 @@ pixman_fill_mmx (uint32_t *bits,
}
while (w >= 2)
{
- *(uint16_t *)d = xor;
+ *(uint16_t *)d = (xor & 0xffff);
w -= 2;
d += 2;
}
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index 696005f75..a983164d4 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -1,6077 +1,6077 @@
-/*
- * Copyright © 2008 Rodrigo Kumpera
- * Copyright © 2008 André Tupinambá
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Red Hat not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. Red Hat makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author: Rodrigo Kumpera (kumpera@gmail.com)
- * André Tupinambá (andrelrt@gmail.com)
- *
- * Based on work by Owen Taylor and Søren Sandmann
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
-#include <emmintrin.h> /* for SSE2 intrinsics */
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-fast-path.h"
-
-static __m128i mask_0080;
-static __m128i mask_00ff;
-static __m128i mask_0101;
-static __m128i mask_ffff;
-static __m128i mask_ff000000;
-static __m128i mask_alpha;
-
-static __m128i mask_565_r;
-static __m128i mask_565_g1, mask_565_g2;
-static __m128i mask_565_b;
-static __m128i mask_red;
-static __m128i mask_green;
-static __m128i mask_blue;
-
-static __m128i mask_565_fix_rb;
-static __m128i mask_565_fix_g;
-
-static force_inline __m128i
-unpack_32_1x128 (uint32_t data)
-{
- return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());
-}
-
-static force_inline void
-unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi)
-{
- *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());
- *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
-}
-
-static force_inline __m128i
-unpack_565_to_8888 (__m128i lo)
-{
- __m128i r, g, b, rb, t;
-
- r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);
- g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);
- b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);
-
- rb = _mm_or_si128 (r, b);
- t = _mm_and_si128 (rb, mask_565_fix_rb);
- t = _mm_srli_epi32 (t, 5);
- rb = _mm_or_si128 (rb, t);
-
- t = _mm_and_si128 (g, mask_565_fix_g);
- t = _mm_srli_epi32 (t, 6);
- g = _mm_or_si128 (g, t);
-
- return _mm_or_si128 (rb, g);
-}
-
-static force_inline void
-unpack_565_128_4x128 (__m128i data,
- __m128i* data0,
- __m128i* data1,
- __m128i* data2,
- __m128i* data3)
-{
- __m128i lo, hi;
-
- lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
- hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
-
- lo = unpack_565_to_8888 (lo);
- hi = unpack_565_to_8888 (hi);
-
- unpack_128_2x128 (lo, data0, data1);
- unpack_128_2x128 (hi, data2, data3);
-}
-
-static force_inline uint16_t
-pack_565_32_16 (uint32_t pixel)
-{
- return (uint16_t) (((pixel >> 8) & 0xf800) |
- ((pixel >> 5) & 0x07e0) |
- ((pixel >> 3) & 0x001f));
-}
-
-static force_inline __m128i
-pack_2x128_128 (__m128i lo, __m128i hi)
-{
- return _mm_packus_epi16 (lo, hi);
-}
-
-static force_inline __m128i
-pack_565_2x128_128 (__m128i lo, __m128i hi)
-{
- __m128i data;
- __m128i r, g1, g2, b;
-
- data = pack_2x128_128 (lo, hi);
-
- r = _mm_and_si128 (data, mask_565_r);
- g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);
- g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);
- b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);
-
- return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);
-}
-
-static force_inline __m128i
-pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
-{
- return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),
- pack_565_2x128_128 (*xmm2, *xmm3));
-}
-
-static force_inline int
-is_opaque (__m128i x)
-{
- __m128i ffs = _mm_cmpeq_epi8 (x, x);
-
- return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
-}
-
-static force_inline int
-is_zero (__m128i x)
-{
- return _mm_movemask_epi8 (
- _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;
-}
-
-static force_inline int
-is_transparent (__m128i x)
-{
- return (_mm_movemask_epi8 (
- _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;
-}
-
-static force_inline __m128i
-expand_pixel_32_1x128 (uint32_t data)
-{
- return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0));
-}
-
-static force_inline __m128i
-expand_alpha_1x128 (__m128i data)
-{
- return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,
- _MM_SHUFFLE (3, 3, 3, 3)),
- _MM_SHUFFLE (3, 3, 3, 3));
-}
-
-static force_inline void
-expand_alpha_2x128 (__m128i data_lo,
- __m128i data_hi,
- __m128i* alpha_lo,
- __m128i* alpha_hi)
-{
- __m128i lo, hi;
-
- lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3));
- hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3));
-
- *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3));
- *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3));
-}
-
-static force_inline void
-expand_alpha_rev_2x128 (__m128i data_lo,
- __m128i data_hi,
- __m128i* alpha_lo,
- __m128i* alpha_hi)
-{
- __m128i lo, hi;
-
- lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0));
- hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0));
- *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0));
- *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0));
-}
-
-static force_inline void
-pix_multiply_2x128 (__m128i* data_lo,
- __m128i* data_hi,
- __m128i* alpha_lo,
- __m128i* alpha_hi,
- __m128i* ret_lo,
- __m128i* ret_hi)
-{
- __m128i lo, hi;
-
- lo = _mm_mullo_epi16 (*data_lo, *alpha_lo);
- hi = _mm_mullo_epi16 (*data_hi, *alpha_hi);
- lo = _mm_adds_epu16 (lo, mask_0080);
- hi = _mm_adds_epu16 (hi, mask_0080);
- *ret_lo = _mm_mulhi_epu16 (lo, mask_0101);
- *ret_hi = _mm_mulhi_epu16 (hi, mask_0101);
-}
-
-static force_inline void
-pix_add_multiply_2x128 (__m128i* src_lo,
- __m128i* src_hi,
- __m128i* alpha_dst_lo,
- __m128i* alpha_dst_hi,
- __m128i* dst_lo,
- __m128i* dst_hi,
- __m128i* alpha_src_lo,
- __m128i* alpha_src_hi,
- __m128i* ret_lo,
- __m128i* ret_hi)
-{
- __m128i t1_lo, t1_hi;
- __m128i t2_lo, t2_hi;
-
- pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi);
- pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi);
-
- *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo);
- *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi);
-}
-
-static force_inline void
-negate_2x128 (__m128i data_lo,
- __m128i data_hi,
- __m128i* neg_lo,
- __m128i* neg_hi)
-{
- *neg_lo = _mm_xor_si128 (data_lo, mask_00ff);
- *neg_hi = _mm_xor_si128 (data_hi, mask_00ff);
-}
-
-static force_inline void
-invert_colors_2x128 (__m128i data_lo,
- __m128i data_hi,
- __m128i* inv_lo,
- __m128i* inv_hi)
-{
- __m128i lo, hi;
-
- lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2));
- hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2));
- *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2));
- *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2));
-}
-
-static force_inline void
-over_2x128 (__m128i* src_lo,
- __m128i* src_hi,
- __m128i* alpha_lo,
- __m128i* alpha_hi,
- __m128i* dst_lo,
- __m128i* dst_hi)
-{
- __m128i t1, t2;
-
- negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
-
- pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
-
- *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo);
- *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi);
-}
-
-static force_inline void
-over_rev_non_pre_2x128 (__m128i src_lo,
- __m128i src_hi,
- __m128i* dst_lo,
- __m128i* dst_hi)
-{
- __m128i lo, hi;
- __m128i alpha_lo, alpha_hi;
-
- expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi);
-
- lo = _mm_or_si128 (alpha_lo, mask_alpha);
- hi = _mm_or_si128 (alpha_hi, mask_alpha);
-
- invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi);
-
- pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi);
-
- over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi);
-}
-
-static force_inline void
-in_over_2x128 (__m128i* src_lo,
- __m128i* src_hi,
- __m128i* alpha_lo,
- __m128i* alpha_hi,
- __m128i* mask_lo,
- __m128i* mask_hi,
- __m128i* dst_lo,
- __m128i* dst_hi)
-{
- __m128i s_lo, s_hi;
- __m128i a_lo, a_hi;
-
- pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
- pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
-
- over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
-}
-
-/* load 4 pixels from a 16-byte boundary aligned address */
-static force_inline __m128i
-load_128_aligned (__m128i* src)
-{
- return _mm_load_si128 (src);
-}
-
-/* load 4 pixels from a unaligned address */
-static force_inline __m128i
-load_128_unaligned (const __m128i* src)
-{
- return _mm_loadu_si128 (src);
-}
-
-/* save 4 pixels using Write Combining memory on a 16-byte
- * boundary aligned address
- */
-static force_inline void
-save_128_write_combining (__m128i* dst,
- __m128i data)
-{
- _mm_stream_si128 (dst, data);
-}
-
-/* save 4 pixels on a 16-byte boundary aligned address */
-static force_inline void
-save_128_aligned (__m128i* dst,
- __m128i data)
-{
- _mm_store_si128 (dst, data);
-}
-
-/* save 4 pixels on a unaligned address */
-static force_inline void
-save_128_unaligned (__m128i* dst,
- __m128i data)
-{
- _mm_storeu_si128 (dst, data);
-}
-
-static force_inline __m128i
-load_32_1x128 (uint32_t data)
-{
- return _mm_cvtsi32_si128 (data);
-}
-
-static force_inline __m128i
-expand_alpha_rev_1x128 (__m128i data)
-{
- return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
-}
-
-static force_inline __m128i
-expand_pixel_8_1x128 (uint8_t data)
-{
- return _mm_shufflelo_epi16 (
- unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
-}
-
-static force_inline __m128i
-pix_multiply_1x128 (__m128i data,
- __m128i alpha)
-{
- return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha),
- mask_0080),
- mask_0101);
-}
-
-static force_inline __m128i
-pix_add_multiply_1x128 (__m128i* src,
- __m128i* alpha_dst,
- __m128i* dst,
- __m128i* alpha_src)
-{
- __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst);
- __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src);
-
- return _mm_adds_epu8 (t1, t2);
-}
-
-static force_inline __m128i
-negate_1x128 (__m128i data)
-{
- return _mm_xor_si128 (data, mask_00ff);
-}
-
-static force_inline __m128i
-invert_colors_1x128 (__m128i data)
-{
- return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
-}
-
-static force_inline __m128i
-over_1x128 (__m128i src, __m128i alpha, __m128i dst)
-{
- return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
-}
-
-static force_inline __m128i
-in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
-{
- return over_1x128 (pix_multiply_1x128 (*src, *mask),
- pix_multiply_1x128 (*alpha, *mask),
- *dst);
-}
-
-static force_inline __m128i
-over_rev_non_pre_1x128 (__m128i src, __m128i dst)
-{
- __m128i alpha = expand_alpha_1x128 (src);
-
- return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src),
- _mm_or_si128 (alpha, mask_alpha)),
- alpha,
- dst);
-}
-
-static force_inline uint32_t
-pack_1x128_32 (__m128i data)
-{
- return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
-}
-
-static force_inline __m128i
-expand565_16_1x128 (uint16_t pixel)
-{
- __m128i m = _mm_cvtsi32_si128 (pixel);
-
- m = unpack_565_to_8888 (m);
-
- return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
-}
-
-static force_inline uint32_t
-core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
-{
- uint8_t a;
- __m128i xmms;
-
- a = src >> 24;
-
- if (a == 0xff)
- {
- return src;
- }
- else if (src)
- {
- xmms = unpack_32_1x128 (src);
- return pack_1x128_32 (
- over_1x128 (xmms, expand_alpha_1x128 (xmms),
- unpack_32_1x128 (dst)));
- }
-
- return dst;
-}
-
-static force_inline uint32_t
-combine1 (const uint32_t *ps, const uint32_t *pm)
-{
- uint32_t s = *ps;
-
- if (pm)
- {
- __m128i ms, mm;
-
- mm = unpack_32_1x128 (*pm);
- mm = expand_alpha_1x128 (mm);
-
- ms = unpack_32_1x128 (s);
- ms = pix_multiply_1x128 (ms, mm);
-
- s = pack_1x128_32 (ms);
- }
-
- return s;
-}
-
-static force_inline __m128i
-combine4 (const __m128i *ps, const __m128i *pm)
-{
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_msk_lo, xmm_msk_hi;
- __m128i s;
-
- if (pm)
- {
- xmm_msk_lo = load_128_unaligned (pm);
-
- if (is_transparent (xmm_msk_lo))
- return _mm_setzero_si128 ();
- }
-
- s = load_128_unaligned (ps);
-
- if (pm)
- {
- unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);
-
- expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_msk_lo, &xmm_msk_hi,
- &xmm_src_lo, &xmm_src_hi);
-
- s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);
- }
-
- return s;
-}
-
-static force_inline void
-core_combine_over_u_sse2_mask (uint32_t * pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
-{
- uint32_t s, d;
-
- /* Align dst on a 16-byte boundary */
- while (w && ((unsigned long)pd & 15))
- {
- d = *pd;
- s = combine1 (ps, pm);
-
- if (s)
- *pd = core_combine_over_u_pixel_sse2 (s, d);
- pd++;
- ps++;
- pm++;
- w--;
- }
-
- while (w >= 4)
- {
- __m128i mask = load_128_unaligned ((__m128i *)pm);
-
- if (!is_zero (mask))
- {
- __m128i src;
- __m128i src_hi, src_lo;
- __m128i mask_hi, mask_lo;
- __m128i alpha_hi, alpha_lo;
-
- src = load_128_unaligned ((__m128i *)ps);
-
- if (is_opaque (_mm_and_si128 (src, mask)))
- {
- save_128_aligned ((__m128i *)pd, src);
- }
- else
- {
- __m128i dst = load_128_aligned ((__m128i *)pd);
- __m128i dst_hi, dst_lo;
-
- unpack_128_2x128 (mask, &mask_lo, &mask_hi);
- unpack_128_2x128 (src, &src_lo, &src_hi);
-
- expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi);
- pix_multiply_2x128 (&src_lo, &src_hi,
- &mask_lo, &mask_hi,
- &src_lo, &src_hi);
-
- unpack_128_2x128 (dst, &dst_lo, &dst_hi);
-
- expand_alpha_2x128 (src_lo, src_hi,
- &alpha_lo, &alpha_hi);
-
- over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
- &dst_lo, &dst_hi);
-
- save_128_aligned (
- (__m128i *)pd,
- pack_2x128_128 (dst_lo, dst_hi));
- }
- }
-
- pm += 4;
- ps += 4;
- pd += 4;
- w -= 4;
- }
- while (w)
- {
- d = *pd;
- s = combine1 (ps, pm);
-
- if (s)
- *pd = core_combine_over_u_pixel_sse2 (s, d);
- pd++;
- ps++;
- pm++;
-
- w--;
- }
-}
-
-static force_inline void
-core_combine_over_u_sse2_no_mask (uint32_t * pd,
- const uint32_t* ps,
- int w)
-{
- uint32_t s, d;
-
- /* Align dst on a 16-byte boundary */
- while (w && ((unsigned long)pd & 15))
- {
- d = *pd;
- s = *ps;
-
- if (s)
- *pd = core_combine_over_u_pixel_sse2 (s, d);
- pd++;
- ps++;
- w--;
- }
-
- while (w >= 4)
- {
- __m128i src;
- __m128i src_hi, src_lo, dst_hi, dst_lo;
- __m128i alpha_hi, alpha_lo;
-
- src = load_128_unaligned ((__m128i *)ps);
-
- if (!is_zero (src))
- {
- if (is_opaque (src))
- {
- save_128_aligned ((__m128i *)pd, src);
- }
- else
- {
- __m128i dst = load_128_aligned ((__m128i *)pd);
-
- unpack_128_2x128 (src, &src_lo, &src_hi);
- unpack_128_2x128 (dst, &dst_lo, &dst_hi);
-
- expand_alpha_2x128 (src_lo, src_hi,
- &alpha_lo, &alpha_hi);
- over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
- &dst_lo, &dst_hi);
-
- save_128_aligned (
- (__m128i *)pd,
- pack_2x128_128 (dst_lo, dst_hi));
- }
- }
-
- ps += 4;
- pd += 4;
- w -= 4;
- }
- while (w)
- {
- d = *pd;
- s = *ps;
-
- if (s)
- *pd = core_combine_over_u_pixel_sse2 (s, d);
- pd++;
- ps++;
-
- w--;
- }
-}
-
-static force_inline void
-sse2_combine_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- if (pm)
- core_combine_over_u_sse2_mask (pd, ps, pm, w);
- else
- core_combine_over_u_sse2_no_mask (pd, ps, w);
-}
-
-static void
-sse2_combine_over_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, d;
-
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_alpha_lo, xmm_alpha_hi;
-
- /* Align dst on a 16-byte boundary */
- while (w &&
- ((unsigned long)pd & 15))
- {
- d = *pd;
- s = combine1 (ps, pm);
-
- *pd++ = core_combine_over_u_pixel_sse2 (d, s);
- w--;
- ps++;
- if (pm)
- pm++;
- }
-
- while (w >= 4)
- {
- /* I'm loading unaligned because I'm not sure
- * about the address alignment.
- */
- xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
- xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_src_lo, &xmm_src_hi);
-
- /* rebuid the 4 pixel data and save*/
- save_128_aligned ((__m128i*)pd,
- pack_2x128_128 (xmm_src_lo, xmm_src_hi));
-
- w -= 4;
- ps += 4;
- pd += 4;
-
- if (pm)
- pm += 4;
- }
-
- while (w)
- {
- d = *pd;
- s = combine1 (ps, pm);
-
- *pd++ = core_combine_over_u_pixel_sse2 (d, s);
- ps++;
- w--;
- if (pm)
- pm++;
- }
-}
-
-static force_inline uint32_t
-core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst)
-{
- uint32_t maska = src >> 24;
-
- if (maska == 0)
- {
- return 0;
- }
- else if (maska != 0xff)
- {
- return pack_1x128_32 (
- pix_multiply_1x128 (unpack_32_1x128 (dst),
- expand_alpha_1x128 (unpack_32_1x128 (src))));
- }
-
- return dst;
-}
-
-static void
-sse2_combine_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, d;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
-
- while (w && ((unsigned long) pd & 15))
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_in_u_pixel_sse2 (d, s);
- w--;
- ps++;
- if (pm)
- pm++;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*) pd);
- xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_dst_lo, &xmm_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned ((__m128i*)pd,
- pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- w -= 4;
- if (pm)
- pm += 4;
- }
-
- while (w)
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_in_u_pixel_sse2 (d, s);
- w--;
- ps++;
- if (pm)
- pm++;
- }
-}
-
-static void
-sse2_combine_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, d;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
-
- while (w && ((unsigned long) pd & 15))
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_in_u_pixel_sse2 (s, d);
- ps++;
- w--;
- if (pm)
- pm++;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*) pd);
- xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
- &xmm_src_lo, &xmm_src_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- w -= 4;
- if (pm)
- pm += 4;
- }
-
- while (w)
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_in_u_pixel_sse2 (s, d);
- w--;
- ps++;
- if (pm)
- pm++;
- }
-}
-
-static void
-sse2_combine_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- while (w && ((unsigned long) pd & 15))
- {
- uint32_t s = combine1 (ps, pm);
- uint32_t d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- unpack_32_1x128 (d), negate_1x128 (
- expand_alpha_1x128 (unpack_32_1x128 (s)))));
-
- if (pm)
- pm++;
- ps++;
- w--;
- }
-
- while (w >= 4)
- {
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
-
- xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
- xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-
- pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
- &xmm_src_lo, &xmm_src_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- if (pm)
- pm += 4;
-
- w -= 4;
- }
-
- while (w)
- {
- uint32_t s = combine1 (ps, pm);
- uint32_t d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- unpack_32_1x128 (d), negate_1x128 (
- expand_alpha_1x128 (unpack_32_1x128 (s)))));
- ps++;
- if (pm)
- pm++;
- w--;
- }
-}
-
-static void
-sse2_combine_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- while (w && ((unsigned long) pd & 15))
- {
- uint32_t s = combine1 (ps, pm);
- uint32_t d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- unpack_32_1x128 (s), negate_1x128 (
- expand_alpha_1x128 (unpack_32_1x128 (d)))));
- w--;
- ps++;
- if (pm)
- pm++;
- }
-
- while (w >= 4)
- {
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
-
- xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
- xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_dst_lo, &xmm_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- w -= 4;
- if (pm)
- pm += 4;
- }
-
- while (w)
- {
- uint32_t s = combine1 (ps, pm);
- uint32_t d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- unpack_32_1x128 (s), negate_1x128 (
- expand_alpha_1x128 (unpack_32_1x128 (d)))));
- w--;
- ps++;
- if (pm)
- pm++;
- }
-}
-
-static force_inline uint32_t
-core_combine_atop_u_pixel_sse2 (uint32_t src,
- uint32_t dst)
-{
- __m128i s = unpack_32_1x128 (src);
- __m128i d = unpack_32_1x128 (dst);
-
- __m128i sa = negate_1x128 (expand_alpha_1x128 (s));
- __m128i da = expand_alpha_1x128 (d);
-
- return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
-}
-
-static void
-sse2_combine_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, d;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
- __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
-
- while (w && ((unsigned long) pd & 15))
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
- w--;
- ps++;
- if (pm)
- pm++;
- }
-
- while (w >= 4)
- {
- xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
- xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi);
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
- negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-
- pix_add_multiply_2x128 (
- &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- w -= 4;
- if (pm)
- pm += 4;
- }
-
- while (w)
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
- w--;
- ps++;
- if (pm)
- pm++;
- }
-}
-
-static force_inline uint32_t
-core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
- uint32_t dst)
-{
- __m128i s = unpack_32_1x128 (src);
- __m128i d = unpack_32_1x128 (dst);
-
- __m128i sa = expand_alpha_1x128 (s);
- __m128i da = negate_1x128 (expand_alpha_1x128 (d));
-
- return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
-}
-
-static void
-sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, d;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
- __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
-
- while (w && ((unsigned long) pd & 15))
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
- ps++;
- w--;
- if (pm)
- pm++;
- }
-
- while (w >= 4)
- {
- xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
- xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi);
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
- negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
- pix_add_multiply_2x128 (
- &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- w -= 4;
- if (pm)
- pm += 4;
- }
-
- while (w)
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
- ps++;
- w--;
- if (pm)
- pm++;
- }
-}
-
-static force_inline uint32_t
-core_combine_xor_u_pixel_sse2 (uint32_t src,
- uint32_t dst)
-{
- __m128i s = unpack_32_1x128 (src);
- __m128i d = unpack_32_1x128 (dst);
-
- __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d));
- __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s));
-
- return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
-}
-
-static void
-sse2_combine_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- int w = width;
- uint32_t s, d;
- uint32_t* pd = dst;
- const uint32_t* ps = src;
- const uint32_t* pm = mask;
-
- __m128i xmm_src, xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
- __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
-
- while (w && ((unsigned long) pd & 15))
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
- w--;
- ps++;
- if (pm)
- pm++;
- }
-
- while (w >= 4)
- {
- xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
- xmm_dst = load_128_aligned ((__m128i*) pd);
-
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi);
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
- negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi);
- negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
- pix_add_multiply_2x128 (
- &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- w -= 4;
- if (pm)
- pm += 4;
- }
-
- while (w)
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
- w--;
- ps++;
- if (pm)
- pm++;
- }
-}
-
-static force_inline void
-sse2_combine_add_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- int w = width;
- uint32_t s, d;
- uint32_t* pd = dst;
- const uint32_t* ps = src;
- const uint32_t* pm = mask;
-
- while (w && (unsigned long)pd & 15)
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- ps++;
- if (pm)
- pm++;
- *pd++ = _mm_cvtsi128_si32 (
- _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
- w--;
- }
-
- while (w >= 4)
- {
- __m128i s;
-
- s = combine4 ((__m128i*)ps, (__m128i*)pm);
-
- save_128_aligned (
- (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd)));
-
- pd += 4;
- ps += 4;
- if (pm)
- pm += 4;
- w -= 4;
- }
-
- while (w--)
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- ps++;
- *pd++ = _mm_cvtsi128_si32 (
- _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
- if (pm)
- pm++;
- }
-}
-
-static force_inline uint32_t
-core_combine_saturate_u_pixel_sse2 (uint32_t src,
- uint32_t dst)
-{
- __m128i ms = unpack_32_1x128 (src);
- __m128i md = unpack_32_1x128 (dst);
- uint32_t sa = src >> 24;
- uint32_t da = ~dst >> 24;
-
- if (sa > da)
- {
- ms = pix_multiply_1x128 (
- ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24)));
- }
-
- return pack_1x128_32 (_mm_adds_epu16 (md, ms));
-}
-
-static void
-sse2_combine_saturate_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, d;
-
- uint32_t pack_cmp;
- __m128i xmm_src, xmm_dst;
-
- while (w && (unsigned long)pd & 15)
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
- w--;
- ps++;
- if (pm)
- pm++;
- }
-
- while (w >= 4)
- {
- xmm_dst = load_128_aligned ((__m128i*)pd);
- xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
-
- pack_cmp = _mm_movemask_epi8 (
- _mm_cmpgt_epi32 (
- _mm_srli_epi32 (xmm_src, 24),
- _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
-
- /* if some alpha src is grater than respective ~alpha dst */
- if (pack_cmp)
- {
- s = combine1 (ps++, pm);
- d = *pd;
- *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
- if (pm)
- pm++;
-
- s = combine1 (ps++, pm);
- d = *pd;
- *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
- if (pm)
- pm++;
-
- s = combine1 (ps++, pm);
- d = *pd;
- *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
- if (pm)
- pm++;
-
- s = combine1 (ps++, pm);
- d = *pd;
- *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
- if (pm)
- pm++;
- }
- else
- {
- save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
-
- pd += 4;
- ps += 4;
- if (pm)
- pm += 4;
- }
-
- w -= 4;
- }
-
- while (w--)
- {
- s = combine1 (ps, pm);
- d = *pd;
-
- *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
- ps++;
- if (pm)
- pm++;
- }
-}
-
-static void
-sse2_combine_src_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
- w--;
- }
-
- while (w >= 4)
- {
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
- w--;
- }
-}
-
-static force_inline uint32_t
-core_combine_over_ca_pixel_sse2 (uint32_t src,
- uint32_t mask,
- uint32_t dst)
-{
- __m128i s = unpack_32_1x128 (src);
- __m128i expAlpha = expand_alpha_1x128 (s);
- __m128i unpk_mask = unpack_32_1x128 (mask);
- __m128i unpk_dst = unpack_32_1x128 (dst);
-
- return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
-}
-
-static void
-sse2_combine_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_alpha_lo, xmm_alpha_hi;
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
- w--;
- }
-}
-
-static force_inline uint32_t
-core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
- uint32_t mask,
- uint32_t dst)
-{
- __m128i d = unpack_32_1x128 (dst);
-
- return pack_1x128_32 (
- over_1x128 (d, expand_alpha_1x128 (d),
- pix_multiply_1x128 (unpack_32_1x128 (src),
- unpack_32_1x128 (mask))));
-}
-
-static void
-sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_alpha_lo, xmm_alpha_hi;
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
- w--;
- }
-}
-
-static void
-sse2_combine_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_alpha_lo, xmm_alpha_hi;
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)),
- expand_alpha_1x128 (unpack_32_1x128 (d))));
-
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- pix_multiply_1x128 (
- unpack_32_1x128 (s), unpack_32_1x128 (m)),
- expand_alpha_1x128 (unpack_32_1x128 (d))));
-
- w--;
- }
-}
-
-static void
-sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_alpha_lo, xmm_alpha_hi;
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- unpack_32_1x128 (d),
- pix_multiply_1x128 (unpack_32_1x128 (m),
- expand_alpha_1x128 (unpack_32_1x128 (s)))));
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
- pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- unpack_32_1x128 (d),
- pix_multiply_1x128 (unpack_32_1x128 (m),
- expand_alpha_1x128 (unpack_32_1x128 (s)))));
- w--;
- }
-}
-
-static void
-sse2_combine_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_alpha_lo, xmm_alpha_hi;
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- pix_multiply_1x128 (
- unpack_32_1x128 (s), unpack_32_1x128 (m)),
- negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
- negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi);
- pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- pix_multiply_1x128 (
- unpack_32_1x128 (s), unpack_32_1x128 (m)),
- negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
-
- w--;
- }
-}
-
-static void
-sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_alpha_lo, xmm_alpha_hi;
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- unpack_32_1x128 (d),
- negate_1x128 (pix_multiply_1x128 (
- unpack_32_1x128 (m),
- expand_alpha_1x128 (unpack_32_1x128 (s))))));
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- negate_2x128 (xmm_mask_lo, xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- pix_multiply_1x128 (
- unpack_32_1x128 (d),
- negate_1x128 (pix_multiply_1x128 (
- unpack_32_1x128 (m),
- expand_alpha_1x128 (unpack_32_1x128 (s))))));
- w--;
- }
-}
-
-static force_inline uint32_t
-core_combine_atop_ca_pixel_sse2 (uint32_t src,
- uint32_t mask,
- uint32_t dst)
-{
- __m128i m = unpack_32_1x128 (mask);
- __m128i s = unpack_32_1x128 (src);
- __m128i d = unpack_32_1x128 (dst);
- __m128i sa = expand_alpha_1x128 (s);
- __m128i da = expand_alpha_1x128 (d);
-
- s = pix_multiply_1x128 (s, m);
- m = negate_1x128 (pix_multiply_1x128 (m, sa));
-
- return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
-}
-
-static void
-sse2_combine_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
- __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi);
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_src_lo, &xmm_src_hi);
- pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- pix_add_multiply_2x128 (
- &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
- &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
- w--;
- }
-}
-
-static force_inline uint32_t
-core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
- uint32_t mask,
- uint32_t dst)
-{
- __m128i m = unpack_32_1x128 (mask);
- __m128i s = unpack_32_1x128 (src);
- __m128i d = unpack_32_1x128 (dst);
-
- __m128i da = negate_1x128 (expand_alpha_1x128 (d));
- __m128i sa = expand_alpha_1x128 (s);
-
- s = pix_multiply_1x128 (s, m);
- m = pix_multiply_1x128 (m, sa);
-
- return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
-}
-
-static void
-sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
- __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi);
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_src_lo, &xmm_src_hi);
- pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
- pix_add_multiply_2x128 (
- &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
- &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
- w--;
- }
-}
-
-static force_inline uint32_t
-core_combine_xor_ca_pixel_sse2 (uint32_t src,
- uint32_t mask,
- uint32_t dst)
-{
- __m128i a = unpack_32_1x128 (mask);
- __m128i s = unpack_32_1x128 (src);
- __m128i d = unpack_32_1x128 (dst);
-
- __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 (
- a, expand_alpha_1x128 (s)));
- __m128i dest = pix_multiply_1x128 (s, a);
- __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d));
-
- return pack_1x128_32 (pix_add_multiply_1x128 (&d,
- &alpha_dst,
- &dest,
- &alpha_src));
-}
-
-static void
-sse2_combine_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
- __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi);
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_src_lo, &xmm_src_hi);
- pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
- &xmm_alpha_src_lo, &xmm_alpha_src_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
- &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
- negate_2x128 (xmm_mask_lo, xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- pix_add_multiply_2x128 (
- &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
- &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
- w--;
- }
-}
-
-static void
-sse2_combine_add_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * pd,
- const uint32_t * ps,
- const uint32_t * pm,
- int w)
-{
- uint32_t s, m, d;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask_lo, xmm_mask_hi;
-
- while (w && (unsigned long)pd & 15)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
- unpack_32_1x128 (m)),
- unpack_32_1x128 (d)));
- w--;
- }
-
- while (w >= 4)
- {
- xmm_src_hi = load_128_unaligned ((__m128i*)ps);
- xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
- xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_src_lo, &xmm_src_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (
- _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
- _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
- unpack_32_1x128 (m)),
- unpack_32_1x128 (d)));
- w--;
- }
-}
-
-static force_inline __m128i
-create_mask_16_128 (uint16_t mask)
-{
- return _mm_set1_epi16 (mask);
-}
-
-/* Work around a code generation bug in Sun Studio 12. */
-#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
-# define create_mask_2x32_128(mask0, mask1) \
- (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
-#else
-static force_inline __m128i
-create_mask_2x32_128 (uint32_t mask0,
- uint32_t mask1)
-{
- return _mm_set_epi32 (mask0, mask1, mask0, mask1);
-}
-#endif
-
-static void
-sse2_composite_over_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src;
- uint32_t *dst_line, *dst, d;
- int32_t w;
- int dst_stride;
- __m128i xmm_src, xmm_alpha;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
- xmm_src = expand_pixel_32_1x128 (src);
- xmm_alpha = expand_alpha_1x128 (xmm_src);
-
- while (height--)
- {
- dst = dst_line;
-
- dst_line += dst_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- d = *dst;
- *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
- xmm_alpha,
- unpack_32_1x128 (d)));
- w--;
- }
-
- while (w >= 4)
- {
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- over_2x128 (&xmm_src, &xmm_src,
- &xmm_alpha, &xmm_alpha,
- &xmm_dst_lo, &xmm_dst_hi);
-
- /* rebuid the 4 pixel data and save*/
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- w -= 4;
- dst += 4;
- }
-
- while (w)
- {
- d = *dst;
- *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
- xmm_alpha,
- unpack_32_1x128 (d)));
- w--;
- }
-
- }
-}
-
-static void
-sse2_composite_over_n_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src;
- uint16_t *dst_line, *dst, d;
- int32_t w;
- int dst_stride;
- __m128i xmm_src, xmm_alpha;
- __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- xmm_src = expand_pixel_32_1x128 (src);
- xmm_alpha = expand_alpha_1x128 (xmm_src);
-
- while (height--)
- {
- dst = dst_line;
-
- dst_line += dst_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- d = *dst;
-
- *dst++ = pack_565_32_16 (
- pack_1x128_32 (over_1x128 (xmm_src,
- xmm_alpha,
- expand565_16_1x128 (d))));
- w--;
- }
-
- while (w >= 8)
- {
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_565_128_4x128 (xmm_dst,
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-
- over_2x128 (&xmm_src, &xmm_src,
- &xmm_alpha, &xmm_alpha,
- &xmm_dst0, &xmm_dst1);
- over_2x128 (&xmm_src, &xmm_src,
- &xmm_alpha, &xmm_alpha,
- &xmm_dst2, &xmm_dst3);
-
- xmm_dst = pack_565_4x128_128 (
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-
- save_128_aligned ((__m128i*)dst, xmm_dst);
-
- dst += 8;
- w -= 8;
- }
-
- while (w--)
- {
- d = *dst;
- *dst++ = pack_565_32_16 (
- pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha,
- expand565_16_1x128 (d))));
- }
- }
-
-}
-
-static void
-sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst_line, d;
- uint32_t *mask_line, m;
- uint32_t pack_cmp;
- int dst_stride, mask_stride;
-
- __m128i xmm_src, xmm_alpha;
- __m128i xmm_dst;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
- __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- xmm_src = _mm_unpacklo_epi8 (
- create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
- xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = xmm_src;
- mmx_alpha = xmm_alpha;
-
- while (height--)
- {
- int w = width;
- const uint32_t *pm = (uint32_t *)mask_line;
- uint32_t *pd = (uint32_t *)dst_line;
-
- dst_line += dst_stride;
- mask_line += mask_stride;
-
- while (w && (unsigned long)pd & 15)
- {
- m = *pm++;
-
- if (m)
- {
- d = *pd;
-
- mmx_mask = unpack_32_1x128 (m);
- mmx_dest = unpack_32_1x128 (d);
-
- *pd = pack_1x128_32 (
- _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
- mmx_dest));
- }
-
- pd++;
- w--;
- }
-
- while (w >= 4)
- {
- xmm_mask = load_128_unaligned ((__m128i*)pm);
-
- pack_cmp =
- _mm_movemask_epi8 (
- _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
-
- /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
- if (pack_cmp != 0xffff)
- {
- xmm_dst = load_128_aligned ((__m128i*)pd);
-
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
- pix_multiply_2x128 (&xmm_src, &xmm_src,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
- xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);
-
- save_128_aligned (
- (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));
- }
-
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- m = *pm++;
-
- if (m)
- {
- d = *pd;
-
- mmx_mask = unpack_32_1x128 (m);
- mmx_dest = unpack_32_1x128 (d);
-
- *pd = pack_1x128_32 (
- _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
- mmx_dest));
- }
-
- pd++;
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src;
- uint32_t *dst_line, d;
- uint32_t *mask_line, m;
- uint32_t pack_cmp;
- int dst_stride, mask_stride;
-
- __m128i xmm_src, xmm_alpha;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
- __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- xmm_src = _mm_unpacklo_epi8 (
- create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
- xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = xmm_src;
- mmx_alpha = xmm_alpha;
-
- while (height--)
- {
- int w = width;
- const uint32_t *pm = (uint32_t *)mask_line;
- uint32_t *pd = (uint32_t *)dst_line;
-
- dst_line += dst_stride;
- mask_line += mask_stride;
-
- while (w && (unsigned long)pd & 15)
- {
- m = *pm++;
-
- if (m)
- {
- d = *pd;
- mmx_mask = unpack_32_1x128 (m);
- mmx_dest = unpack_32_1x128 (d);
-
- *pd = pack_1x128_32 (in_over_1x128 (&mmx_src,
- &mmx_alpha,
- &mmx_mask,
- &mmx_dest));
- }
-
- pd++;
- w--;
- }
-
- while (w >= 4)
- {
- xmm_mask = load_128_unaligned ((__m128i*)pm);
-
- pack_cmp =
- _mm_movemask_epi8 (
- _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
-
- /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
- if (pack_cmp != 0xffff)
- {
- xmm_dst = load_128_aligned ((__m128i*)pd);
-
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- in_over_2x128 (&xmm_src, &xmm_src,
- &xmm_alpha, &xmm_alpha,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
-
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- m = *pm++;
-
- if (m)
- {
- d = *pd;
- mmx_mask = unpack_32_1x128 (m);
- mmx_dest = unpack_32_1x128 (d);
-
- *pd = pack_1x128_32 (
- in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
- }
-
- pd++;
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- uint32_t mask;
- int32_t w;
- int dst_stride, src_stride;
-
- __m128i xmm_mask;
- __m128i xmm_src, xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_alpha_lo, xmm_alpha_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
-
- xmm_mask = create_mask_16_128 (mask >> 24);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- uint32_t s = *src++;
-
- if (s)
- {
- uint32_t d = *dst;
-
- __m128i ms = unpack_32_1x128 (s);
- __m128i alpha = expand_alpha_1x128 (ms);
- __m128i dest = xmm_mask;
- __m128i alpha_dst = unpack_32_1x128 (d);
-
- *dst = pack_1x128_32 (
- in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
- }
- dst++;
- w--;
- }
-
- while (w >= 4)
- {
- xmm_src = load_128_unaligned ((__m128i*)src);
-
- if (!is_zero (xmm_src))
- {
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_mask, &xmm_mask,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
-
- dst += 4;
- src += 4;
- w -= 4;
- }
-
- while (w)
- {
- uint32_t s = *src++;
-
- if (s)
- {
- uint32_t d = *dst;
-
- __m128i ms = unpack_32_1x128 (s);
- __m128i alpha = expand_alpha_1x128 (ms);
- __m128i mask = xmm_mask;
- __m128i dest = unpack_32_1x128 (d);
-
- *dst = pack_1x128_32 (
- in_over_1x128 (&ms, &alpha, &mask, &dest));
- }
-
- dst++;
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int32_t w;
- int dst_stride, src_stride;
-
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- *dst++ = *src++ | 0xff000000;
- w--;
- }
-
- while (w >= 16)
- {
- __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
-
- xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
- xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
- xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
- xmm_src4 = load_128_unaligned ((__m128i*)src + 3);
-
- save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));
- save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));
- save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));
- save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));
-
- dst += 16;
- src += 16;
- w -= 16;
- }
-
- while (w)
- {
- *dst++ = *src++ | 0xff000000;
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- uint32_t mask;
- int dst_stride, src_stride;
- int32_t w;
-
- __m128i xmm_mask, xmm_alpha;
- __m128i xmm_src, xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
-
- xmm_mask = create_mask_16_128 (mask >> 24);
- xmm_alpha = mask_00ff;
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- uint32_t s = (*src++) | 0xff000000;
- uint32_t d = *dst;
-
- __m128i src = unpack_32_1x128 (s);
- __m128i alpha = xmm_alpha;
- __m128i mask = xmm_mask;
- __m128i dest = unpack_32_1x128 (d);
-
- *dst++ = pack_1x128_32 (
- in_over_1x128 (&src, &alpha, &mask, &dest));
-
- w--;
- }
-
- while (w >= 4)
- {
- xmm_src = _mm_or_si128 (
- load_128_unaligned ((__m128i*)src), mask_ff000000);
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_alpha, &xmm_alpha,
- &xmm_mask, &xmm_mask,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- dst += 4;
- src += 4;
- w -= 4;
-
- }
-
- while (w)
- {
- uint32_t s = (*src++) | 0xff000000;
- uint32_t d = *dst;
-
- __m128i src = unpack_32_1x128 (s);
- __m128i alpha = xmm_alpha;
- __m128i mask = xmm_mask;
- __m128i dest = unpack_32_1x128 (d);
-
- *dst++ = pack_1x128_32 (
- in_over_1x128 (&src, &alpha, &mask, &dest));
-
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- int dst_stride, src_stride;
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- dst = dst_line;
- src = src_line;
-
- while (height--)
- {
- sse2_combine_over_u (imp, op, dst, src, NULL, width);
-
- dst += dst_stride;
- src += src_stride;
- }
-}
-
-static force_inline uint16_t
-composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
-{
- __m128i ms;
-
- ms = unpack_32_1x128 (src);
- return pack_565_32_16 (
- pack_1x128_32 (
- over_1x128 (
- ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst))));
-}
-
-static void
-sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst, d;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
-
- __m128i xmm_alpha_lo, xmm_alpha_hi;
- __m128i xmm_src, xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- src = src_line;
-
- dst_line += dst_stride;
- src_line += src_stride;
- w = width;
-
- /* Align dst on a 16-byte boundary */
- while (w &&
- ((unsigned long)dst & 15))
- {
- s = *src++;
- d = *dst;
-
- *dst++ = composite_over_8888_0565pixel (s, d);
- w--;
- }
-
- /* It's a 8 pixel loop */
- while (w >= 8)
- {
- /* I'm loading unaligned because I'm not sure
- * about the address alignment.
- */
- xmm_src = load_128_unaligned ((__m128i*) src);
- xmm_dst = load_128_aligned ((__m128i*) dst);
-
- /* Unpacking */
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_565_128_4x128 (xmm_dst,
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- /* I'm loading next 4 pixels from memory
- * before to optimze the memory read.
- */
- xmm_src = load_128_unaligned ((__m128i*) (src + 4));
-
- over_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_dst0, &xmm_dst1);
-
- /* Unpacking */
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- over_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_dst2, &xmm_dst3);
-
- save_128_aligned (
- (__m128i*)dst, pack_565_4x128_128 (
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
- w -= 8;
- dst += 8;
- src += 8;
- }
-
- while (w--)
- {
- s = *src++;
- d = *dst;
-
- *dst++ = composite_over_8888_0565pixel (s, d);
- }
- }
-
-}
-
-static void
-sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- int32_t w;
- uint32_t m, d;
-
- __m128i xmm_src, xmm_alpha, xmm_def;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
- __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- xmm_def = create_mask_2x32_128 (src, src);
- xmm_src = expand_pixel_32_1x128 (src);
- xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = xmm_src;
- mmx_alpha = xmm_alpha;
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- uint8_t m = *mask++;
-
- if (m)
- {
- d = *dst;
- mmx_mask = expand_pixel_8_1x128 (m);
- mmx_dest = unpack_32_1x128 (d);
-
- *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
- &mmx_alpha,
- &mmx_mask,
- &mmx_dest));
- }
-
- w--;
- dst++;
- }
-
- while (w >= 4)
- {
- m = *((uint32_t*)mask);
-
- if (srca == 0xff && m == 0xffffffff)
- {
- save_128_aligned ((__m128i*)dst, xmm_def);
- }
- else if (m)
- {
- xmm_dst = load_128_aligned ((__m128i*) dst);
- xmm_mask = unpack_32_1x128 (m);
- xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
-
- /* Unpacking */
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- in_over_2x128 (&xmm_src, &xmm_src,
- &xmm_alpha, &xmm_alpha,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
-
- w -= 4;
- dst += 4;
- mask += 4;
- }
-
- while (w)
- {
- uint8_t m = *mask++;
-
- if (m)
- {
- d = *dst;
- mmx_mask = expand_pixel_8_1x128 (m);
- mmx_dest = unpack_32_1x128 (d);
-
- *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
- &mmx_alpha,
- &mmx_mask,
- &mmx_dest));
- }
-
- w--;
- dst++;
- }
- }
-
-}
-
-static pixman_bool_t
-pixman_fill_sse2 (uint32_t *bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t data)
-{
- uint32_t byte_width;
- uint8_t *byte_line;
-
- __m128i xmm_def;
-
- if (bpp == 8)
- {
- uint8_t b;
- uint16_t w;
-
- stride = stride * (int) sizeof (uint32_t) / 1;
- byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
- byte_width = width;
- stride *= 1;
-
- b = data & 0xff;
- w = (b << 8) | b;
- data = (w << 16) | w;
- }
- else if (bpp == 16)
- {
- stride = stride * (int) sizeof (uint32_t) / 2;
- byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
- byte_width = 2 * width;
- stride *= 2;
-
- data = (data & 0xffff) * 0x00010001;
- }
- else if (bpp == 32)
- {
- stride = stride * (int) sizeof (uint32_t) / 4;
- byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
- byte_width = 4 * width;
- stride *= 4;
- }
- else
- {
- return FALSE;
- }
-
- xmm_def = create_mask_2x32_128 (data, data);
-
- while (height--)
- {
- int w;
- uint8_t *d = byte_line;
- byte_line += stride;
- w = byte_width;
-
- while (w >= 1 && ((unsigned long)d & 1))
- {
- *(uint8_t *)d = data;
- w -= 1;
- d += 1;
- }
-
- while (w >= 2 && ((unsigned long)d & 3))
- {
- *(uint16_t *)d = data;
- w -= 2;
- d += 2;
- }
-
- while (w >= 4 && ((unsigned long)d & 15))
- {
- *(uint32_t *)d = data;
-
- w -= 4;
- d += 4;
- }
-
- while (w >= 128)
- {
- save_128_aligned ((__m128i*)(d), xmm_def);
- save_128_aligned ((__m128i*)(d + 16), xmm_def);
- save_128_aligned ((__m128i*)(d + 32), xmm_def);
- save_128_aligned ((__m128i*)(d + 48), xmm_def);
- save_128_aligned ((__m128i*)(d + 64), xmm_def);
- save_128_aligned ((__m128i*)(d + 80), xmm_def);
- save_128_aligned ((__m128i*)(d + 96), xmm_def);
- save_128_aligned ((__m128i*)(d + 112), xmm_def);
-
- d += 128;
- w -= 128;
- }
-
- if (w >= 64)
- {
- save_128_aligned ((__m128i*)(d), xmm_def);
- save_128_aligned ((__m128i*)(d + 16), xmm_def);
- save_128_aligned ((__m128i*)(d + 32), xmm_def);
- save_128_aligned ((__m128i*)(d + 48), xmm_def);
-
- d += 64;
- w -= 64;
- }
-
- if (w >= 32)
- {
- save_128_aligned ((__m128i*)(d), xmm_def);
- save_128_aligned ((__m128i*)(d + 16), xmm_def);
-
- d += 32;
- w -= 32;
- }
-
- if (w >= 16)
- {
- save_128_aligned ((__m128i*)(d), xmm_def);
-
- d += 16;
- w -= 16;
- }
-
- while (w >= 4)
- {
- *(uint32_t *)d = data;
-
- w -= 4;
- d += 4;
- }
-
- if (w >= 2)
- {
- *(uint16_t *)d = data;
- w -= 2;
- d += 2;
- }
-
- if (w >= 1)
- {
- *(uint8_t *)d = data;
- w -= 1;
- d += 1;
- }
- }
-
- return TRUE;
-}
-
-static void
-sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- int32_t w;
- uint32_t m;
-
- __m128i xmm_src, xmm_def;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- {
- pixman_fill_sse2 (dst_image->bits.bits, dst_image->bits.rowstride,
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
- dest_x, dest_y, width, height, 0);
- return;
- }
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- xmm_def = create_mask_2x32_128 (src, src);
- xmm_src = expand_pixel_32_1x128 (src);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- uint8_t m = *mask++;
-
- if (m)
- {
- *dst = pack_1x128_32 (
- pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)));
- }
- else
- {
- *dst = 0;
- }
-
- w--;
- dst++;
- }
-
- while (w >= 4)
- {
- m = *((uint32_t*)mask);
-
- if (srca == 0xff && m == 0xffffffff)
- {
- save_128_aligned ((__m128i*)dst, xmm_def);
- }
- else if (m)
- {
- xmm_mask = unpack_32_1x128 (m);
- xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
-
- /* Unpacking */
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- pix_multiply_2x128 (&xmm_src, &xmm_src,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
- }
- else
- {
- save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());
- }
-
- w -= 4;
- dst += 4;
- mask += 4;
- }
-
- while (w)
- {
- uint8_t m = *mask++;
-
- if (m)
- {
- *dst = pack_1x128_32 (
- pix_multiply_1x128 (
- xmm_src, expand_pixel_8_1x128 (m)));
- }
- else
- {
- *dst = 0;
- }
-
- w--;
- dst++;
- }
- }
-
-}
-
-static void
-sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst_line, *dst, d;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- int32_t w;
- uint32_t m;
- __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
-
- __m128i xmm_src, xmm_alpha;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- xmm_src = expand_pixel_32_1x128 (src);
- xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = xmm_src;
- mmx_alpha = xmm_alpha;
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- m = *mask++;
-
- if (m)
- {
- d = *dst;
- mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
- mmx_dest = expand565_16_1x128 (d);
-
- *dst = pack_565_32_16 (
- pack_1x128_32 (
- in_over_1x128 (
- &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
- }
-
- w--;
- dst++;
- }
-
- while (w >= 8)
- {
- xmm_dst = load_128_aligned ((__m128i*) dst);
- unpack_565_128_4x128 (xmm_dst,
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-
- m = *((uint32_t*)mask);
- mask += 4;
-
- if (m)
- {
- xmm_mask = unpack_32_1x128 (m);
- xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
-
- /* Unpacking */
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- in_over_2x128 (&xmm_src, &xmm_src,
- &xmm_alpha, &xmm_alpha,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst0, &xmm_dst1);
- }
-
- m = *((uint32_t*)mask);
- mask += 4;
-
- if (m)
- {
- xmm_mask = unpack_32_1x128 (m);
- xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
-
- /* Unpacking */
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
- expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
- in_over_2x128 (&xmm_src, &xmm_src,
- &xmm_alpha, &xmm_alpha,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst2, &xmm_dst3);
- }
-
- save_128_aligned (
- (__m128i*)dst, pack_565_4x128_128 (
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
- w -= 8;
- dst += 8;
- }
-
- while (w)
- {
- m = *mask++;
-
- if (m)
- {
- d = *dst;
- mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
- mmx_dest = expand565_16_1x128 (d);
-
- *dst = pack_565_32_16 (
- pack_1x128_32 (
- in_over_1x128 (
- &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
- }
-
- w--;
- dst++;
- }
- }
-
-}
-
-static void
-sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst, d;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
- uint32_t opaque, zero;
-
- __m128i ms;
- __m128i xmm_src, xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- s = *src++;
- d = *dst;
-
- ms = unpack_32_1x128 (s);
-
- *dst++ = pack_565_32_16 (
- pack_1x128_32 (
- over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
- w--;
- }
-
- while (w >= 8)
- {
- /* First round */
- xmm_src = load_128_unaligned ((__m128i*)src);
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- opaque = is_opaque (xmm_src);
- zero = is_zero (xmm_src);
-
- unpack_565_128_4x128 (xmm_dst,
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-
- /* preload next round*/
- xmm_src = load_128_unaligned ((__m128i*)(src + 4));
-
- if (opaque)
- {
- invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_dst0, &xmm_dst1);
- }
- else if (!zero)
- {
- over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_dst0, &xmm_dst1);
- }
-
- /* Second round */
- opaque = is_opaque (xmm_src);
- zero = is_zero (xmm_src);
-
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-
- if (opaque)
- {
- invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_dst2, &xmm_dst3);
- }
- else if (!zero)
- {
- over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_dst2, &xmm_dst3);
- }
-
- save_128_aligned (
- (__m128i*)dst, pack_565_4x128_128 (
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
- w -= 8;
- src += 8;
- dst += 8;
- }
-
- while (w)
- {
- s = *src++;
- d = *dst;
-
- ms = unpack_32_1x128 (s);
-
- *dst++ = pack_565_32_16 (
- pack_1x128_32 (
- over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst, d;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
- uint32_t opaque, zero;
-
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst_lo, xmm_dst_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- s = *src++;
- d = *dst;
-
- *dst++ = pack_1x128_32 (
- over_rev_non_pre_1x128 (
- unpack_32_1x128 (s), unpack_32_1x128 (d)));
-
- w--;
- }
-
- while (w >= 4)
- {
- xmm_src_hi = load_128_unaligned ((__m128i*)src);
-
- opaque = is_opaque (xmm_src_hi);
- zero = is_zero (xmm_src_hi);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-
- if (opaque)
- {
- invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
- else if (!zero)
- {
- xmm_dst_hi = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
-
- w -= 4;
- dst += 4;
- src += 4;
- }
-
- while (w)
- {
- s = *src++;
- d = *dst;
-
- *dst++ = pack_1x128_32 (
- over_rev_non_pre_1x128 (
- unpack_32_1x128 (s), unpack_32_1x128 (d)));
-
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src;
- uint16_t *dst_line, *dst, d;
- uint32_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int w;
- uint32_t pack_cmp;
-
- __m128i xmm_src, xmm_alpha;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
-
- __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- xmm_src = expand_pixel_32_1x128 (src);
- xmm_alpha = expand_alpha_1x128 (xmm_src);
- mmx_src = xmm_src;
- mmx_alpha = xmm_alpha;
-
- while (height--)
- {
- w = width;
- mask = mask_line;
- dst = dst_line;
- mask_line += mask_stride;
- dst_line += dst_stride;
-
- while (w && ((unsigned long)dst & 15))
- {
- m = *(uint32_t *) mask;
-
- if (m)
- {
- d = *dst;
- mmx_mask = unpack_32_1x128 (m);
- mmx_dest = expand565_16_1x128 (d);
-
- *dst = pack_565_32_16 (
- pack_1x128_32 (
- in_over_1x128 (
- &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
- }
-
- w--;
- dst++;
- mask++;
- }
-
- while (w >= 8)
- {
- /* First round */
- xmm_mask = load_128_unaligned ((__m128i*)mask);
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- pack_cmp = _mm_movemask_epi8 (
- _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
-
- unpack_565_128_4x128 (xmm_dst,
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
- /* preload next round */
- xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));
-
- /* preload next round */
- if (pack_cmp != 0xffff)
- {
- in_over_2x128 (&xmm_src, &xmm_src,
- &xmm_alpha, &xmm_alpha,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst0, &xmm_dst1);
- }
-
- /* Second round */
- pack_cmp = _mm_movemask_epi8 (
- _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
-
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
- if (pack_cmp != 0xffff)
- {
- in_over_2x128 (&xmm_src, &xmm_src,
- &xmm_alpha, &xmm_alpha,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst2, &xmm_dst3);
- }
-
- save_128_aligned (
- (__m128i*)dst, pack_565_4x128_128 (
- &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
- w -= 8;
- dst += 8;
- mask += 8;
- }
-
- while (w)
- {
- m = *(uint32_t *) mask;
-
- if (m)
- {
- d = *dst;
- mmx_mask = unpack_32_1x128 (m);
- mmx_dest = expand565_16_1x128 (d);
-
- *dst = pack_565_32_16 (
- pack_1x128_32 (
- in_over_1x128 (
- &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
- }
-
- w--;
- dst++;
- mask++;
- }
- }
-
-}
-
-static void
-sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- uint32_t d, m;
- uint32_t src;
- uint8_t sa;
- int32_t w;
-
- __m128i xmm_alpha;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- sa = src >> 24;
-
- xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w && ((unsigned long)dst & 15))
- {
- m = (uint32_t) *mask++;
- d = (uint32_t) *dst;
-
- *dst++ = (uint8_t) pack_1x128_32 (
- pix_multiply_1x128 (
- pix_multiply_1x128 (xmm_alpha,
- unpack_32_1x128 (m)),
- unpack_32_1x128 (d)));
- w--;
- }
-
- while (w >= 16)
- {
- xmm_mask = load_128_unaligned ((__m128i*)mask);
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- mask += 16;
- dst += 16;
- w -= 16;
- }
-
- while (w)
- {
- m = (uint32_t) *mask++;
- d = (uint32_t) *dst;
-
- *dst++ = (uint8_t) pack_1x128_32 (
- pix_multiply_1x128 (
- pix_multiply_1x128 (
- xmm_alpha, unpack_32_1x128 (m)),
- unpack_32_1x128 (d)));
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_in_n_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- int dst_stride;
- uint32_t d;
- uint32_t src;
- int32_t w;
-
- __m128i xmm_alpha;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
-
- src = src >> 24;
-
- if (src == 0xff)
- return;
-
- if (src == 0x00)
- {
- pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
- 8, dest_x, dest_y, width, height, src);
-
- return;
- }
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- w = width;
-
- while (w && ((unsigned long)dst & 15))
- {
- d = (uint32_t) *dst;
-
- *dst++ = (uint8_t) pack_1x128_32 (
- pix_multiply_1x128 (
- xmm_alpha,
- unpack_32_1x128 (d)));
- w--;
- }
-
- while (w >= 16)
- {
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
- &xmm_dst_lo, &xmm_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- dst += 16;
- w -= 16;
- }
-
- while (w)
- {
- d = (uint32_t) *dst;
-
- *dst++ = (uint8_t) pack_1x128_32 (
- pix_multiply_1x128 (
- xmm_alpha,
- unpack_32_1x128 (d)));
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_in_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int src_stride, dst_stride;
- int32_t w;
- uint32_t s, d;
-
- __m128i xmm_src, xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w && ((unsigned long)dst & 15))
- {
- s = (uint32_t) *src++;
- d = (uint32_t) *dst;
-
- *dst++ = (uint8_t) pack_1x128_32 (
- pix_multiply_1x128 (
- unpack_32_1x128 (s), unpack_32_1x128 (d)));
- w--;
- }
-
- while (w >= 16)
- {
- xmm_src = load_128_unaligned ((__m128i*)src);
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_dst_lo, &xmm_dst_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- src += 16;
- dst += 16;
- w -= 16;
- }
-
- while (w)
- {
- s = (uint32_t) *src++;
- d = (uint32_t) *dst;
-
- *dst++ = (uint8_t) pack_1x128_32 (
- pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d)));
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- int32_t w;
- uint32_t src;
- uint8_t sa;
- uint32_t m, d;
-
- __m128i xmm_alpha;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- sa = src >> 24;
-
- xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w && ((unsigned long)dst & 15))
- {
- m = (uint32_t) *mask++;
- d = (uint32_t) *dst;
-
- *dst++ = (uint8_t) pack_1x128_32 (
- _mm_adds_epu16 (
- pix_multiply_1x128 (
- xmm_alpha, unpack_32_1x128 (m)),
- unpack_32_1x128 (d)));
- w--;
- }
-
- while (w >= 16)
- {
- xmm_mask = load_128_unaligned ((__m128i*)mask);
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
- &xmm_mask_lo, &xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
-
- xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
- xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
- mask += 16;
- dst += 16;
- w -= 16;
- }
-
- while (w)
- {
- m = (uint32_t) *mask++;
- d = (uint32_t) *dst;
-
- *dst++ = (uint8_t) pack_1x128_32 (
- _mm_adds_epu16 (
- pix_multiply_1x128 (
- xmm_alpha, unpack_32_1x128 (m)),
- unpack_32_1x128 (d)));
-
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_add_n_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- int dst_stride;
- int32_t w;
- uint32_t src;
-
- __m128i xmm_src;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- src >>= 24;
-
- if (src == 0x00)
- return;
-
- if (src == 0xff)
- {
- pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
- 8, dest_x, dest_y, width, height, 0xff);
-
- return;
- }
-
- src = (src << 24) | (src << 16) | (src << 8) | src;
- xmm_src = _mm_set_epi32 (src, src, src, src);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- w = width;
-
- while (w && ((unsigned long)dst & 15))
- {
- *dst = (uint8_t)_mm_cvtsi128_si32 (
- _mm_adds_epu8 (
- xmm_src,
- _mm_cvtsi32_si128 (*dst)));
-
- w--;
- dst++;
- }
-
- while (w >= 16)
- {
- save_128_aligned (
- (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
-
- dst += 16;
- w -= 16;
- }
-
- while (w)
- {
- *dst = (uint8_t)_mm_cvtsi128_si32 (
- _mm_adds_epu8 (
- xmm_src,
- _mm_cvtsi32_si128 (*dst)));
-
- w--;
- dst++;
- }
- }
-
-}
-
-static void
-sse2_composite_add_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint16_t t;
-
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- src = src_line;
-
- dst_line += dst_stride;
- src_line += src_stride;
- w = width;
-
- /* Small head */
- while (w && (unsigned long)dst & 3)
- {
- t = (*dst) + (*src++);
- *dst++ = t | (0 - (t >> 8));
- w--;
- }
-
- sse2_combine_add_u (imp, op,
- (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
-
- /* Small tail */
- dst += w & 0xfffc;
- src += w & 0xfffc;
-
- w &= 3;
-
- while (w)
- {
- t = (*dst) + (*src++);
- *dst++ = t | (0 - (t >> 8));
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
-
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
-
- sse2_combine_add_u (imp, op, dst, src, NULL, width);
- }
-
-}
-
-static pixman_bool_t
-pixman_blt_sse2 (uint32_t *src_bits,
- uint32_t *dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- uint8_t * src_bytes;
- uint8_t * dst_bytes;
- int byte_width;
-
- if (src_bpp != dst_bpp)
- return FALSE;
-
- if (src_bpp == 16)
- {
- src_stride = src_stride * (int) sizeof (uint32_t) / 2;
- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
- src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
- byte_width = 2 * width;
- src_stride *= 2;
- dst_stride *= 2;
- }
- else if (src_bpp == 32)
- {
- src_stride = src_stride * (int) sizeof (uint32_t) / 4;
- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
- byte_width = 4 * width;
- src_stride *= 4;
- dst_stride *= 4;
- }
- else
- {
- return FALSE;
- }
-
- while (height--)
- {
- int w;
- uint8_t *s = src_bytes;
- uint8_t *d = dst_bytes;
- src_bytes += src_stride;
- dst_bytes += dst_stride;
- w = byte_width;
-
- while (w >= 2 && ((unsigned long)d & 3))
- {
- *(uint16_t *)d = *(uint16_t *)s;
- w -= 2;
- s += 2;
- d += 2;
- }
-
- while (w >= 4 && ((unsigned long)d & 15))
- {
- *(uint32_t *)d = *(uint32_t *)s;
-
- w -= 4;
- s += 4;
- d += 4;
- }
-
- while (w >= 64)
- {
- __m128i xmm0, xmm1, xmm2, xmm3;
-
- xmm0 = load_128_unaligned ((__m128i*)(s));
- xmm1 = load_128_unaligned ((__m128i*)(s + 16));
- xmm2 = load_128_unaligned ((__m128i*)(s + 32));
- xmm3 = load_128_unaligned ((__m128i*)(s + 48));
-
- save_128_aligned ((__m128i*)(d), xmm0);
- save_128_aligned ((__m128i*)(d + 16), xmm1);
- save_128_aligned ((__m128i*)(d + 32), xmm2);
- save_128_aligned ((__m128i*)(d + 48), xmm3);
-
- s += 64;
- d += 64;
- w -= 64;
- }
-
- while (w >= 16)
- {
- save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
-
- w -= 16;
- d += 16;
- s += 16;
- }
-
- while (w >= 4)
- {
- *(uint32_t *)d = *(uint32_t *)s;
-
- w -= 4;
- s += 4;
- d += 4;
- }
-
- if (w >= 2)
- {
- *(uint16_t *)d = *(uint16_t *)s;
- w -= 2;
- s += 2;
- d += 2;
- }
- }
-
-
- return TRUE;
-}
-
-static void
-sse2_composite_copy_area (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- pixman_blt_sse2 (src_image->bits.bits,
- dst_image->bits.bits,
- src_image->bits.rowstride,
- dst_image->bits.rowstride,
- PIXMAN_FORMAT_BPP (src_image->bits.format),
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
- src_x, src_y, dest_x, dest_y, width, height);
-}
-
-static void
-sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *src, *src_line, s;
- uint32_t *dst, *dst_line, d;
- uint8_t *mask, *mask_line;
- uint32_t m;
- int src_stride, mask_stride, dst_stride;
- int32_t w;
- __m128i ms;
-
- __m128i xmm_src, xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- src = src_line;
- src_line += src_stride;
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
-
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- s = 0xff000000 | *src++;
- m = (uint32_t) *mask++;
- d = *dst;
- ms = unpack_32_1x128 (s);
-
- if (m != 0xff)
- {
- __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
- __m128i md = unpack_32_1x128 (d);
-
- ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md);
- }
-
- *dst++ = pack_1x128_32 (ms);
- w--;
- }
-
- while (w >= 4)
- {
- m = *(uint32_t*) mask;
- xmm_src = _mm_or_si128 (
- load_128_unaligned ((__m128i*)src), mask_ff000000);
-
- if (m == 0xffffffff)
- {
- save_128_aligned ((__m128i*)dst, xmm_src);
- }
- else
- {
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
-
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_rev_2x128 (
- xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
- &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
-
- src += 4;
- dst += 4;
- mask += 4;
- w -= 4;
- }
-
- while (w)
- {
- m = (uint32_t) *mask++;
-
- if (m)
- {
- s = 0xff000000 | *src;
-
- if (m == 0xff)
- {
- *dst = s;
- }
- else
- {
- __m128i ma, md, ms;
-
- d = *dst;
-
- ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
- md = unpack_32_1x128 (d);
- ms = unpack_32_1x128 (s);
-
- *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md));
- }
-
- }
-
- src++;
- dst++;
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *src, *src_line, s;
- uint32_t *dst, *dst_line, d;
- uint8_t *mask, *mask_line;
- uint32_t m;
- int src_stride, mask_stride, dst_stride;
- int32_t w;
-
- __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- src = src_line;
- src_line += src_stride;
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
-
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- uint32_t sa;
-
- s = *src++;
- m = (uint32_t) *mask++;
- d = *dst;
-
- sa = s >> 24;
-
- if (m)
- {
- if (sa == 0xff && m == 0xff)
- {
- *dst = s;
- }
- else
- {
- __m128i ms, md, ma, msa;
-
- ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
- ms = unpack_32_1x128 (s);
- md = unpack_32_1x128 (d);
-
- msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
- *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
- }
- }
-
- dst++;
- w--;
- }
-
- while (w >= 4)
- {
- m = *(uint32_t *) mask;
-
- if (m)
- {
- xmm_src = load_128_unaligned ((__m128i*)src);
-
- if (m == 0xffffffff && is_opaque (xmm_src))
- {
- save_128_aligned ((__m128i *)dst, xmm_src);
- }
- else
- {
- xmm_dst = load_128_aligned ((__m128i *)dst);
-
- xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
-
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
- expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
- &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
- }
-
- src += 4;
- dst += 4;
- mask += 4;
- w -= 4;
- }
-
- while (w)
- {
- uint32_t sa;
-
- s = *src++;
- m = (uint32_t) *mask++;
- d = *dst;
-
- sa = s >> 24;
-
- if (m)
- {
- if (sa == 0xff && m == 0xff)
- {
- *dst = s;
- }
- else
- {
- __m128i ms, md, ma, msa;
-
- ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
- ms = unpack_32_1x128 (s);
- md = unpack_32_1x128 (d);
-
- msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
- *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
- }
- }
-
- dst++;
- w--;
- }
- }
-
-}
-
-static void
-sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src;
- uint32_t *dst_line, *dst;
- __m128i xmm_src;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_dsta_hi, xmm_dsta_lo;
- int dst_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
- xmm_src = expand_pixel_32_1x128 (src);
-
- while (height--)
- {
- dst = dst_line;
-
- dst_line += dst_stride;
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- __m128i vd;
-
- vd = unpack_32_1x128 (*dst);
-
- *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
- xmm_src));
- w--;
- dst++;
- }
-
- while (w >= 4)
- {
- __m128i tmp_lo, tmp_hi;
-
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
- expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);
-
- tmp_lo = xmm_src;
- tmp_hi = xmm_src;
-
- over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
- &xmm_dsta_lo, &xmm_dsta_hi,
- &tmp_lo, &tmp_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));
-
- w -= 4;
- dst += 4;
- }
-
- while (w)
- {
- __m128i vd;
-
- vd = unpack_32_1x128 (*dst);
-
- *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
- xmm_src));
- w--;
- dst++;
- }
-
- }
-
-}
-
-static void
-sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *src, *src_line, s;
- uint32_t *dst, *dst_line, d;
- uint32_t *mask, *mask_line;
- uint32_t m;
- int src_stride, mask_stride, dst_stride;
- int32_t w;
-
- __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
-
- PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
- PIXMAN_IMAGE_GET_LINE (
- src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- src = src_line;
- src_line += src_stride;
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
-
- w = width;
-
- while (w && (unsigned long)dst & 15)
- {
- uint32_t sa;
-
- s = *src++;
- m = (*mask++) >> 24;
- d = *dst;
-
- sa = s >> 24;
-
- if (m)
- {
- if (sa == 0xff && m == 0xff)
- {
- *dst = s;
- }
- else
- {
- __m128i ms, md, ma, msa;
-
- ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
- ms = unpack_32_1x128 (s);
- md = unpack_32_1x128 (d);
-
- msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
- *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
- }
- }
-
- dst++;
- w--;
- }
-
- while (w >= 4)
- {
- xmm_mask = load_128_unaligned ((__m128i*)mask);
-
- if (!is_transparent (xmm_mask))
- {
- xmm_src = load_128_unaligned ((__m128i*)src);
-
- if (is_opaque (xmm_mask) && is_opaque (xmm_src))
- {
- save_128_aligned ((__m128i *)dst, xmm_src);
- }
- else
- {
- xmm_dst = load_128_aligned ((__m128i *)dst);
-
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
- expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
- in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
- &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
- }
-
- src += 4;
- dst += 4;
- mask += 4;
- w -= 4;
- }
-
- while (w)
- {
- uint32_t sa;
-
- s = *src++;
- m = (*mask++) >> 24;
- d = *dst;
-
- sa = s >> 24;
-
- if (m)
- {
- if (sa == 0xff && m == 0xff)
- {
- *dst = s;
- }
- else
- {
- __m128i ms, md, ma, msa;
-
- ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
- ms = unpack_32_1x128 (s);
- md = unpack_32_1x128 (d);
-
- msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
-
- *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
- }
- }
-
- dst++;
- w--;
- }
- }
-
-}
-
-/* A variant of 'sse2_combine_over_u' with minor tweaks */
-static force_inline void
-scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
- const uint32_t* ps,
- int32_t w,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t fully_transparent_src)
-{
- uint32_t s, d;
- const uint32_t* pm = NULL;
-
- __m128i xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_src_lo, xmm_src_hi;
- __m128i xmm_alpha_lo, xmm_alpha_hi;
-
- if (fully_transparent_src)
- return;
-
- /* Align dst on a 16-byte boundary */
- while (w && ((unsigned long)pd & 15))
- {
- d = *pd;
- s = combine1 (ps + (vx >> 16), pm);
- vx += unit_x;
-
- *pd++ = core_combine_over_u_pixel_sse2 (s, d);
- if (pm)
- pm++;
- w--;
- }
-
- while (w >= 4)
- {
- __m128i tmp;
- uint32_t tmp1, tmp2, tmp3, tmp4;
-
- tmp1 = ps[vx >> 16];
- vx += unit_x;
- tmp2 = ps[vx >> 16];
- vx += unit_x;
- tmp3 = ps[vx >> 16];
- vx += unit_x;
- tmp4 = ps[vx >> 16];
- vx += unit_x;
-
- tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
-
- xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
-
- if (is_opaque (xmm_src_hi))
- {
- save_128_aligned ((__m128i*)pd, xmm_src_hi);
- }
- else if (!is_zero (xmm_src_hi))
- {
- xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
- unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
- expand_alpha_2x128 (
- xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-
- over_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_dst_lo, &xmm_dst_hi);
-
- /* rebuid the 4 pixel data and save*/
- save_128_aligned ((__m128i*)pd,
- pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
-
- w -= 4;
- pd += 4;
- if (pm)
- pm += 4;
- }
-
- while (w)
- {
- d = *pd;
- s = combine1 (ps + (vx >> 16), pm);
- vx += unit_x;
-
- *pd++ = core_combine_over_u_pixel_sse2 (s, d);
- if (pm)
- pm++;
-
- w--;
- }
-}
-
-FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
- scaled_nearest_scanline_sse2_8888_8888_OVER,
- uint32_t, uint32_t, COVER)
-FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,
- scaled_nearest_scanline_sse2_8888_8888_OVER,
- uint32_t, uint32_t, NONE)
-FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
- scaled_nearest_scanline_sse2_8888_8888_OVER,
- uint32_t, uint32_t, PAD)
-
-static force_inline void
-scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
- uint32_t * dst,
- const uint32_t * src,
- int32_t w,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t zero_src)
-{
- __m128i xmm_mask;
- __m128i xmm_src, xmm_src_lo, xmm_src_hi;
- __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- __m128i xmm_alpha_lo, xmm_alpha_hi;
-
- if (zero_src || (*mask >> 24) == 0)
- return;
-
- xmm_mask = create_mask_16_128 (*mask >> 24);
-
- while (w && (unsigned long)dst & 15)
- {
- uint32_t s = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
-
- if (s)
- {
- uint32_t d = *dst;
-
- __m128i ms = unpack_32_1x128 (s);
- __m128i alpha = expand_alpha_1x128 (ms);
- __m128i dest = xmm_mask;
- __m128i alpha_dst = unpack_32_1x128 (d);
-
- *dst = pack_1x128_32 (
- in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
- }
- dst++;
- w--;
- }
-
- while (w >= 4)
- {
- uint32_t tmp1, tmp2, tmp3, tmp4;
-
- tmp1 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp2 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp3 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp4 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
-
- xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
-
- if (!is_zero (xmm_src))
- {
- xmm_dst = load_128_aligned ((__m128i*)dst);
-
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
- expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi);
-
- in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
- &xmm_alpha_lo, &xmm_alpha_hi,
- &xmm_mask, &xmm_mask,
- &xmm_dst_lo, &xmm_dst_hi);
-
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
- }
-
- dst += 4;
- w -= 4;
- }
-
- while (w)
- {
- uint32_t s = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
-
- if (s)
- {
- uint32_t d = *dst;
-
- __m128i ms = unpack_32_1x128 (s);
- __m128i alpha = expand_alpha_1x128 (ms);
- __m128i mask = xmm_mask;
- __m128i dest = unpack_32_1x128 (d);
-
- *dst = pack_1x128_32 (
- in_over_1x128 (&ms, &alpha, &mask, &dest));
- }
-
- dst++;
- w--;
- }
-
-}
-
-FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
- scaled_nearest_scanline_sse2_8888_n_8888_OVER,
- uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
-FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
- scaled_nearest_scanline_sse2_8888_n_8888_OVER,
- uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
-FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
- scaled_nearest_scanline_sse2_8888_n_8888_OVER,
- uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
-
-static void
-bilinear_interpolate_line_sse2 (uint32_t * out,
- const uint32_t * top,
- const uint32_t * bottom,
- int wt,
- int wb,
- pixman_fixed_t x,
- pixman_fixed_t ux,
- int width)
-{
- const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);
- const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);
- const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);
- const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
- const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux);
- const __m128i xmm_zero = _mm_setzero_si128 ();
- __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x);
- uint32_t pix1, pix2, pix3, pix4;
-
- #define INTERPOLATE_ONE_PIXEL(pix) \
- do { \
- __m128i xmm_wh, xmm_lo, xmm_hi, a; \
- /* fetch 2x2 pixel block into sse2 register */ \
- uint32_t tl = top [pixman_fixed_to_int (x)]; \
- uint32_t tr = top [pixman_fixed_to_int (x) + 1]; \
- uint32_t bl = bottom [pixman_fixed_to_int (x)]; \
- uint32_t br = bottom [pixman_fixed_to_int (x) + 1]; \
- a = _mm_set_epi32 (tr, tl, br, bl); \
- x += ux; \
- /* vertical interpolation */ \
- a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \
- xmm_wt), \
- _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \
- xmm_wb)); \
- /* calculate horizontal weights */ \
- xmm_wh = _mm_add_epi16 (xmm_addc, \
- _mm_xor_si128 (xmm_xorc, \
- _mm_srli_epi16 (xmm_x, 8))); \
- xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
- /* horizontal interpolation */ \
- xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
- xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
- a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
- _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
- /* shift and pack the result */ \
- a = _mm_srli_epi32 (a, 16); \
- a = _mm_packs_epi32 (a, a); \
- a = _mm_packus_epi16 (a, a); \
- pix = _mm_cvtsi128_si32 (a); \
- } while (0)
-
- while ((width -= 4) >= 0)
- {
- INTERPOLATE_ONE_PIXEL (pix1);
- INTERPOLATE_ONE_PIXEL (pix2);
- INTERPOLATE_ONE_PIXEL (pix3);
- INTERPOLATE_ONE_PIXEL (pix4);
- *out++ = pix1;
- *out++ = pix2;
- *out++ = pix3;
- *out++ = pix4;
- }
- if (width & 2)
- {
- INTERPOLATE_ONE_PIXEL (pix1);
- INTERPOLATE_ONE_PIXEL (pix2);
- *out++ = pix1;
- *out++ = pix2;
- }
- if (width & 1)
- {
- INTERPOLATE_ONE_PIXEL (pix1);
- *out = pix1;
- }
-
- #undef INTERPOLATE_ONE_PIXEL
-}
-
-static force_inline void
-scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
- const uint32_t * mask,
- const uint32_t * src_top,
- const uint32_t * src_bottom,
- int32_t w,
- int wt,
- int wb,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx,
- pixman_bool_t zero_src)
-{
- bilinear_interpolate_line_sse2 (dst, src_top, src_bottom,
- wt, wb, vx, unit_x, w);
-}
-
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
- scaled_bilinear_scanline_sse2_8888_8888_SRC,
- uint32_t, uint32_t, uint32_t,
- COVER, FALSE, FALSE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
- scaled_bilinear_scanline_sse2_8888_8888_SRC,
- uint32_t, uint32_t, uint32_t,
- PAD, FALSE, FALSE)
-FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
- scaled_bilinear_scanline_sse2_8888_8888_SRC,
- uint32_t, uint32_t, uint32_t,
- NONE, FALSE, FALSE)
-
-static const pixman_fast_path_t sse2_fast_paths[] =
-{
- /* PIXMAN_OP_OVER */
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888),
- PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565),
- PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
-
- /* PIXMAN_OP_OVER_REVERSE */
- PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888),
- PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888),
-
- /* PIXMAN_OP_ADD */
- PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
- PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8),
-
- /* PIXMAN_OP_SRC */
- PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
- PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888),
- PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888),
- PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area),
-
- /* PIXMAN_OP_IN */
- PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8),
- PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
- PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
-
- SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
- SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
-
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
- SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
-
- SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
- SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
- SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
-
- { PIXMAN_OP_NONE },
-};
-
-static pixman_bool_t
-sse2_blt (pixman_implementation_t *imp,
- uint32_t * src_bits,
- uint32_t * dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- if (!pixman_blt_sse2 (
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height))
-
- {
- return _pixman_implementation_blt (
- imp->delegate,
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height);
- }
-
- return TRUE;
-}
-
-#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
-__attribute__((__force_align_arg_pointer__))
-#endif
-static pixman_bool_t
-sse2_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- if (!pixman_fill_sse2 (bits, stride, bpp, x, y, width, height, xor))
- {
- return _pixman_implementation_fill (
- imp->delegate, bits, stride, bpp, x, y, width, height, xor);
- }
-
- return TRUE;
-}
-
-static uint32_t *
-sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
-{
- int w = iter->width;
- __m128i ff000000 = mask_ff000000;
- uint32_t *dst = iter->buffer;
- uint32_t *src = (uint32_t *)iter->bits;
-
- iter->bits += iter->stride;
-
- while (w && ((unsigned long)dst) & 0x0f)
- {
- *dst++ = (*src++) | 0xff000000;
- w--;
- }
-
- while (w >= 4)
- {
- save_128_aligned (
- (__m128i *)dst, _mm_or_si128 (
- load_128_unaligned ((__m128i *)src), ff000000));
-
- dst += 4;
- src += 4;
- w -= 4;
- }
-
- while (w)
- {
- *dst++ = (*src++) | 0xff000000;
- w--;
- }
-
- return iter->buffer;
-}
-
-static uint32_t *
-sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
-{
- int w = iter->width;
- uint32_t *dst = iter->buffer;
- uint16_t *src = (uint16_t *)iter->bits;
- __m128i ff000000 = mask_ff000000;
-
- iter->bits += iter->stride;
-
- while (w && ((unsigned long)dst) & 0x0f)
- {
- uint16_t s = *src++;
-
- *dst++ = CONVERT_0565_TO_8888 (s);
- w--;
- }
-
- while (w >= 8)
- {
- __m128i lo, hi, s;
-
- s = _mm_loadu_si128 ((__m128i *)src);
-
- lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ()));
- hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ()));
-
- save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000));
- save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000));
-
- dst += 8;
- src += 8;
- w -= 8;
- }
-
- while (w)
- {
- uint16_t s = *src++;
-
- *dst++ = CONVERT_0565_TO_8888 (s);
- w--;
- }
-
- return iter->buffer;
-}
-
-static uint32_t *
-sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
-{
- int w = iter->width;
- uint32_t *dst = iter->buffer;
- uint8_t *src = iter->bits;
- __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
-
- iter->bits += iter->stride;
-
- while (w && (((unsigned long)dst) & 15))
- {
- *dst++ = *(src++) << 24;
- w--;
- }
-
- while (w >= 16)
- {
- xmm0 = _mm_loadu_si128((__m128i *)src);
-
- xmm1 = _mm_unpacklo_epi8 (_mm_setzero_si128(), xmm0);
- xmm2 = _mm_unpackhi_epi8 (_mm_setzero_si128(), xmm0);
- xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1);
- xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1);
- xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2);
- xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2);
-
- _mm_store_si128(((__m128i *)(dst + 0)), xmm3);
- _mm_store_si128(((__m128i *)(dst + 4)), xmm4);
- _mm_store_si128(((__m128i *)(dst + 8)), xmm5);
- _mm_store_si128(((__m128i *)(dst + 12)), xmm6);
-
- dst += 16;
- src += 16;
- w -= 16;
- }
-
- while (w)
- {
- *dst++ = *(src++) << 24;
- w--;
- }
-
- return iter->buffer;
-}
-
-typedef struct
-{
- pixman_format_code_t format;
- pixman_iter_get_scanline_t get_scanline;
-} fetcher_info_t;
-
-static const fetcher_info_t fetchers[] =
-{
- { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
- { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 },
- { PIXMAN_a8, sse2_fetch_a8 },
- { PIXMAN_null }
-};
-
-static void
-sse2_src_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x, int y, int width, int height,
- uint8_t *buffer, iter_flags_t flags)
-{
-#define FLAGS \
- (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
-
- if ((flags & ITER_NARROW) &&
- (image->common.flags & FLAGS) == FLAGS &&
- x >= 0 && y >= 0 &&
- x + width <= image->bits.width &&
- y + height <= image->bits.height)
- {
- const fetcher_info_t *f;
-
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
- {
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
-
- iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
- iter->width = width;
- iter->buffer = (uint32_t *)buffer;
-
- iter->get_scanline = f->get_scanline;
- return;
- }
- }
- }
-
- _pixman_implementation_src_iter_init (
- imp->delegate, iter, image, x, y, width, height, buffer, flags);
-}
-
-#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
-__attribute__((__force_align_arg_pointer__))
-#endif
-pixman_implementation_t *
-_pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
-{
- pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);
-
- /* SSE2 constants */
- mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000);
- mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000);
- mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0);
- mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f);
- mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000);
- mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00);
- mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8);
- mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0);
- mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000);
- mask_0080 = create_mask_16_128 (0x0080);
- mask_00ff = create_mask_16_128 (0x00ff);
- mask_0101 = create_mask_16_128 (0x0101);
- mask_ffff = create_mask_16_128 (0xffff);
- mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
- mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
-
- /* Set up function pointers */
- imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
- imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
- imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
- imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u;
- imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u;
- imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u;
- imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u;
- imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;
- imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;
- imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
-
- imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
-
- imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
- imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
- imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca;
- imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca;
- imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca;
- imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;
- imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
- imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
-
- imp->blt = sse2_blt;
- imp->fill = sse2_fill;
-
- imp->src_iter_init = sse2_src_iter_init;
-
- return imp;
-}
+/*
+ * Copyright © 2008 Rodrigo Kumpera
+ * Copyright © 2008 André Tupinambá
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. Red Hat makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Rodrigo Kumpera (kumpera@gmail.com)
+ * André Tupinambá (andrelrt@gmail.com)
+ *
+ * Based on work by Owen Taylor and Søren Sandmann
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
+#include <emmintrin.h> /* for SSE2 intrinsics */
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-fast-path.h"
+
+static __m128i mask_0080;
+static __m128i mask_00ff;
+static __m128i mask_0101;
+static __m128i mask_ffff;
+static __m128i mask_ff000000;
+static __m128i mask_alpha;
+
+static __m128i mask_565_r;
+static __m128i mask_565_g1, mask_565_g2;
+static __m128i mask_565_b;
+static __m128i mask_red;
+static __m128i mask_green;
+static __m128i mask_blue;
+
+static __m128i mask_565_fix_rb;
+static __m128i mask_565_fix_g;
+
+static force_inline __m128i
+unpack_32_1x128 (uint32_t data)
+{
+ return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());
+}
+
+static force_inline void
+unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi)
+{
+ *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());
+ *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
+}
+
+static force_inline __m128i
+unpack_565_to_8888 (__m128i lo)
+{
+ __m128i r, g, b, rb, t;
+
+ r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);
+ g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);
+ b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);
+
+ rb = _mm_or_si128 (r, b);
+ t = _mm_and_si128 (rb, mask_565_fix_rb);
+ t = _mm_srli_epi32 (t, 5);
+ rb = _mm_or_si128 (rb, t);
+
+ t = _mm_and_si128 (g, mask_565_fix_g);
+ t = _mm_srli_epi32 (t, 6);
+ g = _mm_or_si128 (g, t);
+
+ return _mm_or_si128 (rb, g);
+}
+
+static force_inline void
+unpack_565_128_4x128 (__m128i data,
+ __m128i* data0,
+ __m128i* data1,
+ __m128i* data2,
+ __m128i* data3)
+{
+ __m128i lo, hi;
+
+ lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
+ hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
+
+ lo = unpack_565_to_8888 (lo);
+ hi = unpack_565_to_8888 (hi);
+
+ unpack_128_2x128 (lo, data0, data1);
+ unpack_128_2x128 (hi, data2, data3);
+}
+
+static force_inline uint16_t
+pack_565_32_16 (uint32_t pixel)
+{
+ return (uint16_t) (((pixel >> 8) & 0xf800) |
+ ((pixel >> 5) & 0x07e0) |
+ ((pixel >> 3) & 0x001f));
+}
+
+static force_inline __m128i
+pack_2x128_128 (__m128i lo, __m128i hi)
+{
+ return _mm_packus_epi16 (lo, hi);
+}
+
+static force_inline __m128i
+pack_565_2x128_128 (__m128i lo, __m128i hi)
+{
+ __m128i data;
+ __m128i r, g1, g2, b;
+
+ data = pack_2x128_128 (lo, hi);
+
+ r = _mm_and_si128 (data, mask_565_r);
+ g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);
+ g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);
+ b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);
+
+ return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);
+}
+
+static force_inline __m128i
+pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
+{
+ return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),
+ pack_565_2x128_128 (*xmm2, *xmm3));
+}
+
+static force_inline int
+is_opaque (__m128i x)
+{
+ __m128i ffs = _mm_cmpeq_epi8 (x, x);
+
+ return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
+}
+
+static force_inline int
+is_zero (__m128i x)
+{
+ return _mm_movemask_epi8 (
+ _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;
+}
+
+static force_inline int
+is_transparent (__m128i x)
+{
+ return (_mm_movemask_epi8 (
+ _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;
+}
+
+static force_inline __m128i
+expand_pixel_32_1x128 (uint32_t data)
+{
+ return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0));
+}
+
+static force_inline __m128i
+expand_alpha_1x128 (__m128i data)
+{
+ return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,
+ _MM_SHUFFLE (3, 3, 3, 3)),
+ _MM_SHUFFLE (3, 3, 3, 3));
+}
+
+static force_inline void
+expand_alpha_2x128 (__m128i data_lo,
+ __m128i data_hi,
+ __m128i* alpha_lo,
+ __m128i* alpha_hi)
+{
+ __m128i lo, hi;
+
+ lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3));
+ hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3));
+
+ *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3));
+ *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3));
+}
+
+static force_inline void
+expand_alpha_rev_2x128 (__m128i data_lo,
+ __m128i data_hi,
+ __m128i* alpha_lo,
+ __m128i* alpha_hi)
+{
+ __m128i lo, hi;
+
+ lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0));
+ hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0));
+ *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0));
+ *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0));
+}
+
+static force_inline void
+pix_multiply_2x128 (__m128i* data_lo,
+ __m128i* data_hi,
+ __m128i* alpha_lo,
+ __m128i* alpha_hi,
+ __m128i* ret_lo,
+ __m128i* ret_hi)
+{
+ __m128i lo, hi;
+
+ lo = _mm_mullo_epi16 (*data_lo, *alpha_lo);
+ hi = _mm_mullo_epi16 (*data_hi, *alpha_hi);
+ lo = _mm_adds_epu16 (lo, mask_0080);
+ hi = _mm_adds_epu16 (hi, mask_0080);
+ *ret_lo = _mm_mulhi_epu16 (lo, mask_0101);
+ *ret_hi = _mm_mulhi_epu16 (hi, mask_0101);
+}
+
+static force_inline void
+pix_add_multiply_2x128 (__m128i* src_lo,
+ __m128i* src_hi,
+ __m128i* alpha_dst_lo,
+ __m128i* alpha_dst_hi,
+ __m128i* dst_lo,
+ __m128i* dst_hi,
+ __m128i* alpha_src_lo,
+ __m128i* alpha_src_hi,
+ __m128i* ret_lo,
+ __m128i* ret_hi)
+{
+ __m128i t1_lo, t1_hi;
+ __m128i t2_lo, t2_hi;
+
+ pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi);
+ pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi);
+
+ *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo);
+ *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi);
+}
+
+static force_inline void
+negate_2x128 (__m128i data_lo,
+ __m128i data_hi,
+ __m128i* neg_lo,
+ __m128i* neg_hi)
+{
+ *neg_lo = _mm_xor_si128 (data_lo, mask_00ff);
+ *neg_hi = _mm_xor_si128 (data_hi, mask_00ff);
+}
+
+static force_inline void
+invert_colors_2x128 (__m128i data_lo,
+ __m128i data_hi,
+ __m128i* inv_lo,
+ __m128i* inv_hi)
+{
+ __m128i lo, hi;
+
+ lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2));
+ hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2));
+ *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2));
+ *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2));
+}
+
+static force_inline void
+over_2x128 (__m128i* src_lo,
+ __m128i* src_hi,
+ __m128i* alpha_lo,
+ __m128i* alpha_hi,
+ __m128i* dst_lo,
+ __m128i* dst_hi)
+{
+ __m128i t1, t2;
+
+ negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
+
+ pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
+
+ *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo);
+ *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi);
+}
+
+static force_inline void
+over_rev_non_pre_2x128 (__m128i src_lo,
+ __m128i src_hi,
+ __m128i* dst_lo,
+ __m128i* dst_hi)
+{
+ __m128i lo, hi;
+ __m128i alpha_lo, alpha_hi;
+
+ expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi);
+
+ lo = _mm_or_si128 (alpha_lo, mask_alpha);
+ hi = _mm_or_si128 (alpha_hi, mask_alpha);
+
+ invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi);
+
+ pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi);
+
+ over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi);
+}
+
+static force_inline void
+in_over_2x128 (__m128i* src_lo,
+ __m128i* src_hi,
+ __m128i* alpha_lo,
+ __m128i* alpha_hi,
+ __m128i* mask_lo,
+ __m128i* mask_hi,
+ __m128i* dst_lo,
+ __m128i* dst_hi)
+{
+ __m128i s_lo, s_hi;
+ __m128i a_lo, a_hi;
+
+ pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
+ pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
+
+ over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
+}
+
+/* load 4 pixels from a 16-byte boundary aligned address */
+static force_inline __m128i
+load_128_aligned (__m128i* src)
+{
+ return _mm_load_si128 (src);
+}
+
+/* load 4 pixels from a unaligned address */
+static force_inline __m128i
+load_128_unaligned (const __m128i* src)
+{
+ return _mm_loadu_si128 (src);
+}
+
+/* save 4 pixels using Write Combining memory on a 16-byte
+ * boundary aligned address
+ */
+static force_inline void
+save_128_write_combining (__m128i* dst,
+ __m128i data)
+{
+ _mm_stream_si128 (dst, data);
+}
+
+/* save 4 pixels on a 16-byte boundary aligned address */
+static force_inline void
+save_128_aligned (__m128i* dst,
+ __m128i data)
+{
+ _mm_store_si128 (dst, data);
+}
+
+/* save 4 pixels on a unaligned address */
+static force_inline void
+save_128_unaligned (__m128i* dst,
+ __m128i data)
+{
+ _mm_storeu_si128 (dst, data);
+}
+
+static force_inline __m128i
+load_32_1x128 (uint32_t data)
+{
+ return _mm_cvtsi32_si128 (data);
+}
+
+static force_inline __m128i
+expand_alpha_rev_1x128 (__m128i data)
+{
+ return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
+}
+
+static force_inline __m128i
+expand_pixel_8_1x128 (uint8_t data)
+{
+ return _mm_shufflelo_epi16 (
+ unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
+}
+
+static force_inline __m128i
+pix_multiply_1x128 (__m128i data,
+ __m128i alpha)
+{
+ return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha),
+ mask_0080),
+ mask_0101);
+}
+
+static force_inline __m128i
+pix_add_multiply_1x128 (__m128i* src,
+ __m128i* alpha_dst,
+ __m128i* dst,
+ __m128i* alpha_src)
+{
+ __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst);
+ __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src);
+
+ return _mm_adds_epu8 (t1, t2);
+}
+
+static force_inline __m128i
+negate_1x128 (__m128i data)
+{
+ return _mm_xor_si128 (data, mask_00ff);
+}
+
+static force_inline __m128i
+invert_colors_1x128 (__m128i data)
+{
+ return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
+}
+
+static force_inline __m128i
+over_1x128 (__m128i src, __m128i alpha, __m128i dst)
+{
+ return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
+}
+
+static force_inline __m128i
+in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
+{
+ return over_1x128 (pix_multiply_1x128 (*src, *mask),
+ pix_multiply_1x128 (*alpha, *mask),
+ *dst);
+}
+
+static force_inline __m128i
+over_rev_non_pre_1x128 (__m128i src, __m128i dst)
+{
+ __m128i alpha = expand_alpha_1x128 (src);
+
+ return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src),
+ _mm_or_si128 (alpha, mask_alpha)),
+ alpha,
+ dst);
+}
+
+static force_inline uint32_t
+pack_1x128_32 (__m128i data)
+{
+ return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
+}
+
+static force_inline __m128i
+expand565_16_1x128 (uint16_t pixel)
+{
+ __m128i m = _mm_cvtsi32_si128 (pixel);
+
+ m = unpack_565_to_8888 (m);
+
+ return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
+}
+
+static force_inline uint32_t
+core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
+{
+ uint8_t a;
+ __m128i xmms;
+
+ a = src >> 24;
+
+ if (a == 0xff)
+ {
+ return src;
+ }
+ else if (src)
+ {
+ xmms = unpack_32_1x128 (src);
+ return pack_1x128_32 (
+ over_1x128 (xmms, expand_alpha_1x128 (xmms),
+ unpack_32_1x128 (dst)));
+ }
+
+ return dst;
+}
+
+static force_inline uint32_t
+combine1 (const uint32_t *ps, const uint32_t *pm)
+{
+ uint32_t s = *ps;
+
+ if (pm)
+ {
+ __m128i ms, mm;
+
+ mm = unpack_32_1x128 (*pm);
+ mm = expand_alpha_1x128 (mm);
+
+ ms = unpack_32_1x128 (s);
+ ms = pix_multiply_1x128 (ms, mm);
+
+ s = pack_1x128_32 (ms);
+ }
+
+ return s;
+}
+
+static force_inline __m128i
+combine4 (const __m128i *ps, const __m128i *pm)
+{
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_msk_lo, xmm_msk_hi;
+ __m128i s;
+
+ if (pm)
+ {
+ xmm_msk_lo = load_128_unaligned (pm);
+
+ if (is_transparent (xmm_msk_lo))
+ return _mm_setzero_si128 ();
+ }
+
+ s = load_128_unaligned (ps);
+
+ if (pm)
+ {
+ unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);
+
+ expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_msk_lo, &xmm_msk_hi,
+ &xmm_src_lo, &xmm_src_hi);
+
+ s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);
+ }
+
+ return s;
+}
+
+static force_inline void
+core_combine_over_u_sse2_mask (uint32_t * pd,
+ const uint32_t* ps,
+ const uint32_t* pm,
+ int w)
+{
+ uint32_t s, d;
+
+ /* Align dst on a 16-byte boundary */
+ while (w && ((unsigned long)pd & 15))
+ {
+ d = *pd;
+ s = combine1 (ps, pm);
+
+ if (s)
+ *pd = core_combine_over_u_pixel_sse2 (s, d);
+ pd++;
+ ps++;
+ pm++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m128i mask = load_128_unaligned ((__m128i *)pm);
+
+ if (!is_zero (mask))
+ {
+ __m128i src;
+ __m128i src_hi, src_lo;
+ __m128i mask_hi, mask_lo;
+ __m128i alpha_hi, alpha_lo;
+
+ src = load_128_unaligned ((__m128i *)ps);
+
+ if (is_opaque (_mm_and_si128 (src, mask)))
+ {
+ save_128_aligned ((__m128i *)pd, src);
+ }
+ else
+ {
+ __m128i dst = load_128_aligned ((__m128i *)pd);
+ __m128i dst_hi, dst_lo;
+
+ unpack_128_2x128 (mask, &mask_lo, &mask_hi);
+ unpack_128_2x128 (src, &src_lo, &src_hi);
+
+ expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi);
+ pix_multiply_2x128 (&src_lo, &src_hi,
+ &mask_lo, &mask_hi,
+ &src_lo, &src_hi);
+
+ unpack_128_2x128 (dst, &dst_lo, &dst_hi);
+
+ expand_alpha_2x128 (src_lo, src_hi,
+ &alpha_lo, &alpha_hi);
+
+ over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
+ &dst_lo, &dst_hi);
+
+ save_128_aligned (
+ (__m128i *)pd,
+ pack_2x128_128 (dst_lo, dst_hi));
+ }
+ }
+
+ pm += 4;
+ ps += 4;
+ pd += 4;
+ w -= 4;
+ }
+ while (w)
+ {
+ d = *pd;
+ s = combine1 (ps, pm);
+
+ if (s)
+ *pd = core_combine_over_u_pixel_sse2 (s, d);
+ pd++;
+ ps++;
+ pm++;
+
+ w--;
+ }
+}
+
+static force_inline void
+core_combine_over_u_sse2_no_mask (uint32_t * pd,
+ const uint32_t* ps,
+ int w)
+{
+ uint32_t s, d;
+
+ /* Align dst on a 16-byte boundary */
+ while (w && ((unsigned long)pd & 15))
+ {
+ d = *pd;
+ s = *ps;
+
+ if (s)
+ *pd = core_combine_over_u_pixel_sse2 (s, d);
+ pd++;
+ ps++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m128i src;
+ __m128i src_hi, src_lo, dst_hi, dst_lo;
+ __m128i alpha_hi, alpha_lo;
+
+ src = load_128_unaligned ((__m128i *)ps);
+
+ if (!is_zero (src))
+ {
+ if (is_opaque (src))
+ {
+ save_128_aligned ((__m128i *)pd, src);
+ }
+ else
+ {
+ __m128i dst = load_128_aligned ((__m128i *)pd);
+
+ unpack_128_2x128 (src, &src_lo, &src_hi);
+ unpack_128_2x128 (dst, &dst_lo, &dst_hi);
+
+ expand_alpha_2x128 (src_lo, src_hi,
+ &alpha_lo, &alpha_hi);
+ over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
+ &dst_lo, &dst_hi);
+
+ save_128_aligned (
+ (__m128i *)pd,
+ pack_2x128_128 (dst_lo, dst_hi));
+ }
+ }
+
+ ps += 4;
+ pd += 4;
+ w -= 4;
+ }
+ while (w)
+ {
+ d = *pd;
+ s = *ps;
+
+ if (s)
+ *pd = core_combine_over_u_pixel_sse2 (s, d);
+ pd++;
+ ps++;
+
+ w--;
+ }
+}
+
+static force_inline void
+sse2_combine_over_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ if (pm)
+ core_combine_over_u_sse2_mask (pd, ps, pm, w);
+ else
+ core_combine_over_u_sse2_no_mask (pd, ps, w);
+}
+
+static void
+sse2_combine_over_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, d;
+
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ /* Align dst on a 16-byte boundary */
+ while (w &&
+ ((unsigned long)pd & 15))
+ {
+ d = *pd;
+ s = combine1 (ps, pm);
+
+ *pd++ = core_combine_over_u_pixel_sse2 (d, s);
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+
+ while (w >= 4)
+ {
+ /* I'm loading unaligned because I'm not sure
+ * about the address alignment.
+ */
+ xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_src_lo, &xmm_src_hi);
+
+ /* rebuid the 4 pixel data and save*/
+ save_128_aligned ((__m128i*)pd,
+ pack_2x128_128 (xmm_src_lo, xmm_src_hi));
+
+ w -= 4;
+ ps += 4;
+ pd += 4;
+
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ d = *pd;
+ s = combine1 (ps, pm);
+
+ *pd++ = core_combine_over_u_pixel_sse2 (d, s);
+ ps++;
+ w--;
+ if (pm)
+ pm++;
+ }
+}
+
+static force_inline uint32_t
+core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst)
+{
+ uint32_t maska = src >> 24;
+
+ if (maska == 0)
+ {
+ return 0;
+ }
+ else if (maska != 0xff)
+ {
+ return pack_1x128_32 (
+ pix_multiply_1x128 (unpack_32_1x128 (dst),
+ expand_alpha_1x128 (unpack_32_1x128 (src))));
+ }
+
+ return dst;
+}
+
+static void
+sse2_combine_in_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, d;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+
+ while (w && ((unsigned long) pd & 15))
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_in_u_pixel_sse2 (d, s);
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+ xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i*)pd,
+ pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ w -= 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_in_u_pixel_sse2 (d, s);
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+}
+
+static void
+sse2_combine_in_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, d;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+
+ while (w && ((unsigned long) pd & 15))
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_in_u_pixel_sse2 (s, d);
+ ps++;
+ w--;
+ if (pm)
+ pm++;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+ xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+ &xmm_src_lo, &xmm_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ w -= 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_in_u_pixel_sse2 (s, d);
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+}
+
+static void
+sse2_combine_out_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ while (w && ((unsigned long) pd & 15))
+ {
+ uint32_t s = combine1 (ps, pm);
+ uint32_t d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d), negate_1x128 (
+ expand_alpha_1x128 (unpack_32_1x128 (s)))));
+
+ if (pm)
+ pm++;
+ ps++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+
+ xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+
+ pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+ &xmm_src_lo, &xmm_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ if (pm)
+ pm += 4;
+
+ w -= 4;
+ }
+
+ while (w)
+ {
+ uint32_t s = combine1 (ps, pm);
+ uint32_t d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d), negate_1x128 (
+ expand_alpha_1x128 (unpack_32_1x128 (s)))));
+ ps++;
+ if (pm)
+ pm++;
+ w--;
+ }
+}
+
+static void
+sse2_combine_out_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ while (w && ((unsigned long) pd & 15))
+ {
+ uint32_t s = combine1 (ps, pm);
+ uint32_t d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), negate_1x128 (
+ expand_alpha_1x128 (unpack_32_1x128 (d)))));
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+
+ while (w >= 4)
+ {
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+
+ xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ w -= 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ uint32_t s = combine1 (ps, pm);
+ uint32_t d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), negate_1x128 (
+ expand_alpha_1x128 (unpack_32_1x128 (d)))));
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+}
+
+static force_inline uint32_t
+core_combine_atop_u_pixel_sse2 (uint32_t src,
+ uint32_t dst)
+{
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
+
+ __m128i sa = negate_1x128 (expand_alpha_1x128 (s));
+ __m128i da = expand_alpha_1x128 (d);
+
+ return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
+}
+
+static void
+sse2_combine_atop_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, d;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+ __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+
+ while (w && ((unsigned long) pd & 15))
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+
+ while (w >= 4)
+ {
+ xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+ negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+
+ pix_add_multiply_2x128 (
+ &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ w -= 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+}
+
+static force_inline uint32_t
+core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
+ uint32_t dst)
+{
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
+
+ __m128i sa = expand_alpha_1x128 (s);
+ __m128i da = negate_1x128 (expand_alpha_1x128 (d));
+
+ return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
+}
+
+static void
+sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, d;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+ __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+
+ while (w && ((unsigned long) pd & 15))
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
+ ps++;
+ w--;
+ if (pm)
+ pm++;
+ }
+
+ while (w >= 4)
+ {
+ xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+ negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+ pix_add_multiply_2x128 (
+ &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ w -= 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
+ ps++;
+ w--;
+ if (pm)
+ pm++;
+ }
+}
+
+static force_inline uint32_t
+core_combine_xor_u_pixel_sse2 (uint32_t src,
+ uint32_t dst)
+{
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
+
+ __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d));
+ __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s));
+
+ return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
+}
+
+static void
+sse2_combine_xor_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dst,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
+{
+ int w = width;
+ uint32_t s, d;
+ uint32_t* pd = dst;
+ const uint32_t* ps = src;
+ const uint32_t* pm = mask;
+
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+ __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+
+ while (w && ((unsigned long) pd & 15))
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+
+ while (w >= 4)
+ {
+ xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
+ xmm_dst = load_128_aligned ((__m128i*) pd);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+ negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+ negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+ pix_add_multiply_2x128 (
+ &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ w -= 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+}
+
+static force_inline void
+sse2_combine_add_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dst,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
+{
+ int w = width;
+ uint32_t s, d;
+ uint32_t* pd = dst;
+ const uint32_t* ps = src;
+ const uint32_t* pm = mask;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ ps++;
+ if (pm)
+ pm++;
+ *pd++ = _mm_cvtsi128_si32 (
+ _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m128i s;
+
+ s = combine4 ((__m128i*)ps, (__m128i*)pm);
+
+ save_128_aligned (
+ (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd)));
+
+ pd += 4;
+ ps += 4;
+ if (pm)
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w--)
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ ps++;
+ *pd++ = _mm_cvtsi128_si32 (
+ _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
+ if (pm)
+ pm++;
+ }
+}
+
+static force_inline uint32_t
+core_combine_saturate_u_pixel_sse2 (uint32_t src,
+ uint32_t dst)
+{
+ __m128i ms = unpack_32_1x128 (src);
+ __m128i md = unpack_32_1x128 (dst);
+ uint32_t sa = src >> 24;
+ uint32_t da = ~dst >> 24;
+
+ if (sa > da)
+ {
+ ms = pix_multiply_1x128 (
+ ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24)));
+ }
+
+ return pack_1x128_32 (_mm_adds_epu16 (md, ms));
+}
+
+static void
+sse2_combine_saturate_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, d;
+
+ uint32_t pack_cmp;
+ __m128i xmm_src, xmm_dst;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+ w--;
+ ps++;
+ if (pm)
+ pm++;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst = load_128_aligned ((__m128i*)pd);
+ xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
+
+ pack_cmp = _mm_movemask_epi8 (
+ _mm_cmpgt_epi32 (
+ _mm_srli_epi32 (xmm_src, 24),
+ _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
+
+ /* if some alpha src is grater than respective ~alpha dst */
+ if (pack_cmp)
+ {
+ s = combine1 (ps++, pm);
+ d = *pd;
+ *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+
+ s = combine1 (ps++, pm);
+ d = *pd;
+ *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+
+ s = combine1 (ps++, pm);
+ d = *pd;
+ *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+
+ s = combine1 (ps++, pm);
+ d = *pd;
+ *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+ }
+ else
+ {
+ save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
+
+ pd += 4;
+ ps += 4;
+ if (pm)
+ pm += 4;
+ }
+
+ w -= 4;
+ }
+
+ while (w--)
+ {
+ s = combine1 (ps, pm);
+ d = *pd;
+
+ *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+ ps++;
+ if (pm)
+ pm++;
+ }
+}
+
+static void
+sse2_combine_src_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
+ w--;
+ }
+}
+
+static force_inline uint32_t
+core_combine_over_ca_pixel_sse2 (uint32_t src,
+ uint32_t mask,
+ uint32_t dst)
+{
+ __m128i s = unpack_32_1x128 (src);
+ __m128i expAlpha = expand_alpha_1x128 (s);
+ __m128i unpk_mask = unpack_32_1x128 (mask);
+ __m128i unpk_dst = unpack_32_1x128 (dst);
+
+ return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
+}
+
+static void
+sse2_combine_over_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+}
+
+static force_inline uint32_t
+core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
+ uint32_t mask,
+ uint32_t dst)
+{
+ __m128i d = unpack_32_1x128 (dst);
+
+ return pack_1x128_32 (
+ over_1x128 (d, expand_alpha_1x128 (d),
+ pix_multiply_1x128 (unpack_32_1x128 (src),
+ unpack_32_1x128 (mask))));
+}
+
+static void
+sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+}
+
+static void
+sse2_combine_in_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)),
+ expand_alpha_1x128 (unpack_32_1x128 (d))));
+
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (m)),
+ expand_alpha_1x128 (unpack_32_1x128 (d))));
+
+ w--;
+ }
+}
+
+static void
+sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d),
+ pix_multiply_1x128 (unpack_32_1x128 (m),
+ expand_alpha_1x128 (unpack_32_1x128 (s)))));
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+ pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d),
+ pix_multiply_1x128 (unpack_32_1x128 (m),
+ expand_alpha_1x128 (unpack_32_1x128 (s)))));
+ w--;
+ }
+}
+
+static void
+sse2_combine_out_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (m)),
+ negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+ negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+ pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (m)),
+ negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
+
+ w--;
+ }
+}
+
+static void
+sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d),
+ negate_1x128 (pix_multiply_1x128 (
+ unpack_32_1x128 (m),
+ expand_alpha_1x128 (unpack_32_1x128 (s))))));
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ negate_2x128 (xmm_mask_lo, xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (d),
+ negate_1x128 (pix_multiply_1x128 (
+ unpack_32_1x128 (m),
+ expand_alpha_1x128 (unpack_32_1x128 (s))))));
+ w--;
+ }
+}
+
+static force_inline uint32_t
+core_combine_atop_ca_pixel_sse2 (uint32_t src,
+ uint32_t mask,
+ uint32_t dst)
+{
+ __m128i m = unpack_32_1x128 (mask);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
+ __m128i sa = expand_alpha_1x128 (s);
+ __m128i da = expand_alpha_1x128 (d);
+
+ s = pix_multiply_1x128 (s, m);
+ m = negate_1x128 (pix_multiply_1x128 (m, sa));
+
+ return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
+}
+
+static void
+sse2_combine_atop_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+ __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_src_lo, &xmm_src_hi);
+ pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_add_multiply_2x128 (
+ &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+}
+
+static force_inline uint32_t
+core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
+ uint32_t mask,
+ uint32_t dst)
+{
+ __m128i m = unpack_32_1x128 (mask);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
+
+ __m128i da = negate_1x128 (expand_alpha_1x128 (d));
+ __m128i sa = expand_alpha_1x128 (s);
+
+ s = pix_multiply_1x128 (s, m);
+ m = pix_multiply_1x128 (m, sa);
+
+ return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
+}
+
+static void
+sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+ __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_src_lo, &xmm_src_hi);
+ pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+ pix_add_multiply_2x128 (
+ &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+}
+
+static force_inline uint32_t
+core_combine_xor_ca_pixel_sse2 (uint32_t src,
+ uint32_t mask,
+ uint32_t dst)
+{
+ __m128i a = unpack_32_1x128 (mask);
+ __m128i s = unpack_32_1x128 (src);
+ __m128i d = unpack_32_1x128 (dst);
+
+ __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 (
+ a, expand_alpha_1x128 (s)));
+ __m128i dest = pix_multiply_1x128 (s, a);
+ __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d));
+
+ return pack_1x128_32 (pix_add_multiply_1x128 (&d,
+ &alpha_dst,
+ &dest,
+ &alpha_src));
+}
+
+static void
+sse2_combine_xor_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+ __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_src_lo, &xmm_src_hi);
+ pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+ &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+ &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+ negate_2x128 (xmm_mask_lo, xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_add_multiply_2x128 (
+ &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
+ w--;
+ }
+}
+
+static void
+sse2_combine_add_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
+ unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_src_lo, &xmm_src_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (
+ _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
+ _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = pack_1x128_32 (
+ _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
+ unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
+ w--;
+ }
+}
+
+static force_inline __m128i
+create_mask_16_128 (uint16_t mask)
+{
+ return _mm_set1_epi16 (mask);
+}
+
+/* Work around a code generation bug in Sun Studio 12. */
+#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
+# define create_mask_2x32_128(mask0, mask1) \
+ (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
+#else
+static force_inline __m128i
+create_mask_2x32_128 (uint32_t mask0,
+ uint32_t mask1)
+{
+ return _mm_set_epi32 (mask0, mask1, mask0, mask1);
+}
+#endif
+
+static void
+sse2_composite_over_n_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src;
+ uint32_t *dst_line, *dst, d;
+ int32_t w;
+ int dst_stride;
+ __m128i xmm_src, xmm_alpha;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ xmm_src = expand_pixel_32_1x128 (src);
+ xmm_alpha = expand_alpha_1x128 (xmm_src);
+
+ while (height--)
+ {
+ dst = dst_line;
+
+ dst_line += dst_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ d = *dst;
+ *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
+ xmm_alpha,
+ unpack_32_1x128 (d)));
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ over_2x128 (&xmm_src, &xmm_src,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ /* rebuid the 4 pixel data and save*/
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ w -= 4;
+ dst += 4;
+ }
+
+ while (w)
+ {
+ d = *dst;
+ *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
+ xmm_alpha,
+ unpack_32_1x128 (d)));
+ w--;
+ }
+
+ }
+}
+
+static void
+sse2_composite_over_n_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src;
+ uint16_t *dst_line, *dst, d;
+ int32_t w;
+ int dst_stride;
+ __m128i xmm_src, xmm_alpha;
+ __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+
+ xmm_src = expand_pixel_32_1x128 (src);
+ xmm_alpha = expand_alpha_1x128 (xmm_src);
+
+ while (height--)
+ {
+ dst = dst_line;
+
+ dst_line += dst_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ d = *dst;
+
+ *dst++ = pack_565_32_16 (
+ pack_1x128_32 (over_1x128 (xmm_src,
+ xmm_alpha,
+ expand565_16_1x128 (d))));
+ w--;
+ }
+
+ while (w >= 8)
+ {
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_565_128_4x128 (xmm_dst,
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+
+ over_2x128 (&xmm_src, &xmm_src,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_dst0, &xmm_dst1);
+ over_2x128 (&xmm_src, &xmm_src,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_dst2, &xmm_dst3);
+
+ xmm_dst = pack_565_4x128_128 (
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+
+ save_128_aligned ((__m128i*)dst, xmm_dst);
+
+ dst += 8;
+ w -= 8;
+ }
+
+ while (w--)
+ {
+ d = *dst;
+ *dst++ = pack_565_32_16 (
+ pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha,
+ expand565_16_1x128 (d))));
+ }
+ }
+
+}
+
+static void
+sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst_line, d;
+ uint32_t *mask_line, m;
+ uint32_t pack_cmp;
+ int dst_stride, mask_stride;
+
+ __m128i xmm_src, xmm_alpha;
+ __m128i xmm_dst;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ srca = src >> 24;
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ xmm_src = _mm_unpacklo_epi8 (
+ create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
+ xmm_alpha = expand_alpha_1x128 (xmm_src);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
+
+ while (height--)
+ {
+ int w = width;
+ const uint32_t *pm = (uint32_t *)mask_line;
+ uint32_t *pd = (uint32_t *)dst_line;
+
+ dst_line += dst_stride;
+ mask_line += mask_stride;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ m = *pm++;
+
+ if (m)
+ {
+ d = *pd;
+
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
+
+ *pd = pack_1x128_32 (
+ _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+ mmx_dest));
+ }
+
+ pd++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_mask = load_128_unaligned ((__m128i*)pm);
+
+ pack_cmp =
+ _mm_movemask_epi8 (
+ _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+
+ /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
+ if (pack_cmp != 0xffff)
+ {
+ xmm_dst = load_128_aligned ((__m128i*)pd);
+
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_multiply_2x128 (&xmm_src, &xmm_src,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+ xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));
+ }
+
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ m = *pm++;
+
+ if (m)
+ {
+ d = *pd;
+
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
+
+ *pd = pack_1x128_32 (
+ _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+ mmx_dest));
+ }
+
+ pd++;
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src;
+ uint32_t *dst_line, d;
+ uint32_t *mask_line, m;
+ uint32_t pack_cmp;
+ int dst_stride, mask_stride;
+
+ __m128i xmm_src, xmm_alpha;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ xmm_src = _mm_unpacklo_epi8 (
+ create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
+ xmm_alpha = expand_alpha_1x128 (xmm_src);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
+
+ while (height--)
+ {
+ int w = width;
+ const uint32_t *pm = (uint32_t *)mask_line;
+ uint32_t *pd = (uint32_t *)dst_line;
+
+ dst_line += dst_stride;
+ mask_line += mask_stride;
+
+ while (w && (unsigned long)pd & 15)
+ {
+ m = *pm++;
+
+ if (m)
+ {
+ d = *pd;
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
+
+ *pd = pack_1x128_32 (in_over_1x128 (&mmx_src,
+ &mmx_alpha,
+ &mmx_mask,
+ &mmx_dest));
+ }
+
+ pd++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_mask = load_128_unaligned ((__m128i*)pm);
+
+ pack_cmp =
+ _mm_movemask_epi8 (
+ _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+
+ /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
+ if (pack_cmp != 0xffff)
+ {
+ xmm_dst = load_128_aligned ((__m128i*)pd);
+
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ in_over_2x128 (&xmm_src, &xmm_src,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ m = *pm++;
+
+ if (m)
+ {
+ d = *pd;
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
+
+ *pd = pack_1x128_32 (
+ in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
+ }
+
+ pd++;
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ uint32_t mask;
+ int32_t w;
+ int dst_stride, src_stride;
+
+ __m128i xmm_mask;
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
+
+ xmm_mask = create_mask_16_128 (mask >> 24);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ uint32_t s = *src++;
+
+ if (s)
+ {
+ uint32_t d = *dst;
+
+ __m128i ms = unpack_32_1x128 (s);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i dest = xmm_mask;
+ __m128i alpha_dst = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32 (
+ in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+ }
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_src = load_128_unaligned ((__m128i*)src);
+
+ if (!is_zero (xmm_src))
+ {
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_mask, &xmm_mask,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ dst += 4;
+ src += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ uint32_t s = *src++;
+
+ if (s)
+ {
+ uint32_t d = *dst;
+
+ __m128i ms = unpack_32_1x128 (s);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i mask = xmm_mask;
+ __m128i dest = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32 (
+ in_over_1x128 (&ms, &alpha, &mask, &dest));
+ }
+
+ dst++;
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int32_t w;
+ int dst_stride, src_stride;
+
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ *dst++ = *src++ | 0xff000000;
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
+
+ xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
+ xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
+ xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
+ xmm_src4 = load_128_unaligned ((__m128i*)src + 3);
+
+ save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));
+ save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));
+ save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));
+ save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));
+
+ dst += 16;
+ src += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ *dst++ = *src++ | 0xff000000;
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ uint32_t mask;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ __m128i xmm_mask, xmm_alpha;
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
+
+ xmm_mask = create_mask_16_128 (mask >> 24);
+ xmm_alpha = mask_00ff;
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ uint32_t s = (*src++) | 0xff000000;
+ uint32_t d = *dst;
+
+ __m128i src = unpack_32_1x128 (s);
+ __m128i alpha = xmm_alpha;
+ __m128i mask = xmm_mask;
+ __m128i dest = unpack_32_1x128 (d);
+
+ *dst++ = pack_1x128_32 (
+ in_over_1x128 (&src, &alpha, &mask, &dest));
+
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_src = _mm_or_si128 (
+ load_128_unaligned ((__m128i*)src), mask_ff000000);
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_mask, &xmm_mask,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ dst += 4;
+ src += 4;
+ w -= 4;
+
+ }
+
+ while (w)
+ {
+ uint32_t s = (*src++) | 0xff000000;
+ uint32_t d = *dst;
+
+ __m128i src = unpack_32_1x128 (s);
+ __m128i alpha = xmm_alpha;
+ __m128i mask = xmm_mask;
+ __m128i dest = unpack_32_1x128 (d);
+
+ *dst++ = pack_1x128_32 (
+ in_over_1x128 (&src, &alpha, &mask, &dest));
+
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ int dst_stride, src_stride;
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ dst = dst_line;
+ src = src_line;
+
+ while (height--)
+ {
+ sse2_combine_over_u (imp, op, dst, src, NULL, width);
+
+ dst += dst_stride;
+ src += src_stride;
+ }
+}
+
+static force_inline uint16_t
+composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
+{
+ __m128i ms;
+
+ ms = unpack_32_1x128 (src);
+ return pack_565_32_16 (
+ pack_1x128_32 (
+ over_1x128 (
+ ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst))));
+}
+
+static void
+sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint16_t *dst_line, *dst, d;
+ uint32_t *src_line, *src, s;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ src = src_line;
+
+ dst_line += dst_stride;
+ src_line += src_stride;
+ w = width;
+
+ /* Align dst on a 16-byte boundary */
+ while (w &&
+ ((unsigned long)dst & 15))
+ {
+ s = *src++;
+ d = *dst;
+
+ *dst++ = composite_over_8888_0565pixel (s, d);
+ w--;
+ }
+
+ /* It's a 8 pixel loop */
+ while (w >= 8)
+ {
+ /* I'm loading unaligned because I'm not sure
+ * about the address alignment.
+ */
+ xmm_src = load_128_unaligned ((__m128i*) src);
+ xmm_dst = load_128_aligned ((__m128i*) dst);
+
+ /* Unpacking */
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_565_128_4x128 (xmm_dst,
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ /* I'm loading next 4 pixels from memory
+ * before to optimze the memory read.
+ */
+ xmm_src = load_128_unaligned ((__m128i*) (src + 4));
+
+ over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst0, &xmm_dst1);
+
+ /* Unpacking */
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst2, &xmm_dst3);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_565_4x128_128 (
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+
+ w -= 8;
+ dst += 8;
+ src += 8;
+ }
+
+ while (w--)
+ {
+ s = *src++;
+ d = *dst;
+
+ *dst++ = composite_over_8888_0565pixel (s, d);
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint32_t m, d;
+
+ __m128i xmm_src, xmm_alpha, xmm_def;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ xmm_def = create_mask_2x32_128 (src, src);
+ xmm_src = expand_pixel_32_1x128 (src);
+ xmm_alpha = expand_alpha_1x128 (xmm_src);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ uint8_t m = *mask++;
+
+ if (m)
+ {
+ d = *dst;
+ mmx_mask = expand_pixel_8_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
+ &mmx_alpha,
+ &mmx_mask,
+ &mmx_dest));
+ }
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 4)
+ {
+ m = *((uint32_t*)mask);
+
+ if (srca == 0xff && m == 0xffffffff)
+ {
+ save_128_aligned ((__m128i*)dst, xmm_def);
+ }
+ else if (m)
+ {
+ xmm_dst = load_128_aligned ((__m128i*) dst);
+ xmm_mask = unpack_32_1x128 (m);
+ xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+
+ /* Unpacking */
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ in_over_2x128 (&xmm_src, &xmm_src,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ w -= 4;
+ dst += 4;
+ mask += 4;
+ }
+
+ while (w)
+ {
+ uint8_t m = *mask++;
+
+ if (m)
+ {
+ d = *dst;
+ mmx_mask = expand_pixel_8_1x128 (m);
+ mmx_dest = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
+ &mmx_alpha,
+ &mmx_mask,
+ &mmx_dest));
+ }
+
+ w--;
+ dst++;
+ }
+ }
+
+}
+
+static pixman_bool_t
+pixman_fill_sse2 (uint32_t *bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t data)
+{
+ uint32_t byte_width;
+ uint8_t *byte_line;
+
+ __m128i xmm_def;
+
+ if (bpp == 8)
+ {
+ uint8_t b;
+ uint16_t w;
+
+ stride = stride * (int) sizeof (uint32_t) / 1;
+ byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
+ byte_width = width;
+ stride *= 1;
+
+ b = data & 0xff;
+ w = (b << 8) | b;
+ data = (w << 16) | w;
+ }
+ else if (bpp == 16)
+ {
+ stride = stride * (int) sizeof (uint32_t) / 2;
+ byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+ byte_width = 2 * width;
+ stride *= 2;
+
+ data = (data & 0xffff) * 0x00010001;
+ }
+ else if (bpp == 32)
+ {
+ stride = stride * (int) sizeof (uint32_t) / 4;
+ byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+ byte_width = 4 * width;
+ stride *= 4;
+ }
+ else
+ {
+ return FALSE;
+ }
+
+ xmm_def = create_mask_2x32_128 (data, data);
+
+ while (height--)
+ {
+ int w;
+ uint8_t *d = byte_line;
+ byte_line += stride;
+ w = byte_width;
+
+ while (w >= 1 && ((unsigned long)d & 1))
+ {
+ *(uint8_t *)d = data;
+ w -= 1;
+ d += 1;
+ }
+
+ while (w >= 2 && ((unsigned long)d & 3))
+ {
+ *(uint16_t *)d = data;
+ w -= 2;
+ d += 2;
+ }
+
+ while (w >= 4 && ((unsigned long)d & 15))
+ {
+ *(uint32_t *)d = data;
+
+ w -= 4;
+ d += 4;
+ }
+
+ while (w >= 128)
+ {
+ save_128_aligned ((__m128i*)(d), xmm_def);
+ save_128_aligned ((__m128i*)(d + 16), xmm_def);
+ save_128_aligned ((__m128i*)(d + 32), xmm_def);
+ save_128_aligned ((__m128i*)(d + 48), xmm_def);
+ save_128_aligned ((__m128i*)(d + 64), xmm_def);
+ save_128_aligned ((__m128i*)(d + 80), xmm_def);
+ save_128_aligned ((__m128i*)(d + 96), xmm_def);
+ save_128_aligned ((__m128i*)(d + 112), xmm_def);
+
+ d += 128;
+ w -= 128;
+ }
+
+ if (w >= 64)
+ {
+ save_128_aligned ((__m128i*)(d), xmm_def);
+ save_128_aligned ((__m128i*)(d + 16), xmm_def);
+ save_128_aligned ((__m128i*)(d + 32), xmm_def);
+ save_128_aligned ((__m128i*)(d + 48), xmm_def);
+
+ d += 64;
+ w -= 64;
+ }
+
+ if (w >= 32)
+ {
+ save_128_aligned ((__m128i*)(d), xmm_def);
+ save_128_aligned ((__m128i*)(d + 16), xmm_def);
+
+ d += 32;
+ w -= 32;
+ }
+
+ if (w >= 16)
+ {
+ save_128_aligned ((__m128i*)(d), xmm_def);
+
+ d += 16;
+ w -= 16;
+ }
+
+ while (w >= 4)
+ {
+ *(uint32_t *)d = data;
+
+ w -= 4;
+ d += 4;
+ }
+
+ if (w >= 2)
+ {
+ *(uint16_t *)d = data;
+ w -= 2;
+ d += 2;
+ }
+
+ if (w >= 1)
+ {
+ *(uint8_t *)d = data;
+ w -= 1;
+ d += 1;
+ }
+ }
+
+ return TRUE;
+}
+
+static void
+sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint32_t m;
+
+ __m128i xmm_src, xmm_def;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ {
+ pixman_fill_sse2 (dst_image->bits.bits, dst_image->bits.rowstride,
+ PIXMAN_FORMAT_BPP (dst_image->bits.format),
+ dest_x, dest_y, width, height, 0);
+ return;
+ }
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ xmm_def = create_mask_2x32_128 (src, src);
+ xmm_src = expand_pixel_32_1x128 (src);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ uint8_t m = *mask++;
+
+ if (m)
+ {
+ *dst = pack_1x128_32 (
+ pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)));
+ }
+ else
+ {
+ *dst = 0;
+ }
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 4)
+ {
+ m = *((uint32_t*)mask);
+
+ if (srca == 0xff && m == 0xffffffff)
+ {
+ save_128_aligned ((__m128i*)dst, xmm_def);
+ }
+ else if (m)
+ {
+ xmm_mask = unpack_32_1x128 (m);
+ xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+
+ /* Unpacking */
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_multiply_2x128 (&xmm_src, &xmm_src,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
+ }
+ else
+ {
+ save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());
+ }
+
+ w -= 4;
+ dst += 4;
+ mask += 4;
+ }
+
+ while (w)
+ {
+ uint8_t m = *mask++;
+
+ if (m)
+ {
+ *dst = pack_1x128_32 (
+ pix_multiply_1x128 (
+ xmm_src, expand_pixel_8_1x128 (m)));
+ }
+ else
+ {
+ *dst = 0;
+ }
+
+ w--;
+ dst++;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint16_t *dst_line, *dst, d;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint32_t m;
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+
+ __m128i xmm_src, xmm_alpha;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+ __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ xmm_src = expand_pixel_32_1x128 (src);
+ xmm_alpha = expand_alpha_1x128 (xmm_src);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ m = *mask++;
+
+ if (m)
+ {
+ d = *dst;
+ mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+ mmx_dest = expand565_16_1x128 (d);
+
+ *dst = pack_565_32_16 (
+ pack_1x128_32 (
+ in_over_1x128 (
+ &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+ }
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 8)
+ {
+ xmm_dst = load_128_aligned ((__m128i*) dst);
+ unpack_565_128_4x128 (xmm_dst,
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+
+ m = *((uint32_t*)mask);
+ mask += 4;
+
+ if (m)
+ {
+ xmm_mask = unpack_32_1x128 (m);
+ xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+
+ /* Unpacking */
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ in_over_2x128 (&xmm_src, &xmm_src,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst0, &xmm_dst1);
+ }
+
+ m = *((uint32_t*)mask);
+ mask += 4;
+
+ if (m)
+ {
+ xmm_mask = unpack_32_1x128 (m);
+ xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+
+ /* Unpacking */
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+ in_over_2x128 (&xmm_src, &xmm_src,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst2, &xmm_dst3);
+ }
+
+ save_128_aligned (
+ (__m128i*)dst, pack_565_4x128_128 (
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+
+ w -= 8;
+ dst += 8;
+ }
+
+ while (w)
+ {
+ m = *mask++;
+
+ if (m)
+ {
+ d = *dst;
+ mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+ mmx_dest = expand565_16_1x128 (d);
+
+ *dst = pack_565_32_16 (
+ pack_1x128_32 (
+ in_over_1x128 (
+ &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+ }
+
+ w--;
+ dst++;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint16_t *dst_line, *dst, d;
+ uint32_t *src_line, *src, s;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint32_t opaque, zero;
+
+ __m128i ms;
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ s = *src++;
+ d = *dst;
+
+ ms = unpack_32_1x128 (s);
+
+ *dst++ = pack_565_32_16 (
+ pack_1x128_32 (
+ over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
+ w--;
+ }
+
+ while (w >= 8)
+ {
+ /* First round */
+ xmm_src = load_128_unaligned ((__m128i*)src);
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ opaque = is_opaque (xmm_src);
+ zero = is_zero (xmm_src);
+
+ unpack_565_128_4x128 (xmm_dst,
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+
+ /* preload next round*/
+ xmm_src = load_128_unaligned ((__m128i*)(src + 4));
+
+ if (opaque)
+ {
+ invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_dst0, &xmm_dst1);
+ }
+ else if (!zero)
+ {
+ over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_dst0, &xmm_dst1);
+ }
+
+ /* Second round */
+ opaque = is_opaque (xmm_src);
+ zero = is_zero (xmm_src);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+
+ if (opaque)
+ {
+ invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_dst2, &xmm_dst3);
+ }
+ else if (!zero)
+ {
+ over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_dst2, &xmm_dst3);
+ }
+
+ save_128_aligned (
+ (__m128i*)dst, pack_565_4x128_128 (
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+
+ w -= 8;
+ src += 8;
+ dst += 8;
+ }
+
+ while (w)
+ {
+ s = *src++;
+ d = *dst;
+
+ ms = unpack_32_1x128 (s);
+
+ *dst++ = pack_565_32_16 (
+ pack_1x128_32 (
+ over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst, d;
+ uint32_t *src_line, *src, s;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint32_t opaque, zero;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ s = *src++;
+ d = *dst;
+
+ *dst++ = pack_1x128_32 (
+ over_rev_non_pre_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (d)));
+
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_src_hi = load_128_unaligned ((__m128i*)src);
+
+ opaque = is_opaque (xmm_src_hi);
+ zero = is_zero (xmm_src_hi);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+
+ if (opaque)
+ {
+ invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ else if (!zero)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ w -= 4;
+ dst += 4;
+ src += 4;
+ }
+
+ while (w)
+ {
+ s = *src++;
+ d = *dst;
+
+ *dst++ = pack_1x128_32 (
+ over_rev_non_pre_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (d)));
+
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src;
+ uint16_t *dst_line, *dst, d;
+ uint32_t *mask_line, *mask, m;
+ int dst_stride, mask_stride;
+ int w;
+ uint32_t pack_cmp;
+
+ __m128i xmm_src, xmm_alpha;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+ __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+
+ __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ xmm_src = expand_pixel_32_1x128 (src);
+ xmm_alpha = expand_alpha_1x128 (xmm_src);
+ mmx_src = xmm_src;
+ mmx_alpha = xmm_alpha;
+
+ while (height--)
+ {
+ w = width;
+ mask = mask_line;
+ dst = dst_line;
+ mask_line += mask_stride;
+ dst_line += dst_stride;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ m = *(uint32_t *) mask;
+
+ if (m)
+ {
+ d = *dst;
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = expand565_16_1x128 (d);
+
+ *dst = pack_565_32_16 (
+ pack_1x128_32 (
+ in_over_1x128 (
+ &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+ }
+
+ w--;
+ dst++;
+ mask++;
+ }
+
+ while (w >= 8)
+ {
+ /* First round */
+ xmm_mask = load_128_unaligned ((__m128i*)mask);
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ pack_cmp = _mm_movemask_epi8 (
+ _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+
+ unpack_565_128_4x128 (xmm_dst,
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+ /* preload next round */
+ xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));
+
+ /* preload next round */
+ if (pack_cmp != 0xffff)
+ {
+ in_over_2x128 (&xmm_src, &xmm_src,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst0, &xmm_dst1);
+ }
+
+ /* Second round */
+ pack_cmp = _mm_movemask_epi8 (
+ _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+ if (pack_cmp != 0xffff)
+ {
+ in_over_2x128 (&xmm_src, &xmm_src,
+ &xmm_alpha, &xmm_alpha,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst2, &xmm_dst3);
+ }
+
+ save_128_aligned (
+ (__m128i*)dst, pack_565_4x128_128 (
+ &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+
+ w -= 8;
+ dst += 8;
+ mask += 8;
+ }
+
+ while (w)
+ {
+ m = *(uint32_t *) mask;
+
+ if (m)
+ {
+ d = *dst;
+ mmx_mask = unpack_32_1x128 (m);
+ mmx_dest = expand565_16_1x128 (d);
+
+ *dst = pack_565_32_16 (
+ pack_1x128_32 (
+ in_over_1x128 (
+ &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+ }
+
+ w--;
+ dst++;
+ mask++;
+ }
+ }
+
+}
+
+static void
+sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+ uint32_t d, m;
+ uint32_t src;
+ uint8_t sa;
+ int32_t w;
+
+ __m128i xmm_alpha;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ sa = src >> 24;
+
+ xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ m = (uint32_t) *mask++;
+ d = (uint32_t) *dst;
+
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (xmm_alpha,
+ unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ xmm_mask = load_128_unaligned ((__m128i*)mask);
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ mask += 16;
+ dst += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ m = (uint32_t) *mask++;
+ d = (uint32_t) *dst;
+
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ pix_multiply_1x128 (
+ xmm_alpha, unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_in_n_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ int dst_stride;
+ uint32_t d;
+ uint32_t src;
+ int32_t w;
+
+ __m128i xmm_alpha;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
+
+ src = src >> 24;
+
+ if (src == 0xff)
+ return;
+
+ if (src == 0x00)
+ {
+ pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
+ 8, dest_x, dest_y, width, height, src);
+
+ return;
+ }
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ w = width;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ d = (uint32_t) *dst;
+
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ xmm_alpha,
+ unpack_32_1x128 (d)));
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
+ &xmm_dst_lo, &xmm_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ dst += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ d = (uint32_t) *dst;
+
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ xmm_alpha,
+ unpack_32_1x128 (d)));
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_in_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *src_line, *src;
+ int src_stride, dst_stride;
+ int32_t w;
+ uint32_t s, d;
+
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ s = (uint32_t) *src++;
+ d = (uint32_t) *dst;
+
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (
+ unpack_32_1x128 (s), unpack_32_1x128 (d)));
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ xmm_src = load_128_unaligned ((__m128i*)src);
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_dst_lo, &xmm_dst_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ src += 16;
+ dst += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ s = (uint32_t) *src++;
+ d = (uint32_t) *dst;
+
+ *dst++ = (uint8_t) pack_1x128_32 (
+ pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d)));
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint32_t src;
+ uint8_t sa;
+ uint32_t m, d;
+
+ __m128i xmm_alpha;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ sa = src >> 24;
+
+ xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ m = (uint32_t) *mask++;
+ d = (uint32_t) *dst;
+
+ *dst++ = (uint8_t) pack_1x128_32 (
+ _mm_adds_epu16 (
+ pix_multiply_1x128 (
+ xmm_alpha, unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ xmm_mask = load_128_unaligned ((__m128i*)mask);
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_mask_lo, &xmm_mask_hi);
+
+ xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
+ xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ mask += 16;
+ dst += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ m = (uint32_t) *mask++;
+ d = (uint32_t) *dst;
+
+ *dst++ = (uint8_t) pack_1x128_32 (
+ _mm_adds_epu16 (
+ pix_multiply_1x128 (
+ xmm_alpha, unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
+
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_add_n_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ int dst_stride;
+ int32_t w;
+ uint32_t src;
+
+ __m128i xmm_src;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ src >>= 24;
+
+ if (src == 0x00)
+ return;
+
+ if (src == 0xff)
+ {
+ pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
+ 8, dest_x, dest_y, width, height, 0xff);
+
+ return;
+ }
+
+ src = (src << 24) | (src << 16) | (src << 8) | src;
+ xmm_src = _mm_set_epi32 (src, src, src, src);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ w = width;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ *dst = (uint8_t)_mm_cvtsi128_si32 (
+ _mm_adds_epu8 (
+ xmm_src,
+ _mm_cvtsi32_si128 (*dst)));
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 16)
+ {
+ save_128_aligned (
+ (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
+
+ dst += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ *dst = (uint8_t)_mm_cvtsi128_si32 (
+ _mm_adds_epu8 (
+ xmm_src,
+ _mm_cvtsi32_si128 (*dst)));
+
+ w--;
+ dst++;
+ }
+ }
+
+}
+
+static void
+sse2_composite_add_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint16_t t;
+
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ src = src_line;
+
+ dst_line += dst_stride;
+ src_line += src_stride;
+ w = width;
+
+ /* Small head */
+ while (w && (unsigned long)dst & 3)
+ {
+ t = (*dst) + (*src++);
+ *dst++ = t | (0 - (t >> 8));
+ w--;
+ }
+
+ sse2_combine_add_u (imp, op,
+ (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+
+ /* Small tail */
+ dst += w & 0xfffc;
+ src += w & 0xfffc;
+
+ w &= 3;
+
+ while (w)
+ {
+ t = (*dst) + (*src++);
+ *dst++ = t | (0 - (t >> 8));
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int dst_stride, src_stride;
+
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+
+ sse2_combine_add_u (imp, op, dst, src, NULL, width);
+ }
+
+}
+
+static pixman_bool_t
+pixman_blt_sse2 (uint32_t *src_bits,
+ uint32_t *dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
+{
+ uint8_t * src_bytes;
+ uint8_t * dst_bytes;
+ int byte_width;
+
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ if (src_bpp == 16)
+ {
+ src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+ src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+ byte_width = 2 * width;
+ src_stride *= 2;
+ dst_stride *= 2;
+ }
+ else if (src_bpp == 32)
+ {
+ src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+ src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+ byte_width = 4 * width;
+ src_stride *= 4;
+ dst_stride *= 4;
+ }
+ else
+ {
+ return FALSE;
+ }
+
+ while (height--)
+ {
+ int w;
+ uint8_t *s = src_bytes;
+ uint8_t *d = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ w = byte_width;
+
+ while (w >= 2 && ((unsigned long)d & 3))
+ {
+ *(uint16_t *)d = *(uint16_t *)s;
+ w -= 2;
+ s += 2;
+ d += 2;
+ }
+
+ while (w >= 4 && ((unsigned long)d & 15))
+ {
+ *(uint32_t *)d = *(uint32_t *)s;
+
+ w -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ while (w >= 64)
+ {
+ __m128i xmm0, xmm1, xmm2, xmm3;
+
+ xmm0 = load_128_unaligned ((__m128i*)(s));
+ xmm1 = load_128_unaligned ((__m128i*)(s + 16));
+ xmm2 = load_128_unaligned ((__m128i*)(s + 32));
+ xmm3 = load_128_unaligned ((__m128i*)(s + 48));
+
+ save_128_aligned ((__m128i*)(d), xmm0);
+ save_128_aligned ((__m128i*)(d + 16), xmm1);
+ save_128_aligned ((__m128i*)(d + 32), xmm2);
+ save_128_aligned ((__m128i*)(d + 48), xmm3);
+
+ s += 64;
+ d += 64;
+ w -= 64;
+ }
+
+ while (w >= 16)
+ {
+ save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
+
+ w -= 16;
+ d += 16;
+ s += 16;
+ }
+
+ while (w >= 4)
+ {
+ *(uint32_t *)d = *(uint32_t *)s;
+
+ w -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ if (w >= 2)
+ {
+ *(uint16_t *)d = *(uint16_t *)s;
+ w -= 2;
+ s += 2;
+ d += 2;
+ }
+ }
+
+
+ return TRUE;
+}
+
+static void
+sse2_composite_copy_area (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ pixman_blt_sse2 (src_image->bits.bits,
+ dst_image->bits.bits,
+ src_image->bits.rowstride,
+ dst_image->bits.rowstride,
+ PIXMAN_FORMAT_BPP (src_image->bits.format),
+ PIXMAN_FORMAT_BPP (dst_image->bits.format),
+ src_x, src_y, dest_x, dest_y, width, height);
+}
+
+static void
+sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *src, *src_line, s;
+ uint32_t *dst, *dst_line, d;
+ uint8_t *mask, *mask_line;
+ uint32_t m;
+ int src_stride, mask_stride, dst_stride;
+ int32_t w;
+ __m128i ms;
+
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ src = src_line;
+ src_line += src_stride;
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ s = 0xff000000 | *src++;
+ m = (uint32_t) *mask++;
+ d = *dst;
+ ms = unpack_32_1x128 (s);
+
+ if (m != 0xff)
+ {
+ __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+ __m128i md = unpack_32_1x128 (d);
+
+ ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md);
+ }
+
+ *dst++ = pack_1x128_32 (ms);
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ m = *(uint32_t*) mask;
+ xmm_src = _mm_or_si128 (
+ load_128_unaligned ((__m128i*)src), mask_ff000000);
+
+ if (m == 0xffffffff)
+ {
+ save_128_aligned ((__m128i*)dst, xmm_src);
+ }
+ else
+ {
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_rev_2x128 (
+ xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ src += 4;
+ dst += 4;
+ mask += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ m = (uint32_t) *mask++;
+
+ if (m)
+ {
+ s = 0xff000000 | *src;
+
+ if (m == 0xff)
+ {
+ *dst = s;
+ }
+ else
+ {
+ __m128i ma, md, ms;
+
+ d = *dst;
+
+ ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+ md = unpack_32_1x128 (d);
+ ms = unpack_32_1x128 (s);
+
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md));
+ }
+
+ }
+
+ src++;
+ dst++;
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *src, *src_line, s;
+ uint32_t *dst, *dst_line, d;
+ uint8_t *mask, *mask_line;
+ uint32_t m;
+ int src_stride, mask_stride, dst_stride;
+ int32_t w;
+
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ src = src_line;
+ src_line += src_stride;
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ uint32_t sa;
+
+ s = *src++;
+ m = (uint32_t) *mask++;
+ d = *dst;
+
+ sa = s >> 24;
+
+ if (m)
+ {
+ if (sa == 0xff && m == 0xff)
+ {
+ *dst = s;
+ }
+ else
+ {
+ __m128i ms, md, ma, msa;
+
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (s);
+ md = unpack_32_1x128 (d);
+
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+ }
+ }
+
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ m = *(uint32_t *) mask;
+
+ if (m)
+ {
+ xmm_src = load_128_unaligned ((__m128i*)src);
+
+ if (m == 0xffffffff && is_opaque (xmm_src))
+ {
+ save_128_aligned ((__m128i *)dst, xmm_src);
+ }
+ else
+ {
+ xmm_dst = load_128_aligned ((__m128i *)dst);
+
+ xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
+ &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ }
+
+ src += 4;
+ dst += 4;
+ mask += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ uint32_t sa;
+
+ s = *src++;
+ m = (uint32_t) *mask++;
+ d = *dst;
+
+ sa = s >> 24;
+
+ if (m)
+ {
+ if (sa == 0xff && m == 0xff)
+ {
+ *dst = s;
+ }
+ else
+ {
+ __m128i ms, md, ma, msa;
+
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (s);
+ md = unpack_32_1x128 (d);
+
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+ }
+ }
+
+ dst++;
+ w--;
+ }
+ }
+
+}
+
+static void
+sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src;
+ uint32_t *dst_line, *dst;
+ __m128i xmm_src;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_dsta_hi, xmm_dsta_lo;
+ int dst_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ xmm_src = expand_pixel_32_1x128 (src);
+
+ while (height--)
+ {
+ dst = dst_line;
+
+ dst_line += dst_stride;
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ __m128i vd;
+
+ vd = unpack_32_1x128 (*dst);
+
+ *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
+ xmm_src));
+ w--;
+ dst++;
+ }
+
+ while (w >= 4)
+ {
+ __m128i tmp_lo, tmp_hi;
+
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+ expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);
+
+ tmp_lo = xmm_src;
+ tmp_hi = xmm_src;
+
+ over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+ &xmm_dsta_lo, &xmm_dsta_hi,
+ &tmp_lo, &tmp_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));
+
+ w -= 4;
+ dst += 4;
+ }
+
+ while (w)
+ {
+ __m128i vd;
+
+ vd = unpack_32_1x128 (*dst);
+
+ *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
+ xmm_src));
+ w--;
+ dst++;
+ }
+
+ }
+
+}
+
+static void
+sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *src, *src_line, s;
+ uint32_t *dst, *dst_line, d;
+ uint32_t *mask, *mask_line;
+ uint32_t m;
+ int src_stride, mask_stride, dst_stride;
+ int32_t w;
+
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ src = src_line;
+ src_line += src_stride;
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+
+ w = width;
+
+ while (w && (unsigned long)dst & 15)
+ {
+ uint32_t sa;
+
+ s = *src++;
+ m = (*mask++) >> 24;
+ d = *dst;
+
+ sa = s >> 24;
+
+ if (m)
+ {
+ if (sa == 0xff && m == 0xff)
+ {
+ *dst = s;
+ }
+ else
+ {
+ __m128i ms, md, ma, msa;
+
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (s);
+ md = unpack_32_1x128 (d);
+
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+ }
+ }
+
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_mask = load_128_unaligned ((__m128i*)mask);
+
+ if (!is_transparent (xmm_mask))
+ {
+ xmm_src = load_128_unaligned ((__m128i*)src);
+
+ if (is_opaque (xmm_mask) && is_opaque (xmm_src))
+ {
+ save_128_aligned ((__m128i *)dst, xmm_src);
+ }
+ else
+ {
+ xmm_dst = load_128_aligned ((__m128i *)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
+ expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
+ &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ }
+
+ src += 4;
+ dst += 4;
+ mask += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ uint32_t sa;
+
+ s = *src++;
+ m = (*mask++) >> 24;
+ d = *dst;
+
+ sa = s >> 24;
+
+ if (m)
+ {
+ if (sa == 0xff && m == 0xff)
+ {
+ *dst = s;
+ }
+ else
+ {
+ __m128i ms, md, ma, msa;
+
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (s);
+ md = unpack_32_1x128 (d);
+
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+ }
+ }
+
+ dst++;
+ w--;
+ }
+ }
+
+}
+
+/* A variant of 'sse2_combine_over_u' with minor tweaks */
+static force_inline void
+scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
+ const uint32_t* ps,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t fully_transparent_src)
+{
+ uint32_t s, d;
+ const uint32_t* pm = NULL;
+
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ if (fully_transparent_src)
+ return;
+
+ /* Align dst on a 16-byte boundary */
+ while (w && ((unsigned long)pd & 15))
+ {
+ d = *pd;
+ s = combine1 (ps + (vx >> 16), pm);
+ vx += unit_x;
+
+ *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m128i tmp;
+ uint32_t tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = ps[vx >> 16];
+ vx += unit_x;
+ tmp2 = ps[vx >> 16];
+ vx += unit_x;
+ tmp3 = ps[vx >> 16];
+ vx += unit_x;
+ tmp4 = ps[vx >> 16];
+ vx += unit_x;
+
+ tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
+
+ xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
+
+ if (is_opaque (xmm_src_hi))
+ {
+ save_128_aligned ((__m128i*)pd, xmm_src_hi);
+ }
+ else if (!is_zero (xmm_src_hi))
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (
+ xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+
+ over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ /* rebuid the 4 pixel data and save*/
+ save_128_aligned ((__m128i*)pd,
+ pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ w -= 4;
+ pd += 4;
+ if (pm)
+ pm += 4;
+ }
+
+ while (w)
+ {
+ d = *pd;
+ s = combine1 (ps + (vx >> 16), pm);
+ vx += unit_x;
+
+ *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+ if (pm)
+ pm++;
+
+ w--;
+ }
+}
+
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, COVER)
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, NONE)
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
+ scaled_nearest_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, PAD)
+
+static force_inline void
+scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
+ uint32_t * dst,
+ const uint32_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ __m128i xmm_mask;
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ if (zero_src || (*mask >> 24) == 0)
+ return;
+
+ xmm_mask = create_mask_16_128 (*mask >> 24);
+
+ while (w && (unsigned long)dst & 15)
+ {
+ uint32_t s = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+
+ if (s)
+ {
+ uint32_t d = *dst;
+
+ __m128i ms = unpack_32_1x128 (s);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i dest = xmm_mask;
+ __m128i alpha_dst = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32 (
+ in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+ }
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ uint32_t tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp3 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp4 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+
+ xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
+
+ if (!is_zero (xmm_src))
+ {
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_mask, &xmm_mask,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ dst += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ uint32_t s = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+
+ if (s)
+ {
+ uint32_t d = *dst;
+
+ __m128i ms = unpack_32_1x128 (s);
+ __m128i alpha = expand_alpha_1x128 (ms);
+ __m128i mask = xmm_mask;
+ __m128i dest = unpack_32_1x128 (d);
+
+ *dst = pack_1x128_32 (
+ in_over_1x128 (&ms, &alpha, &mask, &dest));
+ }
+
+ dst++;
+ w--;
+ }
+
+}
+
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+
+static void
+bilinear_interpolate_line_sse2 (uint32_t * out,
+ const uint32_t * top,
+ const uint32_t * bottom,
+ int wt,
+ int wb,
+ pixman_fixed_t x,
+ pixman_fixed_t ux,
+ int width)
+{
+ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);
+ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);
+ const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);
+ const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
+ const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux);
+ const __m128i xmm_zero = _mm_setzero_si128 ();
+ __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x);
+ uint32_t pix1, pix2, pix3, pix4;
+
+ #define INTERPOLATE_ONE_PIXEL(pix) \
+ do { \
+ __m128i xmm_wh, xmm_lo, xmm_hi, a; \
+ /* fetch 2x2 pixel block into sse2 register */ \
+ uint32_t tl = top [pixman_fixed_to_int (x)]; \
+ uint32_t tr = top [pixman_fixed_to_int (x) + 1]; \
+ uint32_t bl = bottom [pixman_fixed_to_int (x)]; \
+ uint32_t br = bottom [pixman_fixed_to_int (x) + 1]; \
+ a = _mm_set_epi32 (tr, tl, br, bl); \
+ x += ux; \
+ /* vertical interpolation */ \
+ a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \
+ xmm_wt), \
+ _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \
+ xmm_wb)); \
+ /* calculate horizontal weights */ \
+ xmm_wh = _mm_add_epi16 (xmm_addc, \
+ _mm_xor_si128 (xmm_xorc, \
+ _mm_srli_epi16 (xmm_x, 8))); \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
+ /* horizontal interpolation */ \
+ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
+ xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
+ a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
+ _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
+ /* shift and pack the result */ \
+ a = _mm_srli_epi32 (a, 16); \
+ a = _mm_packs_epi32 (a, a); \
+ a = _mm_packus_epi16 (a, a); \
+ pix = _mm_cvtsi128_si32 (a); \
+ } while (0)
+
+ while ((width -= 4) >= 0)
+ {
+ INTERPOLATE_ONE_PIXEL (pix1);
+ INTERPOLATE_ONE_PIXEL (pix2);
+ INTERPOLATE_ONE_PIXEL (pix3);
+ INTERPOLATE_ONE_PIXEL (pix4);
+ *out++ = pix1;
+ *out++ = pix2;
+ *out++ = pix3;
+ *out++ = pix4;
+ }
+ if (width & 2)
+ {
+ INTERPOLATE_ONE_PIXEL (pix1);
+ INTERPOLATE_ONE_PIXEL (pix2);
+ *out++ = pix1;
+ *out++ = pix2;
+ }
+ if (width & 1)
+ {
+ INTERPOLATE_ONE_PIXEL (pix1);
+ *out = pix1;
+ }
+
+ #undef INTERPOLATE_ONE_PIXEL
+}
+
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
+ const uint32_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ bilinear_interpolate_line_sse2 (dst, src_top, src_bottom,
+ wt, wb, vx, unit_x, w);
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ COVER, FALSE, FALSE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ PAD, FALSE, FALSE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ NONE, FALSE, FALSE)
+
+static const pixman_fast_path_t sse2_fast_paths[] =
+{
+ /* PIXMAN_OP_OVER */
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565),
+ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
+
+ /* PIXMAN_OP_OVER_REVERSE */
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888),
+
+ /* PIXMAN_OP_ADD */
+ PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8),
+
+ /* PIXMAN_OP_SRC */
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888),
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888),
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area),
+
+ /* PIXMAN_OP_IN */
+ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8),
+ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
+ PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
+
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
+
+ { PIXMAN_OP_NONE },
+};
+
+static pixman_bool_t
+sse2_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
+{
+ if (!pixman_blt_sse2 (
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height))
+
+ {
+ return _pixman_implementation_blt (
+ imp->delegate,
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height);
+ }
+
+ return TRUE;
+}
+
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+static pixman_bool_t
+sse2_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ if (!pixman_fill_sse2 (bits, stride, bpp, x, y, width, height, xor))
+ {
+ return _pixman_implementation_fill (
+ imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+ }
+
+ return TRUE;
+}
+
+static uint32_t *
+sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ __m128i ff000000 = mask_ff000000;
+ uint32_t *dst = iter->buffer;
+ uint32_t *src = (uint32_t *)iter->bits;
+
+ iter->bits += iter->stride;
+
+ while (w && ((unsigned long)dst) & 0x0f)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ save_128_aligned (
+ (__m128i *)dst, _mm_or_si128 (
+ load_128_unaligned ((__m128i *)src), ff000000));
+
+ dst += 4;
+ src += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ return iter->buffer;
+}
+
+static uint32_t *
+sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ uint32_t *dst = iter->buffer;
+ uint16_t *src = (uint16_t *)iter->bits;
+ __m128i ff000000 = mask_ff000000;
+
+ iter->bits += iter->stride;
+
+ while (w && ((unsigned long)dst) & 0x0f)
+ {
+ uint16_t s = *src++;
+
+ *dst++ = CONVERT_0565_TO_8888 (s);
+ w--;
+ }
+
+ while (w >= 8)
+ {
+ __m128i lo, hi, s;
+
+ s = _mm_loadu_si128 ((__m128i *)src);
+
+ lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ()));
+ hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ()));
+
+ save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000));
+ save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000));
+
+ dst += 8;
+ src += 8;
+ w -= 8;
+ }
+
+ while (w)
+ {
+ uint16_t s = *src++;
+
+ *dst++ = CONVERT_0565_TO_8888 (s);
+ w--;
+ }
+
+ return iter->buffer;
+}
+
+static uint32_t *
+sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ uint32_t *dst = iter->buffer;
+ uint8_t *src = iter->bits;
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
+
+ iter->bits += iter->stride;
+
+ while (w && (((unsigned long)dst) & 15))
+ {
+ *dst++ = *(src++) << 24;
+ w--;
+ }
+
+ while (w >= 16)
+ {
+ xmm0 = _mm_loadu_si128((__m128i *)src);
+
+ xmm1 = _mm_unpacklo_epi8 (_mm_setzero_si128(), xmm0);
+ xmm2 = _mm_unpackhi_epi8 (_mm_setzero_si128(), xmm0);
+ xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1);
+ xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1);
+ xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2);
+ xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2);
+
+ _mm_store_si128(((__m128i *)(dst + 0)), xmm3);
+ _mm_store_si128(((__m128i *)(dst + 4)), xmm4);
+ _mm_store_si128(((__m128i *)(dst + 8)), xmm5);
+ _mm_store_si128(((__m128i *)(dst + 12)), xmm6);
+
+ dst += 16;
+ src += 16;
+ w -= 16;
+ }
+
+ while (w)
+ {
+ *dst++ = *(src++) << 24;
+ w--;
+ }
+
+ return iter->buffer;
+}
+
+typedef struct
+{
+ pixman_format_code_t format;
+ pixman_iter_get_scanline_t get_scanline;
+} fetcher_info_t;
+
+static const fetcher_info_t fetchers[] =
+{
+ { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
+ { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 },
+ { PIXMAN_a8, sse2_fetch_a8 },
+ { PIXMAN_null }
+};
+
+static void
+sse2_src_iter_init (pixman_implementation_t *imp,
+ pixman_iter_t *iter,
+ pixman_image_t *image,
+ int x, int y, int width, int height,
+ uint8_t *buffer, iter_flags_t flags)
+{
+#define FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+
+ if ((flags & ITER_NARROW) &&
+ (image->common.flags & FLAGS) == FLAGS &&
+ x >= 0 && y >= 0 &&
+ x + width <= image->bits.width &&
+ y + height <= image->bits.height)
+ {
+ const fetcher_info_t *f;
+
+ for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ {
+ if (image->common.extended_format_code == f->format)
+ {
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+ iter->width = width;
+ iter->buffer = (uint32_t *)buffer;
+
+ iter->get_scanline = f->get_scanline;
+ return;
+ }
+ }
+ }
+
+ _pixman_implementation_src_iter_init (
+ imp->delegate, iter, image, x, y, width, height, buffer, flags);
+}
+
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+pixman_implementation_t *
+_pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
+{
+ pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);
+
+ /* SSE2 constants */
+ mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000);
+ mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000);
+ mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0);
+ mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f);
+ mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000);
+ mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00);
+ mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8);
+ mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0);
+ mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000);
+ mask_0080 = create_mask_16_128 (0x0080);
+ mask_00ff = create_mask_16_128 (0x00ff);
+ mask_0101 = create_mask_16_128 (0x0101);
+ mask_ffff = create_mask_16_128 (0xffff);
+ mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
+ mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
+
+ /* Set up function pointers */
+ imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
+ imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
+ imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
+ imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u;
+ imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u;
+ imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u;
+ imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u;
+ imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;
+ imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;
+ imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
+
+ imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
+
+ imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
+ imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
+ imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;
+ imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca;
+ imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca;
+ imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca;
+ imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca;
+ imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca;
+ imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;
+ imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
+ imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
+
+ imp->blt = sse2_blt;
+ imp->fill = sse2_fill;
+
+ imp->src_iter_init = sse2_src_iter_init;
+
+ return imp;
+}
diff --git a/pixman/pixman/pixman-trap.c b/pixman/pixman/pixman-trap.c
index 6e85acd49..a1a07128e 100644
--- a/pixman/pixman/pixman-trap.c
+++ b/pixman/pixman/pixman-trap.c
@@ -138,8 +138,8 @@ _pixman_edge_multi_init (pixman_edge_t * e,
if (ne > 0)
{
- int nx = ne / e->dy;
- ne -= nx * e->dy;
+ pixman_fixed_48_16_t nx = ne / e->dy;
+ ne -= nx * (pixman_fixed_48_16_t)e->dy;
stepx += nx * e->signdx;
}
diff --git a/pixman/pixman/pixman-version.h b/pixman/pixman/pixman-version.h
new file mode 100644
index 000000000..70642e961
--- /dev/null
+++ b/pixman/pixman/pixman-version.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Carl D. Worth <cworth@cworth.org>
+ */
+
+#ifndef PIXMAN_VERSION_H__
+#define PIXMAN_VERSION_H__
+
+#ifndef PIXMAN_H__
+# error pixman-version.h should only be included by pixman.h
+#endif
+
+#define PIXMAN_VERSION_MAJOR 0
+#define PIXMAN_VERSION_MINOR 19
+#define PIXMAN_VERSION_MICRO 1
+
+#define PIXMAN_VERSION_STRING "0.19.1"
+
+#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \
+ ((major) * 10000) \
+ + ((minor) * 100) \
+ + ((micro) * 1))
+
+#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE( \
+ PIXMAN_VERSION_MAJOR, \
+ PIXMAN_VERSION_MINOR, \
+ PIXMAN_VERSION_MICRO)
+
+#endif /* PIXMAN_VERSION_H__ */
diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am
index 9dc72199e..d785c2c2e 100644
--- a/pixman/test/Makefile.am
+++ b/pixman/test/Makefile.am
@@ -1,48 +1,48 @@
-AM_CFLAGS = @OPENMP_CFLAGS@
-AM_LDFLAGS = @OPENMP_CFLAGS@ @TESTPROGS_EXTRA_LDFLAGS@
-LDADD = $(top_builddir)/pixman/libpixman-1.la -lm
-INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman
-
-TESTPROGRAMS = \
- a1-trap-test \
- pdf-op-test \
- region-test \
- region-translate-test \
- fetch-test \
- oob-test \
- trap-crasher \
- alpha-loop \
- scaling-crash-test \
- scaling-helpers-test \
- gradient-crash-test \
- alphamap \
- stress-test \
- composite-traps-test \
- blitters-test \
- scaling-test \
- affine-test \
- composite
-
-pdf_op_test_SOURCES = pdf-op-test.c utils.c utils.h
-region_test_SOURCES = region-test.c utils.c utils.h
-blitters_test_SOURCES = blitters-test.c utils.c utils.h
-composite_traps_test_SOURCES = composite-traps-test.c utils.c utils.h
-scaling_test_SOURCES = scaling-test.c utils.c utils.h
-affine_test_SOURCES = affine-test.c utils.c utils.h
-alphamap_SOURCES = alphamap.c utils.c utils.h
-alpha_loop_SOURCES = alpha-loop.c utils.c utils.h
-composite_SOURCES = composite.c utils.c utils.h
-gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h
-stress_test_SOURCES = stress-test.c utils.c utils.h
-scaling_helpers_test_SOURCES = scaling-helpers-test.c utils.c utils.h
-
-# Benchmarks
-
-BENCHMARKS = \
- lowlevel-blt-bench
-
-lowlevel_blt_bench_SOURCES = lowlevel-blt-bench.c utils.c utils.h
-
-noinst_PROGRAMS = $(TESTPROGRAMS) $(BENCHMARKS)
-
-TESTS = $(TESTPROGRAMS)
+AM_CFLAGS = @OPENMP_CFLAGS@
+AM_LDFLAGS = @OPENMP_CFLAGS@ @TESTPROGS_EXTRA_LDFLAGS@
+LDADD = $(top_builddir)/pixman/libpixman-1.la -lm
+INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman
+
+TESTPROGRAMS = \
+ a1-trap-test \
+ pdf-op-test \
+ region-test \
+ region-translate-test \
+ fetch-test \
+ oob-test \
+ trap-crasher \
+ alpha-loop \
+ scaling-crash-test \
+ scaling-helpers-test \
+ gradient-crash-test \
+ alphamap \
+ stress-test \
+ composite-traps-test \
+ blitters-test \
+ scaling-test \
+ affine-test \
+ composite
+
+pdf_op_test_SOURCES = pdf-op-test.c utils.c utils.h
+region_test_SOURCES = region-test.c utils.c utils.h
+blitters_test_SOURCES = blitters-test.c utils.c utils.h
+composite_traps_test_SOURCES = composite-traps-test.c utils.c utils.h
+scaling_test_SOURCES = scaling-test.c utils.c utils.h
+affine_test_SOURCES = affine-test.c utils.c utils.h
+alphamap_SOURCES = alphamap.c utils.c utils.h
+alpha_loop_SOURCES = alpha-loop.c utils.c utils.h
+composite_SOURCES = composite.c utils.c utils.h
+gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h
+stress_test_SOURCES = stress-test.c utils.c utils.h
+scaling_helpers_test_SOURCES = scaling-helpers-test.c utils.c utils.h
+
+# Benchmarks
+
+BENCHMARKS = \
+ lowlevel-blt-bench
+
+lowlevel_blt_bench_SOURCES = lowlevel-blt-bench.c utils.c utils.h
+
+noinst_PROGRAMS = $(TESTPROGRAMS) $(BENCHMARKS)
+
+TESTS = $(TESTPROGRAMS)
diff --git a/pixman/test/composite.c b/pixman/test/composite.c
index a86e5ed68..50a3c0644 100644
--- a/pixman/test/composite.c
+++ b/pixman/test/composite.c
@@ -1,907 +1,907 @@
-/*
- * Copyright © 2005 Eric Anholt
- * Copyright © 2009 Chris Wilson
- * Copyright © 2010 Soeren Sandmann
- * Copyright © 2010 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Eric Anholt not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission. Eric Anholt makes no
- * representations about the suitability of this software for any purpose. It
- * is provided "as is" without express or implied warranty.
- *
- * ERIC ANHOLT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL ERIC ANHOLT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
- * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-#define PIXMAN_USE_INTERNAL_API
-#include <pixman.h>
-#include <stdio.h>
-#include <stdlib.h> /* abort() */
-#include <math.h>
-#include <config.h>
-#include <time.h>
-#include "utils.h"
-
-typedef struct color_t color_t;
-typedef struct format_t format_t;
-typedef struct image_t image_t;
-typedef struct operator_t operator_t;
-
-struct color_t
-{
- double r, g, b, a;
-};
-
-struct format_t
-{
- pixman_format_code_t format;
- const char *name;
-};
-
-static const color_t colors[] =
-{
- { 1.0, 1.0, 1.0, 1.0 },
- { 1.0, 1.0, 1.0, 0.0 },
- { 0.0, 0.0, 0.0, 1.0 },
- { 0.0, 0.0, 0.0, 0.0 },
- { 1.0, 0.0, 0.0, 1.0 },
- { 0.0, 1.0, 0.0, 1.0 },
- { 0.0, 0.0, 1.0, 1.0 },
- { 0.5, 0.0, 0.0, 0.5 },
-};
-
-static uint16_t
-_color_double_to_short (double d)
-{
- uint32_t i;
-
- i = (uint32_t) (d * 65536);
- i -= (i >> 16);
-
- return i;
-}
-
-static void
-compute_pixman_color (const color_t *color,
- pixman_color_t *out)
-{
- out->red = _color_double_to_short (color->r);
- out->green = _color_double_to_short (color->g);
- out->blue = _color_double_to_short (color->b);
- out->alpha = _color_double_to_short (color->a);
-}
-
-#define REPEAT 0x01000000
-#define FLAGS 0xff000000
-
-static const int sizes[] =
-{
- 0,
- 1,
- 1 | REPEAT,
- 10
-};
-
-static const format_t formats[] =
-{
-#define P(x) { PIXMAN_##x, #x }
-
- /* 32 bpp formats */
- P(a8r8g8b8),
- P(x8r8g8b8),
- P(a8b8g8r8),
- P(x8b8g8r8),
- P(b8g8r8a8),
- P(b8g8r8x8),
- P(x2r10g10b10),
- P(x2b10g10r10),
- P(a2r10g10b10),
- P(a2b10g10r10),
-
- /* 24 bpp formats */
- P(r8g8b8),
- P(b8g8r8),
- P(r5g6b5),
- P(b5g6r5),
-
- /* 16 bpp formats */
- P(x1r5g5b5),
- P(x1b5g5r5),
- P(a1r5g5b5),
- P(a1b5g5r5),
- P(a4b4g4r4),
- P(x4b4g4r4),
- P(a4r4g4b4),
- P(x4r4g4b4),
-
- /* 8 bpp formats */
- P(a8),
- P(r3g3b2),
- P(b2g3r3),
- P(a2r2g2b2),
- P(a2b2g2r2),
- P(x4a4),
-
- /* 4 bpp formats */
- P(a4),
- P(r1g2b1),
- P(b1g2r1),
- P(a1r1g1b1),
- P(a1b1g1r1),
-
- /* 1 bpp formats */
- P(a1)
-#undef P
-};
-
-struct image_t
-{
- pixman_image_t *image;
- const format_t *format;
- const color_t *color;
- pixman_repeat_t repeat;
- int size;
-};
-
-struct operator_t
-{
- pixman_op_t op;
- const char *name;
-};
-
-static const operator_t operators[] =
-{
-#define P(x) { PIXMAN_OP_##x, #x }
- P(CLEAR),
- P(SRC),
- P(DST),
- P(OVER),
- P(OVER_REVERSE),
- P(IN),
- P(IN_REVERSE),
- P(OUT),
- P(OUT_REVERSE),
- P(ATOP),
- P(ATOP_REVERSE),
- P(XOR),
- P(ADD),
- P(SATURATE),
-
- P(DISJOINT_CLEAR),
- P(DISJOINT_SRC),
- P(DISJOINT_DST),
- P(DISJOINT_OVER),
- P(DISJOINT_OVER_REVERSE),
- P(DISJOINT_IN),
- P(DISJOINT_IN_REVERSE),
- P(DISJOINT_OUT),
- P(DISJOINT_OUT_REVERSE),
- P(DISJOINT_ATOP),
- P(DISJOINT_ATOP_REVERSE),
- P(DISJOINT_XOR),
-
- P(CONJOINT_CLEAR),
- P(CONJOINT_SRC),
- P(CONJOINT_DST),
- P(CONJOINT_OVER),
- P(CONJOINT_OVER_REVERSE),
- P(CONJOINT_IN),
- P(CONJOINT_IN_REVERSE),
- P(CONJOINT_OUT),
- P(CONJOINT_OUT_REVERSE),
- P(CONJOINT_ATOP),
- P(CONJOINT_ATOP_REVERSE),
- P(CONJOINT_XOR),
-#undef P
-};
-
-static double
-calc_op (pixman_op_t op, double src, double dst, double srca, double dsta)
-{
-#define mult_chan(src, dst, Fa, Fb) MIN ((src) * (Fa) + (dst) * (Fb), 1.0)
-
- double Fa, Fb;
-
- switch (op)
- {
- case PIXMAN_OP_CLEAR:
- case PIXMAN_OP_DISJOINT_CLEAR:
- case PIXMAN_OP_CONJOINT_CLEAR:
- return mult_chan (src, dst, 0.0, 0.0);
-
- case PIXMAN_OP_SRC:
- case PIXMAN_OP_DISJOINT_SRC:
- case PIXMAN_OP_CONJOINT_SRC:
- return mult_chan (src, dst, 1.0, 0.0);
-
- case PIXMAN_OP_DST:
- case PIXMAN_OP_DISJOINT_DST:
- case PIXMAN_OP_CONJOINT_DST:
- return mult_chan (src, dst, 0.0, 1.0);
-
- case PIXMAN_OP_OVER:
- return mult_chan (src, dst, 1.0, 1.0 - srca);
-
- case PIXMAN_OP_OVER_REVERSE:
- return mult_chan (src, dst, 1.0 - dsta, 1.0);
-
- case PIXMAN_OP_IN:
- return mult_chan (src, dst, dsta, 0.0);
-
- case PIXMAN_OP_IN_REVERSE:
- return mult_chan (src, dst, 0.0, srca);
-
- case PIXMAN_OP_OUT:
- return mult_chan (src, dst, 1.0 - dsta, 0.0);
-
- case PIXMAN_OP_OUT_REVERSE:
- return mult_chan (src, dst, 0.0, 1.0 - srca);
-
- case PIXMAN_OP_ATOP:
- return mult_chan (src, dst, dsta, 1.0 - srca);
-
- case PIXMAN_OP_ATOP_REVERSE:
- return mult_chan (src, dst, 1.0 - dsta, srca);
-
- case PIXMAN_OP_XOR:
- return mult_chan (src, dst, 1.0 - dsta, 1.0 - srca);
-
- case PIXMAN_OP_ADD:
- return mult_chan (src, dst, 1.0, 1.0);
-
- case PIXMAN_OP_SATURATE:
- case PIXMAN_OP_DISJOINT_OVER_REVERSE:
- if (srca == 0.0)
- Fa = 1.0;
- else
- Fa = MIN (1.0, (1.0 - dsta) / srca);
- return mult_chan (src, dst, Fa, 1.0);
-
- case PIXMAN_OP_DISJOINT_OVER:
- if (dsta == 0.0)
- Fb = 1.0;
- else
- Fb = MIN (1.0, (1.0 - srca) / dsta);
- return mult_chan (src, dst, 1.0, Fb);
-
- case PIXMAN_OP_DISJOINT_IN:
- if (srca == 0.0)
- Fa = 0.0;
- else
- Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca);
- return mult_chan (src, dst, Fa, 0.0);
-
- case PIXMAN_OP_DISJOINT_IN_REVERSE:
- if (dsta == 0.0)
- Fb = 0.0;
- else
- Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta);
- return mult_chan (src, dst, 0.0, Fb);
-
- case PIXMAN_OP_DISJOINT_OUT:
- if (srca == 0.0)
- Fa = 1.0;
- else
- Fa = MIN (1.0, (1.0 - dsta) / srca);
- return mult_chan (src, dst, Fa, 0.0);
-
- case PIXMAN_OP_DISJOINT_OUT_REVERSE:
- if (dsta == 0.0)
- Fb = 1.0;
- else
- Fb = MIN (1.0, (1.0 - srca) / dsta);
- return mult_chan (src, dst, 0.0, Fb);
-
- case PIXMAN_OP_DISJOINT_ATOP:
- if (srca == 0.0)
- Fa = 0.0;
- else
- Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca);
- if (dsta == 0.0)
- Fb = 1.0;
- else
- Fb = MIN (1.0, (1.0 - srca) / dsta);
- return mult_chan (src, dst, Fa, Fb);
-
- case PIXMAN_OP_DISJOINT_ATOP_REVERSE:
- if (srca == 0.0)
- Fa = 1.0;
- else
- Fa = MIN (1.0, (1.0 - dsta) / srca);
- if (dsta == 0.0)
- Fb = 0.0;
- else
- Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta);
- return mult_chan (src, dst, Fa, Fb);
-
- case PIXMAN_OP_DISJOINT_XOR:
- if (srca == 0.0)
- Fa = 1.0;
- else
- Fa = MIN (1.0, (1.0 - dsta) / srca);
- if (dsta == 0.0)
- Fb = 1.0;
- else
- Fb = MIN (1.0, (1.0 - srca) / dsta);
- return mult_chan (src, dst, Fa, Fb);
-
- case PIXMAN_OP_CONJOINT_OVER:
- if (dsta == 0.0)
- Fb = 0.0;
- else
- Fb = MAX (0.0, 1.0 - srca / dsta);
- return mult_chan (src, dst, 1.0, Fb);
-
- case PIXMAN_OP_CONJOINT_OVER_REVERSE:
- if (srca == 0.0)
- Fa = 0.0;
- else
- Fa = MAX (0.0, 1.0 - dsta / srca);
- return mult_chan (src, dst, Fa, 1.0);
-
- case PIXMAN_OP_CONJOINT_IN:
- if (srca == 0.0)
- Fa = 1.0;
- else
- Fa = MIN (1.0, dsta / srca);
- return mult_chan (src, dst, Fa, 0.0);
-
- case PIXMAN_OP_CONJOINT_IN_REVERSE:
- if (dsta == 0.0)
- Fb = 1.0;
- else
- Fb = MIN (1.0, srca / dsta);
- return mult_chan (src, dst, 0.0, Fb);
-
- case PIXMAN_OP_CONJOINT_OUT:
- if (srca == 0.0)
- Fa = 0.0;
- else
- Fa = MAX (0.0, 1.0 - dsta / srca);
- return mult_chan (src, dst, Fa, 0.0);
-
- case PIXMAN_OP_CONJOINT_OUT_REVERSE:
- if (dsta == 0.0)
- Fb = 0.0;
- else
- Fb = MAX (0.0, 1.0 - srca / dsta);
- return mult_chan (src, dst, 0.0, Fb);
-
- case PIXMAN_OP_CONJOINT_ATOP:
- if (srca == 0.0)
- Fa = 1.0;
- else
- Fa = MIN (1.0, dsta / srca);
- if (dsta == 0.0)
- Fb = 0.0;
- else
- Fb = MAX (0.0, 1.0 - srca / dsta);
- return mult_chan (src, dst, Fa, Fb);
-
- case PIXMAN_OP_CONJOINT_ATOP_REVERSE:
- if (srca == 0.0)
- Fa = 0.0;
- else
- Fa = MAX (0.0, 1.0 - dsta / srca);
- if (dsta == 0.0)
- Fb = 1.0;
- else
- Fb = MIN (1.0, srca / dsta);
- return mult_chan (src, dst, Fa, Fb);
-
- case PIXMAN_OP_CONJOINT_XOR:
- if (srca == 0.0)
- Fa = 0.0;
- else
- Fa = MAX (0.0, 1.0 - dsta / srca);
- if (dsta == 0.0)
- Fb = 0.0;
- else
- Fb = MAX (0.0, 1.0 - srca / dsta);
- return mult_chan (src, dst, Fa, Fb);
-
- case PIXMAN_OP_MULTIPLY:
- case PIXMAN_OP_SCREEN:
- case PIXMAN_OP_OVERLAY:
- case PIXMAN_OP_DARKEN:
- case PIXMAN_OP_LIGHTEN:
- case PIXMAN_OP_COLOR_DODGE:
- case PIXMAN_OP_COLOR_BURN:
- case PIXMAN_OP_HARD_LIGHT:
- case PIXMAN_OP_SOFT_LIGHT:
- case PIXMAN_OP_DIFFERENCE:
- case PIXMAN_OP_EXCLUSION:
- case PIXMAN_OP_HSL_HUE:
- case PIXMAN_OP_HSL_SATURATION:
- case PIXMAN_OP_HSL_COLOR:
- case PIXMAN_OP_HSL_LUMINOSITY:
- default:
- abort();
- return 0; /* silence MSVC */
- }
-#undef mult_chan
-}
-
-static void
-do_composite (pixman_op_t op,
- const color_t *src,
- const color_t *mask,
- const color_t *dst,
- color_t *result,
- pixman_bool_t component_alpha)
-{
- color_t srcval, srcalpha;
-
- if (mask == NULL)
- {
- srcval = *src;
-
- srcalpha.r = src->a;
- srcalpha.g = src->a;
- srcalpha.b = src->a;
- srcalpha.a = src->a;
- }
- else if (component_alpha)
- {
- srcval.r = src->r * mask->r;
- srcval.g = src->g * mask->g;
- srcval.b = src->b * mask->b;
- srcval.a = src->a * mask->a;
-
- srcalpha.r = src->a * mask->r;
- srcalpha.g = src->a * mask->g;
- srcalpha.b = src->a * mask->b;
- srcalpha.a = src->a * mask->a;
- }
- else
- {
- srcval.r = src->r * mask->a;
- srcval.g = src->g * mask->a;
- srcval.b = src->b * mask->a;
- srcval.a = src->a * mask->a;
-
- srcalpha.r = src->a * mask->a;
- srcalpha.g = src->a * mask->a;
- srcalpha.b = src->a * mask->a;
- srcalpha.a = src->a * mask->a;
- }
-
- result->r = calc_op (op, srcval.r, dst->r, srcalpha.r, dst->a);
- result->g = calc_op (op, srcval.g, dst->g, srcalpha.g, dst->a);
- result->b = calc_op (op, srcval.b, dst->b, srcalpha.b, dst->a);
- result->a = calc_op (op, srcval.a, dst->a, srcalpha.a, dst->a);
-}
-
-static void
-color_correct (pixman_format_code_t format,
- color_t *color)
-{
-#define MASK(x) ((1 << (x)) - 1)
-#define round_pix(pix, m) \
- ((int)((pix) * (MASK(m)) + .5) / (double) (MASK(m)))
-
- if (PIXMAN_FORMAT_R (format) == 0)
- {
- color->r = 0.0;
- color->g = 0.0;
- color->b = 0.0;
- }
- else
- {
- color->r = round_pix (color->r, PIXMAN_FORMAT_R (format));
- color->g = round_pix (color->g, PIXMAN_FORMAT_G (format));
- color->b = round_pix (color->b, PIXMAN_FORMAT_B (format));
- }
-
- if (PIXMAN_FORMAT_A (format) == 0)
- color->a = 1.0;
- else
- color->a = round_pix (color->a, PIXMAN_FORMAT_A (format));
-
-#undef round_pix
-#undef MASK
-}
-
-static void
-get_pixel (pixman_image_t *image,
- pixman_format_code_t format,
- color_t *color)
-{
-#define MASK(N) ((1UL << (N))-1)
-
- unsigned long rs, gs, bs, as;
- int a, r, g, b;
- unsigned long val;
-
- val = *(unsigned long *) pixman_image_get_data (image);
-#ifdef WORDS_BIGENDIAN
- val >>= 8 * sizeof(val) - PIXMAN_FORMAT_BPP (format);
-#endif
-
- /* Number of bits in each channel */
- a = PIXMAN_FORMAT_A (format);
- r = PIXMAN_FORMAT_R (format);
- g = PIXMAN_FORMAT_G (format);
- b = PIXMAN_FORMAT_B (format);
-
- switch (PIXMAN_FORMAT_TYPE (format))
- {
- case PIXMAN_TYPE_ARGB:
- bs = 0;
- gs = b + bs;
- rs = g + gs;
- as = r + rs;
- break;
-
- case PIXMAN_TYPE_ABGR:
- rs = 0;
- gs = r + rs;
- bs = g + gs;
- as = b + bs;
- break;
-
- case PIXMAN_TYPE_BGRA:
- as = 0;
- rs = PIXMAN_FORMAT_BPP (format) - (b + g + r);
- gs = r + rs;
- bs = g + gs;
- break;
-
- case PIXMAN_TYPE_A:
- as = 0;
- rs = 0;
- gs = 0;
- bs = 0;
- break;
-
- case PIXMAN_TYPE_OTHER:
- case PIXMAN_TYPE_COLOR:
- case PIXMAN_TYPE_GRAY:
- case PIXMAN_TYPE_YUY2:
- case PIXMAN_TYPE_YV12:
- default:
- abort ();
- as = 0;
- rs = 0;
- gs = 0;
- bs = 0;
- break;
- }
-
- if (MASK (a) != 0)
- color->a = ((val >> as) & MASK (a)) / (double) MASK (a);
- else
- color->a = 1.0;
-
- if (MASK (r) != 0)
- {
- color->r = ((val >> rs) & MASK (r)) / (double) MASK (r);
- color->g = ((val >> gs) & MASK (g)) / (double) MASK (g);
- color->b = ((val >> bs) & MASK (b)) / (double) MASK (b);
- }
- else
- {
- color->r = 0.0;
- color->g = 0.0;
- color->b = 0.0;
- }
-
-#undef MASK
-}
-
-static double
-eval_diff (color_t *expected, color_t *test, pixman_format_code_t format)
-{
- double rscale, gscale, bscale, ascale;
- double rdiff, gdiff, bdiff, adiff;
-
- rscale = 1.0 * ((1 << PIXMAN_FORMAT_R (format)) - 1);
- gscale = 1.0 * ((1 << PIXMAN_FORMAT_G (format)) - 1);
- bscale = 1.0 * ((1 << PIXMAN_FORMAT_B (format)) - 1);
- ascale = 1.0 * ((1 << PIXMAN_FORMAT_A (format)) - 1);
-
- rdiff = fabs (test->r - expected->r) * rscale;
- bdiff = fabs (test->g - expected->g) * gscale;
- gdiff = fabs (test->b - expected->b) * bscale;
- adiff = fabs (test->a - expected->a) * ascale;
-
- return MAX (MAX (MAX (rdiff, gdiff), bdiff), adiff);
-}
-
-static char *
-describe_image (image_t *info, char *buf)
-{
- if (info->size)
- {
- sprintf (buf, "%s %dx%d%s",
- info->format->name,
- info->size, info->size,
- info->repeat ? "R" :"");
- }
- else
- {
- sprintf (buf, "solid");
- }
-
- return buf;
-}
-
-/* Test a composite of a given operation, source, mask, and destination
- * picture.
- * Fills the window, and samples from the 0,0 pixel corner.
- */
-static pixman_bool_t
-composite_test (image_t *dst,
- const operator_t *op,
- image_t *src,
- image_t *mask,
- pixman_bool_t component_alpha)
-{
- pixman_color_t fill;
- pixman_rectangle16_t rect;
- color_t expected, result, tdst, tsrc, tmsk;
- double diff;
- pixman_bool_t success = TRUE;
-
- compute_pixman_color (dst->color, &fill);
- rect.x = rect.y = 0;
- rect.width = rect.height = dst->size;
- pixman_image_fill_rectangles (PIXMAN_OP_SRC, dst->image,
- &fill, 1, &rect);
-
- if (mask != NULL)
- {
- pixman_image_set_component_alpha (mask->image, component_alpha);
- pixman_image_composite (op->op, src->image, mask->image, dst->image,
- 0, 0,
- 0, 0,
- 0, 0,
- dst->size, dst->size);
-
- tmsk = *mask->color;
- if (mask->size)
- {
- color_correct (mask->format->format, &tmsk);
-
- if (component_alpha &&
- PIXMAN_FORMAT_R (mask->format->format) == 0)
- {
- /* Ax component-alpha masks expand alpha into
- * all color channels.
- */
- tmsk.r = tmsk.g = tmsk.b = tmsk.a;
- }
- }
- }
- else
- {
- pixman_image_composite (op->op, src->image, NULL, dst->image,
- 0, 0,
- 0, 0,
- 0, 0,
- dst->size, dst->size);
- }
- get_pixel (dst->image, dst->format->format, &result);
-
- tdst = *dst->color;
- color_correct (dst->format->format, &tdst);
- tsrc = *src->color;
- if (src->size)
- color_correct (src->format->format, &tsrc);
- do_composite (op->op, &tsrc, mask ? &tmsk : NULL, &tdst,
- &expected, component_alpha);
- color_correct (dst->format->format, &expected);
-
- diff = eval_diff (&expected, &result, dst->format->format);
-
- /* FIXME: We should find out what deviation is acceptable. 3.0
- * is clearly absurd for 2 bit formats for example. On the other
- * hand currently 1.0 does not work.
- */
- if (diff > 3.0)
- {
- char buf[40];
-
- sprintf (buf, "%s %scomposite",
- op->name,
- component_alpha ? "CA " : "");
-
- printf ("%s test error of %.4f --\n"
- " R G B A\n"
- "got: %.2f %.2f %.2f %.2f [%08lx]\n"
- "expected: %.2f %.2f %.2f %.2f\n",
- buf, diff,
- result.r, result.g, result.b, result.a,
- *(unsigned long *) pixman_image_get_data (dst->image),
- expected.r, expected.g, expected.b, expected.a);
-
- if (mask != NULL)
- {
- printf ("src color: %.2f %.2f %.2f %.2f\n"
- "msk color: %.2f %.2f %.2f %.2f\n"
- "dst color: %.2f %.2f %.2f %.2f\n",
- src->color->r, src->color->g,
- src->color->b, src->color->a,
- mask->color->r, mask->color->g,
- mask->color->b, mask->color->a,
- dst->color->r, dst->color->g,
- dst->color->b, dst->color->a);
- printf ("src: %s, ", describe_image (src, buf));
- printf ("mask: %s, ", describe_image (mask, buf));
- printf ("dst: %s\n\n", describe_image (dst, buf));
- }
- else
- {
- printf ("src color: %.2f %.2f %.2f %.2f\n"
- "dst color: %.2f %.2f %.2f %.2f\n",
- src->color->r, src->color->g,
- src->color->b, src->color->a,
- dst->color->r, dst->color->g,
- dst->color->b, dst->color->a);
- printf ("src: %s, ", describe_image (src, buf));
- printf ("dst: %s\n\n", describe_image (dst, buf));
- }
-
- success = FALSE;
- }
-
- return success;
-}
-
-static void
-image_init (image_t *info,
- int color,
- int format,
- int size)
-{
- pixman_color_t fill;
-
- info->color = &colors[color];
- compute_pixman_color (info->color, &fill);
-
- info->format = &formats[format];
- info->size = sizes[size] & ~FLAGS;
- info->repeat = PIXMAN_REPEAT_NONE;
-
- if (info->size)
- {
- pixman_rectangle16_t rect;
-
- info->image = pixman_image_create_bits (info->format->format,
- info->size, info->size,
- NULL, 0);
-
- rect.x = rect.y = 0;
- rect.width = rect.height = info->size;
- pixman_image_fill_rectangles (PIXMAN_OP_SRC, info->image, &fill,
- 1, &rect);
-
- if (size & REPEAT)
- {
- pixman_image_set_repeat (info->image, PIXMAN_REPEAT_NORMAL);
- info->repeat = PIXMAN_REPEAT_NORMAL;
- }
- }
- else
- {
- info->image = pixman_image_create_solid_fill (&fill);
- }
-}
-
-static void
-image_fini (image_t *info)
-{
- pixman_image_unref (info->image);
-}
-
-static int
-random_size (void)
-{
- return lcg_rand_n (ARRAY_LENGTH (sizes));
-}
-
-static int
-random_color (void)
-{
- return lcg_rand_n (ARRAY_LENGTH (colors));
-}
-
-static int
-random_format (void)
-{
- return lcg_rand_n (ARRAY_LENGTH (formats));
-}
-
-static pixman_bool_t
-run_test (uint32_t seed)
-{
- image_t src, mask, dst;
- const operator_t *op;
- int ca;
- int ok;
-
- lcg_srand (seed);
-
- image_init (&dst, random_color(), random_format(), 1);
- image_init (&src, random_color(), random_format(), random_size());
- image_init (&mask, random_color(), random_format(), random_size());
-
- op = &(operators [lcg_rand_n (ARRAY_LENGTH (operators))]);
-
- ca = lcg_rand_n (3);
-
- switch (ca)
- {
- case 0:
- ok = composite_test (&dst, op, &src, NULL, FALSE);
- break;
- case 1:
- ok = composite_test (&dst, op, &src, &mask, FALSE);
- break;
- case 2:
- ok = composite_test (&dst, op, &src, &mask,
- mask.size? TRUE : FALSE);
- break;
- default:
- ok = FALSE;
- break;
- }
-
- image_fini (&src);
- image_fini (&mask);
- image_fini (&dst);
-
- return ok;
-}
-
-int
-main (int argc, char **argv)
-{
-#define N_TESTS (8 * 1024 * 1024)
- int result = 0;
- int i;
-
- if (argc > 1)
- {
- char *end;
-
- i = strtol (argv[1], &end, 0);
-
- if (end != argv[1])
- {
- if (!run_test (i))
- return 1;
- else
- return 0;
- }
- else
- {
- printf ("Usage:\n\n %s <number>\n\n", argv[0]);
- return -1;
- }
- }
-
-#ifdef USE_OPENMP
-# pragma omp parallel for default(none) shared(result) shared(argv)
-#endif
- for (i = 1; i <= N_TESTS; ++i)
- {
- if (!result && !run_test (i))
- {
- printf ("Test %d failed.\n", i);
-
- result = i;
- }
- }
-
- return result;
-}
+/*
+ * Copyright © 2005 Eric Anholt
+ * Copyright © 2009 Chris Wilson
+ * Copyright © 2010 Soeren Sandmann
+ * Copyright © 2010 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Eric Anholt not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Eric Anholt makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * ERIC ANHOLT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL ERIC ANHOLT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+#define PIXMAN_USE_INTERNAL_API
+#include <pixman.h>
+#include <stdio.h>
+#include <stdlib.h> /* abort() */
+#include <math.h>
+#include <config.h>
+#include <time.h>
+#include "utils.h"
+
+typedef struct color_t color_t;
+typedef struct format_t format_t;
+typedef struct image_t image_t;
+typedef struct operator_t operator_t;
+
+struct color_t
+{
+ double r, g, b, a;
+};
+
+struct format_t
+{
+ pixman_format_code_t format;
+ const char *name;
+};
+
+static const color_t colors[] =
+{
+ { 1.0, 1.0, 1.0, 1.0 },
+ { 1.0, 1.0, 1.0, 0.0 },
+ { 0.0, 0.0, 0.0, 1.0 },
+ { 0.0, 0.0, 0.0, 0.0 },
+ { 1.0, 0.0, 0.0, 1.0 },
+ { 0.0, 1.0, 0.0, 1.0 },
+ { 0.0, 0.0, 1.0, 1.0 },
+ { 0.5, 0.0, 0.0, 0.5 },
+};
+
+static uint16_t
+_color_double_to_short (double d)
+{
+ uint32_t i;
+
+ i = (uint32_t) (d * 65536);
+ i -= (i >> 16);
+
+ return i;
+}
+
+static void
+compute_pixman_color (const color_t *color,
+ pixman_color_t *out)
+{
+ out->red = _color_double_to_short (color->r);
+ out->green = _color_double_to_short (color->g);
+ out->blue = _color_double_to_short (color->b);
+ out->alpha = _color_double_to_short (color->a);
+}
+
+#define REPEAT 0x01000000
+#define FLAGS 0xff000000
+
+static const int sizes[] =
+{
+ 0,
+ 1,
+ 1 | REPEAT,
+ 10
+};
+
+static const format_t formats[] =
+{
+#define P(x) { PIXMAN_##x, #x }
+
+ /* 32 bpp formats */
+ P(a8r8g8b8),
+ P(x8r8g8b8),
+ P(a8b8g8r8),
+ P(x8b8g8r8),
+ P(b8g8r8a8),
+ P(b8g8r8x8),
+ P(x2r10g10b10),
+ P(x2b10g10r10),
+ P(a2r10g10b10),
+ P(a2b10g10r10),
+
+ /* 24 bpp formats */
+ P(r8g8b8),
+ P(b8g8r8),
+ P(r5g6b5),
+ P(b5g6r5),
+
+ /* 16 bpp formats */
+ P(x1r5g5b5),
+ P(x1b5g5r5),
+ P(a1r5g5b5),
+ P(a1b5g5r5),
+ P(a4b4g4r4),
+ P(x4b4g4r4),
+ P(a4r4g4b4),
+ P(x4r4g4b4),
+
+ /* 8 bpp formats */
+ P(a8),
+ P(r3g3b2),
+ P(b2g3r3),
+ P(a2r2g2b2),
+ P(a2b2g2r2),
+ P(x4a4),
+
+ /* 4 bpp formats */
+ P(a4),
+ P(r1g2b1),
+ P(b1g2r1),
+ P(a1r1g1b1),
+ P(a1b1g1r1),
+
+ /* 1 bpp formats */
+ P(a1)
+#undef P
+};
+
+struct image_t
+{
+ pixman_image_t *image;
+ const format_t *format;
+ const color_t *color;
+ pixman_repeat_t repeat;
+ int size;
+};
+
+struct operator_t
+{
+ pixman_op_t op;
+ const char *name;
+};
+
+static const operator_t operators[] =
+{
+#define P(x) { PIXMAN_OP_##x, #x }
+ P(CLEAR),
+ P(SRC),
+ P(DST),
+ P(OVER),
+ P(OVER_REVERSE),
+ P(IN),
+ P(IN_REVERSE),
+ P(OUT),
+ P(OUT_REVERSE),
+ P(ATOP),
+ P(ATOP_REVERSE),
+ P(XOR),
+ P(ADD),
+ P(SATURATE),
+
+ P(DISJOINT_CLEAR),
+ P(DISJOINT_SRC),
+ P(DISJOINT_DST),
+ P(DISJOINT_OVER),
+ P(DISJOINT_OVER_REVERSE),
+ P(DISJOINT_IN),
+ P(DISJOINT_IN_REVERSE),
+ P(DISJOINT_OUT),
+ P(DISJOINT_OUT_REVERSE),
+ P(DISJOINT_ATOP),
+ P(DISJOINT_ATOP_REVERSE),
+ P(DISJOINT_XOR),
+
+ P(CONJOINT_CLEAR),
+ P(CONJOINT_SRC),
+ P(CONJOINT_DST),
+ P(CONJOINT_OVER),
+ P(CONJOINT_OVER_REVERSE),
+ P(CONJOINT_IN),
+ P(CONJOINT_IN_REVERSE),
+ P(CONJOINT_OUT),
+ P(CONJOINT_OUT_REVERSE),
+ P(CONJOINT_ATOP),
+ P(CONJOINT_ATOP_REVERSE),
+ P(CONJOINT_XOR),
+#undef P
+};
+
+static double
+calc_op (pixman_op_t op, double src, double dst, double srca, double dsta)
+{
+#define mult_chan(src, dst, Fa, Fb) MIN ((src) * (Fa) + (dst) * (Fb), 1.0)
+
+ double Fa, Fb;
+
+ switch (op)
+ {
+ case PIXMAN_OP_CLEAR:
+ case PIXMAN_OP_DISJOINT_CLEAR:
+ case PIXMAN_OP_CONJOINT_CLEAR:
+ return mult_chan (src, dst, 0.0, 0.0);
+
+ case PIXMAN_OP_SRC:
+ case PIXMAN_OP_DISJOINT_SRC:
+ case PIXMAN_OP_CONJOINT_SRC:
+ return mult_chan (src, dst, 1.0, 0.0);
+
+ case PIXMAN_OP_DST:
+ case PIXMAN_OP_DISJOINT_DST:
+ case PIXMAN_OP_CONJOINT_DST:
+ return mult_chan (src, dst, 0.0, 1.0);
+
+ case PIXMAN_OP_OVER:
+ return mult_chan (src, dst, 1.0, 1.0 - srca);
+
+ case PIXMAN_OP_OVER_REVERSE:
+ return mult_chan (src, dst, 1.0 - dsta, 1.0);
+
+ case PIXMAN_OP_IN:
+ return mult_chan (src, dst, dsta, 0.0);
+
+ case PIXMAN_OP_IN_REVERSE:
+ return mult_chan (src, dst, 0.0, srca);
+
+ case PIXMAN_OP_OUT:
+ return mult_chan (src, dst, 1.0 - dsta, 0.0);
+
+ case PIXMAN_OP_OUT_REVERSE:
+ return mult_chan (src, dst, 0.0, 1.0 - srca);
+
+ case PIXMAN_OP_ATOP:
+ return mult_chan (src, dst, dsta, 1.0 - srca);
+
+ case PIXMAN_OP_ATOP_REVERSE:
+ return mult_chan (src, dst, 1.0 - dsta, srca);
+
+ case PIXMAN_OP_XOR:
+ return mult_chan (src, dst, 1.0 - dsta, 1.0 - srca);
+
+ case PIXMAN_OP_ADD:
+ return mult_chan (src, dst, 1.0, 1.0);
+
+ case PIXMAN_OP_SATURATE:
+ case PIXMAN_OP_DISJOINT_OVER_REVERSE:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = MIN (1.0, (1.0 - dsta) / srca);
+ return mult_chan (src, dst, Fa, 1.0);
+
+ case PIXMAN_OP_DISJOINT_OVER:
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = MIN (1.0, (1.0 - srca) / dsta);
+ return mult_chan (src, dst, 1.0, Fb);
+
+ case PIXMAN_OP_DISJOINT_IN:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca);
+ return mult_chan (src, dst, Fa, 0.0);
+
+ case PIXMAN_OP_DISJOINT_IN_REVERSE:
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta);
+ return mult_chan (src, dst, 0.0, Fb);
+
+ case PIXMAN_OP_DISJOINT_OUT:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = MIN (1.0, (1.0 - dsta) / srca);
+ return mult_chan (src, dst, Fa, 0.0);
+
+ case PIXMAN_OP_DISJOINT_OUT_REVERSE:
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = MIN (1.0, (1.0 - srca) / dsta);
+ return mult_chan (src, dst, 0.0, Fb);
+
+ case PIXMAN_OP_DISJOINT_ATOP:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca);
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = MIN (1.0, (1.0 - srca) / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_DISJOINT_ATOP_REVERSE:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = MIN (1.0, (1.0 - dsta) / srca);
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_DISJOINT_XOR:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = MIN (1.0, (1.0 - dsta) / srca);
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = MIN (1.0, (1.0 - srca) / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_CONJOINT_OVER:
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = MAX (0.0, 1.0 - srca / dsta);
+ return mult_chan (src, dst, 1.0, Fb);
+
+ case PIXMAN_OP_CONJOINT_OVER_REVERSE:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = MAX (0.0, 1.0 - dsta / srca);
+ return mult_chan (src, dst, Fa, 1.0);
+
+ case PIXMAN_OP_CONJOINT_IN:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = MIN (1.0, dsta / srca);
+ return mult_chan (src, dst, Fa, 0.0);
+
+ case PIXMAN_OP_CONJOINT_IN_REVERSE:
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = MIN (1.0, srca / dsta);
+ return mult_chan (src, dst, 0.0, Fb);
+
+ case PIXMAN_OP_CONJOINT_OUT:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = MAX (0.0, 1.0 - dsta / srca);
+ return mult_chan (src, dst, Fa, 0.0);
+
+ case PIXMAN_OP_CONJOINT_OUT_REVERSE:
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = MAX (0.0, 1.0 - srca / dsta);
+ return mult_chan (src, dst, 0.0, Fb);
+
+ case PIXMAN_OP_CONJOINT_ATOP:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = MIN (1.0, dsta / srca);
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = MAX (0.0, 1.0 - srca / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_CONJOINT_ATOP_REVERSE:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = MAX (0.0, 1.0 - dsta / srca);
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = MIN (1.0, srca / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_CONJOINT_XOR:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = MAX (0.0, 1.0 - dsta / srca);
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = MAX (0.0, 1.0 - srca / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_MULTIPLY:
+ case PIXMAN_OP_SCREEN:
+ case PIXMAN_OP_OVERLAY:
+ case PIXMAN_OP_DARKEN:
+ case PIXMAN_OP_LIGHTEN:
+ case PIXMAN_OP_COLOR_DODGE:
+ case PIXMAN_OP_COLOR_BURN:
+ case PIXMAN_OP_HARD_LIGHT:
+ case PIXMAN_OP_SOFT_LIGHT:
+ case PIXMAN_OP_DIFFERENCE:
+ case PIXMAN_OP_EXCLUSION:
+ case PIXMAN_OP_HSL_HUE:
+ case PIXMAN_OP_HSL_SATURATION:
+ case PIXMAN_OP_HSL_COLOR:
+ case PIXMAN_OP_HSL_LUMINOSITY:
+ default:
+ abort();
+ return 0; /* silence MSVC */
+ }
+#undef mult_chan
+}
+
+static void
+do_composite (pixman_op_t op,
+ const color_t *src,
+ const color_t *mask,
+ const color_t *dst,
+ color_t *result,
+ pixman_bool_t component_alpha)
+{
+ color_t srcval, srcalpha;
+
+ if (mask == NULL)
+ {
+ srcval = *src;
+
+ srcalpha.r = src->a;
+ srcalpha.g = src->a;
+ srcalpha.b = src->a;
+ srcalpha.a = src->a;
+ }
+ else if (component_alpha)
+ {
+ srcval.r = src->r * mask->r;
+ srcval.g = src->g * mask->g;
+ srcval.b = src->b * mask->b;
+ srcval.a = src->a * mask->a;
+
+ srcalpha.r = src->a * mask->r;
+ srcalpha.g = src->a * mask->g;
+ srcalpha.b = src->a * mask->b;
+ srcalpha.a = src->a * mask->a;
+ }
+ else
+ {
+ srcval.r = src->r * mask->a;
+ srcval.g = src->g * mask->a;
+ srcval.b = src->b * mask->a;
+ srcval.a = src->a * mask->a;
+
+ srcalpha.r = src->a * mask->a;
+ srcalpha.g = src->a * mask->a;
+ srcalpha.b = src->a * mask->a;
+ srcalpha.a = src->a * mask->a;
+ }
+
+ result->r = calc_op (op, srcval.r, dst->r, srcalpha.r, dst->a);
+ result->g = calc_op (op, srcval.g, dst->g, srcalpha.g, dst->a);
+ result->b = calc_op (op, srcval.b, dst->b, srcalpha.b, dst->a);
+ result->a = calc_op (op, srcval.a, dst->a, srcalpha.a, dst->a);
+}
+
+static void
+color_correct (pixman_format_code_t format,
+ color_t *color)
+{
+#define MASK(x) ((1 << (x)) - 1)
+#define round_pix(pix, m) \
+ ((int)((pix) * (MASK(m)) + .5) / (double) (MASK(m)))
+
+ if (PIXMAN_FORMAT_R (format) == 0)
+ {
+ color->r = 0.0;
+ color->g = 0.0;
+ color->b = 0.0;
+ }
+ else
+ {
+ color->r = round_pix (color->r, PIXMAN_FORMAT_R (format));
+ color->g = round_pix (color->g, PIXMAN_FORMAT_G (format));
+ color->b = round_pix (color->b, PIXMAN_FORMAT_B (format));
+ }
+
+ if (PIXMAN_FORMAT_A (format) == 0)
+ color->a = 1.0;
+ else
+ color->a = round_pix (color->a, PIXMAN_FORMAT_A (format));
+
+#undef round_pix
+#undef MASK
+}
+
+static void
+get_pixel (pixman_image_t *image,
+ pixman_format_code_t format,
+ color_t *color)
+{
+#define MASK(N) ((1UL << (N))-1)
+
+ unsigned long rs, gs, bs, as;
+ int a, r, g, b;
+ unsigned long val;
+
+ val = *(unsigned long *) pixman_image_get_data (image);
+#ifdef WORDS_BIGENDIAN
+ val >>= 8 * sizeof(val) - PIXMAN_FORMAT_BPP (format);
+#endif
+
+ /* Number of bits in each channel */
+ a = PIXMAN_FORMAT_A (format);
+ r = PIXMAN_FORMAT_R (format);
+ g = PIXMAN_FORMAT_G (format);
+ b = PIXMAN_FORMAT_B (format);
+
+ switch (PIXMAN_FORMAT_TYPE (format))
+ {
+ case PIXMAN_TYPE_ARGB:
+ bs = 0;
+ gs = b + bs;
+ rs = g + gs;
+ as = r + rs;
+ break;
+
+ case PIXMAN_TYPE_ABGR:
+ rs = 0;
+ gs = r + rs;
+ bs = g + gs;
+ as = b + bs;
+ break;
+
+ case PIXMAN_TYPE_BGRA:
+ as = 0;
+ rs = PIXMAN_FORMAT_BPP (format) - (b + g + r);
+ gs = r + rs;
+ bs = g + gs;
+ break;
+
+ case PIXMAN_TYPE_A:
+ as = 0;
+ rs = 0;
+ gs = 0;
+ bs = 0;
+ break;
+
+ case PIXMAN_TYPE_OTHER:
+ case PIXMAN_TYPE_COLOR:
+ case PIXMAN_TYPE_GRAY:
+ case PIXMAN_TYPE_YUY2:
+ case PIXMAN_TYPE_YV12:
+ default:
+ abort ();
+ as = 0;
+ rs = 0;
+ gs = 0;
+ bs = 0;
+ break;
+ }
+
+ if (MASK (a) != 0)
+ color->a = ((val >> as) & MASK (a)) / (double) MASK (a);
+ else
+ color->a = 1.0;
+
+ if (MASK (r) != 0)
+ {
+ color->r = ((val >> rs) & MASK (r)) / (double) MASK (r);
+ color->g = ((val >> gs) & MASK (g)) / (double) MASK (g);
+ color->b = ((val >> bs) & MASK (b)) / (double) MASK (b);
+ }
+ else
+ {
+ color->r = 0.0;
+ color->g = 0.0;
+ color->b = 0.0;
+ }
+
+#undef MASK
+}
+
+static double
+eval_diff (color_t *expected, color_t *test, pixman_format_code_t format)
+{
+ double rscale, gscale, bscale, ascale;
+ double rdiff, gdiff, bdiff, adiff;
+
+ rscale = 1.0 * ((1 << PIXMAN_FORMAT_R (format)) - 1);
+ gscale = 1.0 * ((1 << PIXMAN_FORMAT_G (format)) - 1);
+ bscale = 1.0 * ((1 << PIXMAN_FORMAT_B (format)) - 1);
+ ascale = 1.0 * ((1 << PIXMAN_FORMAT_A (format)) - 1);
+
+ rdiff = fabs (test->r - expected->r) * rscale;
+ bdiff = fabs (test->g - expected->g) * gscale;
+ gdiff = fabs (test->b - expected->b) * bscale;
+ adiff = fabs (test->a - expected->a) * ascale;
+
+ return MAX (MAX (MAX (rdiff, gdiff), bdiff), adiff);
+}
+
+static char *
+describe_image (image_t *info, char *buf)
+{
+ if (info->size)
+ {
+ sprintf (buf, "%s %dx%d%s",
+ info->format->name,
+ info->size, info->size,
+ info->repeat ? "R" :"");
+ }
+ else
+ {
+ sprintf (buf, "solid");
+ }
+
+ return buf;
+}
+
+/* Test a composite of a given operation, source, mask, and destination
+ * picture.
+ * Fills the window, and samples from the 0,0 pixel corner.
+ */
+static pixman_bool_t
+composite_test (image_t *dst,
+ const operator_t *op,
+ image_t *src,
+ image_t *mask,
+ pixman_bool_t component_alpha)
+{
+ pixman_color_t fill;
+ pixman_rectangle16_t rect;
+ color_t expected, result, tdst, tsrc, tmsk;
+ double diff;
+ pixman_bool_t success = TRUE;
+
+ compute_pixman_color (dst->color, &fill);
+ rect.x = rect.y = 0;
+ rect.width = rect.height = dst->size;
+ pixman_image_fill_rectangles (PIXMAN_OP_SRC, dst->image,
+ &fill, 1, &rect);
+
+ if (mask != NULL)
+ {
+ pixman_image_set_component_alpha (mask->image, component_alpha);
+ pixman_image_composite (op->op, src->image, mask->image, dst->image,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ dst->size, dst->size);
+
+ tmsk = *mask->color;
+ if (mask->size)
+ {
+ color_correct (mask->format->format, &tmsk);
+
+ if (component_alpha &&
+ PIXMAN_FORMAT_R (mask->format->format) == 0)
+ {
+ /* Ax component-alpha masks expand alpha into
+ * all color channels.
+ */
+ tmsk.r = tmsk.g = tmsk.b = tmsk.a;
+ }
+ }
+ }
+ else
+ {
+ pixman_image_composite (op->op, src->image, NULL, dst->image,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ dst->size, dst->size);
+ }
+ get_pixel (dst->image, dst->format->format, &result);
+
+ tdst = *dst->color;
+ color_correct (dst->format->format, &tdst);
+ tsrc = *src->color;
+ if (src->size)
+ color_correct (src->format->format, &tsrc);
+ do_composite (op->op, &tsrc, mask ? &tmsk : NULL, &tdst,
+ &expected, component_alpha);
+ color_correct (dst->format->format, &expected);
+
+ diff = eval_diff (&expected, &result, dst->format->format);
+
+ /* FIXME: We should find out what deviation is acceptable. 3.0
+ * is clearly absurd for 2 bit formats for example. On the other
+ * hand currently 1.0 does not work.
+ */
+ if (diff > 3.0)
+ {
+ char buf[40];
+
+ sprintf (buf, "%s %scomposite",
+ op->name,
+ component_alpha ? "CA " : "");
+
+ printf ("%s test error of %.4f --\n"
+ " R G B A\n"
+ "got: %.2f %.2f %.2f %.2f [%08lx]\n"
+ "expected: %.2f %.2f %.2f %.2f\n",
+ buf, diff,
+ result.r, result.g, result.b, result.a,
+ *(unsigned long *) pixman_image_get_data (dst->image),
+ expected.r, expected.g, expected.b, expected.a);
+
+ if (mask != NULL)
+ {
+ printf ("src color: %.2f %.2f %.2f %.2f\n"
+ "msk color: %.2f %.2f %.2f %.2f\n"
+ "dst color: %.2f %.2f %.2f %.2f\n",
+ src->color->r, src->color->g,
+ src->color->b, src->color->a,
+ mask->color->r, mask->color->g,
+ mask->color->b, mask->color->a,
+ dst->color->r, dst->color->g,
+ dst->color->b, dst->color->a);
+ printf ("src: %s, ", describe_image (src, buf));
+ printf ("mask: %s, ", describe_image (mask, buf));
+ printf ("dst: %s\n\n", describe_image (dst, buf));
+ }
+ else
+ {
+ printf ("src color: %.2f %.2f %.2f %.2f\n"
+ "dst color: %.2f %.2f %.2f %.2f\n",
+ src->color->r, src->color->g,
+ src->color->b, src->color->a,
+ dst->color->r, dst->color->g,
+ dst->color->b, dst->color->a);
+ printf ("src: %s, ", describe_image (src, buf));
+ printf ("dst: %s\n\n", describe_image (dst, buf));
+ }
+
+ success = FALSE;
+ }
+
+ return success;
+}
+
+static void
+image_init (image_t *info,
+ int color,
+ int format,
+ int size)
+{
+ pixman_color_t fill;
+
+ info->color = &colors[color];
+ compute_pixman_color (info->color, &fill);
+
+ info->format = &formats[format];
+ info->size = sizes[size] & ~FLAGS;
+ info->repeat = PIXMAN_REPEAT_NONE;
+
+ if (info->size)
+ {
+ pixman_rectangle16_t rect;
+
+ info->image = pixman_image_create_bits (info->format->format,
+ info->size, info->size,
+ NULL, 0);
+
+ rect.x = rect.y = 0;
+ rect.width = rect.height = info->size;
+ pixman_image_fill_rectangles (PIXMAN_OP_SRC, info->image, &fill,
+ 1, &rect);
+
+ if (size & REPEAT)
+ {
+ pixman_image_set_repeat (info->image, PIXMAN_REPEAT_NORMAL);
+ info->repeat = PIXMAN_REPEAT_NORMAL;
+ }
+ }
+ else
+ {
+ info->image = pixman_image_create_solid_fill (&fill);
+ }
+}
+
+static void
+image_fini (image_t *info)
+{
+ pixman_image_unref (info->image);
+}
+
+static int
+random_size (void)
+{
+ return lcg_rand_n (ARRAY_LENGTH (sizes));
+}
+
+static int
+random_color (void)
+{
+ return lcg_rand_n (ARRAY_LENGTH (colors));
+}
+
+static int
+random_format (void)
+{
+ return lcg_rand_n (ARRAY_LENGTH (formats));
+}
+
+static pixman_bool_t
+run_test (uint32_t seed)
+{
+ image_t src, mask, dst;
+ const operator_t *op;
+ int ca;
+ int ok;
+
+ lcg_srand (seed);
+
+ image_init (&dst, random_color(), random_format(), 1);
+ image_init (&src, random_color(), random_format(), random_size());
+ image_init (&mask, random_color(), random_format(), random_size());
+
+ op = &(operators [lcg_rand_n (ARRAY_LENGTH (operators))]);
+
+ ca = lcg_rand_n (3);
+
+ switch (ca)
+ {
+ case 0:
+ ok = composite_test (&dst, op, &src, NULL, FALSE);
+ break;
+ case 1:
+ ok = composite_test (&dst, op, &src, &mask, FALSE);
+ break;
+ case 2:
+ ok = composite_test (&dst, op, &src, &mask,
+ mask.size? TRUE : FALSE);
+ break;
+ default:
+ ok = FALSE;
+ break;
+ }
+
+ image_fini (&src);
+ image_fini (&mask);
+ image_fini (&dst);
+
+ return ok;
+}
+
+int
+main (int argc, char **argv)
+{
+#define N_TESTS (8 * 1024 * 1024)
+ int result = 0;
+ int i;
+
+ if (argc > 1)
+ {
+ char *end;
+
+ i = strtol (argv[1], &end, 0);
+
+ if (end != argv[1])
+ {
+ if (!run_test (i))
+ return 1;
+ else
+ return 0;
+ }
+ else
+ {
+ printf ("Usage:\n\n %s <number>\n\n", argv[0]);
+ return -1;
+ }
+ }
+
+#ifdef USE_OPENMP
+# pragma omp parallel for default(none) shared(result) shared(argv)
+#endif
+ for (i = 1; i <= N_TESTS; ++i)
+ {
+ if (!result && !run_test (i))
+ {
+ printf ("Test %d failed.\n", i);
+
+ result = i;
+ }
+ }
+
+ return result;
+}
diff --git a/pixman/test/fetch-test.c b/pixman/test/fetch-test.c
index 60bc765f6..4554f9f9b 100644
--- a/pixman/test/fetch-test.c
+++ b/pixman/test/fetch-test.c
@@ -1,196 +1,196 @@
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "pixman.h"
-#include <config.h>
-
-#define SIZE 1024
-
-static pixman_indexed_t mono_palette =
-{
- 0, { 0x00000000, 0x00ffffff },
-};
-
-
-typedef struct {
- pixman_format_code_t format;
- int width, height;
- int stride;
- uint32_t src[SIZE];
- uint32_t dst[SIZE];
- pixman_indexed_t *indexed;
-} testcase_t;
-
-static testcase_t testcases[] =
-{
- {
- PIXMAN_a8r8g8b8,
- 2, 2,
- 8,
- { 0x00112233, 0x44556677,
- 0x8899aabb, 0xccddeeff },
- { 0x00112233, 0x44556677,
- 0x8899aabb, 0xccddeeff },
- NULL,
- },
- {
- PIXMAN_g1,
- 8, 2,
- 4,
-#ifdef WORDS_BIGENDIAN
- {
- 0xaa000000,
- 0x55000000
- },
-#else
- {
- 0x00000055,
- 0x000000aa
- },
-#endif
- {
- 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000,
- 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff
- },
- &mono_palette,
- },
-#if 0
- {
- PIXMAN_g8,
- 4, 2,
- 4,
- { 0x01234567,
- 0x89abcdef },
- { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
- 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
- },
-#endif
- /* FIXME: make this work on big endian */
- {
- PIXMAN_yv12,
- 8, 2,
- 8,
-#ifdef WORDS_BIGENDIAN
- {
- 0x00ff00ff, 0x00ff00ff,
- 0xff00ff00, 0xff00ff00,
- 0x80ff8000,
- 0x800080ff
- },
-#else
- {
- 0xff00ff00, 0xff00ff00,
- 0x00ff00ff, 0x00ff00ff,
- 0x0080ff80,
- 0xff800080
- },
-#endif
- {
- 0xff000000, 0xffffffff, 0xffb80000, 0xffffe113,
- 0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff,
- 0xffffffff, 0xff000000, 0xffffe113, 0xffb80000,
- 0xffffffff, 0xff000000, 0xff4affff, 0xff0023ee,
- },
- },
-};
-
-int n_test_cases = sizeof(testcases)/sizeof(testcases[0]);
-
-
-static uint32_t
-reader (const void *src, int size)
-{
- switch (size)
- {
- case 1:
- return *(uint8_t *)src;
- case 2:
- return *(uint16_t *)src;
- case 4:
- return *(uint32_t *)src;
- default:
- assert(0);
- return 0; /* silence MSVC */
- }
-}
-
-
-static void
-writer (void *src, uint32_t value, int size)
-{
- switch (size)
- {
- case 1:
- *(uint8_t *)src = value;
- break;
- case 2:
- *(uint16_t *)src = value;
- break;
- case 4:
- *(uint32_t *)src = value;
- break;
- default:
- assert(0);
- }
-}
-
-
-int
-main (int argc, char **argv)
-{
- uint32_t dst[SIZE];
- pixman_image_t *src_img;
- pixman_image_t *dst_img;
- int i, j, x, y;
- int ret = 0;
-
- for (i = 0; i < n_test_cases; ++i)
- {
- for (j = 0; j < 2; ++j)
- {
- src_img = pixman_image_create_bits (testcases[i].format,
- testcases[i].width,
- testcases[i].height,
- testcases[i].src,
- testcases[i].stride);
- pixman_image_set_indexed(src_img, testcases[i].indexed);
-
- dst_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
- testcases[i].width,
- testcases[i].height,
- dst,
- testcases[i].width*4);
-
- if (j)
- {
- pixman_image_set_accessors (src_img, reader, writer);
- pixman_image_set_accessors (dst_img, reader, writer);
- }
-
- pixman_image_composite (PIXMAN_OP_SRC, src_img, NULL, dst_img,
- 0, 0, 0, 0, 0, 0, testcases[i].width, testcases[i].height);
-
- pixman_image_unref (src_img);
- pixman_image_unref (dst_img);
-
- for (y = 0; y < testcases[i].height; ++y)
- {
- for (x = 0; x < testcases[i].width; ++x)
- {
- int offset = y * testcases[i].width + x;
-
- if (dst[offset] != testcases[i].dst[offset])
- {
- printf ("test %i%c: pixel mismatch at (x=%d,y=%d): %08x expected, %08x obtained\n",
- i + 1, 'a' + j,
- x, y,
- testcases[i].dst[offset], dst[offset]);
- ret = 1;
- }
- }
- }
- }
- }
-
- return ret;
-}
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "pixman.h"
+#include <config.h>
+
+#define SIZE 1024
+
+static pixman_indexed_t mono_palette =
+{
+ 0, { 0x00000000, 0x00ffffff },
+};
+
+
+typedef struct {
+ pixman_format_code_t format;
+ int width, height;
+ int stride;
+ uint32_t src[SIZE];
+ uint32_t dst[SIZE];
+ pixman_indexed_t *indexed;
+} testcase_t;
+
+static testcase_t testcases[] =
+{
+ {
+ PIXMAN_a8r8g8b8,
+ 2, 2,
+ 8,
+ { 0x00112233, 0x44556677,
+ 0x8899aabb, 0xccddeeff },
+ { 0x00112233, 0x44556677,
+ 0x8899aabb, 0xccddeeff },
+ NULL,
+ },
+ {
+ PIXMAN_g1,
+ 8, 2,
+ 4,
+#ifdef WORDS_BIGENDIAN
+ {
+ 0xaa000000,
+ 0x55000000
+ },
+#else
+ {
+ 0x00000055,
+ 0x000000aa
+ },
+#endif
+ {
+ 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000,
+ 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff
+ },
+ &mono_palette,
+ },
+#if 0
+ {
+ PIXMAN_g8,
+ 4, 2,
+ 4,
+ { 0x01234567,
+ 0x89abcdef },
+ { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
+ 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
+ },
+#endif
+ /* FIXME: make this work on big endian */
+ {
+ PIXMAN_yv12,
+ 8, 2,
+ 8,
+#ifdef WORDS_BIGENDIAN
+ {
+ 0x00ff00ff, 0x00ff00ff,
+ 0xff00ff00, 0xff00ff00,
+ 0x80ff8000,
+ 0x800080ff
+ },
+#else
+ {
+ 0xff00ff00, 0xff00ff00,
+ 0x00ff00ff, 0x00ff00ff,
+ 0x0080ff80,
+ 0xff800080
+ },
+#endif
+ {
+ 0xff000000, 0xffffffff, 0xffb80000, 0xffffe113,
+ 0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff,
+ 0xffffffff, 0xff000000, 0xffffe113, 0xffb80000,
+ 0xffffffff, 0xff000000, 0xff4affff, 0xff0023ee,
+ },
+ },
+};
+
+int n_test_cases = sizeof(testcases)/sizeof(testcases[0]);
+
+
+static uint32_t
+reader (const void *src, int size)
+{
+ switch (size)
+ {
+ case 1:
+ return *(uint8_t *)src;
+ case 2:
+ return *(uint16_t *)src;
+ case 4:
+ return *(uint32_t *)src;
+ default:
+ assert(0);
+ return 0; /* silence MSVC */
+ }
+}
+
+
+static void
+writer (void *src, uint32_t value, int size)
+{
+ switch (size)
+ {
+ case 1:
+ *(uint8_t *)src = value;
+ break;
+ case 2:
+ *(uint16_t *)src = value;
+ break;
+ case 4:
+ *(uint32_t *)src = value;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+int
+main (int argc, char **argv)
+{
+ uint32_t dst[SIZE];
+ pixman_image_t *src_img;
+ pixman_image_t *dst_img;
+ int i, j, x, y;
+ int ret = 0;
+
+ for (i = 0; i < n_test_cases; ++i)
+ {
+ for (j = 0; j < 2; ++j)
+ {
+ src_img = pixman_image_create_bits (testcases[i].format,
+ testcases[i].width,
+ testcases[i].height,
+ testcases[i].src,
+ testcases[i].stride);
+ pixman_image_set_indexed(src_img, testcases[i].indexed);
+
+ dst_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
+ testcases[i].width,
+ testcases[i].height,
+ dst,
+ testcases[i].width*4);
+
+ if (j)
+ {
+ pixman_image_set_accessors (src_img, reader, writer);
+ pixman_image_set_accessors (dst_img, reader, writer);
+ }
+
+ pixman_image_composite (PIXMAN_OP_SRC, src_img, NULL, dst_img,
+ 0, 0, 0, 0, 0, 0, testcases[i].width, testcases[i].height);
+
+ pixman_image_unref (src_img);
+ pixman_image_unref (dst_img);
+
+ for (y = 0; y < testcases[i].height; ++y)
+ {
+ for (x = 0; x < testcases[i].width; ++x)
+ {
+ int offset = y * testcases[i].width + x;
+
+ if (dst[offset] != testcases[i].dst[offset])
+ {
+ printf ("test %i%c: pixel mismatch at (x=%d,y=%d): %08x expected, %08x obtained\n",
+ i + 1, 'a' + j,
+ x, y,
+ testcases[i].dst[offset], dst[offset]);
+ ret = 1;
+ }
+ }
+ }
+ }
+ }
+
+ return ret;
+}
diff --git a/pixman/test/stress-test.c b/pixman/test/stress-test.c
index 166dc6d2c..a51737ea0 100644
--- a/pixman/test/stress-test.c
+++ b/pixman/test/stress-test.c
@@ -1,855 +1,855 @@
-#include "utils.h"
-
-#if 0
-#define fence_malloc malloc
-#define fence_free free
-#define make_random_bytes malloc
-#endif
-
-static const pixman_format_code_t image_formats[] =
-{
- PIXMAN_a8r8g8b8,
- PIXMAN_x8r8g8b8,
- PIXMAN_r5g6b5,
- PIXMAN_r3g3b2,
- PIXMAN_a8,
- PIXMAN_a8b8g8r8,
- PIXMAN_x8b8g8r8,
- PIXMAN_b8g8r8a8,
- PIXMAN_b8g8r8x8,
- PIXMAN_x14r6g6b6,
- PIXMAN_r8g8b8,
- PIXMAN_b8g8r8,
- PIXMAN_r5g6b5,
- PIXMAN_b5g6r5,
- PIXMAN_x2r10g10b10,
- PIXMAN_a2r10g10b10,
- PIXMAN_x2b10g10r10,
- PIXMAN_a2b10g10r10,
- PIXMAN_a1r5g5b5,
- PIXMAN_x1r5g5b5,
- PIXMAN_a1b5g5r5,
- PIXMAN_x1b5g5r5,
- PIXMAN_a4r4g4b4,
- PIXMAN_x4r4g4b4,
- PIXMAN_a4b4g4r4,
- PIXMAN_x4b4g4r4,
- PIXMAN_a8,
- PIXMAN_r3g3b2,
- PIXMAN_b2g3r3,
- PIXMAN_a2r2g2b2,
- PIXMAN_a2b2g2r2,
- PIXMAN_c8,
- PIXMAN_g8,
- PIXMAN_x4c4,
- PIXMAN_x4g4,
- PIXMAN_c4,
- PIXMAN_g4,
- PIXMAN_g1,
- PIXMAN_x4a4,
- PIXMAN_a4,
- PIXMAN_r1g2b1,
- PIXMAN_b1g2r1,
- PIXMAN_a1r1g1b1,
- PIXMAN_a1b1g1r1,
- PIXMAN_a1
-};
-
-static pixman_filter_t filters[] =
-{
- PIXMAN_FILTER_NEAREST,
- PIXMAN_FILTER_BILINEAR,
- PIXMAN_FILTER_FAST,
- PIXMAN_FILTER_GOOD,
- PIXMAN_FILTER_BEST,
- PIXMAN_FILTER_CONVOLUTION
-};
-
-static int
-get_size (void)
-{
- switch (lcg_rand_n (28))
- {
- case 0:
- return 1;
-
- case 1:
- return 2;
-
- default:
- case 2:
- return lcg_rand_n (200);
-
- case 4:
- return lcg_rand_n (2000) + 1000;
-
- case 5:
- return 65535;
-
- case 6:
- return 65536;
-
- case 7:
- return lcg_rand_N (64000) + 63000;
- }
-}
-
-static void
-destroy (pixman_image_t *image, void *data)
-{
- if (image->type == BITS && image->bits.free_me != image->bits.bits)
- {
- uint32_t *bits;
-
- if (image->bits.bits != (void *)0x01)
- {
- bits = image->bits.bits;
-
- if (image->bits.rowstride < 0)
- bits -= (- image->bits.rowstride * (image->bits.height - 1));
-
- fence_free (bits);
- }
- }
-
- free (data);
-}
-
-static uint32_t
-real_reader (const void *src, int size)
-{
- switch (size)
- {
- case 1:
- return *(uint8_t *)src;
- case 2:
- return *(uint16_t *)src;
- case 4:
- return *(uint32_t *)src;
- default:
- assert (0);
- return 0; /* silence MSVC */
- }
-}
-
-static void
-real_writer (void *src, uint32_t value, int size)
-{
- switch (size)
- {
- case 1:
- *(uint8_t *)src = value;
- break;
-
- case 2:
- *(uint16_t *)src = value;
- break;
-
- case 4:
- *(uint32_t *)src = value;
- break;
-
- default:
- assert (0);
- break;
- }
-}
-
-static uint32_t
-fake_reader (const void *src, int size)
-{
- uint32_t r = lcg_rand_u32 ();
-
- assert (size == 1 || size == 2 || size == 4);
- return r & ((1 << (size * 8)) - 1);
-}
-
-static void
-fake_writer (void *src, uint32_t value, int size)
-{
- assert (size == 1 || size == 2 || size == 4);
-}
-
-static int32_t
-log_rand (void)
-{
- uint32_t mask;
-
- mask = (1 << lcg_rand_n (31)) - 1;
-
- return (lcg_rand () & mask) - (mask >> 1);
-}
-
-static pixman_image_t *
-create_random_bits_image (void)
-{
- pixman_format_code_t format;
- pixman_indexed_t *indexed;
- pixman_image_t *image;
- int width, height, stride;
- uint32_t *bits;
- pixman_read_memory_func_t read_func = NULL;
- pixman_write_memory_func_t write_func = NULL;
- pixman_filter_t filter;
- pixman_fixed_t *coefficients = NULL;
- int n_coefficients = 0;
-
- /* format */
- format = image_formats[lcg_rand_n (ARRAY_LENGTH (image_formats))];
-
- indexed = NULL;
- if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
- {
- indexed = malloc (sizeof (pixman_indexed_t));
-
- initialize_palette (indexed, PIXMAN_FORMAT_BPP (format), TRUE);
- }
- else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY)
- {
- indexed = malloc (sizeof (pixman_indexed_t));
-
- initialize_palette (indexed, PIXMAN_FORMAT_BPP (format), FALSE);
- }
- else
- {
- indexed = NULL;
- }
-
- /* size */
- width = get_size ();
- height = get_size ();
-
- if ((uint64_t)width * height > 200000)
- {
- if (lcg_rand_n(2) == 0)
- height = 200000 / width;
- else
- width = 200000 / height;
- }
-
- if (height == 0)
- height = 1;
- if (width == 0)
- width = 1;
-
- /* bits */
- switch (lcg_rand_n (7))
- {
- default:
- case 0:
- stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
- stride = (stride + 3) & (~3);
- bits = (uint32_t *)make_random_bytes (height * stride);
- break;
-
- case 1:
- stride = 0;
- bits = NULL;
- break;
-
- case 2: /* Zero-filled */
- stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
- stride = (stride + 3) & (~3);
- bits = fence_malloc (height * stride);
- if (!bits)
- return NULL;
- memset (bits, 0, height * stride);
- break;
-
- case 3: /* Filled with 0xFF */
- stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
- stride = (stride + 3) & (~3);
- bits = fence_malloc (height * stride);
- if (!bits)
- return NULL;
- memset (bits, 0xff, height * stride);
- break;
-
- case 4: /* bits is a bad pointer, has read/write functions */
- stride = 232;
- bits = (void *)0x01;
- read_func = fake_reader;
- write_func = fake_writer;
- break;
-
- case 5: /* bits is a real pointer, has read/write functions */
- stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
- stride = (stride + 3) & (~3);
- bits = fence_malloc (height * stride);
- if (!bits)
- return NULL;
- memset (bits, 0xff, height * stride);
- read_func = real_reader;
- write_func = real_writer;
- break;
-
- case 6: /* bits is a real pointer, stride is negative */
- stride = (width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17));
- stride = (stride + 3) & (~3);
- bits = (uint32_t *)make_random_bytes (height * stride);
- if (!bits)
- return NULL;
- bits += ((height - 1) * stride) / 4;
- stride = - stride;
- break;
- }
-
- /* Filter */
- filter = filters[lcg_rand_n (ARRAY_LENGTH (filters))];
- if (filter == PIXMAN_FILTER_CONVOLUTION)
- {
- int width = lcg_rand_n (17);
- int height = lcg_rand_n (19);
-
- n_coefficients = width * height + 2;
- coefficients = malloc (n_coefficients * sizeof (pixman_fixed_t));
-
- if (coefficients)
- {
- int i;
-
- for (i = 0; i < width * height; ++i)
- coefficients[i + 2] = lcg_rand_u32();
-
- coefficients[0] = width << 16;
- coefficients[1] = height << 16;
- }
- else
- {
- filter = PIXMAN_FILTER_BEST;
- }
- }
-
- /* Finally create the image */
- image = pixman_image_create_bits (format, width, height, bits, stride);
- if (!image)
- return NULL;
-
- pixman_image_set_indexed (image, indexed);
- pixman_image_set_destroy_function (image, destroy, indexed);
- pixman_image_set_accessors (image, read_func, write_func);
- pixman_image_set_filter (image, filter, coefficients, n_coefficients);
-
- return image;
-}
-
-static pixman_repeat_t repeats[] =
-{
- PIXMAN_REPEAT_NONE,
- PIXMAN_REPEAT_NORMAL,
- PIXMAN_REPEAT_REFLECT,
- PIXMAN_REPEAT_PAD
-};
-
-static uint32_t
-absolute (int32_t i)
-{
- return i < 0? -i : i;
-}
-
-static void
-set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map)
-{
- pixman_repeat_t repeat;
-
- /* Set properties that are generic to all images */
-
- /* Repeat */
- repeat = repeats[lcg_rand_n (ARRAY_LENGTH (repeats))];
- pixman_image_set_repeat (image, repeat);
-
- /* Alpha map */
- if (allow_alpha_map && lcg_rand_n (3) == 0)
- {
- pixman_image_t *alpha_map;
- int16_t x, y;
-
- alpha_map = create_random_bits_image ();
-
- if (alpha_map)
- {
- set_general_properties (alpha_map, FALSE);
-
- x = lcg_rand_N (100000) - 65536;
- y = lcg_rand_N (100000) - 65536;
-
- pixman_image_set_alpha_map (image, alpha_map, x, y);
-
- pixman_image_unref (alpha_map);
- }
- }
-
- /* Component alpha */
- pixman_image_set_component_alpha (image, lcg_rand_n (3) == 0);
-
- /* Clip region */
- if (lcg_rand_n (8) != 0)
- {
- pixman_region32_t region;
- int i, n_rects;
-
- pixman_region32_init (&region);
-
- switch (lcg_rand_n (10))
- {
- case 0:
- n_rects = 0;
- break;
-
- case 1: case 2: case 3:
- n_rects = 1;
- break;
-
- case 4: case 5:
- n_rects = 2;
- break;
-
- case 6: case 7:
- n_rects = 3;
-
- default:
- n_rects = lcg_rand_n (100);
- break;
- }
-
- for (i = 0; i < n_rects; ++i)
- {
- uint32_t width, height;
- int x, y;
-
- x = log_rand();
- y = log_rand();
- width = absolute (log_rand ()) + 1;
- height = absolute (log_rand ()) + 1;
-
- pixman_region32_union_rect (
- &region, &region, x, y, width, height);
- }
-
- pixman_image_set_clip_region32 (image, &region);
-
- pixman_region32_fini (&region);
- }
-
- /* Whether source clipping is enabled */
- pixman_image_set_source_clipping (image, !!lcg_rand_n (2));
-
- /* Client clip */
- pixman_image_set_has_client_clip (image, !!lcg_rand_n (2));
-
- /* Transform */
- if (lcg_rand_n (5) < 2)
- {
- pixman_transform_t xform;
- int i, j, k;
- uint32_t tx, ty, sx, sy;
- uint32_t c, s;
-
- memset (&xform, 0, sizeof xform);
- xform.matrix[0][0] = pixman_fixed_1;
- xform.matrix[1][1] = pixman_fixed_1;
- xform.matrix[2][2] = pixman_fixed_1;
-
- for (k = 0; k < 3; ++k)
- {
- switch (lcg_rand_n (4))
- {
- case 0:
- /* rotation */
- c = lcg_rand_N (2 * 65536) - 65536;
- s = lcg_rand_N (2 * 65536) - 65536;
- pixman_transform_rotate (&xform, NULL, c, s);
- break;
-
- case 1:
- /* translation */
- tx = lcg_rand_u32();
- ty = lcg_rand_u32();
- pixman_transform_translate (&xform, NULL, tx, ty);
- break;
-
- case 2:
- /* scale */
- sx = lcg_rand_u32();
- sy = lcg_rand_u32();
- pixman_transform_scale (&xform, NULL, sx, sy);
- break;
-
- case 3:
- if (lcg_rand_n (16) == 0)
- {
- /* random */
- for (i = 0; i < 3; ++i)
- for (j = 0; j < 3; ++j)
- xform.matrix[i][j] = lcg_rand_u32();
- break;
- }
- else if (lcg_rand_n (16) == 0)
- {
- /* zero */
- memset (&xform, 0, sizeof xform);
- }
- break;
- }
- }
-
- pixman_image_set_transform (image, &xform);
- }
-}
-
-static pixman_color_t
-random_color (void)
-{
- pixman_color_t color =
- {
- lcg_rand() & 0xffff,
- lcg_rand() & 0xffff,
- lcg_rand() & 0xffff,
- lcg_rand() & 0xffff,
- };
-
- return color;
-}
-
-
-static pixman_image_t *
-create_random_solid_image (void)
-{
- pixman_color_t color = random_color();
- pixman_image_t *image = pixman_image_create_solid_fill (&color);
-
- return image;
-}
-
-static pixman_gradient_stop_t *
-create_random_stops (int *n_stops)
-{
- pixman_fixed_t step;
- pixman_fixed_t s;
- int i;
- pixman_gradient_stop_t *stops;
-
- *n_stops = lcg_rand_n (50) + 1;
-
- step = pixman_fixed_1 / *n_stops;
-
- stops = malloc (*n_stops * sizeof (pixman_gradient_stop_t));
-
- s = 0;
- for (i = 0; i < (*n_stops) - 1; ++i)
- {
- stops[i].x = s;
- stops[i].color = random_color();
-
- s += step;
- }
-
- stops[*n_stops - 1].x = pixman_fixed_1;
- stops[*n_stops - 1].color = random_color();
-
- return stops;
-}
-
-static pixman_point_fixed_t
-create_random_point (void)
-{
- pixman_point_fixed_t p;
-
- p.x = log_rand ();
- p.y = log_rand ();
-
- return p;
-}
-
-static pixman_image_t *
-create_random_linear_image (void)
-{
- int n_stops;
- pixman_gradient_stop_t *stops;
- pixman_point_fixed_t p1, p2;
- pixman_image_t *result;
-
- stops = create_random_stops (&n_stops);
- if (!stops)
- return NULL;
-
- p1 = create_random_point ();
- p2 = create_random_point ();
-
- result = pixman_image_create_linear_gradient (&p1, &p2, stops, n_stops);
-
- free (stops);
-
- return result;
-}
-
-static pixman_image_t *
-create_random_radial_image (void)
-{
- int n_stops;
- pixman_gradient_stop_t *stops;
- pixman_point_fixed_t inner_c, outer_c;
- pixman_fixed_t inner_r, outer_r;
- pixman_image_t *result;
-
- inner_c = create_random_point();
- outer_c = create_random_point();
- inner_r = lcg_rand();
- outer_r = lcg_rand();
-
- stops = create_random_stops (&n_stops);
-
- if (!stops)
- return NULL;
-
- result = pixman_image_create_radial_gradient (
- &inner_c, &outer_c, inner_r, outer_r, stops, n_stops);
-
- free (stops);
-
- return result;
-}
-
-static pixman_image_t *
-create_random_conical_image (void)
-{
- pixman_gradient_stop_t *stops;
- int n_stops;
- pixman_point_fixed_t c;
- pixman_fixed_t angle;
- pixman_image_t *result;
-
- c = create_random_point();
- angle = lcg_rand();
-
- stops = create_random_stops (&n_stops);
-
- if (!stops)
- return NULL;
-
- result = pixman_image_create_conical_gradient (&c, angle, stops, n_stops);
-
- free (stops);
-
- return result;
-}
-
-static pixman_image_t *
-create_random_image (void)
-{
- pixman_image_t *result;
-
- switch (lcg_rand_n (5))
- {
- default:
- case 0:
- result = create_random_bits_image ();
- break;
-
- case 1:
- result = create_random_solid_image ();
- break;
-
- case 2:
- result = create_random_linear_image ();
- break;
-
- case 3:
- result = create_random_radial_image ();
- break;
-
- case 4:
- result = create_random_conical_image ();
- break;
- }
-
- if (result)
- set_general_properties (result, TRUE);
-
- return result;
-}
-
-static const pixman_op_t op_list[] =
-{
- PIXMAN_OP_SRC,
- PIXMAN_OP_OVER,
- PIXMAN_OP_ADD,
- PIXMAN_OP_CLEAR,
- PIXMAN_OP_SRC,
- PIXMAN_OP_DST,
- PIXMAN_OP_OVER,
- PIXMAN_OP_OVER_REVERSE,
- PIXMAN_OP_IN,
- PIXMAN_OP_IN_REVERSE,
- PIXMAN_OP_OUT,
- PIXMAN_OP_OUT_REVERSE,
- PIXMAN_OP_ATOP,
- PIXMAN_OP_ATOP_REVERSE,
- PIXMAN_OP_XOR,
- PIXMAN_OP_ADD,
- PIXMAN_OP_SATURATE,
- PIXMAN_OP_DISJOINT_CLEAR,
- PIXMAN_OP_DISJOINT_SRC,
- PIXMAN_OP_DISJOINT_DST,
- PIXMAN_OP_DISJOINT_OVER,
- PIXMAN_OP_DISJOINT_OVER_REVERSE,
- PIXMAN_OP_DISJOINT_IN,
- PIXMAN_OP_DISJOINT_IN_REVERSE,
- PIXMAN_OP_DISJOINT_OUT,
- PIXMAN_OP_DISJOINT_OUT_REVERSE,
- PIXMAN_OP_DISJOINT_ATOP,
- PIXMAN_OP_DISJOINT_ATOP_REVERSE,
- PIXMAN_OP_DISJOINT_XOR,
- PIXMAN_OP_CONJOINT_CLEAR,
- PIXMAN_OP_CONJOINT_SRC,
- PIXMAN_OP_CONJOINT_DST,
- PIXMAN_OP_CONJOINT_OVER,
- PIXMAN_OP_CONJOINT_OVER_REVERSE,
- PIXMAN_OP_CONJOINT_IN,
- PIXMAN_OP_CONJOINT_IN_REVERSE,
- PIXMAN_OP_CONJOINT_OUT,
- PIXMAN_OP_CONJOINT_OUT_REVERSE,
- PIXMAN_OP_CONJOINT_ATOP,
- PIXMAN_OP_CONJOINT_ATOP_REVERSE,
- PIXMAN_OP_CONJOINT_XOR,
- PIXMAN_OP_MULTIPLY,
- PIXMAN_OP_SCREEN,
- PIXMAN_OP_OVERLAY,
- PIXMAN_OP_DARKEN,
- PIXMAN_OP_LIGHTEN,
- PIXMAN_OP_COLOR_DODGE,
- PIXMAN_OP_COLOR_BURN,
- PIXMAN_OP_HARD_LIGHT,
- PIXMAN_OP_DIFFERENCE,
- PIXMAN_OP_EXCLUSION,
- PIXMAN_OP_SOFT_LIGHT,
- PIXMAN_OP_HSL_HUE,
- PIXMAN_OP_HSL_SATURATION,
- PIXMAN_OP_HSL_COLOR,
- PIXMAN_OP_HSL_LUMINOSITY,
-};
-
-static void
-run_test (uint32_t seed)
-{
- pixman_image_t *source, *mask, *dest;
- pixman_op_t op;
-
- lcg_srand (seed);
-
- source = create_random_image ();
- mask = create_random_image ();
- dest = create_random_bits_image ();
-
- if (source && mask && dest)
- {
- set_general_properties (dest, TRUE);
-
- op = op_list [lcg_rand_n (ARRAY_LENGTH (op_list))];
-
- pixman_image_composite32 (op,
- source, mask, dest,
- log_rand(), log_rand(),
- log_rand(), log_rand(),
- log_rand(), log_rand(),
- absolute (log_rand()),
- absolute (log_rand()));
- }
- if (source)
- pixman_image_unref (source);
- if (mask)
- pixman_image_unref (mask);
- if (dest)
- pixman_image_unref (dest);
-}
-
-static pixman_bool_t
-get_int (char *s, uint32_t *i)
-{
- char *end;
- int p;
-
- p = strtol (s, &end, 0);
-
- if (end != s && *end == 0)
- {
- *i = p;
- return TRUE;
- }
-
- return FALSE;
-}
-
-int
-main (int argc, char **argv)
-{
- int verbose = FALSE;
- uint32_t seed = 1;
- uint32_t n_tests = 0xffffffff;
- uint32_t mod = 0;
- uint32_t i;
-
- pixman_disable_out_of_bounds_workaround ();
-
- enable_fp_exceptions();
-
- if (getenv ("VERBOSE") != NULL)
- verbose = TRUE;
-
- for (i = 1; i < argc; ++i)
- {
- if (strcmp (argv[i], "-v") == 0)
- {
- verbose = TRUE;
-
- if (i + 1 < argc)
- {
- get_int (argv[i + 1], &mod);
- i++;
- }
- }
- else if (strcmp (argv[i], "-s") == 0 && i + 1 < argc)
- {
- get_int (argv[i + 1], &seed);
- i++;
- }
- else if (strcmp (argv[i], "-n") == 0 && i + 1 < argc)
- {
- get_int (argv[i + 1], &n_tests);
- i++;
- }
- else
- {
- if (strcmp (argv[i], "-h") != 0)
- printf ("Unknown option '%s'\n\n", argv[i]);
-
- printf ("Options:\n\n"
- "-n <number> Number of tests to run\n"
- "-s <seed> Seed of first test\n"
- "-v Print out seeds\n"
- "-v <n> Print out every n'th seed\n\n");
-
- exit (-1);
- }
- }
-
- if (n_tests == 0xffffffff)
- n_tests = 8000;
-
- /* FIXME: seed 2005763 fails in set_lum() with divide by zero */
-#ifdef USE_OPENMP
-# pragma omp parallel for default(none) shared(verbose, n_tests, mod, seed)
-#endif
- for (i = seed; i < seed + n_tests; ++i)
- {
- if (verbose)
- {
- if (mod == 0 || (i % mod) == 0)
- printf ("Seed %d\n", i);
- }
-
- run_test (i);
- }
-
- return 0;
-}
+#include "utils.h"
+
+#if 0
+#define fence_malloc malloc
+#define fence_free free
+#define make_random_bytes malloc
+#endif
+
+static const pixman_format_code_t image_formats[] =
+{
+ PIXMAN_a8r8g8b8,
+ PIXMAN_x8r8g8b8,
+ PIXMAN_r5g6b5,
+ PIXMAN_r3g3b2,
+ PIXMAN_a8,
+ PIXMAN_a8b8g8r8,
+ PIXMAN_x8b8g8r8,
+ PIXMAN_b8g8r8a8,
+ PIXMAN_b8g8r8x8,
+ PIXMAN_x14r6g6b6,
+ PIXMAN_r8g8b8,
+ PIXMAN_b8g8r8,
+ PIXMAN_r5g6b5,
+ PIXMAN_b5g6r5,
+ PIXMAN_x2r10g10b10,
+ PIXMAN_a2r10g10b10,
+ PIXMAN_x2b10g10r10,
+ PIXMAN_a2b10g10r10,
+ PIXMAN_a1r5g5b5,
+ PIXMAN_x1r5g5b5,
+ PIXMAN_a1b5g5r5,
+ PIXMAN_x1b5g5r5,
+ PIXMAN_a4r4g4b4,
+ PIXMAN_x4r4g4b4,
+ PIXMAN_a4b4g4r4,
+ PIXMAN_x4b4g4r4,
+ PIXMAN_a8,
+ PIXMAN_r3g3b2,
+ PIXMAN_b2g3r3,
+ PIXMAN_a2r2g2b2,
+ PIXMAN_a2b2g2r2,
+ PIXMAN_c8,
+ PIXMAN_g8,
+ PIXMAN_x4c4,
+ PIXMAN_x4g4,
+ PIXMAN_c4,
+ PIXMAN_g4,
+ PIXMAN_g1,
+ PIXMAN_x4a4,
+ PIXMAN_a4,
+ PIXMAN_r1g2b1,
+ PIXMAN_b1g2r1,
+ PIXMAN_a1r1g1b1,
+ PIXMAN_a1b1g1r1,
+ PIXMAN_a1
+};
+
+static pixman_filter_t filters[] =
+{
+ PIXMAN_FILTER_NEAREST,
+ PIXMAN_FILTER_BILINEAR,
+ PIXMAN_FILTER_FAST,
+ PIXMAN_FILTER_GOOD,
+ PIXMAN_FILTER_BEST,
+ PIXMAN_FILTER_CONVOLUTION
+};
+
+static int
+get_size (void)
+{
+ switch (lcg_rand_n (28))
+ {
+ case 0:
+ return 1;
+
+ case 1:
+ return 2;
+
+ default:
+ case 2:
+ return lcg_rand_n (200);
+
+ case 4:
+ return lcg_rand_n (2000) + 1000;
+
+ case 5:
+ return 65535;
+
+ case 6:
+ return 65536;
+
+ case 7:
+ return lcg_rand_N (64000) + 63000;
+ }
+}
+
+static void
+destroy (pixman_image_t *image, void *data)
+{
+ if (image->type == BITS && image->bits.free_me != image->bits.bits)
+ {
+ uint32_t *bits;
+
+ if (image->bits.bits != (void *)0x01)
+ {
+ bits = image->bits.bits;
+
+ if (image->bits.rowstride < 0)
+ bits -= (- image->bits.rowstride * (image->bits.height - 1));
+
+ fence_free (bits);
+ }
+ }
+
+ free (data);
+}
+
+static uint32_t
+real_reader (const void *src, int size)
+{
+ switch (size)
+ {
+ case 1:
+ return *(uint8_t *)src;
+ case 2:
+ return *(uint16_t *)src;
+ case 4:
+ return *(uint32_t *)src;
+ default:
+ assert (0);
+ return 0; /* silence MSVC */
+ }
+}
+
+static void
+real_writer (void *src, uint32_t value, int size)
+{
+ switch (size)
+ {
+ case 1:
+ *(uint8_t *)src = value;
+ break;
+
+ case 2:
+ *(uint16_t *)src = value;
+ break;
+
+ case 4:
+ *(uint32_t *)src = value;
+ break;
+
+ default:
+ assert (0);
+ break;
+ }
+}
+
+static uint32_t
+fake_reader (const void *src, int size)
+{
+ uint32_t r = lcg_rand_u32 ();
+
+ assert (size == 1 || size == 2 || size == 4);
+ return r & ((1 << (size * 8)) - 1);
+}
+
+static void
+fake_writer (void *src, uint32_t value, int size)
+{
+ assert (size == 1 || size == 2 || size == 4);
+}
+
+static int32_t
+log_rand (void)
+{
+ uint32_t mask;
+
+ mask = (1 << lcg_rand_n (31)) - 1;
+
+ return (lcg_rand () & mask) - (mask >> 1);
+}
+
+static pixman_image_t *
+create_random_bits_image (void)
+{
+ pixman_format_code_t format;
+ pixman_indexed_t *indexed;
+ pixman_image_t *image;
+ int width, height, stride;
+ uint32_t *bits;
+ pixman_read_memory_func_t read_func = NULL;
+ pixman_write_memory_func_t write_func = NULL;
+ pixman_filter_t filter;
+ pixman_fixed_t *coefficients = NULL;
+ int n_coefficients = 0;
+
+ /* format */
+ format = image_formats[lcg_rand_n (ARRAY_LENGTH (image_formats))];
+
+ indexed = NULL;
+ if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
+ {
+ indexed = malloc (sizeof (pixman_indexed_t));
+
+ initialize_palette (indexed, PIXMAN_FORMAT_BPP (format), TRUE);
+ }
+ else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY)
+ {
+ indexed = malloc (sizeof (pixman_indexed_t));
+
+ initialize_palette (indexed, PIXMAN_FORMAT_BPP (format), FALSE);
+ }
+ else
+ {
+ indexed = NULL;
+ }
+
+ /* size */
+ width = get_size ();
+ height = get_size ();
+
+ if ((uint64_t)width * height > 200000)
+ {
+ if (lcg_rand_n(2) == 0)
+ height = 200000 / width;
+ else
+ width = 200000 / height;
+ }
+
+ if (height == 0)
+ height = 1;
+ if (width == 0)
+ width = 1;
+
+ /* bits */
+ switch (lcg_rand_n (7))
+ {
+ default:
+ case 0:
+ stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
+ stride = (stride + 3) & (~3);
+ bits = (uint32_t *)make_random_bytes (height * stride);
+ break;
+
+ case 1:
+ stride = 0;
+ bits = NULL;
+ break;
+
+ case 2: /* Zero-filled */
+ stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
+ stride = (stride + 3) & (~3);
+ bits = fence_malloc (height * stride);
+ if (!bits)
+ return NULL;
+ memset (bits, 0, height * stride);
+ break;
+
+ case 3: /* Filled with 0xFF */
+ stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
+ stride = (stride + 3) & (~3);
+ bits = fence_malloc (height * stride);
+ if (!bits)
+ return NULL;
+ memset (bits, 0xff, height * stride);
+ break;
+
+ case 4: /* bits is a bad pointer, has read/write functions */
+ stride = 232;
+ bits = (void *)0x01;
+ read_func = fake_reader;
+ write_func = fake_writer;
+ break;
+
+ case 5: /* bits is a real pointer, has read/write functions */
+ stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
+ stride = (stride + 3) & (~3);
+ bits = fence_malloc (height * stride);
+ if (!bits)
+ return NULL;
+ memset (bits, 0xff, height * stride);
+ read_func = real_reader;
+ write_func = real_writer;
+ break;
+
+ case 6: /* bits is a real pointer, stride is negative */
+ stride = (width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17));
+ stride = (stride + 3) & (~3);
+ bits = (uint32_t *)make_random_bytes (height * stride);
+ if (!bits)
+ return NULL;
+ bits += ((height - 1) * stride) / 4;
+ stride = - stride;
+ break;
+ }
+
+ /* Filter */
+ filter = filters[lcg_rand_n (ARRAY_LENGTH (filters))];
+ if (filter == PIXMAN_FILTER_CONVOLUTION)
+ {
+ int width = lcg_rand_n (17);
+ int height = lcg_rand_n (19);
+
+ n_coefficients = width * height + 2;
+ coefficients = malloc (n_coefficients * sizeof (pixman_fixed_t));
+
+ if (coefficients)
+ {
+ int i;
+
+ for (i = 0; i < width * height; ++i)
+ coefficients[i + 2] = lcg_rand_u32();
+
+ coefficients[0] = width << 16;
+ coefficients[1] = height << 16;
+ }
+ else
+ {
+ filter = PIXMAN_FILTER_BEST;
+ }
+ }
+
+ /* Finally create the image */
+ image = pixman_image_create_bits (format, width, height, bits, stride);
+ if (!image)
+ return NULL;
+
+ pixman_image_set_indexed (image, indexed);
+ pixman_image_set_destroy_function (image, destroy, indexed);
+ pixman_image_set_accessors (image, read_func, write_func);
+ pixman_image_set_filter (image, filter, coefficients, n_coefficients);
+
+ return image;
+}
+
+static pixman_repeat_t repeats[] =
+{
+ PIXMAN_REPEAT_NONE,
+ PIXMAN_REPEAT_NORMAL,
+ PIXMAN_REPEAT_REFLECT,
+ PIXMAN_REPEAT_PAD
+};
+
+static uint32_t
+absolute (int32_t i)
+{
+ return i < 0? -i : i;
+}
+
+static void
+set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map)
+{
+ pixman_repeat_t repeat;
+
+ /* Set properties that are generic to all images */
+
+ /* Repeat */
+ repeat = repeats[lcg_rand_n (ARRAY_LENGTH (repeats))];
+ pixman_image_set_repeat (image, repeat);
+
+ /* Alpha map */
+ if (allow_alpha_map && lcg_rand_n (3) == 0)
+ {
+ pixman_image_t *alpha_map;
+ int16_t x, y;
+
+ alpha_map = create_random_bits_image ();
+
+ if (alpha_map)
+ {
+ set_general_properties (alpha_map, FALSE);
+
+ x = lcg_rand_N (100000) - 65536;
+ y = lcg_rand_N (100000) - 65536;
+
+ pixman_image_set_alpha_map (image, alpha_map, x, y);
+
+ pixman_image_unref (alpha_map);
+ }
+ }
+
+ /* Component alpha */
+ pixman_image_set_component_alpha (image, lcg_rand_n (3) == 0);
+
+ /* Clip region */
+ if (lcg_rand_n (8) != 0)
+ {
+ pixman_region32_t region;
+ int i, n_rects;
+
+ pixman_region32_init (&region);
+
+ switch (lcg_rand_n (10))
+ {
+ case 0:
+ n_rects = 0;
+ break;
+
+ case 1: case 2: case 3:
+ n_rects = 1;
+ break;
+
+ case 4: case 5:
+ n_rects = 2;
+ break;
+
+ case 6: case 7:
+ n_rects = 3;
+
+ default:
+ n_rects = lcg_rand_n (100);
+ break;
+ }
+
+ for (i = 0; i < n_rects; ++i)
+ {
+ uint32_t width, height;
+ int x, y;
+
+ x = log_rand();
+ y = log_rand();
+ width = absolute (log_rand ()) + 1;
+ height = absolute (log_rand ()) + 1;
+
+ pixman_region32_union_rect (
+ &region, &region, x, y, width, height);
+ }
+
+ pixman_image_set_clip_region32 (image, &region);
+
+ pixman_region32_fini (&region);
+ }
+
+ /* Whether source clipping is enabled */
+ pixman_image_set_source_clipping (image, !!lcg_rand_n (2));
+
+ /* Client clip */
+ pixman_image_set_has_client_clip (image, !!lcg_rand_n (2));
+
+ /* Transform */
+ if (lcg_rand_n (5) < 2)
+ {
+ pixman_transform_t xform;
+ int i, j, k;
+ uint32_t tx, ty, sx, sy;
+ uint32_t c, s;
+
+ memset (&xform, 0, sizeof xform);
+ xform.matrix[0][0] = pixman_fixed_1;
+ xform.matrix[1][1] = pixman_fixed_1;
+ xform.matrix[2][2] = pixman_fixed_1;
+
+ for (k = 0; k < 3; ++k)
+ {
+ switch (lcg_rand_n (4))
+ {
+ case 0:
+ /* rotation */
+ c = lcg_rand_N (2 * 65536) - 65536;
+ s = lcg_rand_N (2 * 65536) - 65536;
+ pixman_transform_rotate (&xform, NULL, c, s);
+ break;
+
+ case 1:
+ /* translation */
+ tx = lcg_rand_u32();
+ ty = lcg_rand_u32();
+ pixman_transform_translate (&xform, NULL, tx, ty);
+ break;
+
+ case 2:
+ /* scale */
+ sx = lcg_rand_u32();
+ sy = lcg_rand_u32();
+ pixman_transform_scale (&xform, NULL, sx, sy);
+ break;
+
+ case 3:
+ if (lcg_rand_n (16) == 0)
+ {
+ /* random */
+ for (i = 0; i < 3; ++i)
+ for (j = 0; j < 3; ++j)
+ xform.matrix[i][j] = lcg_rand_u32();
+ break;
+ }
+ else if (lcg_rand_n (16) == 0)
+ {
+ /* zero */
+ memset (&xform, 0, sizeof xform);
+ }
+ break;
+ }
+ }
+
+ pixman_image_set_transform (image, &xform);
+ }
+}
+
+static pixman_color_t
+random_color (void)
+{
+ pixman_color_t color =
+ {
+ lcg_rand() & 0xffff,
+ lcg_rand() & 0xffff,
+ lcg_rand() & 0xffff,
+ lcg_rand() & 0xffff,
+ };
+
+ return color;
+}
+
+
+static pixman_image_t *
+create_random_solid_image (void)
+{
+ pixman_color_t color = random_color();
+ pixman_image_t *image = pixman_image_create_solid_fill (&color);
+
+ return image;
+}
+
+static pixman_gradient_stop_t *
+create_random_stops (int *n_stops)
+{
+ pixman_fixed_t step;
+ pixman_fixed_t s;
+ int i;
+ pixman_gradient_stop_t *stops;
+
+ *n_stops = lcg_rand_n (50) + 1;
+
+ step = pixman_fixed_1 / *n_stops;
+
+ stops = malloc (*n_stops * sizeof (pixman_gradient_stop_t));
+
+ s = 0;
+ for (i = 0; i < (*n_stops) - 1; ++i)
+ {
+ stops[i].x = s;
+ stops[i].color = random_color();
+
+ s += step;
+ }
+
+ stops[*n_stops - 1].x = pixman_fixed_1;
+ stops[*n_stops - 1].color = random_color();
+
+ return stops;
+}
+
+static pixman_point_fixed_t
+create_random_point (void)
+{
+ pixman_point_fixed_t p;
+
+ p.x = log_rand ();
+ p.y = log_rand ();
+
+ return p;
+}
+
+static pixman_image_t *
+create_random_linear_image (void)
+{
+ int n_stops;
+ pixman_gradient_stop_t *stops;
+ pixman_point_fixed_t p1, p2;
+ pixman_image_t *result;
+
+ stops = create_random_stops (&n_stops);
+ if (!stops)
+ return NULL;
+
+ p1 = create_random_point ();
+ p2 = create_random_point ();
+
+ result = pixman_image_create_linear_gradient (&p1, &p2, stops, n_stops);
+
+ free (stops);
+
+ return result;
+}
+
+static pixman_image_t *
+create_random_radial_image (void)
+{
+ int n_stops;
+ pixman_gradient_stop_t *stops;
+ pixman_point_fixed_t inner_c, outer_c;
+ pixman_fixed_t inner_r, outer_r;
+ pixman_image_t *result;
+
+ inner_c = create_random_point();
+ outer_c = create_random_point();
+ inner_r = lcg_rand();
+ outer_r = lcg_rand();
+
+ stops = create_random_stops (&n_stops);
+
+ if (!stops)
+ return NULL;
+
+ result = pixman_image_create_radial_gradient (
+ &inner_c, &outer_c, inner_r, outer_r, stops, n_stops);
+
+ free (stops);
+
+ return result;
+}
+
+static pixman_image_t *
+create_random_conical_image (void)
+{
+ pixman_gradient_stop_t *stops;
+ int n_stops;
+ pixman_point_fixed_t c;
+ pixman_fixed_t angle;
+ pixman_image_t *result;
+
+ c = create_random_point();
+ angle = lcg_rand();
+
+ stops = create_random_stops (&n_stops);
+
+ if (!stops)
+ return NULL;
+
+ result = pixman_image_create_conical_gradient (&c, angle, stops, n_stops);
+
+ free (stops);
+
+ return result;
+}
+
+static pixman_image_t *
+create_random_image (void)
+{
+ pixman_image_t *result;
+
+ switch (lcg_rand_n (5))
+ {
+ default:
+ case 0:
+ result = create_random_bits_image ();
+ break;
+
+ case 1:
+ result = create_random_solid_image ();
+ break;
+
+ case 2:
+ result = create_random_linear_image ();
+ break;
+
+ case 3:
+ result = create_random_radial_image ();
+ break;
+
+ case 4:
+ result = create_random_conical_image ();
+ break;
+ }
+
+ if (result)
+ set_general_properties (result, TRUE);
+
+ return result;
+}
+
+static const pixman_op_t op_list[] =
+{
+ PIXMAN_OP_SRC,
+ PIXMAN_OP_OVER,
+ PIXMAN_OP_ADD,
+ PIXMAN_OP_CLEAR,
+ PIXMAN_OP_SRC,
+ PIXMAN_OP_DST,
+ PIXMAN_OP_OVER,
+ PIXMAN_OP_OVER_REVERSE,
+ PIXMAN_OP_IN,
+ PIXMAN_OP_IN_REVERSE,
+ PIXMAN_OP_OUT,
+ PIXMAN_OP_OUT_REVERSE,
+ PIXMAN_OP_ATOP,
+ PIXMAN_OP_ATOP_REVERSE,
+ PIXMAN_OP_XOR,
+ PIXMAN_OP_ADD,
+ PIXMAN_OP_SATURATE,
+ PIXMAN_OP_DISJOINT_CLEAR,
+ PIXMAN_OP_DISJOINT_SRC,
+ PIXMAN_OP_DISJOINT_DST,
+ PIXMAN_OP_DISJOINT_OVER,
+ PIXMAN_OP_DISJOINT_OVER_REVERSE,
+ PIXMAN_OP_DISJOINT_IN,
+ PIXMAN_OP_DISJOINT_IN_REVERSE,
+ PIXMAN_OP_DISJOINT_OUT,
+ PIXMAN_OP_DISJOINT_OUT_REVERSE,
+ PIXMAN_OP_DISJOINT_ATOP,
+ PIXMAN_OP_DISJOINT_ATOP_REVERSE,
+ PIXMAN_OP_DISJOINT_XOR,
+ PIXMAN_OP_CONJOINT_CLEAR,
+ PIXMAN_OP_CONJOINT_SRC,
+ PIXMAN_OP_CONJOINT_DST,
+ PIXMAN_OP_CONJOINT_OVER,
+ PIXMAN_OP_CONJOINT_OVER_REVERSE,
+ PIXMAN_OP_CONJOINT_IN,
+ PIXMAN_OP_CONJOINT_IN_REVERSE,
+ PIXMAN_OP_CONJOINT_OUT,
+ PIXMAN_OP_CONJOINT_OUT_REVERSE,
+ PIXMAN_OP_CONJOINT_ATOP,
+ PIXMAN_OP_CONJOINT_ATOP_REVERSE,
+ PIXMAN_OP_CONJOINT_XOR,
+ PIXMAN_OP_MULTIPLY,
+ PIXMAN_OP_SCREEN,
+ PIXMAN_OP_OVERLAY,
+ PIXMAN_OP_DARKEN,
+ PIXMAN_OP_LIGHTEN,
+ PIXMAN_OP_COLOR_DODGE,
+ PIXMAN_OP_COLOR_BURN,
+ PIXMAN_OP_HARD_LIGHT,
+ PIXMAN_OP_DIFFERENCE,
+ PIXMAN_OP_EXCLUSION,
+ PIXMAN_OP_SOFT_LIGHT,
+ PIXMAN_OP_HSL_HUE,
+ PIXMAN_OP_HSL_SATURATION,
+ PIXMAN_OP_HSL_COLOR,
+ PIXMAN_OP_HSL_LUMINOSITY,
+};
+
+static void
+run_test (uint32_t seed)
+{
+ pixman_image_t *source, *mask, *dest;
+ pixman_op_t op;
+
+ lcg_srand (seed);
+
+ source = create_random_image ();
+ mask = create_random_image ();
+ dest = create_random_bits_image ();
+
+ if (source && mask && dest)
+ {
+ set_general_properties (dest, TRUE);
+
+ op = op_list [lcg_rand_n (ARRAY_LENGTH (op_list))];
+
+ pixman_image_composite32 (op,
+ source, mask, dest,
+ log_rand(), log_rand(),
+ log_rand(), log_rand(),
+ log_rand(), log_rand(),
+ absolute (log_rand()),
+ absolute (log_rand()));
+ }
+ if (source)
+ pixman_image_unref (source);
+ if (mask)
+ pixman_image_unref (mask);
+ if (dest)
+ pixman_image_unref (dest);
+}
+
+static pixman_bool_t
+get_int (char *s, uint32_t *i)
+{
+ char *end;
+ int p;
+
+ p = strtol (s, &end, 0);
+
+ if (end != s && *end == 0)
+ {
+ *i = p;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+int
+main (int argc, char **argv)
+{
+ int verbose = FALSE;
+ uint32_t seed = 1;
+ uint32_t n_tests = 0xffffffff;
+ uint32_t mod = 0;
+ uint32_t i;
+
+ pixman_disable_out_of_bounds_workaround ();
+
+ enable_fp_exceptions();
+
+ if (getenv ("VERBOSE") != NULL)
+ verbose = TRUE;
+
+ for (i = 1; i < argc; ++i)
+ {
+ if (strcmp (argv[i], "-v") == 0)
+ {
+ verbose = TRUE;
+
+ if (i + 1 < argc)
+ {
+ get_int (argv[i + 1], &mod);
+ i++;
+ }
+ }
+ else if (strcmp (argv[i], "-s") == 0 && i + 1 < argc)
+ {
+ get_int (argv[i + 1], &seed);
+ i++;
+ }
+ else if (strcmp (argv[i], "-n") == 0 && i + 1 < argc)
+ {
+ get_int (argv[i + 1], &n_tests);
+ i++;
+ }
+ else
+ {
+ if (strcmp (argv[i], "-h") != 0)
+ printf ("Unknown option '%s'\n\n", argv[i]);
+
+ printf ("Options:\n\n"
+ "-n <number> Number of tests to run\n"
+ "-s <seed> Seed of first test\n"
+ "-v Print out seeds\n"
+ "-v <n> Print out every n'th seed\n\n");
+
+ exit (-1);
+ }
+ }
+
+ if (n_tests == 0xffffffff)
+ n_tests = 8000;
+
+ /* FIXME: seed 2005763 fails in set_lum() with divide by zero */
+#ifdef USE_OPENMP
+# pragma omp parallel for default(none) shared(verbose, n_tests, mod, seed)
+#endif
+ for (i = seed; i < seed + n_tests; ++i)
+ {
+ if (verbose)
+ {
+ if (mod == 0 || (i % mod) == 0)
+ printf ("Seed %d\n", i);
+ }
+
+ run_test (i);
+ }
+
+ return 0;
+}
diff --git a/pixman/test/trap-crasher.c b/pixman/test/trap-crasher.c
index 7485e62fd..96f3b0bab 100644
--- a/pixman/test/trap-crasher.c
+++ b/pixman/test/trap-crasher.c
@@ -1,27 +1,27 @@
-#include <stdlib.h>
-#include <pixman.h>
-
-int
-main()
-{
- pixman_image_t *dst;
- pixman_trapezoid_t traps[1] = {
- {
- 2147483646,
- 2147483647,
- {
- { 0, 0 },
- { 0, 2147483647 }
- },
- {
- { 65536, 0 },
- { 0, 2147483647 }
- }
- },
- };
-
- dst = pixman_image_create_bits (PIXMAN_a8, 1, 1, NULL, -1);
-
- pixman_add_trapezoids (dst, 0, 0, sizeof (traps)/sizeof (traps[0]), traps);
- return (0);
-}
+#include <stdlib.h>
+#include <pixman.h>
+
+int
+main()
+{
+ pixman_image_t *dst;
+ pixman_trapezoid_t traps[1] = {
+ {
+ 2147483646,
+ 2147483647,
+ {
+ { 0, 0 },
+ { 0, 2147483647 }
+ },
+ {
+ { 65536, 0 },
+ { 0, 2147483647 }
+ }
+ },
+ };
+
+ dst = pixman_image_create_bits (PIXMAN_a8, 1, 1, NULL, -1);
+
+ pixman_add_trapezoids (dst, 0, 0, sizeof (traps)/sizeof (traps[0]), traps);
+ return (0);
+}