From 01df5d59e56a1b060568f8cad2e89f7eea22fc70 Mon Sep 17 00:00:00 2001 From: marha Date: Mon, 29 Aug 2011 08:51:20 +0200 Subject: xwininfo libX11 libXmu libxcb mesa xserver xkeyboard-config git update 29 aug 2011 --- pixman/pixman/Makefile.am | 258 +-- pixman/pixman/pixman-arm-common.h | 2 +- pixman/pixman/pixman-arm-simd.c | 864 +++++----- pixman/pixman/pixman-bits-image.c | 3044 +++++++++++++++++------------------- pixman/pixman/pixman-fast-path.c | 2 +- pixman/pixman/pixman-fast-path.h | 1189 -------------- pixman/pixman/pixman-inlines.h | 1280 +++++++++++++++ pixman/pixman/pixman-noop.c | 2 +- pixman/pixman/pixman-private.h | 7 +- pixman/pixman/pixman-region.c | 64 +- pixman/pixman/pixman-sse2.c | 2 +- pixman/pixman/pixman-utils.c | 12 +- pixman/test/Makefile.am | 2 + pixman/test/Makefile.win32 | 5 +- pixman/test/lowlevel-blt-bench.c | 7 +- pixman/test/region-contains-test.c | 169 ++ pixman/test/scaling-helpers-test.c | 2 +- pixman/test/utils.c | 11 +- pixman/test/utils.h | 16 +- 19 files changed, 3572 insertions(+), 3366 deletions(-) delete mode 100644 pixman/pixman/pixman-fast-path.h create mode 100644 pixman/pixman/pixman-inlines.h create mode 100644 pixman/test/region-contains-test.c (limited to 'pixman') diff --git a/pixman/pixman/Makefile.am b/pixman/pixman/Makefile.am index acc273145..44e6b17e4 100644 --- a/pixman/pixman/Makefile.am +++ b/pixman/pixman/Makefile.am @@ -1,129 +1,129 @@ -lib_LTLIBRARIES = libpixman-1.la -libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined @PTHREAD_LDFLAGS@ -libpixman_1_la_LIBADD = @PTHREAD_LIBS@ @DEP_LIBS@ -lm -libpixman_1_la_SOURCES = \ - pixman.h \ - pixman-accessor.h \ - pixman-access.c \ - pixman-access-accessors.c \ - pixman-cpu.c \ - pixman-gradient-walker.c \ - pixman-region16.c \ - pixman-region32.c \ - pixman-compiler.h \ - pixman-private.h \ - pixman-image.c \ - pixman-implementation.c \ - pixman-combine32.c \ - pixman-combine32.h \ - pixman-combine64.c \ - pixman-combine64.h \ - pixman-general.c \ - pixman.c \ - pixman-noop.c \ - pixman-fast-path.c \ - pixman-fast-path.h \ - pixman-solid-fill.c \ - pixman-conical-gradient.c \ - pixman-linear-gradient.c \ - pixman-radial-gradient.c \ - pixman-bits-image.c \ - pixman-utils.c \ - pixman-edge.c \ - pixman-edge-accessors.c \ - pixman-edge-imp.h \ - pixman-trap.c \ - pixman-timer.c \ - pixman-matrix.c - -libpixmanincludedir = $(includedir)/pixman-1 -libpixmaninclude_HEADERS = pixman.h pixman-version.h -noinst_LTLIBRARIES = - -BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c - -pixman-combine32.c : pixman-combine.c.template pixman-combine32.h make-combine.pl - $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1) -pixman-combine32.h : pixman-combine.h.template make-combine.pl - $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) - -pixman-combine64.c : pixman-combine.c.template pixman-combine64.h make-combine.pl - $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1) -pixman-combine64.h : pixman-combine.h.template make-combine.pl - $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) - -EXTRA_DIST = Makefile.win32 pixman-combine.c.template make-combine.pl pixman-region.c \ - pixman-combine.h.template solaris-hwcap.mapfile -CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h - -# mmx code -if USE_MMX -noinst_LTLIBRARIES += libpixman-mmx.la -libpixman_mmx_la_SOURCES = \ - pixman-mmx.c -libpixman_mmx_la_CFLAGS = $(DEP_CFLAGS) $(MMX_CFLAGS) -libpixman_mmx_la_LIBADD = $(DEP_LIBS) -libpixman_1_la_LDFLAGS += $(MMX_LDFLAGS) -libpixman_1_la_LIBADD += libpixman-mmx.la - -ASM_CFLAGS_mmx=$(MMX_CFLAGS) -endif - -# vmx code -if USE_VMX -noinst_LTLIBRARIES += libpixman-vmx.la -libpixman_vmx_la_SOURCES = \ - pixman-vmx.c \ - pixman-combine32.h -libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS) -libpixman_vmx_la_LIBADD = $(DEP_LIBS) -libpixman_1_la_LIBADD += libpixman-vmx.la - -ASM_CFLAGS_vmx=$(VMX_CFLAGS) -endif - -# sse2 code -if USE_SSE2 -noinst_LTLIBRARIES += libpixman-sse2.la -libpixman_sse2_la_SOURCES = \ - pixman-sse2.c -libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS) -libpixman_sse2_la_LIBADD = $(DEP_LIBS) -libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS) -libpixman_1_la_LIBADD += libpixman-sse2.la - -ASM_CFLAGS_sse2=$(SSE2_CFLAGS) -endif - -# arm simd code -if USE_ARM_SIMD -noinst_LTLIBRARIES += libpixman-arm-simd.la -libpixman_arm_simd_la_SOURCES = \ - pixman-arm-simd.c \ - pixman-arm-common.h \ - pixman-arm-simd-asm.S -libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) -libpixman_arm_simd_la_LIBADD = $(DEP_LIBS) -libpixman_1_la_LIBADD += libpixman-arm-simd.la - -ASM_CFLAGS_arm_simd= -endif - -# arm neon code -if USE_ARM_NEON -noinst_LTLIBRARIES += libpixman-arm-neon.la -libpixman_arm_neon_la_SOURCES = \ - pixman-arm-neon.c \ - pixman-arm-common.h \ - pixman-arm-neon-asm.S \ - pixman-arm-neon-asm-bilinear.S \ - pixman-arm-neon-asm.h -libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) -libpixman_arm_neon_la_LIBADD = $(DEP_LIBS) -libpixman_1_la_LIBADD += libpixman-arm-neon.la - -ASM_CFLAGS_arm_neon= -endif - -.c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES) - $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $< +lib_LTLIBRARIES = libpixman-1.la +libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined @PTHREAD_LDFLAGS@ +libpixman_1_la_LIBADD = @PTHREAD_LIBS@ @DEP_LIBS@ -lm +libpixman_1_la_SOURCES = \ + pixman.h \ + pixman-accessor.h \ + pixman-access.c \ + pixman-access-accessors.c \ + pixman-cpu.c \ + pixman-gradient-walker.c \ + pixman-region16.c \ + pixman-region32.c \ + pixman-compiler.h \ + pixman-private.h \ + pixman-image.c \ + pixman-implementation.c \ + pixman-combine32.c \ + pixman-combine32.h \ + pixman-combine64.c \ + pixman-combine64.h \ + pixman-general.c \ + pixman.c \ + pixman-noop.c \ + pixman-fast-path.c \ + pixman-solid-fill.c \ + pixman-conical-gradient.c \ + pixman-linear-gradient.c \ + pixman-radial-gradient.c \ + pixman-bits-image.c \ + pixman-utils.c \ + pixman-edge.c \ + pixman-edge-accessors.c \ + pixman-edge-imp.h \ + pixman-inlines.h \ + pixman-trap.c \ + pixman-timer.c \ + pixman-matrix.c + +libpixmanincludedir = $(includedir)/pixman-1 +libpixmaninclude_HEADERS = pixman.h pixman-version.h +noinst_LTLIBRARIES = + +BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c + +pixman-combine32.c : pixman-combine.c.template pixman-combine32.h make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1) +pixman-combine32.h : pixman-combine.h.template make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) + +pixman-combine64.c : pixman-combine.c.template pixman-combine64.h make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1) +pixman-combine64.h : pixman-combine.h.template make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) + +EXTRA_DIST = Makefile.win32 pixman-combine.c.template make-combine.pl pixman-region.c \ + pixman-combine.h.template solaris-hwcap.mapfile +CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h + +# mmx code +if USE_MMX +noinst_LTLIBRARIES += libpixman-mmx.la +libpixman_mmx_la_SOURCES = \ + pixman-mmx.c +libpixman_mmx_la_CFLAGS = $(DEP_CFLAGS) $(MMX_CFLAGS) +libpixman_mmx_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LDFLAGS += $(MMX_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-mmx.la + +ASM_CFLAGS_mmx=$(MMX_CFLAGS) +endif + +# vmx code +if USE_VMX +noinst_LTLIBRARIES += libpixman-vmx.la +libpixman_vmx_la_SOURCES = \ + pixman-vmx.c \ + pixman-combine32.h +libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS) +libpixman_vmx_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LIBADD += libpixman-vmx.la + +ASM_CFLAGS_vmx=$(VMX_CFLAGS) +endif + +# sse2 code +if USE_SSE2 +noinst_LTLIBRARIES += libpixman-sse2.la +libpixman_sse2_la_SOURCES = \ + pixman-sse2.c +libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS) +libpixman_sse2_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-sse2.la + +ASM_CFLAGS_sse2=$(SSE2_CFLAGS) +endif + +# arm simd code +if USE_ARM_SIMD +noinst_LTLIBRARIES += libpixman-arm-simd.la +libpixman_arm_simd_la_SOURCES = \ + pixman-arm-simd.c \ + pixman-arm-common.h \ + pixman-arm-simd-asm.S +libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) +libpixman_arm_simd_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LIBADD += libpixman-arm-simd.la + +ASM_CFLAGS_arm_simd= +endif + +# arm neon code +if USE_ARM_NEON +noinst_LTLIBRARIES += libpixman-arm-neon.la +libpixman_arm_neon_la_SOURCES = \ + pixman-arm-neon.c \ + pixman-arm-common.h \ + pixman-arm-neon-asm.S \ + pixman-arm-neon-asm-bilinear.S \ + pixman-arm-neon-asm.h +libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) +libpixman_arm_neon_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LIBADD += libpixman-arm-neon.la + +ASM_CFLAGS_arm_neon= +endif + +.c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES) + $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $< diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h index f1d212c84..f56264e8c 100644 --- a/pixman/pixman/pixman-arm-common.h +++ b/pixman/pixman/pixman-arm-common.h @@ -26,7 +26,7 @@ #ifndef PIXMAN_ARM_COMMON_H #define PIXMAN_ARM_COMMON_H -#include "pixman-fast-path.h" +#include "pixman-inlines.h" /* Define some macros which can expand into proxy functions between * ARM assembly optimized functions and the rest of pixman fast path API. diff --git a/pixman/pixman/pixman-arm-simd.c b/pixman/pixman/pixman-arm-simd.c index 45981a6b0..3d19bfac1 100644 --- a/pixman/pixman/pixman-arm-simd.c +++ b/pixman/pixman/pixman-arm-simd.c @@ -1,432 +1,432 @@ -/* - * Copyright © 2008 Mozilla Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Jeff Muizelaar (jeff@infidigm.net) - * - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-private.h" -#include "pixman-arm-common.h" -#include "pixman-fast-path.h" - -#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */ - -void -pixman_composite_add_8_8_asm_armv6 (int32_t width, - int32_t height, - uint8_t *dst_line, - int32_t dst_stride, - uint8_t *src_line, - int32_t src_stride) -{ - uint8_t *dst, *src; - int32_t w; - uint8_t s, d; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - /* ensure both src and dst are properly aligned before doing 32 bit reads - * we'll stay in this loop if src and dst have differing alignments - */ - while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3))) - { - s = *src; - d = *dst; - asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); - *dst = d; - - dst++; - src++; - w--; - } - - while (w >= 4) - { - asm ("uqadd8 %0, %1, %2" - : "=r" (*(uint32_t*)dst) - : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst)); - dst += 4; - src += 4; - w -= 4; - } - - while (w) - { - s = *src; - d = *dst; - asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); - *dst = d; - - dst++; - src++; - w--; - } - } - -} - -void -pixman_composite_over_8888_8888_asm_armv6 (int32_t width, - int32_t height, - uint32_t *dst_line, - int32_t dst_stride, - uint32_t *src_line, - int32_t src_stride) -{ - uint32_t *dst; - uint32_t *src; - int32_t w; - uint32_t component_half = 0x800080; - uint32_t upper_component_mask = 0xff00ff00; - uint32_t alpha_mask = 0xff; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - -/* #define inner_branch */ - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that - */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" - - "ldr r4, [%[dest]] \n\t" - -#else - "ldr r4, [%[dest]] \n\t" - - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" -#endif - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* multiply by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - /* recombine the 0xff00ff00 bytes of r6 and r7 */ - "and r7, r7, %[upper_component_mask]\n\t" - "uxtab16 r6, r7, r6, ror #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory" - ); - } -} - -void -pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width, - int32_t height, - uint32_t *dst_line, - int32_t dst_stride, - uint32_t *src_line, - int32_t src_stride, - uint32_t mask) -{ - uint32_t *dst; - uint32_t *src; - int32_t w; - uint32_t component_half = 0x800080; - uint32_t alpha_mask = 0xff; - - mask = (mask) >> 24; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - -/* #define inner_branch */ - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that - */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - -#endif - "ldr r4, [%[dest]] \n\t" - - "uxtb16 r6, r5\n\t" - "uxtb16 r7, r5, ror #8\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, %[mask_alpha], %[component_half]\n\t" - "mla r7, r7, %[mask_alpha], %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" - - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [mask_alpha] "r" (mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" - ); - } -} - -void -pixman_composite_over_n_8_8888_asm_armv6 (int32_t width, - int32_t height, - uint32_t *dst_line, - int32_t dst_stride, - uint32_t src, - int32_t unused, - uint8_t *mask_line, - int32_t mask_stride) -{ - uint32_t srca; - uint32_t *dst; - uint8_t *mask; - int32_t w; - - srca = src >> 24; - - uint32_t component_mask = 0xff00ff; - uint32_t component_half = 0x800080; - - uint32_t src_hi = (src >> 8) & component_mask; - uint32_t src_lo = src & component_mask; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - -/* #define inner_branch */ - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load mask */ - "ldrb r5, [%[mask]], #1\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that - */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - -#endif - "ldr r4, [%[dest]] \n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, %[src_lo], r5, %[component_half]\n\t" - "mla r7, %[src_hi], r5, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" - - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* we could simplify this to use 'sub' if we were - * willing to give up a register for alpha_mask - */ - "mvn r8, r5\n\t" - "mov r8, r8, lsr #24\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) - : [component_half] "r" (component_half), - [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory"); - } -} - -#endif - -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, - uint8_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, - uint16_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC, - uint32_t, uint32_t) - -static const pixman_fast_path_t arm_simd_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888), - - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8), - - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), - - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888), - PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), - - { PIXMAN_OP_NONE }, -}; - -pixman_implementation_t * -_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths); - - return imp; -} +/* + * Copyright © 2008 Mozilla Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-private.h" +#include "pixman-arm-common.h" +#include "pixman-inlines.h" + +#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */ + +void +pixman_composite_add_8_8_asm_armv6 (int32_t width, + int32_t height, + uint8_t *dst_line, + int32_t dst_stride, + uint8_t *src_line, + int32_t src_stride) +{ + uint8_t *dst, *src; + int32_t w; + uint8_t s, d; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + /* ensure both src and dst are properly aligned before doing 32 bit reads + * we'll stay in this loop if src and dst have differing alignments + */ + while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3))) + { + s = *src; + d = *dst; + asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); + *dst = d; + + dst++; + src++; + w--; + } + + while (w >= 4) + { + asm ("uqadd8 %0, %1, %2" + : "=r" (*(uint32_t*)dst) + : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst)); + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + s = *src; + d = *dst; + asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); + *dst = d; + + dst++; + src++; + w--; + } + } + +} + +void +pixman_composite_over_8888_8888_asm_armv6 (int32_t width, + int32_t height, + uint32_t *dst_line, + int32_t dst_stride, + uint32_t *src_line, + int32_t src_stride) +{ + uint32_t *dst; + uint32_t *src; + int32_t w; + uint32_t component_half = 0x800080; + uint32_t upper_component_mask = 0xff00ff00; + uint32_t alpha_mask = 0xff; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + +/* #define inner_branch */ + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + "ldr r4, [%[dest]] \n\t" + +#else + "ldr r4, [%[dest]] \n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" +#endif + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* multiply by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + /* recombine the 0xff00ff00 bytes of r6 and r7 */ + "and r7, r7, %[upper_component_mask]\n\t" + "uxtab16 r6, r7, r6, ror #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory" + ); + } +} + +void +pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width, + int32_t height, + uint32_t *dst_line, + int32_t dst_stride, + uint32_t *src_line, + int32_t src_stride, + uint32_t mask) +{ + uint32_t *dst; + uint32_t *src; + int32_t w; + uint32_t component_half = 0x800080; + uint32_t alpha_mask = 0xff; + + mask = (mask) >> 24; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + +/* #define inner_branch */ + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + "uxtb16 r6, r5\n\t" + "uxtb16 r7, r5, ror #8\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, %[mask_alpha], %[component_half]\n\t" + "mla r7, r7, %[mask_alpha], %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [mask_alpha] "r" (mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" + ); + } +} + +void +pixman_composite_over_n_8_8888_asm_armv6 (int32_t width, + int32_t height, + uint32_t *dst_line, + int32_t dst_stride, + uint32_t src, + int32_t unused, + uint8_t *mask_line, + int32_t mask_stride) +{ + uint32_t srca; + uint32_t *dst; + uint8_t *mask; + int32_t w; + + srca = src >> 24; + + uint32_t component_mask = 0xff00ff; + uint32_t component_half = 0x800080; + + uint32_t src_hi = (src >> 8) & component_mask; + uint32_t src_lo = src & component_mask; + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + +/* #define inner_branch */ + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load mask */ + "ldrb r5, [%[mask]], #1\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, %[src_lo], r5, %[component_half]\n\t" + "mla r7, %[src_hi], r5, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* we could simplify this to use 'sub' if we were + * willing to give up a register for alpha_mask + */ + "mvn r8, r5\n\t" + "mov r8, r8, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) + : [component_half] "r" (component_half), + [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory"); + } +} + +#endif + +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, + uint8_t, 1, uint32_t, 1) + +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC, + uint32_t, uint32_t) + +static const pixman_fast_path_t arm_simd_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888), + + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8), + + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), + + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths); + + return imp; +} diff --git a/pixman/pixman/pixman-bits-image.c b/pixman/pixman/pixman-bits-image.c index 40ccaa8bd..f540c76e1 100644 --- a/pixman/pixman/pixman-bits-image.c +++ b/pixman/pixman/pixman-bits-image.c @@ -1,1581 +1,1463 @@ -/* - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * 2008 Aaron Plattner, NVIDIA Corporation - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007, 2009 Red Hat, Inc. - * Copyright © 2008 André Tupinambá - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include -#include -#include "pixman-private.h" -#include "pixman-combine32.h" - -/* - * By default, just evaluate the image at 32bpp and expand. Individual image - * types can plug in a better scanline getter if they want to. For example - * we could produce smoother gradients by evaluating them at higher color - * depth, but that's a project for the future. - */ -static void -_pixman_image_get_scanline_generic_64 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t * mask) -{ - uint32_t *mask8 = NULL; - - /* Contract the mask image, if one exists, so that the 32-bit fetch - * function can use it. - */ - if (mask) - { - mask8 = pixman_malloc_ab (width, sizeof(uint32_t)); - if (!mask8) - return; - - pixman_contract (mask8, (uint64_t *)mask, width); - } - - /* Fetch the source image into the first half of buffer. */ - image->bits.get_scanline_32 (image, x, y, width, (uint32_t*)buffer, mask8); - - /* Expand from 32bpp to 64bpp in place. */ - pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width); - - free (mask8); -} - -/* Fetch functions */ - -static force_inline uint32_t -fetch_pixel_no_alpha (bits_image_t *image, - int x, int y, pixman_bool_t check_bounds) -{ - if (check_bounds && - (x < 0 || x >= image->width || y < 0 || y >= image->height)) - { - return 0; - } - - return image->fetch_pixel_32 (image, x, y); -} - -typedef uint32_t (* get_pixel_t) (bits_image_t *image, - int x, int y, pixman_bool_t check_bounds); - -static force_inline void -repeat (pixman_repeat_t repeat, int size, int *coord) -{ - switch (repeat) - { - case PIXMAN_REPEAT_NORMAL: - *coord = MOD (*coord, size); - break; - - case PIXMAN_REPEAT_PAD: - *coord = CLIP (*coord, 0, size - 1); - break; - - case PIXMAN_REPEAT_REFLECT: - *coord = MOD (*coord, size * 2); - - if (*coord >= size) - *coord = size * 2 - *coord - 1; - break; - - case PIXMAN_REPEAT_NONE: - break; - - default: - break; - } -} - -static force_inline uint32_t -bits_image_fetch_pixel_nearest (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel) -{ - int x0 = pixman_fixed_to_int (x - pixman_fixed_e); - int y0 = pixman_fixed_to_int (y - pixman_fixed_e); - - if (image->common.repeat != PIXMAN_REPEAT_NONE) - { - repeat (image->common.repeat, image->width, &x0); - repeat (image->common.repeat, image->height, &y0); - - return get_pixel (image, x0, y0, FALSE); - } - else - { - return get_pixel (image, x0, y0, TRUE); - } -} - -#if SIZEOF_LONG > 4 - -static force_inline uint32_t -bilinear_interpolation (uint32_t tl, uint32_t tr, - uint32_t bl, uint32_t br, - int distx, int disty) -{ - uint64_t distxy, distxiy, distixy, distixiy; - uint64_t tl64, tr64, bl64, br64; - uint64_t f, r; - - distxy = distx * disty; - distxiy = distx * (256 - disty); - distixy = (256 - distx) * disty; - distixiy = (256 - distx) * (256 - disty); - - /* Alpha and Blue */ - tl64 = tl & 0xff0000ff; - tr64 = tr & 0xff0000ff; - bl64 = bl & 0xff0000ff; - br64 = br & 0xff0000ff; - - f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; - r = f & 0x0000ff0000ff0000ull; - - /* Red and Green */ - tl64 = tl; - tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); - - tr64 = tr; - tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); - - bl64 = bl; - bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); - - br64 = br; - br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); - - f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; - r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); - - return (uint32_t)(r >> 16); -} - -#else - -static force_inline uint32_t -bilinear_interpolation (uint32_t tl, uint32_t tr, - uint32_t bl, uint32_t br, - int distx, int disty) -{ - int distxy, distxiy, distixy, distixiy; - uint32_t f, r; - - distxy = distx * disty; - distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ - distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ - distixiy = - 256 * 256 - (disty << 8) - - (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ - - /* Blue */ - r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy - + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; - - /* Green */ - f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy - + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; - r |= f & 0xff000000; - - tl >>= 16; - tr >>= 16; - bl >>= 16; - br >>= 16; - r >>= 16; - - /* Red */ - f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy - + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; - r |= f & 0x00ff0000; - - /* Alpha */ - f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy - + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; - r |= f & 0xff000000; - - return r; -} - -#endif - -static force_inline uint32_t -bits_image_fetch_pixel_bilinear (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel) -{ - pixman_repeat_t repeat_mode = image->common.repeat; - int width = image->width; - int height = image->height; - int x1, y1, x2, y2; - uint32_t tl, tr, bl, br; - int32_t distx, disty; - - x1 = x - pixman_fixed_1 / 2; - y1 = y - pixman_fixed_1 / 2; - - distx = (x1 >> 8) & 0xff; - disty = (y1 >> 8) & 0xff; - - x1 = pixman_fixed_to_int (x1); - y1 = pixman_fixed_to_int (y1); - x2 = x1 + 1; - y2 = y1 + 1; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, width, &x1); - repeat (repeat_mode, height, &y1); - repeat (repeat_mode, width, &x2); - repeat (repeat_mode, height, &y2); - - tl = get_pixel (image, x1, y1, FALSE); - bl = get_pixel (image, x1, y2, FALSE); - tr = get_pixel (image, x2, y1, FALSE); - br = get_pixel (image, x2, y2, FALSE); - } - else - { - tl = get_pixel (image, x1, y1, TRUE); - tr = get_pixel (image, x2, y1, TRUE); - bl = get_pixel (image, x1, y2, TRUE); - br = get_pixel (image, x2, y2, TRUE); - } - - return bilinear_interpolation (tl, tr, bl, br, distx, disty); -} - -static void -bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask) -{ - bits_image_t *bits = &ima->bits; - pixman_fixed_t x_top, x_bottom, x; - pixman_fixed_t ux_top, ux_bottom, ux; - pixman_vector_t v; - uint32_t top_mask, bottom_mask; - uint32_t *top_row; - uint32_t *bottom_row; - uint32_t *end; - uint32_t zero[2] = { 0, 0 }; - uint32_t one = 1; - int y, y1, y2; - int disty; - int mask_inc; - int w; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (bits->common.transform, &v)) - return; - - ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; - x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; - - y = v.vector[1] - pixman_fixed_1/2; - disty = (y >> 8) & 0xff; - - /* Load the pointers to the first and second lines from the source - * image that bilinear code must read. - * - * The main trick in this code is about the check if any line are - * outside of the image; - * - * When I realize that a line (any one) is outside, I change - * the pointer to a dummy area with zeros. Once I change this, I - * must be sure the pointer will not change, so I set the - * variables to each pointer increments inside the loop. - */ - y1 = pixman_fixed_to_int (y); - y2 = y1 + 1; - - if (y1 < 0 || y1 >= bits->height) - { - top_row = zero; - x_top = 0; - ux_top = 0; - } - else - { - top_row = bits->bits + y1 * bits->rowstride; - x_top = x; - ux_top = ux; - } - - if (y2 < 0 || y2 >= bits->height) - { - bottom_row = zero; - x_bottom = 0; - ux_bottom = 0; - } - else - { - bottom_row = bits->bits + y2 * bits->rowstride; - x_bottom = x; - ux_bottom = ux; - } - - /* Instead of checking whether the operation uses the mast in - * each loop iteration, verify this only once and prepare the - * variables to make the code smaller inside the loop. - */ - if (!mask) - { - mask_inc = 0; - mask = &one; - } - else - { - /* If have a mask, prepare the variables to check it */ - mask_inc = 1; - } - - /* If both are zero, then the whole thing is zero */ - if (top_row == zero && bottom_row == zero) - { - memset (buffer, 0, width * sizeof (uint32_t)); - return; - } - else if (bits->format == PIXMAN_x8r8g8b8) - { - if (top_row == zero) - { - top_mask = 0; - bottom_mask = 0xff000000; - } - else if (bottom_row == zero) - { - top_mask = 0xff000000; - bottom_mask = 0; - } - else - { - top_mask = 0xff000000; - bottom_mask = 0xff000000; - } - } - else - { - top_mask = 0; - bottom_mask = 0; - } - - end = buffer + width; - - /* Zero fill to the left of the image */ - while (buffer < end && x < pixman_fixed_minus_1) - { - *buffer++ = 0; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Left edge - */ - while (buffer < end && x < 0) - { - uint32_t tr, br; - int32_t distx; - - tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; - br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - - distx = (x >> 8) & 0xff; - - *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); - - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Main part */ - w = pixman_int_to_fixed (bits->width - 1); - - while (buffer < end && x < w) - { - if (*mask) - { - uint32_t tl, tr, bl, br; - int32_t distx; - - tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; - tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask; - bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; - br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - - distx = (x >> 8) & 0xff; - - *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); - } - - buffer++; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Right Edge */ - w = pixman_int_to_fixed (bits->width); - while (buffer < end && x < w) - { - if (*mask) - { - uint32_t tl, bl; - int32_t distx; - - tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; - bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; - - distx = (x >> 8) & 0xff; - - *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); - } - - buffer++; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Zero fill to the left of the image */ - while (buffer < end) - *buffer++ = 0; -} - -static force_inline uint32_t -bits_image_fetch_pixel_convolution (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel) -{ - pixman_fixed_t *params = image->common.filter_params; - int x_off = (params[0] - pixman_fixed_1) >> 1; - int y_off = (params[1] - pixman_fixed_1) >> 1; - int32_t cwidth = pixman_fixed_to_int (params[0]); - int32_t cheight = pixman_fixed_to_int (params[1]); - int32_t srtot, sgtot, sbtot, satot; - int32_t i, j, x1, x2, y1, y2; - pixman_repeat_t repeat_mode = image->common.repeat; - int width = image->width; - int height = image->height; - - params += 2; - - x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); - y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); - x2 = x1 + cwidth; - y2 = y1 + cheight; - - srtot = sgtot = sbtot = satot = 0; - - for (i = y1; i < y2; ++i) - { - for (j = x1; j < x2; ++j) - { - int rx = j; - int ry = i; - - pixman_fixed_t f = *params; - - if (f) - { - uint32_t pixel; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, width, &rx); - repeat (repeat_mode, height, &ry); - - pixel = get_pixel (image, rx, ry, FALSE); - } - else - { - pixel = get_pixel (image, rx, ry, TRUE); - } - - srtot += RED_8 (pixel) * f; - sgtot += GREEN_8 (pixel) * f; - sbtot += BLUE_8 (pixel) * f; - satot += ALPHA_8 (pixel) * f; - } - - params++; - } - } - - satot >>= 16; - srtot >>= 16; - sgtot >>= 16; - sbtot >>= 16; - - satot = CLIP (satot, 0, 0xff); - srtot = CLIP (srtot, 0, 0xff); - sgtot = CLIP (sgtot, 0, 0xff); - sbtot = CLIP (sbtot, 0, 0xff); - - return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); -} - -static force_inline uint32_t -bits_image_fetch_pixel_filtered (bits_image_t *image, - pixman_fixed_t x, - pixman_fixed_t y, - get_pixel_t get_pixel) -{ - switch (image->common.filter) - { - case PIXMAN_FILTER_NEAREST: - case PIXMAN_FILTER_FAST: - return bits_image_fetch_pixel_nearest (image, x, y, get_pixel); - break; - - case PIXMAN_FILTER_BILINEAR: - case PIXMAN_FILTER_GOOD: - case PIXMAN_FILTER_BEST: - return bits_image_fetch_pixel_bilinear (image, x, y, get_pixel); - break; - - case PIXMAN_FILTER_CONVOLUTION: - return bits_image_fetch_pixel_convolution (image, x, y, get_pixel); - break; - - default: - break; - } - - return 0; -} - -static void -bits_image_fetch_affine_no_alpha (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask) -{ - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (image->common.transform) - { - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - } - else - { - ux = pixman_fixed_1; - uy = 0; - } - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - if (!mask || mask[i]) - { - buffer[i] = bits_image_fetch_pixel_filtered ( - &image->bits, x, y, fetch_pixel_no_alpha); - } - - x += ux; - y += uy; - } -} - -/* General fetcher */ -static force_inline uint32_t -fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_bounds) -{ - uint32_t pixel; - - if (check_bounds && - (x < 0 || x >= image->width || y < 0 || y >= image->height)) - { - return 0; - } - - pixel = image->fetch_pixel_32 (image, x, y); - - if (image->common.alpha_map) - { - uint32_t pixel_a; - - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - if (x < 0 || x >= image->common.alpha_map->width || - y < 0 || y >= image->common.alpha_map->height) - { - pixel_a = 0; - } - else - { - pixel_a = image->common.alpha_map->fetch_pixel_32 ( - image->common.alpha_map, x, y); - - pixel_a = ALPHA_8 (pixel_a); - } - - pixel &= 0x00ffffff; - pixel |= (pixel_a << 24); - } - - return pixel; -} - -static void -bits_image_fetch_general (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask) -{ - pixman_fixed_t x, y, w; - pixman_fixed_t ux, uy, uw; - pixman_vector_t v; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (image->common.transform) - { - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - uw = image->common.transform->matrix[2][0]; - } - else - { - ux = pixman_fixed_1; - uy = 0; - uw = 0; - } - - x = v.vector[0]; - y = v.vector[1]; - w = v.vector[2]; - - for (i = 0; i < width; ++i) - { - pixman_fixed_t x0, y0; - - if (!mask || mask[i]) - { - if (w != 0) - { - x0 = ((pixman_fixed_48_16_t)x << 16) / w; - y0 = ((pixman_fixed_48_16_t)y << 16) / w; - } - else - { - x0 = 0; - y0 = 0; - } - - buffer[i] = bits_image_fetch_pixel_filtered ( - &image->bits, x0, y0, fetch_pixel_general); - } - - x += ux; - y += uy; - w += uw; - } -} - -static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - -typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); - -static force_inline void -bits_image_fetch_bilinear_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - bits_image_t *bits = &image->bits; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - int x1, y1, x2, y2; - uint32_t tl, tr, bl, br; - int32_t distx, disty; - int width = image->bits.width; - int height = image->bits.height; - const uint8_t *row1; - const uint8_t *row2; - - if (mask && !mask[i]) - goto next; - - x1 = x - pixman_fixed_1 / 2; - y1 = y - pixman_fixed_1 / 2; - - distx = (x1 >> 8) & 0xff; - disty = (y1 >> 8) & 0xff; - - y1 = pixman_fixed_to_int (y1); - y2 = y1 + 1; - x1 = pixman_fixed_to_int (x1); - x2 = x1 + 1; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - uint32_t mask; - - mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - repeat (repeat_mode, width, &x1); - repeat (repeat_mode, height, &y1); - repeat (repeat_mode, width, &x2); - repeat (repeat_mode, height, &y2); - - row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; - row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; - - tl = convert_pixel (row1, x1) | mask; - tr = convert_pixel (row1, x2) | mask; - bl = convert_pixel (row2, x1) | mask; - br = convert_pixel (row2, x2) | mask; - } - else - { - uint32_t mask1, mask2; - int bpp; - - /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value, - * which means if you use it in expressions, those - * expressions become unsigned themselves. Since - * the variables below can be negative in some cases, - * that will lead to crashes on 64 bit architectures. - * - * So this line makes sure bpp is signed - */ - bpp = PIXMAN_FORMAT_BPP (format); - - if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0) - { - buffer[i] = 0; - goto next; - } - - if (y2 == 0) - { - row1 = zero; - mask1 = 0; - } - else - { - row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; - row1 += bpp / 8 * x1; - - mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - } - - if (y1 == height - 1) - { - row2 = zero; - mask2 = 0; - } - else - { - row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; - row2 += bpp / 8 * x1; - - mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - } - - if (x2 == 0) - { - tl = 0; - bl = 0; - } - else - { - tl = convert_pixel (row1, 0) | mask1; - bl = convert_pixel (row2, 0) | mask2; - } - - if (x1 == width - 1) - { - tr = 0; - br = 0; - } - else - { - tr = convert_pixel (row1, 1) | mask1; - br = convert_pixel (row2, 1) | mask2; - } - } - - buffer[i] = bilinear_interpolation ( - tl, tr, bl, br, distx, disty); - - next: - x += ux; - y += uy; - } -} - -static force_inline void -bits_image_fetch_nearest_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - bits_image_t *bits = &image->bits; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - int width, height, x0, y0; - const uint8_t *row; - - if (mask && !mask[i]) - goto next; - - width = image->bits.width; - height = image->bits.height; - x0 = pixman_fixed_to_int (x - pixman_fixed_e); - y0 = pixman_fixed_to_int (y - pixman_fixed_e); - - if (repeat_mode == PIXMAN_REPEAT_NONE && - (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) - { - buffer[i] = 0; - } - else - { - uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, width, &x0); - repeat (repeat_mode, height, &y0); - } - - row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0; - - buffer[i] = convert_pixel (row, x0) | mask; - } - - next: - x += ux; - y += uy; - } -} - -static force_inline uint32_t -convert_a8r8g8b8 (const uint8_t *row, int x) -{ - return *(((uint32_t *)row) + x); -} - -static force_inline uint32_t -convert_x8r8g8b8 (const uint8_t *row, int x) -{ - return *(((uint32_t *)row) + x); -} - -static force_inline uint32_t -convert_a8 (const uint8_t *row, int x) -{ - return *(row + x) << 24; -} - -static force_inline uint32_t -convert_r5g6b5 (const uint8_t *row, int x) -{ - return CONVERT_0565_TO_0888 (*((uint16_t *)row + x)); -} - -#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ - static void \ - bits_image_fetch_bilinear_affine_ ## name (pixman_image_t *image, \ - int offset, \ - int line, \ - int width, \ - uint32_t * buffer, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_bilinear_affine (image, offset, line, \ - width, buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - } - -#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \ - static void \ - bits_image_fetch_nearest_affine_ ## name (pixman_image_t *image, \ - int offset, \ - int line, \ - int width, \ - uint32_t * buffer, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_nearest_affine (image, offset, line, \ - width, buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - } - -#define MAKE_FETCHERS(name, format, repeat_mode) \ - MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ - MAKE_BILINEAR_FETCHER (name, format, repeat_mode) - -MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) - -static void -bits_image_fetch_solid_32 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t * mask) -{ - uint32_t color; - uint32_t *end; - - color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); - - end = buffer + width; - while (buffer < end) - *(buffer++) = color; -} - -static void -bits_image_fetch_solid_64 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t * unused) -{ - uint64_t color; - uint64_t *buffer = (uint64_t *)b; - uint64_t *end; - - color = image->bits.fetch_pixel_64 (&image->bits, 0, 0); - - end = buffer + width; - while (buffer < end) - *(buffer++) = color; -} - -static void -bits_image_fetch_untransformed_repeat_none (bits_image_t *image, - pixman_bool_t wide, - int x, - int y, - int width, - uint32_t * buffer) -{ - uint32_t w; - - if (y < 0 || y >= image->height) - { - memset (buffer, 0, width * (wide? 8 : 4)); - return; - } - - if (x < 0) - { - w = MIN (width, -x); - - memset (buffer, 0, w * (wide ? 8 : 4)); - - width -= w; - buffer += w * (wide? 2 : 1); - x += w; - } - - if (x < image->width) - { - w = MIN (width, image->width - x); - - if (wide) - image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL); - else - image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); - - width -= w; - buffer += w * (wide? 2 : 1); - x += w; - } - - memset (buffer, 0, width * (wide ? 8 : 4)); -} - -static void -bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, - pixman_bool_t wide, - int x, - int y, - int width, - uint32_t * buffer) -{ - uint32_t w; - - while (y < 0) - y += image->height; - - while (y >= image->height) - y -= image->height; - - while (width) - { - while (x < 0) - x += image->width; - while (x >= image->width) - x -= image->width; - - w = MIN (width, image->width - x); - - if (wide) - image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL); - else - image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); - - buffer += w * (wide? 2 : 1); - x += w; - width -= w; - } -} - -static void -bits_image_fetch_untransformed_32 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t * mask) -{ - if (image->common.repeat == PIXMAN_REPEAT_NONE) - { - bits_image_fetch_untransformed_repeat_none ( - &image->bits, FALSE, x, y, width, buffer); - } - else - { - bits_image_fetch_untransformed_repeat_normal ( - &image->bits, FALSE, x, y, width, buffer); - } -} - -static void -bits_image_fetch_untransformed_64 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t * unused) -{ - if (image->common.repeat == PIXMAN_REPEAT_NONE) - { - bits_image_fetch_untransformed_repeat_none ( - &image->bits, TRUE, x, y, width, buffer); - } - else - { - bits_image_fetch_untransformed_repeat_normal ( - &image->bits, TRUE, x, y, width, buffer); - } -} - -typedef struct -{ - pixman_format_code_t format; - uint32_t flags; - fetch_scanline_t fetch_32; - fetch_scanline_t fetch_64; -} fetcher_info_t; - -static const fetcher_info_t fetcher_info[] = -{ - { PIXMAN_solid, - FAST_PATH_NO_ALPHA_MAP, - bits_image_fetch_solid_32, - bits_image_fetch_solid_64 - }, - - { PIXMAN_any, - (FAST_PATH_NO_ALPHA_MAP | - FAST_PATH_ID_TRANSFORM | - FAST_PATH_NO_CONVOLUTION_FILTER | - FAST_PATH_NO_PAD_REPEAT | - FAST_PATH_NO_REFLECT_REPEAT), - bits_image_fetch_untransformed_32, - bits_image_fetch_untransformed_64 - }, - -#define FAST_BILINEAR_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_X_UNIT_POSITIVE | \ - FAST_PATH_Y_UNIT_ZERO | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_BILINEAR_FILTER) - - { PIXMAN_a8r8g8b8, - FAST_BILINEAR_FLAGS, - bits_image_fetch_bilinear_no_repeat_8888, - _pixman_image_get_scanline_generic_64 - }, - - { PIXMAN_x8r8g8b8, - FAST_BILINEAR_FLAGS, - bits_image_fetch_bilinear_no_repeat_8888, - _pixman_image_get_scanline_generic_64 - }, - -#define GENERAL_BILINEAR_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_BILINEAR_FILTER) - -#define GENERAL_NEAREST_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_NEAREST_FILTER) - -#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - bits_image_fetch_bilinear_affine_ ## name, \ - _pixman_image_get_scanline_generic_64 \ - }, - -#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - bits_image_fetch_nearest_affine_ ## name, \ - _pixman_image_get_scanline_generic_64 \ - }, - -#define AFFINE_FAST_PATHS(name, format, repeat) \ - BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ - NEAREST_AFFINE_FAST_PATH(name, format, repeat) - - AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) - AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) - AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT) - AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL) - AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD) - AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE) - AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT) - AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL) - AFFINE_FAST_PATHS (pad_a8, a8, PAD) - AFFINE_FAST_PATHS (none_a8, a8, NONE) - AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT) - AFFINE_FAST_PATHS (normal_a8, a8, NORMAL) - AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD) - AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE) - AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT) - AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL) - - /* Affine, no alpha */ - { PIXMAN_any, - (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM), - bits_image_fetch_affine_no_alpha, - _pixman_image_get_scanline_generic_64 - }, - - /* General */ - { PIXMAN_any, 0, bits_image_fetch_general, _pixman_image_get_scanline_generic_64 }, - - { PIXMAN_null }, -}; - -static void -bits_image_property_changed (pixman_image_t *image) -{ - uint32_t flags = image->common.flags; - pixman_format_code_t format = image->common.extended_format_code; - const fetcher_info_t *info; - - _pixman_bits_image_setup_accessors (&image->bits); - - info = fetcher_info; - while (info->format != PIXMAN_null) - { - if ((info->format == format || info->format == PIXMAN_any) && - (info->flags & flags) == info->flags) - { - image->bits.get_scanline_32 = info->fetch_32; - image->bits.get_scanline_64 = info->fetch_64; - break; - } - - info++; - } -} - -static uint32_t * -src_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) -{ - iter->image->bits.get_scanline_32 ( - iter->image, iter->x, iter->y++, iter->width, iter->buffer, mask); - - return iter->buffer; -} - -static uint32_t * -src_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) -{ - iter->image->bits.get_scanline_64 ( - iter->image, iter->x, iter->y++, iter->width, iter->buffer, mask); - - return iter->buffer; -} - -void -_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter) -{ - if (iter->flags & ITER_NARROW) - iter->get_scanline = src_get_scanline_narrow; - else - iter->get_scanline = src_get_scanline_wide; -} - -static uint32_t * -dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) -{ - pixman_image_t *image = iter->image; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask); - if (image->common.alpha_map) - { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - image->common.alpha_map->fetch_scanline_32 ( - (pixman_image_t *)image->common.alpha_map, - x, y, width, buffer, mask); - } - - return iter->buffer; -} - -static uint32_t * -dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) -{ - bits_image_t * image = &iter->image->bits; - int x = iter->x; - int y = iter->y; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - image->fetch_scanline_64 ( - (pixman_image_t *)image, x, y, width, buffer, mask); - if (image->common.alpha_map) - { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - image->common.alpha_map->fetch_scanline_64 ( - (pixman_image_t *)image->common.alpha_map, x, y, width, buffer, mask); - } - - return iter->buffer; -} - -static void -dest_write_back_narrow (pixman_iter_t *iter) -{ - bits_image_t * image = &iter->image->bits; - int x = iter->x; - int y = iter->y; - int width = iter->width; - const uint32_t *buffer = iter->buffer; - - image->store_scanline_32 (image, x, y, width, buffer); - - if (image->common.alpha_map) - { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - image->common.alpha_map->store_scanline_32 ( - image->common.alpha_map, x, y, width, buffer); - } - - iter->y++; -} - -static void -dest_write_back_wide (pixman_iter_t *iter) -{ - bits_image_t * image = &iter->image->bits; - int x = iter->x; - int y = iter->y; - int width = iter->width; - const uint32_t *buffer = iter->buffer; - - image->store_scanline_64 (image, x, y, width, buffer); - - if (image->common.alpha_map) - { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; - - image->common.alpha_map->store_scanline_64 ( - image->common.alpha_map, x, y, width, buffer); - } - - iter->y++; -} - -void -_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter) -{ - if (iter->flags & ITER_NARROW) - { - if ((iter->flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) - { - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else - { - iter->get_scanline = dest_get_scanline_narrow; - } - - iter->write_back = dest_write_back_narrow; - } - else - { - iter->get_scanline = dest_get_scanline_wide; - iter->write_back = dest_write_back_wide; - } -} - -static uint32_t * -create_bits (pixman_format_code_t format, - int width, - int height, - int * rowstride_bytes) -{ - int stride; - int buf_size; - int bpp; - - /* what follows is a long-winded way, avoiding any possibility of integer - * overflows, of saying: - * stride = ((width * bpp + 0x1f) >> 5) * sizeof (uint32_t); - */ - - bpp = PIXMAN_FORMAT_BPP (format); - if (pixman_multiply_overflows_int (width, bpp)) - return NULL; - - stride = width * bpp; - if (pixman_addition_overflows_int (stride, 0x1f)) - return NULL; - - stride += 0x1f; - stride >>= 5; - - stride *= sizeof (uint32_t); - - if (pixman_multiply_overflows_int (height, stride)) - return NULL; - - buf_size = height * stride; - - if (rowstride_bytes) - *rowstride_bytes = stride; - - return calloc (buf_size, 1); -} - -PIXMAN_EXPORT pixman_image_t * -pixman_image_create_bits (pixman_format_code_t format, - int width, - int height, - uint32_t * bits, - int rowstride_bytes) -{ - pixman_image_t *image; - uint32_t *free_me = NULL; - - /* must be a whole number of uint32_t's - */ - return_val_if_fail ( - bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); - - return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL); - - if (!bits && width && height) - { - free_me = bits = create_bits (format, width, height, &rowstride_bytes); - if (!bits) - return NULL; - } - - image = _pixman_image_allocate (); - - if (!image) - { - if (free_me) - free (free_me); - - return NULL; - } - - image->type = BITS; - image->bits.format = format; - image->bits.width = width; - image->bits.height = height; - image->bits.bits = bits; - image->bits.free_me = free_me; - image->bits.read_func = NULL; - image->bits.write_func = NULL; - - /* The rowstride is stored in number of uint32_t */ - image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t); - - image->bits.indexed = NULL; - - image->common.property_changed = bits_image_property_changed; - - _pixman_image_reset_clip_region (image); - - return image; -} +/* + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * 2008 Aaron Plattner, NVIDIA Corporation + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007, 2009 Red Hat, Inc. + * Copyright © 2008 André Tupinambá + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif +#include +#include +#include +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-inlines.h" + +/* + * By default, just evaluate the image at 32bpp and expand. Individual image + * types can plug in a better scanline getter if they want to. For example + * we could produce smoother gradients by evaluating them at higher color + * depth, but that's a project for the future. + */ +static void +_pixman_image_get_scanline_generic_64 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t * mask) +{ + uint32_t *mask8 = NULL; + + /* Contract the mask image, if one exists, so that the 32-bit fetch + * function can use it. + */ + if (mask) + { + mask8 = pixman_malloc_ab (width, sizeof(uint32_t)); + if (!mask8) + return; + + pixman_contract (mask8, (uint64_t *)mask, width); + } + + /* Fetch the source image into the first half of buffer. */ + image->bits.get_scanline_32 (image, x, y, width, (uint32_t*)buffer, mask8); + + /* Expand from 32bpp to 64bpp in place. */ + pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width); + + free (mask8); +} + +/* Fetch functions */ + +static force_inline uint32_t +fetch_pixel_no_alpha (bits_image_t *image, + int x, int y, pixman_bool_t check_bounds) +{ + if (check_bounds && + (x < 0 || x >= image->width || y < 0 || y >= image->height)) + { + return 0; + } + + return image->fetch_pixel_32 (image, x, y); +} + +typedef uint32_t (* get_pixel_t) (bits_image_t *image, + int x, int y, pixman_bool_t check_bounds); + +static force_inline uint32_t +bits_image_fetch_pixel_nearest (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + int x0 = pixman_fixed_to_int (x - pixman_fixed_e); + int y0 = pixman_fixed_to_int (y - pixman_fixed_e); + + if (image->common.repeat != PIXMAN_REPEAT_NONE) + { + repeat (image->common.repeat, &x0, image->width); + repeat (image->common.repeat, &y0, image->height); + + return get_pixel (image, x0, y0, FALSE); + } + else + { + return get_pixel (image, x0, y0, TRUE); + } +} + +static force_inline uint32_t +bits_image_fetch_pixel_bilinear (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + int x1, y1, x2, y2; + uint32_t tl, tr, bl, br; + int32_t distx, disty; + + x1 = x - pixman_fixed_1 / 2; + y1 = y - pixman_fixed_1 / 2; + + distx = (x1 >> 8) & 0xff; + disty = (y1 >> 8) & 0xff; + + x1 = pixman_fixed_to_int (x1); + y1 = pixman_fixed_to_int (y1); + x2 = x1 + 1; + y2 = y1 + 1; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); + + tl = get_pixel (image, x1, y1, FALSE); + bl = get_pixel (image, x1, y2, FALSE); + tr = get_pixel (image, x2, y1, FALSE); + br = get_pixel (image, x2, y2, FALSE); + } + else + { + tl = get_pixel (image, x1, y1, TRUE); + tr = get_pixel (image, x2, y1, TRUE); + bl = get_pixel (image, x1, y2, TRUE); + br = get_pixel (image, x2, y2, TRUE); + } + + return bilinear_interpolation (tl, tr, bl, br, distx, disty); +} + +static void +bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask) +{ + bits_image_t *bits = &ima->bits; + pixman_fixed_t x_top, x_bottom, x; + pixman_fixed_t ux_top, ux_bottom, ux; + pixman_vector_t v; + uint32_t top_mask, bottom_mask; + uint32_t *top_row; + uint32_t *bottom_row; + uint32_t *end; + uint32_t zero[2] = { 0, 0 }; + uint32_t one = 1; + int y, y1, y2; + int disty; + int mask_inc; + int w; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (bits->common.transform, &v)) + return; + + ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; + x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; + + y = v.vector[1] - pixman_fixed_1/2; + disty = (y >> 8) & 0xff; + + /* Load the pointers to the first and second lines from the source + * image that bilinear code must read. + * + * The main trick in this code is about the check if any line are + * outside of the image; + * + * When I realize that a line (any one) is outside, I change + * the pointer to a dummy area with zeros. Once I change this, I + * must be sure the pointer will not change, so I set the + * variables to each pointer increments inside the loop. + */ + y1 = pixman_fixed_to_int (y); + y2 = y1 + 1; + + if (y1 < 0 || y1 >= bits->height) + { + top_row = zero; + x_top = 0; + ux_top = 0; + } + else + { + top_row = bits->bits + y1 * bits->rowstride; + x_top = x; + ux_top = ux; + } + + if (y2 < 0 || y2 >= bits->height) + { + bottom_row = zero; + x_bottom = 0; + ux_bottom = 0; + } + else + { + bottom_row = bits->bits + y2 * bits->rowstride; + x_bottom = x; + ux_bottom = ux; + } + + /* Instead of checking whether the operation uses the mast in + * each loop iteration, verify this only once and prepare the + * variables to make the code smaller inside the loop. + */ + if (!mask) + { + mask_inc = 0; + mask = &one; + } + else + { + /* If have a mask, prepare the variables to check it */ + mask_inc = 1; + } + + /* If both are zero, then the whole thing is zero */ + if (top_row == zero && bottom_row == zero) + { + memset (buffer, 0, width * sizeof (uint32_t)); + return; + } + else if (bits->format == PIXMAN_x8r8g8b8) + { + if (top_row == zero) + { + top_mask = 0; + bottom_mask = 0xff000000; + } + else if (bottom_row == zero) + { + top_mask = 0xff000000; + bottom_mask = 0; + } + else + { + top_mask = 0xff000000; + bottom_mask = 0xff000000; + } + } + else + { + top_mask = 0; + bottom_mask = 0; + } + + end = buffer + width; + + /* Zero fill to the left of the image */ + while (buffer < end && x < pixman_fixed_minus_1) + { + *buffer++ = 0; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Left edge + */ + while (buffer < end && x < 0) + { + uint32_t tr, br; + int32_t distx; + + tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; + br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; + + distx = (x >> 8) & 0xff; + + *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); + + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Main part */ + w = pixman_int_to_fixed (bits->width - 1); + + while (buffer < end && x < w) + { + if (*mask) + { + uint32_t tl, tr, bl, br; + int32_t distx; + + tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; + tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask; + bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; + br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; + + distx = (x >> 8) & 0xff; + + *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); + } + + buffer++; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Right Edge */ + w = pixman_int_to_fixed (bits->width); + while (buffer < end && x < w) + { + if (*mask) + { + uint32_t tl, bl; + int32_t distx; + + tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; + bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; + + distx = (x >> 8) & 0xff; + + *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); + } + + buffer++; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Zero fill to the left of the image */ + while (buffer < end) + *buffer++ = 0; +} + +static force_inline uint32_t +bits_image_fetch_pixel_convolution (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + pixman_fixed_t *params = image->common.filter_params; + int x_off = (params[0] - pixman_fixed_1) >> 1; + int y_off = (params[1] - pixman_fixed_1) >> 1; + int32_t cwidth = pixman_fixed_to_int (params[0]); + int32_t cheight = pixman_fixed_to_int (params[1]); + int32_t srtot, sgtot, sbtot, satot; + int32_t i, j, x1, x2, y1, y2; + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + + params += 2; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + srtot = sgtot = sbtot = satot = 0; + + for (i = y1; i < y2; ++i) + { + for (j = x1; j < x2; ++j) + { + int rx = j; + int ry = i; + + pixman_fixed_t f = *params; + + if (f) + { + uint32_t pixel; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, width); + repeat (repeat_mode, &ry, height); + + pixel = get_pixel (image, rx, ry, FALSE); + } + else + { + pixel = get_pixel (image, rx, ry, TRUE); + } + + srtot += RED_8 (pixel) * f; + sgtot += GREEN_8 (pixel) * f; + sbtot += BLUE_8 (pixel) * f; + satot += ALPHA_8 (pixel) * f; + } + + params++; + } + } + + satot >>= 16; + srtot >>= 16; + sgtot >>= 16; + sbtot >>= 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); +} + +static force_inline uint32_t +bits_image_fetch_pixel_filtered (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + switch (image->common.filter) + { + case PIXMAN_FILTER_NEAREST: + case PIXMAN_FILTER_FAST: + return bits_image_fetch_pixel_nearest (image, x, y, get_pixel); + break; + + case PIXMAN_FILTER_BILINEAR: + case PIXMAN_FILTER_GOOD: + case PIXMAN_FILTER_BEST: + return bits_image_fetch_pixel_bilinear (image, x, y, get_pixel); + break; + + case PIXMAN_FILTER_CONVOLUTION: + return bits_image_fetch_pixel_convolution (image, x, y, get_pixel); + break; + + default: + break; + } + + return 0; +} + +static void +bits_image_fetch_affine_no_alpha (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (image->common.transform) + { + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + } + else + { + ux = pixman_fixed_1; + uy = 0; + } + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + if (!mask || mask[i]) + { + buffer[i] = bits_image_fetch_pixel_filtered ( + &image->bits, x, y, fetch_pixel_no_alpha); + } + + x += ux; + y += uy; + } +} + +/* General fetcher */ +static force_inline uint32_t +fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_bounds) +{ + uint32_t pixel; + + if (check_bounds && + (x < 0 || x >= image->width || y < 0 || y >= image->height)) + { + return 0; + } + + pixel = image->fetch_pixel_32 (image, x, y); + + if (image->common.alpha_map) + { + uint32_t pixel_a; + + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + if (x < 0 || x >= image->common.alpha_map->width || + y < 0 || y >= image->common.alpha_map->height) + { + pixel_a = 0; + } + else + { + pixel_a = image->common.alpha_map->fetch_pixel_32 ( + image->common.alpha_map, x, y); + + pixel_a = ALPHA_8 (pixel_a); + } + + pixel &= 0x00ffffff; + pixel |= (pixel_a << 24); + } + + return pixel; +} + +static void +bits_image_fetch_general (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask) +{ + pixman_fixed_t x, y, w; + pixman_fixed_t ux, uy, uw; + pixman_vector_t v; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (image->common.transform) + { + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + uw = image->common.transform->matrix[2][0]; + } + else + { + ux = pixman_fixed_1; + uy = 0; + uw = 0; + } + + x = v.vector[0]; + y = v.vector[1]; + w = v.vector[2]; + + for (i = 0; i < width; ++i) + { + pixman_fixed_t x0, y0; + + if (!mask || mask[i]) + { + if (w != 0) + { + x0 = ((pixman_fixed_48_16_t)x << 16) / w; + y0 = ((pixman_fixed_48_16_t)y << 16) / w; + } + else + { + x0 = 0; + y0 = 0; + } + + buffer[i] = bits_image_fetch_pixel_filtered ( + &image->bits, x0, y0, fetch_pixel_general); + } + + x += ux; + y += uy; + w += uw; + } +} + +static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); + +static force_inline void +bits_image_fetch_bilinear_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + bits_image_t *bits = &image->bits; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + int x1, y1, x2, y2; + uint32_t tl, tr, bl, br; + int32_t distx, disty; + int width = image->bits.width; + int height = image->bits.height; + const uint8_t *row1; + const uint8_t *row2; + + if (mask && !mask[i]) + goto next; + + x1 = x - pixman_fixed_1 / 2; + y1 = y - pixman_fixed_1 / 2; + + distx = (x1 >> 8) & 0xff; + disty = (y1 >> 8) & 0xff; + + y1 = pixman_fixed_to_int (y1); + y2 = y1 + 1; + x1 = pixman_fixed_to_int (x1); + x2 = x1 + 1; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + uint32_t mask; + + mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); + + row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; + row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; + + tl = convert_pixel (row1, x1) | mask; + tr = convert_pixel (row1, x2) | mask; + bl = convert_pixel (row2, x1) | mask; + br = convert_pixel (row2, x2) | mask; + } + else + { + uint32_t mask1, mask2; + int bpp; + + /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value, + * which means if you use it in expressions, those + * expressions become unsigned themselves. Since + * the variables below can be negative in some cases, + * that will lead to crashes on 64 bit architectures. + * + * So this line makes sure bpp is signed + */ + bpp = PIXMAN_FORMAT_BPP (format); + + if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0) + { + buffer[i] = 0; + goto next; + } + + if (y2 == 0) + { + row1 = zero; + mask1 = 0; + } + else + { + row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; + row1 += bpp / 8 * x1; + + mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + } + + if (y1 == height - 1) + { + row2 = zero; + mask2 = 0; + } + else + { + row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; + row2 += bpp / 8 * x1; + + mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + } + + if (x2 == 0) + { + tl = 0; + bl = 0; + } + else + { + tl = convert_pixel (row1, 0) | mask1; + bl = convert_pixel (row2, 0) | mask2; + } + + if (x1 == width - 1) + { + tr = 0; + br = 0; + } + else + { + tr = convert_pixel (row1, 1) | mask1; + br = convert_pixel (row2, 1) | mask2; + } + } + + buffer[i] = bilinear_interpolation ( + tl, tr, bl, br, distx, disty); + + next: + x += ux; + y += uy; + } +} + +static force_inline void +bits_image_fetch_nearest_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + bits_image_t *bits = &image->bits; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + int width, height, x0, y0; + const uint8_t *row; + + if (mask && !mask[i]) + goto next; + + width = image->bits.width; + height = image->bits.height; + x0 = pixman_fixed_to_int (x - pixman_fixed_e); + y0 = pixman_fixed_to_int (y - pixman_fixed_e); + + if (repeat_mode == PIXMAN_REPEAT_NONE && + (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) + { + buffer[i] = 0; + } + else + { + uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &x0, width); + repeat (repeat_mode, &y0, height); + } + + row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0; + + buffer[i] = convert_pixel (row, x0) | mask; + } + + next: + x += ux; + y += uy; + } +} + +static force_inline uint32_t +convert_a8r8g8b8 (const uint8_t *row, int x) +{ + return *(((uint32_t *)row) + x); +} + +static force_inline uint32_t +convert_x8r8g8b8 (const uint8_t *row, int x) +{ + return *(((uint32_t *)row) + x); +} + +static force_inline uint32_t +convert_a8 (const uint8_t *row, int x) +{ + return *(row + x) << 24; +} + +static force_inline uint32_t +convert_r5g6b5 (const uint8_t *row, int x) +{ + return CONVERT_0565_TO_0888 (*((uint16_t *)row + x)); +} + +#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ + static void \ + bits_image_fetch_bilinear_affine_ ## name (pixman_image_t *image, \ + int offset, \ + int line, \ + int width, \ + uint32_t * buffer, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_bilinear_affine (image, offset, line, \ + width, buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + } + +#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \ + static void \ + bits_image_fetch_nearest_affine_ ## name (pixman_image_t *image, \ + int offset, \ + int line, \ + int width, \ + uint32_t * buffer, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_nearest_affine (image, offset, line, \ + width, buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + } + +#define MAKE_FETCHERS(name, format, repeat_mode) \ + MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ + MAKE_BILINEAR_FETCHER (name, format, repeat_mode) + +MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) + +static void +bits_image_fetch_solid_32 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t * mask) +{ + uint32_t color; + uint32_t *end; + + color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); + + end = buffer + width; + while (buffer < end) + *(buffer++) = color; +} + +static void +bits_image_fetch_solid_64 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t * unused) +{ + uint64_t color; + uint64_t *buffer = (uint64_t *)b; + uint64_t *end; + + color = image->bits.fetch_pixel_64 (&image->bits, 0, 0); + + end = buffer + width; + while (buffer < end) + *(buffer++) = color; +} + +static void +bits_image_fetch_untransformed_repeat_none (bits_image_t *image, + pixman_bool_t wide, + int x, + int y, + int width, + uint32_t * buffer) +{ + uint32_t w; + + if (y < 0 || y >= image->height) + { + memset (buffer, 0, width * (wide? 8 : 4)); + return; + } + + if (x < 0) + { + w = MIN (width, -x); + + memset (buffer, 0, w * (wide ? 8 : 4)); + + width -= w; + buffer += w * (wide? 2 : 1); + x += w; + } + + if (x < image->width) + { + w = MIN (width, image->width - x); + + if (wide) + image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL); + else + image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); + + width -= w; + buffer += w * (wide? 2 : 1); + x += w; + } + + memset (buffer, 0, width * (wide ? 8 : 4)); +} + +static void +bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, + pixman_bool_t wide, + int x, + int y, + int width, + uint32_t * buffer) +{ + uint32_t w; + + while (y < 0) + y += image->height; + + while (y >= image->height) + y -= image->height; + + while (width) + { + while (x < 0) + x += image->width; + while (x >= image->width) + x -= image->width; + + w = MIN (width, image->width - x); + + if (wide) + image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL); + else + image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); + + buffer += w * (wide? 2 : 1); + x += w; + width -= w; + } +} + +static void +bits_image_fetch_untransformed_32 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t * mask) +{ + if (image->common.repeat == PIXMAN_REPEAT_NONE) + { + bits_image_fetch_untransformed_repeat_none ( + &image->bits, FALSE, x, y, width, buffer); + } + else + { + bits_image_fetch_untransformed_repeat_normal ( + &image->bits, FALSE, x, y, width, buffer); + } +} + +static void +bits_image_fetch_untransformed_64 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t * unused) +{ + if (image->common.repeat == PIXMAN_REPEAT_NONE) + { + bits_image_fetch_untransformed_repeat_none ( + &image->bits, TRUE, x, y, width, buffer); + } + else + { + bits_image_fetch_untransformed_repeat_normal ( + &image->bits, TRUE, x, y, width, buffer); + } +} + +typedef struct +{ + pixman_format_code_t format; + uint32_t flags; + fetch_scanline_t fetch_32; + fetch_scanline_t fetch_64; +} fetcher_info_t; + +static const fetcher_info_t fetcher_info[] = +{ + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, + bits_image_fetch_solid_32, + bits_image_fetch_solid_64 + }, + + { PIXMAN_any, + (FAST_PATH_NO_ALPHA_MAP | + FAST_PATH_ID_TRANSFORM | + FAST_PATH_NO_CONVOLUTION_FILTER | + FAST_PATH_NO_PAD_REPEAT | + FAST_PATH_NO_REFLECT_REPEAT), + bits_image_fetch_untransformed_32, + bits_image_fetch_untransformed_64 + }, + +#define FAST_BILINEAR_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_X_UNIT_POSITIVE | \ + FAST_PATH_Y_UNIT_ZERO | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_BILINEAR_FILTER) + + { PIXMAN_a8r8g8b8, + FAST_BILINEAR_FLAGS, + bits_image_fetch_bilinear_no_repeat_8888, + _pixman_image_get_scanline_generic_64 + }, + + { PIXMAN_x8r8g8b8, + FAST_BILINEAR_FLAGS, + bits_image_fetch_bilinear_no_repeat_8888, + _pixman_image_get_scanline_generic_64 + }, + +#define GENERAL_BILINEAR_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_BILINEAR_FILTER) + +#define GENERAL_NEAREST_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_NEAREST_FILTER) + +#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + bits_image_fetch_bilinear_affine_ ## name, \ + _pixman_image_get_scanline_generic_64 \ + }, + +#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + bits_image_fetch_nearest_affine_ ## name, \ + _pixman_image_get_scanline_generic_64 \ + }, + +#define AFFINE_FAST_PATHS(name, format, repeat) \ + BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ + NEAREST_AFFINE_FAST_PATH(name, format, repeat) + + AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_a8, a8, PAD) + AFFINE_FAST_PATHS (none_a8, a8, NONE) + AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT) + AFFINE_FAST_PATHS (normal_a8, a8, NORMAL) + AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD) + AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE) + AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT) + AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL) + + /* Affine, no alpha */ + { PIXMAN_any, + (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM), + bits_image_fetch_affine_no_alpha, + _pixman_image_get_scanline_generic_64 + }, + + /* General */ + { PIXMAN_any, 0, bits_image_fetch_general, _pixman_image_get_scanline_generic_64 }, + + { PIXMAN_null }, +}; + +static void +bits_image_property_changed (pixman_image_t *image) +{ + uint32_t flags = image->common.flags; + pixman_format_code_t format = image->common.extended_format_code; + const fetcher_info_t *info; + + _pixman_bits_image_setup_accessors (&image->bits); + + info = fetcher_info; + while (info->format != PIXMAN_null) + { + if ((info->format == format || info->format == PIXMAN_any) && + (info->flags & flags) == info->flags) + { + image->bits.get_scanline_32 = info->fetch_32; + image->bits.get_scanline_64 = info->fetch_64; + break; + } + + info++; + } +} + +static uint32_t * +src_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) +{ + iter->image->bits.get_scanline_32 ( + iter->image, iter->x, iter->y++, iter->width, iter->buffer, mask); + + return iter->buffer; +} + +static uint32_t * +src_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) +{ + iter->image->bits.get_scanline_64 ( + iter->image, iter->x, iter->y++, iter->width, iter->buffer, mask); + + return iter->buffer; +} + +void +_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (iter->flags & ITER_NARROW) + iter->get_scanline = src_get_scanline_narrow; + else + iter->get_scanline = src_get_scanline_wide; +} + +static uint32_t * +dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_image_t *image = iter->image; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask); + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->fetch_scanline_32 ( + (pixman_image_t *)image->common.alpha_map, + x, y, width, buffer, mask); + } + + return iter->buffer; +} + +static uint32_t * +dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) +{ + bits_image_t * image = &iter->image->bits; + int x = iter->x; + int y = iter->y; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + image->fetch_scanline_64 ( + (pixman_image_t *)image, x, y, width, buffer, mask); + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->fetch_scanline_64 ( + (pixman_image_t *)image->common.alpha_map, x, y, width, buffer, mask); + } + + return iter->buffer; +} + +static void +dest_write_back_narrow (pixman_iter_t *iter) +{ + bits_image_t * image = &iter->image->bits; + int x = iter->x; + int y = iter->y; + int width = iter->width; + const uint32_t *buffer = iter->buffer; + + image->store_scanline_32 (image, x, y, width, buffer); + + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->store_scanline_32 ( + image->common.alpha_map, x, y, width, buffer); + } + + iter->y++; +} + +static void +dest_write_back_wide (pixman_iter_t *iter) +{ + bits_image_t * image = &iter->image->bits; + int x = iter->x; + int y = iter->y; + int width = iter->width; + const uint32_t *buffer = iter->buffer; + + image->store_scanline_64 (image, x, y, width, buffer); + + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + image->common.alpha_map->store_scanline_64 ( + image->common.alpha_map, x, y, width, buffer); + } + + iter->y++; +} + +void +_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter) +{ + if (iter->flags & ITER_NARROW) + { + if ((iter->flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) + { + iter->get_scanline = _pixman_iter_get_scanline_noop; + } + else + { + iter->get_scanline = dest_get_scanline_narrow; + } + + iter->write_back = dest_write_back_narrow; + } + else + { + iter->get_scanline = dest_get_scanline_wide; + iter->write_back = dest_write_back_wide; + } +} + +static uint32_t * +create_bits (pixman_format_code_t format, + int width, + int height, + int * rowstride_bytes) +{ + int stride; + size_t buf_size; + int bpp; + + /* what follows is a long-winded way, avoiding any possibility of integer + * overflows, of saying: + * stride = ((width * bpp + 0x1f) >> 5) * sizeof (uint32_t); + */ + + bpp = PIXMAN_FORMAT_BPP (format); + if (_pixman_multiply_overflows_int (width, bpp)) + return NULL; + + stride = width * bpp; + if (_pixman_addition_overflows_int (stride, 0x1f)) + return NULL; + + stride += 0x1f; + stride >>= 5; + + stride *= sizeof (uint32_t); + + if (_pixman_multiply_overflows_size (height, stride)) + return NULL; + + buf_size = height * stride; + + if (rowstride_bytes) + *rowstride_bytes = stride; + + return calloc (buf_size, 1); +} + +PIXMAN_EXPORT pixman_image_t * +pixman_image_create_bits (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes) +{ + pixman_image_t *image; + uint32_t *free_me = NULL; + + /* must be a whole number of uint32_t's + */ + return_val_if_fail ( + bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); + + return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL); + + if (!bits && width && height) + { + free_me = bits = create_bits (format, width, height, &rowstride_bytes); + if (!bits) + return NULL; + } + + image = _pixman_image_allocate (); + + if (!image) + { + if (free_me) + free (free_me); + + return NULL; + } + + image->type = BITS; + image->bits.format = format; + image->bits.width = width; + image->bits.height = height; + image->bits.bits = bits; + image->bits.free_me = free_me; + image->bits.read_func = NULL; + image->bits.write_func = NULL; + + /* The rowstride is stored in number of uint32_t */ + image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t); + + image->bits.indexed = NULL; + + image->common.property_changed = bits_image_property_changed; + + _pixman_image_reset_clip_region (image); + + return image; +} diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index 9a6919e85..bbdc8e8b0 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -30,7 +30,7 @@ #include #include "pixman-private.h" #include "pixman-combine32.h" -#include "pixman-fast-path.h" +#include "pixman-inlines.h" static force_inline uint32_t fetch_24 (uint8_t *a) diff --git a/pixman/pixman/pixman-fast-path.h b/pixman/pixman/pixman-fast-path.h deleted file mode 100644 index 2b55c2e74..000000000 --- a/pixman/pixman/pixman-fast-path.h +++ /dev/null @@ -1,1189 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifndef PIXMAN_FAST_PATH_H__ -#define PIXMAN_FAST_PATH_H__ - -#include "pixman-private.h" - -#define PIXMAN_REPEAT_COVER -1 - -/* Flags describing input parameters to fast path macro template. - * Turning on some flag values may indicate that - * "some property X is available so template can use this" or - * "some property X should be handled by template". - * - * FLAG_HAVE_SOLID_MASK - * Input mask is solid so template should handle this. - * - * FLAG_HAVE_NON_SOLID_MASK - * Input mask is bits mask so template should handle this. - * - * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually - * exclusive. (It's not allowed to turn both flags on) - */ -#define FLAG_NONE (0) -#define FLAG_HAVE_SOLID_MASK (1 << 1) -#define FLAG_HAVE_NON_SOLID_MASK (1 << 2) - -/* To avoid too short repeated scanline function calls, extend source - * scanlines having width less than below constant value. - */ -#define REPEAT_NORMAL_MIN_WIDTH 64 - -static force_inline pixman_bool_t -repeat (pixman_repeat_t repeat, int *c, int size) -{ - if (repeat == PIXMAN_REPEAT_NONE) - { - if (*c < 0 || *c >= size) - return FALSE; - } - else if (repeat == PIXMAN_REPEAT_NORMAL) - { - while (*c >= size) - *c -= size; - while (*c < 0) - *c += size; - } - else if (repeat == PIXMAN_REPEAT_PAD) - { - *c = CLIP (*c, 0, size - 1); - } - else /* REFLECT */ - { - *c = MOD (*c, size * 2); - if (*c >= size) - *c = size * 2 - *c - 1; - } - return TRUE; -} - -/* - * For each scanline fetched from source image with PAD repeat: - * - calculate how many pixels need to be padded on the left side - * - calculate how many pixels need to be padded on the right side - * - update width to only count pixels which are fetched from the image - * All this information is returned via 'width', 'left_pad', 'right_pad' - * arguments. The code is assuming that 'unit_x' is positive. - * - * Note: 64-bit math is used in order to avoid potential overflows, which - * is probably excessive in many cases. This particular function - * may need its own correctness test and performance tuning. - */ -static force_inline void -pad_repeat_get_scanline_bounds (int32_t source_image_width, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - int32_t * width, - int32_t * left_pad, - int32_t * right_pad) -{ - int64_t max_vx = (int64_t) source_image_width << 16; - int64_t tmp; - if (vx < 0) - { - tmp = ((int64_t) unit_x - 1 - vx) / unit_x; - if (tmp > *width) - { - *left_pad = *width; - *width = 0; - } - else - { - *left_pad = (int32_t) tmp; - *width -= (int32_t) tmp; - } - } - else - { - *left_pad = 0; - } - tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; - if (tmp < 0) - { - *right_pad = *width; - *width = 0; - } - else if (tmp >= *width) - { - *right_pad = 0; - } - else - { - *right_pad = *width - (int32_t) tmp; - *width = (int32_t) tmp; - } -} - -/* A macroified version of specialized nearest scalers for some - * common 8888 and 565 formats. It supports SRC and OVER ops. - * - * There are two repeat versions, one that handles repeat normal, - * and one without repeat handling that only works if the src region - * used is completely covered by the pre-repeated source samples. - * - * The loops are unrolled to process two pixels per iteration for better - * performance on most CPU architectures (superscalar processors - * can issue several operations simultaneously, other processors can hide - * instructions latencies by pipelining operations). Unrolling more - * does not make much sense because the compiler will start running out - * of spare registers soon. - */ - -#define GET_8888_ALPHA(s) ((s) >> 24) - /* This is not actually used since we don't have an OVER with - 565 source, but it is needed to build. */ -#define GET_0565_ALPHA(s) 0xff -#define GET_x888_ALPHA(s) 0xff - -#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ -static force_inline void \ -scanline_func_name (dst_type_t *dst, \ - const src_type_t *src, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t fully_transparent_src) \ -{ \ - uint32_t d; \ - src_type_t s1, s2; \ - uint8_t a1, a2; \ - int x1, x2; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ - return; \ - \ - if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ - abort(); \ - \ - while ((w -= 2) >= 0) \ - { \ - x1 = vx >> 16; \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= max_vx) \ - vx -= max_vx; \ - } \ - s1 = src[x1]; \ - \ - x2 = vx >> 16; \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= max_vx) \ - vx -= max_vx; \ - } \ - s2 = src[x2]; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ - \ - if (a1 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - \ - if (a2 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ - } \ - else if (s2) \ - { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ - a2 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ - } \ - } \ - \ - if (w & 1) \ - { \ - x1 = vx >> 16; \ - s1 = src[x1]; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - \ - if (a1 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - } \ -} - -#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) \ -static void \ -fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type_t *dst_line; \ - mask_type_t *mask_line; \ - src_type_t *src_first_line; \ - int y; \ - pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ - pixman_fixed_t max_vy; \ - pixman_vector_t v; \ - pixman_fixed_t vx, vy; \ - pixman_fixed_t unit_x, unit_y; \ - int32_t left_pad, right_pad; \ - \ - src_type_t *src; \ - dst_type_t *dst; \ - mask_type_t solid_mask; \ - const mask_type_t *mask = &solid_mask; \ - int src_stride, mask_stride, dst_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ - if (have_mask) \ - { \ - if (mask_is_solid) \ - solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ - else \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ - mask_stride, mask_line, 1); \ - } \ - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ - * transformed from destination space to source space */ \ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ - \ - /* reference point is the center of the pixel */ \ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ - v.vector[2] = pixman_fixed_1; \ - \ - if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ - return; \ - \ - unit_x = src_image->common.transform->matrix[0][0]; \ - unit_y = src_image->common.transform->matrix[1][1]; \ - \ - /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ - v.vector[0] -= pixman_fixed_e; \ - v.vector[1] -= pixman_fixed_e; \ - \ - vx = v.vector[0]; \ - vy = v.vector[1]; \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* Clamp repeating positions inside the actual samples */ \ - max_vx = src_image->bits.width << 16; \ - max_vy = src_image->bits.height << 16; \ - \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - } \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ - PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ - &width, &left_pad, &right_pad); \ - vx += left_pad * unit_x; \ - } \ - \ - while (--height >= 0) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - if (have_mask && !mask_is_solid) \ - { \ - mask = mask_line; \ - mask_line += mask_stride; \ - } \ - \ - y = vy >> 16; \ - vy += unit_y; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \ - } \ - if (width > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ - dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ - dst + left_pad + width, src + src_image->bits.width - 1, \ - right_pad, 0, 0, 0, FALSE); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - static const src_type_t zero[1] = { 0 }; \ - if (y < 0 || y >= src_image->bits.height) \ - { \ - scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \ - continue; \ - } \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \ - } \ - if (width > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ - dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ - dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \ - } \ - } \ - else \ - { \ - src = src_first_line + src_stride * y; \ - scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \ - } \ - } \ -} - -/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) \ - FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, have_mask, mask_is_solid) - -#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - static force_inline void \ - scanline_func##scale_func_name##_wrapper ( \ - const uint8_t *mask, \ - dst_type_t *dst, \ - const src_type_t *src, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx, \ - pixman_bool_t fully_transparent_src) \ - { \ - scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ - } \ - FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ - src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) - -#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ - dst_type_t, repeat_mode) - -#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ - FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ - OP, repeat_mode) \ - FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ - scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - src_type_t, dst_type_t, repeat_mode) - - -#define SCALED_NEAREST_FLAGS \ - (FAST_PATH_SCALE_TRANSFORM | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NEAREST_FILTER | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NARROW_FORMAT) - -#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -/* Prefer the use of 'cover' variant, because it is faster */ -#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) - -#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) - -#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) - -/*****************************************************************************/ - -/* - * Identify 5 zones in each scanline for bilinear scaling. Depending on - * whether 2 pixels to be interpolated are fetched from the image itself, - * from the padding area around it or from both image and padding area. - */ -static force_inline void -bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - int32_t * left_pad, - int32_t * left_tz, - int32_t * width, - int32_t * right_tz, - int32_t * right_pad) -{ - int width1 = *width, left_pad1, right_pad1; - int width2 = *width, left_pad2, right_pad2; - - pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, - &width1, &left_pad1, &right_pad1); - pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, - unit_x, &width2, &left_pad2, &right_pad2); - - *left_pad = left_pad2; - *left_tz = left_pad1 - left_pad2; - *right_tz = right_pad2 - right_pad1; - *right_pad = right_pad1; - *width -= *left_pad + *left_tz + *right_tz + *right_pad; -} - -/* - * Main loop template for single pass bilinear scaling. It needs to be - * provided with 'scanline_func' which should do the compositing operation. - * The needed function has the following prototype: - * - * scanline_func (dst_type_t * dst, - * const mask_type_ * mask, - * const src_type_t * src_top, - * const src_type_t * src_bottom, - * int32_t width, - * int weight_top, - * int weight_bottom, - * pixman_fixed_t vx, - * pixman_fixed_t unit_x, - * pixman_fixed_t max_vx, - * pixman_bool_t zero_src) - * - * Where: - * dst - destination scanline buffer for storing results - * mask - mask buffer (or single value for solid mask) - * src_top, src_bottom - two source scanlines - * width - number of pixels to process - * weight_top - weight of the top row for interpolation - * weight_bottom - weight of the bottom row for interpolation - * vx - initial position for fetching the first pair of - * pixels from the source buffer - * unit_x - position increment needed to move to the next pair - * of pixels - * max_vx - image size as a fixed point value, can be used for - * implementing NORMAL repeat (when it is supported) - * zero_src - boolean hint variable, which is set to TRUE when - * all source pixels are fetched from zero padding - * zone for NONE repeat - * - * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256, - * but sometimes it may be less than that for NONE repeat when handling - * fuzzy antialiased top or bottom image edges. Also both top and - * bottom weight variables are guaranteed to have value in 0-255 - * range and can fit into unsigned byte or be used with 8-bit SIMD - * multiplication instructions. - */ -#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, flags) \ -static void \ -fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ - pixman_composite_info_t *info) \ -{ \ - PIXMAN_COMPOSITE_ARGS (info); \ - dst_type_t *dst_line; \ - mask_type_t *mask_line; \ - src_type_t *src_first_line; \ - int y1, y2; \ - pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ - pixman_vector_t v; \ - pixman_fixed_t vx, vy; \ - pixman_fixed_t unit_x, unit_y; \ - int32_t left_pad, left_tz, right_tz, right_pad; \ - \ - dst_type_t *dst; \ - mask_type_t solid_mask; \ - const mask_type_t *mask = &solid_mask; \ - int src_stride, mask_stride, dst_stride; \ - \ - int src_width; \ - pixman_fixed_t src_width_fixed; \ - int max_x; \ - pixman_bool_t need_src_extension; \ - \ - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ - if (flags & FLAG_HAVE_SOLID_MASK) \ - { \ - solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ - mask_stride = 0; \ - } \ - else if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - { \ - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ - mask_stride, mask_line, 1); \ - } \ - \ - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ - * transformed from destination space to source space */ \ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ - \ - /* reference point is the center of the pixel */ \ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ - v.vector[2] = pixman_fixed_1; \ - \ - if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ - return; \ - \ - unit_x = src_image->common.transform->matrix[0][0]; \ - unit_y = src_image->common.transform->matrix[1][1]; \ - \ - v.vector[0] -= pixman_fixed_1 / 2; \ - v.vector[1] -= pixman_fixed_1 / 2; \ - \ - vy = v.vector[1]; \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ - PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ - &left_pad, &left_tz, &width, &right_tz, &right_pad); \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - /* PAD repeat does not need special handling for 'transition zones' and */ \ - /* they can be combined with 'padding zones' safely */ \ - left_pad += left_tz; \ - right_pad += right_tz; \ - left_tz = right_tz = 0; \ - } \ - v.vector[0] += left_pad * unit_x; \ - } \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - vx = v.vector[0]; \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \ - max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1; \ - \ - if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \ - { \ - src_width = 0; \ - \ - while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \ - src_width += src_image->bits.width; \ - \ - need_src_extension = TRUE; \ - } \ - else \ - { \ - src_width = src_image->bits.width; \ - need_src_extension = FALSE; \ - } \ - \ - src_width_fixed = pixman_int_to_fixed (src_width); \ - } \ - \ - while (--height >= 0) \ - { \ - int weight1, weight2; \ - dst = dst_line; \ - dst_line += dst_stride; \ - vx = v.vector[0]; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - { \ - mask = mask_line; \ - mask_line += mask_stride; \ - } \ - \ - y1 = pixman_fixed_to_int (vy); \ - weight2 = (vy >> 8) & 0xff; \ - if (weight2) \ - { \ - /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \ - y2 = y1 + 1; \ - weight1 = 256 - weight2; \ - } \ - else \ - { \ - /* set both top and bottom row to the same scanline, and weights to 128+128 */ \ - y2 = y1; \ - weight1 = weight2 = 128; \ - } \ - vy += unit_y; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - src_type_t *src1, *src2; \ - src_type_t buf1[2]; \ - src_type_t buf2[2]; \ - repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ - repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ - src1 = src_first_line + src_stride * y1; \ - src2 = src_first_line + src_stride * y2; \ - \ - if (left_pad > 0) \ - { \ - buf1[0] = buf1[1] = src1[0]; \ - buf2[0] = buf2[1] = src2[0]; \ - scanline_func (dst, mask, \ - buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ - dst += left_pad; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += left_pad; \ - } \ - if (width > 0) \ - { \ - scanline_func (dst, mask, \ - src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ - dst += width; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += width; \ - } \ - if (right_pad > 0) \ - { \ - buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ - buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ - scanline_func (dst, mask, \ - buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - src_type_t *src1, *src2; \ - src_type_t buf1[2]; \ - src_type_t buf2[2]; \ - /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ - if (y1 < 0) \ - { \ - weight1 = 0; \ - y1 = 0; \ - } \ - if (y1 >= src_image->bits.height) \ - { \ - weight1 = 0; \ - y1 = src_image->bits.height - 1; \ - } \ - if (y2 < 0) \ - { \ - weight2 = 0; \ - y2 = 0; \ - } \ - if (y2 >= src_image->bits.height) \ - { \ - weight2 = 0; \ - y2 = src_image->bits.height - 1; \ - } \ - src1 = src_first_line + src_stride * y1; \ - src2 = src_first_line + src_stride * y2; \ - \ - if (left_pad > 0) \ - { \ - buf1[0] = buf1[1] = 0; \ - buf2[0] = buf2[1] = 0; \ - scanline_func (dst, mask, \ - buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ - dst += left_pad; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += left_pad; \ - } \ - if (left_tz > 0) \ - { \ - buf1[0] = 0; \ - buf1[1] = src1[0]; \ - buf2[0] = 0; \ - buf2[1] = src2[0]; \ - scanline_func (dst, mask, \ - buf1, buf2, left_tz, weight1, weight2, \ - pixman_fixed_frac (vx), unit_x, 0, FALSE); \ - dst += left_tz; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += left_tz; \ - vx += left_tz * unit_x; \ - } \ - if (width > 0) \ - { \ - scanline_func (dst, mask, \ - src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ - dst += width; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += width; \ - vx += width * unit_x; \ - } \ - if (right_tz > 0) \ - { \ - buf1[0] = src1[src_image->bits.width - 1]; \ - buf1[1] = 0; \ - buf2[0] = src2[src_image->bits.width - 1]; \ - buf2[1] = 0; \ - scanline_func (dst, mask, \ - buf1, buf2, right_tz, weight1, weight2, \ - pixman_fixed_frac (vx), unit_x, 0, FALSE); \ - dst += right_tz; \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += right_tz; \ - } \ - if (right_pad > 0) \ - { \ - buf1[0] = buf1[1] = 0; \ - buf2[0] = buf2[1] = 0; \ - scanline_func (dst, mask, \ - buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - int32_t num_pixels; \ - int32_t width_remain; \ - src_type_t * src_line_top; \ - src_type_t * src_line_bottom; \ - src_type_t buf1[2]; \ - src_type_t buf2[2]; \ - src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \ - src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \ - int i, j; \ - \ - repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \ - repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \ - src_line_top = src_first_line + src_stride * y1; \ - src_line_bottom = src_first_line + src_stride * y2; \ - \ - if (need_src_extension) \ - { \ - for (i=0; ibits.width; j++, i++) \ - { \ - extended_src_line0[i] = src_line_top[j]; \ - extended_src_line1[i] = src_line_bottom[j]; \ - } \ - } \ - \ - src_line_top = &extended_src_line0[0]; \ - src_line_bottom = &extended_src_line1[0]; \ - } \ - \ - /* Top & Bottom wrap around buffer */ \ - buf1[0] = src_line_top[src_width - 1]; \ - buf1[1] = src_line_top[0]; \ - buf2[0] = src_line_bottom[src_width - 1]; \ - buf2[1] = src_line_bottom[0]; \ - \ - width_remain = width; \ - \ - while (width_remain > 0) \ - { \ - /* We use src_width_fixed because it can make vx in original source range */ \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ - \ - /* Wrap around part */ \ - if (pixman_fixed_to_int (vx) == src_width - 1) \ - { \ - /* for positive unit_x \ - * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \ - * \ - * vx is in range [0, src_width_fixed - pixman_fixed_e] \ - * So we are safe from overflow. \ - */ \ - num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \ - \ - if (num_pixels > width_remain) \ - num_pixels = width_remain; \ - \ - scanline_func (dst, mask, buf1, buf2, num_pixels, \ - weight1, weight2, pixman_fixed_frac(vx), \ - unit_x, src_width_fixed, FALSE); \ - \ - width_remain -= num_pixels; \ - vx += num_pixels * unit_x; \ - dst += num_pixels; \ - \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += num_pixels; \ - \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ - } \ - \ - /* Normal scanline composite */ \ - if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \ - { \ - /* for positive unit_x \ - * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \ - * \ - * vx is in range [0, src_width_fixed - pixman_fixed_e] \ - * So we are safe from overflow here. \ - */ \ - num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \ - / unit_x) + 1; \ - \ - if (num_pixels > width_remain) \ - num_pixels = width_remain; \ - \ - scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \ - weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \ - \ - width_remain -= num_pixels; \ - vx += num_pixels * unit_x; \ - dst += num_pixels; \ - \ - if (flags & FLAG_HAVE_NON_SOLID_MASK) \ - mask += num_pixels; \ - } \ - } \ - } \ - else \ - { \ - scanline_func (dst, mask, src_first_line + src_stride * y1, \ - src_first_line + src_stride * y2, width, \ - weight1, weight2, vx, unit_x, max_vx, FALSE); \ - } \ - } \ -} - -/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ - dst_type_t, repeat_mode, flags) \ - FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ - dst_type_t, repeat_mode, flags) - -#define SCALED_BILINEAR_FLAGS \ - (FAST_PATH_SCALE_TRANSFORM | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_BILINEAR_FILTER | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NARROW_FORMAT) - -#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ - } - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_BILINEAR_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ - } - -/* Prefer the use of 'cover' variant, because it is faster */ -#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ - SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func) - -#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) - -#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) - -#endif diff --git a/pixman/pixman/pixman-inlines.h b/pixman/pixman/pixman-inlines.h new file mode 100644 index 000000000..f1e0cbd77 --- /dev/null +++ b/pixman/pixman/pixman-inlines.h @@ -0,0 +1,1280 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifndef PIXMAN_FAST_PATH_H__ +#define PIXMAN_FAST_PATH_H__ + +#include "pixman-private.h" + +#define PIXMAN_REPEAT_COVER -1 + +/* Flags describing input parameters to fast path macro template. + * Turning on some flag values may indicate that + * "some property X is available so template can use this" or + * "some property X should be handled by template". + * + * FLAG_HAVE_SOLID_MASK + * Input mask is solid so template should handle this. + * + * FLAG_HAVE_NON_SOLID_MASK + * Input mask is bits mask so template should handle this. + * + * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually + * exclusive. (It's not allowed to turn both flags on) + */ +#define FLAG_NONE (0) +#define FLAG_HAVE_SOLID_MASK (1 << 1) +#define FLAG_HAVE_NON_SOLID_MASK (1 << 2) + +/* To avoid too short repeated scanline function calls, extend source + * scanlines having width less than below constant value. + */ +#define REPEAT_NORMAL_MIN_WIDTH 64 + +static force_inline pixman_bool_t +repeat (pixman_repeat_t repeat, int *c, int size) +{ + if (repeat == PIXMAN_REPEAT_NONE) + { + if (*c < 0 || *c >= size) + return FALSE; + } + else if (repeat == PIXMAN_REPEAT_NORMAL) + { + while (*c >= size) + *c -= size; + while (*c < 0) + *c += size; + } + else if (repeat == PIXMAN_REPEAT_PAD) + { + *c = CLIP (*c, 0, size - 1); + } + else /* REFLECT */ + { + *c = MOD (*c, size * 2); + if (*c >= size) + *c = size * 2 - *c - 1; + } + return TRUE; +} + +#if SIZEOF_LONG > 4 + +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + uint64_t distxy, distxiy, distixy, distixiy; + uint64_t tl64, tr64, bl64, br64; + uint64_t f, r; + + distxy = distx * disty; + distxiy = distx * (256 - disty); + distixy = (256 - distx) * disty; + distixiy = (256 - distx) * (256 - disty); + + /* Alpha and Blue */ + tl64 = tl & 0xff0000ff; + tr64 = tr & 0xff0000ff; + bl64 = bl & 0xff0000ff; + br64 = br & 0xff0000ff; + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r = f & 0x0000ff0000ff0000ull; + + /* Red and Green */ + tl64 = tl; + tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); + + tr64 = tr; + tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); + + bl64 = bl; + bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); + + br64 = br; + br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); + + return (uint32_t)(r >> 16); +} + +#else + +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t f, r; + + distxy = distx * disty; + distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ + distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ + distixiy = + 256 * 256 - (disty << 8) - + (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ + + /* Blue */ + r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + + /* Green */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + tl >>= 16; + tr >>= 16; + bl >>= 16; + br >>= 16; + r >>= 16; + + /* Red */ + f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + r |= f & 0x00ff0000; + + /* Alpha */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + return r; +} + +#endif + +/* + * For each scanline fetched from source image with PAD repeat: + * - calculate how many pixels need to be padded on the left side + * - calculate how many pixels need to be padded on the right side + * - update width to only count pixels which are fetched from the image + * All this information is returned via 'width', 'left_pad', 'right_pad' + * arguments. The code is assuming that 'unit_x' is positive. + * + * Note: 64-bit math is used in order to avoid potential overflows, which + * is probably excessive in many cases. This particular function + * may need its own correctness test and performance tuning. + */ +static force_inline void +pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * width, + int32_t * left_pad, + int32_t * right_pad) +{ + int64_t max_vx = (int64_t) source_image_width << 16; + int64_t tmp; + if (vx < 0) + { + tmp = ((int64_t) unit_x - 1 - vx) / unit_x; + if (tmp > *width) + { + *left_pad = *width; + *width = 0; + } + else + { + *left_pad = (int32_t) tmp; + *width -= (int32_t) tmp; + } + } + else + { + *left_pad = 0; + } + tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; + if (tmp < 0) + { + *right_pad = *width; + *width = 0; + } + else if (tmp >= *width) + { + *right_pad = 0; + } + else + { + *right_pad = *width - (int32_t) tmp; + *width = (int32_t) tmp; + } +} + +/* A macroified version of specialized nearest scalers for some + * common 8888 and 565 formats. It supports SRC and OVER ops. + * + * There are two repeat versions, one that handles repeat normal, + * and one without repeat handling that only works if the src region + * used is completely covered by the pre-repeated source samples. + * + * The loops are unrolled to process two pixels per iteration for better + * performance on most CPU architectures (superscalar processors + * can issue several operations simultaneously, other processors can hide + * instructions latencies by pipelining operations). Unrolling more + * does not make much sense because the compiler will start running out + * of spare registers soon. + */ + +#define GET_8888_ALPHA(s) ((s) >> 24) + /* This is not actually used since we don't have an OVER with + 565 source, but it is needed to build. */ +#define GET_0565_ALPHA(s) 0xff +#define GET_x888_ALPHA(s) 0xff + +#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ +static force_inline void \ +scanline_func_name (dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ +{ \ + uint32_t d; \ + src_type_t s1, s2; \ + uint8_t a1, a2; \ + int x1, x2; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ + return; \ + \ + if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ + abort(); \ + \ + while ((w -= 2) >= 0) \ + { \ + x1 = vx >> 16; \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= max_vx) \ + vx -= max_vx; \ + } \ + s1 = src[x1]; \ + \ + x2 = vx >> 16; \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= max_vx) \ + vx -= max_vx; \ + } \ + s2 = src[x2]; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ + \ + if (a1 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ + s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + \ + if (a2 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + } \ + else if (s2) \ + { \ + d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ + s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ + a2 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + } \ + } \ + \ + if (w & 1) \ + { \ + x1 = vx >> 16; \ + s1 = src[x1]; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + \ + if (a1 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ + s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + } \ +} + +#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ +static void \ +fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y; \ + pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ + pixman_fixed_t max_vy; \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, right_pad; \ + \ + src_type_t *src; \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ + if (have_mask) \ + { \ + if (mask_is_solid) \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ + else \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ + v.vector[0] -= pixman_fixed_e; \ + v.vector[1] -= pixman_fixed_e; \ + \ + vx = v.vector[0]; \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* Clamp repeating positions inside the actual samples */ \ + max_vx = src_image->bits.width << 16; \ + max_vy = src_image->bits.height << 16; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ + &width, &left_pad, &right_pad); \ + vx += left_pad * unit_x; \ + } \ + \ + while (--height >= 0) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + if (have_mask && !mask_is_solid) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y = vy >> 16; \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, src + src_image->bits.width - 1, \ + right_pad, 0, 0, 0, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + static const src_type_t zero[1] = { 0 }; \ + if (y < 0 || y >= src_image->bits.height) \ + { \ + scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \ + continue; \ + } \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \ + } \ + } \ + else \ + { \ + src = src_first_line + src_stride * y; \ + scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ + FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) + +#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + static force_inline void \ + scanline_func##scale_func_name##_wrapper ( \ + const uint8_t *mask, \ + dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ + { \ + scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ + } \ + FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ + src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) + +#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ + dst_type_t, repeat_mode) + +#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ + FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ + OP, repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ + scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + src_type_t, dst_type_t, repeat_mode) + + +#define SCALED_NEAREST_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) + +/*****************************************************************************/ + +/* + * Identify 5 zones in each scanline for bilinear scaling. Depending on + * whether 2 pixels to be interpolated are fetched from the image itself, + * from the padding area around it or from both image and padding area. + */ +static force_inline void +bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * left_pad, + int32_t * left_tz, + int32_t * width, + int32_t * right_tz, + int32_t * right_pad) +{ + int width1 = *width, left_pad1, right_pad1; + int width2 = *width, left_pad2, right_pad2; + + pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, + &width1, &left_pad1, &right_pad1); + pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, + unit_x, &width2, &left_pad2, &right_pad2); + + *left_pad = left_pad2; + *left_tz = left_pad1 - left_pad2; + *right_tz = right_pad2 - right_pad1; + *right_pad = right_pad1; + *width -= *left_pad + *left_tz + *right_tz + *right_pad; +} + +/* + * Main loop template for single pass bilinear scaling. It needs to be + * provided with 'scanline_func' which should do the compositing operation. + * The needed function has the following prototype: + * + * scanline_func (dst_type_t * dst, + * const mask_type_ * mask, + * const src_type_t * src_top, + * const src_type_t * src_bottom, + * int32_t width, + * int weight_top, + * int weight_bottom, + * pixman_fixed_t vx, + * pixman_fixed_t unit_x, + * pixman_fixed_t max_vx, + * pixman_bool_t zero_src) + * + * Where: + * dst - destination scanline buffer for storing results + * mask - mask buffer (or single value for solid mask) + * src_top, src_bottom - two source scanlines + * width - number of pixels to process + * weight_top - weight of the top row for interpolation + * weight_bottom - weight of the bottom row for interpolation + * vx - initial position for fetching the first pair of + * pixels from the source buffer + * unit_x - position increment needed to move to the next pair + * of pixels + * max_vx - image size as a fixed point value, can be used for + * implementing NORMAL repeat (when it is supported) + * zero_src - boolean hint variable, which is set to TRUE when + * all source pixels are fetched from zero padding + * zone for NONE repeat + * + * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256, + * but sometimes it may be less than that for NONE repeat when handling + * fuzzy antialiased top or bottom image edges. Also both top and + * bottom weight variables are guaranteed to have value in 0-255 + * range and can fit into unsigned byte or be used with 8-bit SIMD + * multiplication instructions. + */ +#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, flags) \ +static void \ +fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y1, y2; \ + pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, left_tz, right_tz, right_pad; \ + \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + int src_width; \ + pixman_fixed_t src_width_fixed; \ + int max_x; \ + pixman_bool_t need_src_extension; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ + if (flags & FLAG_HAVE_SOLID_MASK) \ + { \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ + mask_stride = 0; \ + } \ + else if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + { \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + v.vector[0] -= pixman_fixed_1 / 2; \ + v.vector[1] -= pixman_fixed_1 / 2; \ + \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ + &left_pad, &left_tz, &width, &right_tz, &right_pad); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + /* PAD repeat does not need special handling for 'transition zones' and */ \ + /* they can be combined with 'padding zones' safely */ \ + left_pad += left_tz; \ + right_pad += right_tz; \ + left_tz = right_tz = 0; \ + } \ + v.vector[0] += left_pad * unit_x; \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + vx = v.vector[0]; \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \ + max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1; \ + \ + if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \ + { \ + src_width = 0; \ + \ + while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \ + src_width += src_image->bits.width; \ + \ + need_src_extension = TRUE; \ + } \ + else \ + { \ + src_width = src_image->bits.width; \ + need_src_extension = FALSE; \ + } \ + \ + src_width_fixed = pixman_int_to_fixed (src_width); \ + } \ + \ + while (--height >= 0) \ + { \ + int weight1, weight2; \ + dst = dst_line; \ + dst_line += dst_stride; \ + vx = v.vector[0]; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y1 = pixman_fixed_to_int (vy); \ + weight2 = (vy >> 8) & 0xff; \ + if (weight2) \ + { \ + /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \ + y2 = y1 + 1; \ + weight1 = 256 - weight2; \ + } \ + else \ + { \ + /* set both top and bottom row to the same scanline, and weights to 128+128 */ \ + y2 = y1; \ + weight1 = weight2 = 128; \ + } \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + src_type_t *src1, *src2; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ + repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ + src1 = src_first_line + src_stride * y1; \ + src2 = src_first_line + src_stride * y2; \ + \ + if (left_pad > 0) \ + { \ + buf1[0] = buf1[1] = src1[0]; \ + buf2[0] = buf2[1] = src2[0]; \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ + dst += left_pad; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_pad; \ + } \ + if (width > 0) \ + { \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ + dst += width; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += width; \ + } \ + if (right_pad > 0) \ + { \ + buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ + buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + src_type_t *src1, *src2; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ + if (y1 < 0) \ + { \ + weight1 = 0; \ + y1 = 0; \ + } \ + if (y1 >= src_image->bits.height) \ + { \ + weight1 = 0; \ + y1 = src_image->bits.height - 1; \ + } \ + if (y2 < 0) \ + { \ + weight2 = 0; \ + y2 = 0; \ + } \ + if (y2 >= src_image->bits.height) \ + { \ + weight2 = 0; \ + y2 = src_image->bits.height - 1; \ + } \ + src1 = src_first_line + src_stride * y1; \ + src2 = src_first_line + src_stride * y2; \ + \ + if (left_pad > 0) \ + { \ + buf1[0] = buf1[1] = 0; \ + buf2[0] = buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ + dst += left_pad; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_pad; \ + } \ + if (left_tz > 0) \ + { \ + buf1[0] = 0; \ + buf1[1] = src1[0]; \ + buf2[0] = 0; \ + buf2[1] = src2[0]; \ + scanline_func (dst, mask, \ + buf1, buf2, left_tz, weight1, weight2, \ + pixman_fixed_frac (vx), unit_x, 0, FALSE); \ + dst += left_tz; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_tz; \ + vx += left_tz * unit_x; \ + } \ + if (width > 0) \ + { \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ + dst += width; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += width; \ + vx += width * unit_x; \ + } \ + if (right_tz > 0) \ + { \ + buf1[0] = src1[src_image->bits.width - 1]; \ + buf1[1] = 0; \ + buf2[0] = src2[src_image->bits.width - 1]; \ + buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, right_tz, weight1, weight2, \ + pixman_fixed_frac (vx), unit_x, 0, FALSE); \ + dst += right_tz; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += right_tz; \ + } \ + if (right_pad > 0) \ + { \ + buf1[0] = buf1[1] = 0; \ + buf2[0] = buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + int32_t num_pixels; \ + int32_t width_remain; \ + src_type_t * src_line_top; \ + src_type_t * src_line_bottom; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \ + src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \ + int i, j; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \ + repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \ + src_line_top = src_first_line + src_stride * y1; \ + src_line_bottom = src_first_line + src_stride * y2; \ + \ + if (need_src_extension) \ + { \ + for (i=0; ibits.width; j++, i++) \ + { \ + extended_src_line0[i] = src_line_top[j]; \ + extended_src_line1[i] = src_line_bottom[j]; \ + } \ + } \ + \ + src_line_top = &extended_src_line0[0]; \ + src_line_bottom = &extended_src_line1[0]; \ + } \ + \ + /* Top & Bottom wrap around buffer */ \ + buf1[0] = src_line_top[src_width - 1]; \ + buf1[1] = src_line_top[0]; \ + buf2[0] = src_line_bottom[src_width - 1]; \ + buf2[1] = src_line_bottom[0]; \ + \ + width_remain = width; \ + \ + while (width_remain > 0) \ + { \ + /* We use src_width_fixed because it can make vx in original source range */ \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + \ + /* Wrap around part */ \ + if (pixman_fixed_to_int (vx) == src_width - 1) \ + { \ + /* for positive unit_x \ + * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \ + * \ + * vx is in range [0, src_width_fixed - pixman_fixed_e] \ + * So we are safe from overflow. \ + */ \ + num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \ + \ + if (num_pixels > width_remain) \ + num_pixels = width_remain; \ + \ + scanline_func (dst, mask, buf1, buf2, num_pixels, \ + weight1, weight2, pixman_fixed_frac(vx), \ + unit_x, src_width_fixed, FALSE); \ + \ + width_remain -= num_pixels; \ + vx += num_pixels * unit_x; \ + dst += num_pixels; \ + \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += num_pixels; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + } \ + \ + /* Normal scanline composite */ \ + if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \ + { \ + /* for positive unit_x \ + * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \ + * \ + * vx is in range [0, src_width_fixed - pixman_fixed_e] \ + * So we are safe from overflow here. \ + */ \ + num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \ + / unit_x) + 1; \ + \ + if (num_pixels > width_remain) \ + num_pixels = width_remain; \ + \ + scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \ + weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \ + \ + width_remain -= num_pixels; \ + vx += num_pixels * unit_x; \ + dst += num_pixels; \ + \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += num_pixels; \ + } \ + } \ + } \ + else \ + { \ + scanline_func (dst, mask, src_first_line + src_stride * y1, \ + src_first_line + src_stride * y2, width, \ + weight1, weight2, vx, unit_x, max_vx, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, flags) \ + FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ + dst_type_t, repeat_mode, flags) + +#define SCALED_BILINEAR_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_BILINEAR_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ + } + +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ + SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) + +#endif diff --git a/pixman/pixman/pixman-noop.c b/pixman/pixman/pixman-noop.c index 4f8c3a18e..906a491f5 100644 --- a/pixman/pixman/pixman-noop.c +++ b/pixman/pixman/pixman-noop.c @@ -28,7 +28,7 @@ #include #include "pixman-private.h" #include "pixman-combine32.h" -#include "pixman-fast-path.h" +#include "pixman-inlines.h" static void noop_composite (pixman_implementation_t *imp, diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h index 6a3935e4d..a25897d77 100644 --- a/pixman/pixman/pixman-private.h +++ b/pixman/pixman/pixman-private.h @@ -691,10 +691,13 @@ void * pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c); pixman_bool_t -pixman_multiply_overflows_int (unsigned int a, unsigned int b); +_pixman_multiply_overflows_size (size_t a, size_t b); pixman_bool_t -pixman_addition_overflows_int (unsigned int a, unsigned int b); +_pixman_multiply_overflows_int (unsigned int a, unsigned int b); + +pixman_bool_t +_pixman_addition_overflows_int (unsigned int a, unsigned int b); /* Compositing utilities */ void diff --git a/pixman/pixman/pixman-region.c b/pixman/pixman/pixman-region.c index 142493b77..47beb5238 100644 --- a/pixman/pixman/pixman-region.c +++ b/pixman/pixman/pixman-region.c @@ -102,7 +102,11 @@ static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 }; static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 }; +#if defined (__llvm__) && !defined (__clang__) +static const volatile region_data_type_t PREFIX (_broken_data_) = { 0, 0 }; +#else static const region_data_type_t PREFIX (_broken_data_) = { 0, 0 }; +#endif static box_type_t *pixman_region_empty_box = (box_type_t *)&PREFIX (_empty_box_); @@ -2086,6 +2090,40 @@ PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg, /* Destination region return TRUE; } +/* In time O(log n), locate the first box whose y2 is greater than y. + * Return @end if no such box exists. + */ +static box_type_t * +find_box_for_y (box_type_t *begin, box_type_t *end, int y) +{ + box_type_t *mid; + + if (end == begin) + return end; + + if (end - begin == 1) + { + if (begin->y2 > y) + return begin; + else + return end; + } + + mid = begin + (end - begin) / 2; + if (mid->y2 > y) + { + /* If no box is found in [begin, mid], the function + * will return @mid, which is then known to be the + * correct answer. + */ + return find_box_for_y (begin, mid, y); + } + else + { + return find_box_for_y (mid, end, y); + } +} + /* * rect_in(region, rect) * This routine takes a pointer to a region and a pointer to a box @@ -2102,7 +2140,6 @@ PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg, /* Destination region * partially in the region) or is outside the region (we reached a band * that doesn't overlap the box at all and part_in is false) */ - pixman_region_overlap_t PIXMAN_EXPORT PREFIX (_contains_rectangle) (region_type_t * region, box_type_t * prect) @@ -2139,12 +2176,15 @@ PIXMAN_EXPORT PREFIX (_contains_rectangle) (region_type_t * region, /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 */ for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects; - pbox != pbox_end; - pbox++) + pbox != pbox_end; + pbox++) { - - if (pbox->y2 <= y) - continue; /* getting up to speed or skipping remainder of band */ + /* getting up to speed or skipping remainder of band */ + if (pbox->y2 <= y) + { + if ((pbox = find_box_for_y (pbox, pbox_end, y)) == pbox_end) + break; + } if (pbox->y1 > y) { @@ -2342,13 +2382,13 @@ PREFIX (_contains_point) (region_type_t * region, return(TRUE); } - for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects; - pbox != pbox_end; - pbox++) - { - if (y >= pbox->y2) - continue; /* not there yet */ + pbox = PIXREGION_BOXPTR (region); + pbox_end = pbox + numRects; + pbox = find_box_for_y (pbox, pbox_end, y); + + for (;pbox != pbox_end; pbox++) + { if ((y < pbox->y1) || (x < pbox->x1)) break; /* missed it */ diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index 3d51c2fda..6689c53a2 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -34,7 +34,7 @@ #include /* for SSE2 intrinsics */ #include "pixman-private.h" #include "pixman-combine32.h" -#include "pixman-fast-path.h" +#include "pixman-inlines.h" static __m128i mask_0080; static __m128i mask_00ff; diff --git a/pixman/pixman/pixman-utils.c b/pixman/pixman/pixman-utils.c index cb4e62199..49e3488aa 100644 --- a/pixman/pixman/pixman-utils.c +++ b/pixman/pixman/pixman-utils.c @@ -31,15 +31,19 @@ #include "pixman-private.h" pixman_bool_t -pixman_multiply_overflows_int (unsigned int a, - unsigned int b) +_pixman_multiply_overflows_size (size_t a, size_t b) +{ + return a >= SIZE_MAX / b; +} + +pixman_bool_t +_pixman_multiply_overflows_int (unsigned int a, unsigned int b) { return a >= INT32_MAX / b; } pixman_bool_t -pixman_addition_overflows_int (unsigned int a, - unsigned int b) +_pixman_addition_overflows_int (unsigned int a, unsigned int b) { return a > INT32_MAX - b; } diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am index 9dc72199e..9f61fc9e4 100644 --- a/pixman/test/Makefile.am +++ b/pixman/test/Makefile.am @@ -15,6 +15,7 @@ TESTPROGRAMS = \ scaling-crash-test \ scaling-helpers-test \ gradient-crash-test \ + region-contains-test \ alphamap \ stress-test \ composite-traps-test \ @@ -26,6 +27,7 @@ TESTPROGRAMS = \ pdf_op_test_SOURCES = pdf-op-test.c utils.c utils.h region_test_SOURCES = region-test.c utils.c utils.h blitters_test_SOURCES = blitters-test.c utils.c utils.h +region_contains_test_SOURCES = region-contains-test.c utils.c utils.h composite_traps_test_SOURCES = composite-traps-test.c utils.c utils.h scaling_test_SOURCES = scaling-test.c utils.c utils.h affine_test_SOURCES = affine-test.c utils.c utils.h diff --git a/pixman/test/Makefile.win32 b/pixman/test/Makefile.win32 index c71afe187..a62b6fc35 100644 --- a/pixman/test/Makefile.win32 +++ b/pixman/test/Makefile.win32 @@ -35,6 +35,7 @@ SOURCES = \ scaling-test.c \ affine-test.c \ composite.c \ + lowlevel-blt-bench.c \ utils.c TESTS = \ @@ -56,6 +57,8 @@ TESTS = \ $(CFG_VAR)/affine-test.exe \ $(CFG_VAR)/composite.exe +BENCHMARKS = \ + $(CFG_VAR)/lowlevel-blt-bench.exe OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(SOURCES)) @@ -66,7 +69,7 @@ $(CFG_VAR)/%.obj: %.c $(CFG_VAR)/%.exe: $(CFG_VAR)/%.obj $(LINK) /NOLOGO /OUT:$@ $< $(CFG_VAR)/utils.obj $(TEST_LDADD) -all: $(OBJECTS) $(TESTS) +all: $(OBJECTS) $(TESTS) $(BENCHMARKS) @exit 0 clean: diff --git a/pixman/test/lowlevel-blt-bench.c b/pixman/test/lowlevel-blt-bench.c index d58587d51..099e434f0 100644 --- a/pixman/test/lowlevel-blt-bench.c +++ b/pixman/test/lowlevel-blt-bench.c @@ -22,16 +22,13 @@ * DEALINGS IN THE SOFTWARE. */ -#include #include #include #include -#ifdef HAVE_CONFIG_H -#include -#endif +#define PIXMAN_USE_INTERNAL_API +#include -#include "pixman-private.h" #include "utils.h" #define SOLID_FLAG 1 diff --git a/pixman/test/region-contains-test.c b/pixman/test/region-contains-test.c new file mode 100644 index 000000000..d761c4bdf --- /dev/null +++ b/pixman/test/region-contains-test.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include "utils.h" + +static void +make_random_region (pixman_region32_t *region) +{ + int n_boxes; + + pixman_region32_init (region); + + n_boxes = lcg_rand_n (64); + while (n_boxes--) + { + int32_t x1, y1, x2, y2; + + x1 = (int32_t)lcg_rand_u32(); + y1 = (int32_t)lcg_rand_u32(); + x2 = (int32_t)lcg_rand_u32(); + y2 = (int32_t)lcg_rand_u32(); + + pixman_region32_union_rect (region, region, x1, y1, x2, y2); + } +} + +static void +print_box (pixman_box32_t *box) +{ + printf (" %d %d %d %d\n", box->x1, box->y1, box->x2, box->y2); +} + +static int32_t +random_coord (pixman_region32_t *region, pixman_bool_t x) +{ + pixman_box32_t *b, *bb; + int n_boxes; + int begin, end; + + if (lcg_rand_n (14)) + { + bb = pixman_region32_rectangles (region, &n_boxes); + if (n_boxes == 0) + goto use_extent; + b = bb + lcg_rand_n (n_boxes); + } + else + { + use_extent: + b = pixman_region32_extents (region); + n_boxes = 1; + } + + if (x) + { + begin = b->x1; + end = b->x2; + } + else + { + begin = b->y1; + end = b->y2; + } + + switch (lcg_rand_n (5)) + { + case 0: + return begin - lcg_rand_u32(); + case 1: + return end + lcg_rand_u32 (); + case 2: + return end; + case 3: + return begin; + default: + return (begin + end) / 2; + } + return 0; +} + +static uint32_t +compute_crc32_u32 (uint32_t crc32, uint32_t v) +{ + if (!is_little_endian()) + { + v = ((v & 0xff000000) >> 24) | + ((v & 0x00ff0000) >> 8) | + ((v & 0x0000ff00) << 8) | + ((v & 0x000000ff) << 24); + } + + return compute_crc32 (crc32, &v, sizeof (int32_t)); +} + +static uint32_t +crc32_box32 (uint32_t crc32, pixman_box32_t *box) +{ + crc32 = compute_crc32_u32 (crc32, box->x1); + crc32 = compute_crc32_u32 (crc32, box->y1); + crc32 = compute_crc32_u32 (crc32, box->x2); + crc32 = compute_crc32_u32 (crc32, box->y2); + + return crc32; +} + +static uint32_t +test_region_contains_rectangle (int i, int verbose) +{ + pixman_box32_t box; + pixman_box32_t rbox = { 0, 0, 0, 0 }; + pixman_region32_t region; + uint32_t r, r1, r2, r3, r4, crc32; + + lcg_srand (i); + + make_random_region (®ion); + + box.x1 = random_coord (®ion, TRUE); + box.x2 = box.x1 + lcg_rand_u32 (); + box.y1 = random_coord (®ion, FALSE); + box.y2 = box.y1 + lcg_rand_u32 (); + + if (verbose) + { + int n_rects; + pixman_box32_t *boxes; + + boxes = pixman_region32_rectangles (®ion, &n_rects); + + printf ("region:\n"); + while (n_rects--) + print_box (boxes++); + printf ("box:\n"); + print_box (&box); + } + + crc32 = 0; + + r1 = pixman_region32_contains_point (®ion, box.x1, box.y1, &rbox); + crc32 = crc32_box32 (crc32, &rbox); + r2 = pixman_region32_contains_point (®ion, box.x1, box.y2, &rbox); + crc32 = crc32_box32 (crc32, &rbox); + r3 = pixman_region32_contains_point (®ion, box.x2, box.y1, &rbox); + crc32 = crc32_box32 (crc32, &rbox); + r4 = pixman_region32_contains_point (®ion, box.x2, box.y2, &rbox); + crc32 = crc32_box32 (crc32, &rbox); + + r = pixman_region32_contains_rectangle (®ion, &box); + r = (i << 8) | (r << 4) | (r1 << 3) | (r2 << 2) | (r3 << 1) | (r4 << 0); + + crc32 = compute_crc32_u32 (crc32, r); + + if (verbose) + printf ("results: %d %d %d %d %d\n", (r & 0xf0) >> 4, r1, r2, r3, r4); + + pixman_region32_fini (®ion); + + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main ("region_contains", + 1000000, + 0x86311506, + test_region_contains_rectangle, + argc, argv); +} diff --git a/pixman/test/scaling-helpers-test.c b/pixman/test/scaling-helpers-test.c index c1861389b..a38cac544 100644 --- a/pixman/test/scaling-helpers-test.c +++ b/pixman/test/scaling-helpers-test.c @@ -4,7 +4,7 @@ #include #include #include "utils.h" -#include "pixman-fast-path.h" +#include "pixman-inlines.h" /* A trivial reference implementation for * 'bilinear_pad_repeat_get_scanline_bounds' diff --git a/pixman/test/utils.c b/pixman/test/utils.c index 402560227..44188ea90 100644 --- a/pixman/test/utils.c +++ b/pixman/test/utils.c @@ -130,6 +130,14 @@ compute_crc32 (uint32_t in_crc32, return (crc32 ^ 0xFFFFFFFF); } +pixman_bool_t +is_little_endian (void) +{ + volatile uint16_t endian_check_var = 0x1234; + + return (*(volatile uint8_t *)&endian_check_var == 0x34); +} + /* perform endian conversion of pixel data */ void @@ -142,8 +150,7 @@ image_endian_swap (pixman_image_t *img) int i, j; /* swap bytes only on big endian systems */ - volatile uint16_t endian_check_var = 0x1234; - if (*(volatile uint8_t *)&endian_check_var != 0x12) + if (is_little_endian()) return; if (bpp == 8) diff --git a/pixman/test/utils.h b/pixman/test/utils.h index 615ad7841..f0c9c300c 100644 --- a/pixman/test/utils.h +++ b/pixman/test/utils.h @@ -44,10 +44,14 @@ lcg_rand_N (int max) static inline uint32_t lcg_rand_u32 (void) { - uint32_t lo = lcg_rand(); - uint32_t hi = lcg_rand(); - - return (hi << 16) | lo; + /* This uses the 10/11 most significant bits from the 3 lcg results + * (and mixes them with the low from the adjacent one). + */ + uint32_t lo = lcg_rand() >> -(32 - 15 - 11 * 2); + uint32_t mid = lcg_rand() << (32 - 15 - 11 * 1); + uint32_t hi = lcg_rand() << (32 - 15 - 11 * 0); + + return (hi ^ mid ^ lo); } /* CRC 32 computation @@ -57,6 +61,10 @@ compute_crc32 (uint32_t in_crc32, const void *buf, size_t buf_len); +/* Returns TRUE if running on a little endian system */ +pixman_bool_t +is_little_endian (void); + /* perform endian conversion of pixel data */ void -- cgit v1.2.3