18 files changed, 7502 insertions, 7033 deletions
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 6552f1270..e2f73dc06 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 21)
-m4_define([pixman_micro], 3)
+m4_define([pixman_micro], 5)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 
diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h
index ede63a629..372e9f9a8 100644
--- a/pixman/pixman/pixman-arm-common.h
+++ b/pixman/pixman/pixman-arm-common.h
@@ -1,316 +1,319 @@
-/*
- * Copyright © 2010 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-#ifndef PIXMAN_ARM_COMMON_H
-#define PIXMAN_ARM_COMMON_H
-
-#include "pixman-fast-path.h"
-
-/* Define some macros which can expand into proxy functions between
- * ARM assembly optimized functions and the rest of pixman fast path API.
- *
- * All the low level ARM assembly functions have to use ARM EABI
- * calling convention and take up to 8 arguments:
- *    width, height, dst, dst_stride, src, src_stride, mask, mask_stride
- *
- * The arguments are ordered with the most important coming first (the
- * first 4 arguments are passed to function in registers, the rest are
- * on stack). The last arguments are optional, for example if the
- * function is not using mask, then 'mask' and 'mask_stride' can be
- * omitted when doing a function call.
- *
- * Arguments 'src' and 'mask' contain either a pointer to the top left
- * pixel of the composited rectangle or a pixel color value depending
- * on the function type. In the case of just a color value (solid source
- * or mask), the corresponding stride argument is unused.
- */
-
-#define SKIP_ZERO_SRC  1
-#define SKIP_ZERO_MASK 2
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name,                \
-                                          src_type, src_cnt,            \
-                                          dst_type, dst_cnt)            \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t   w,                   \
-                                         int32_t   h,                   \
-                                         dst_type *dst,                 \
-                                         int32_t   dst_stride,          \
-                                         src_type *src,                 \
-                                         int32_t   src_stride);         \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-                            pixman_op_t              op,                \
-                            pixman_image_t *         src_image,         \
-                            pixman_image_t *         mask_image,        \
-                            pixman_image_t *         dst_image,         \
-                            int32_t                  src_x,             \
-                            int32_t                  src_y,             \
-                            int32_t                  mask_x,            \
-                            int32_t                  mask_y,            \
-                            int32_t                  dest_x,            \
-                            int32_t                  dest_y,            \
-                            int32_t                  width,             \
-                            int32_t                  height)            \
-{                                                                       \
-    dst_type *dst_line;                                                 \
-    src_type *src_line;                                                 \
-    int32_t dst_stride, src_stride;                                     \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src_line, src_stride);     \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name,           \
-                                        dst_type, dst_cnt)              \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
-                                         int32_t    h,                  \
-                                         dst_type  *dst,                \
-                                         int32_t    dst_stride,         \
-                                         uint32_t   src);               \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-                            pixman_op_t              op,                \
-                            pixman_image_t *         src_image,         \
-                            pixman_image_t *         mask_image,        \
-                            pixman_image_t *         dst_image,         \
-                            int32_t                  src_x,             \
-                            int32_t                  src_y,             \
-                            int32_t                  mask_x,            \
-                            int32_t                  mask_y,            \
-                            int32_t                  dest_x,            \
-                            int32_t                  dest_y,            \
-                            int32_t                  width,             \
-                            int32_t                  height)            \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    int32_t    dst_stride;                                              \
-    uint32_t   src;                                                     \
-                                                                        \
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
-                                                                        \
-    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
-	return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src);                      \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name,      \
-                                             mask_type, mask_cnt,       \
-                                             dst_type, dst_cnt)         \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
-                                         int32_t    h,                  \
-                                         dst_type  *dst,                \
-                                         int32_t    dst_stride,         \
-                                         uint32_t   src,                \
-                                         int32_t    unused,             \
-                                         mask_type *mask,               \
-                                         int32_t    mask_stride);       \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-                            pixman_op_t              op,                \
-                            pixman_image_t *         src_image,         \
-                            pixman_image_t *         mask_image,        \
-                            pixman_image_t *         dst_image,         \
-                            int32_t                  src_x,             \
-                            int32_t                  src_y,             \
-                            int32_t                  mask_x,            \
-                            int32_t                  mask_y,            \
-                            int32_t                  dest_x,            \
-                            int32_t                  dest_y,            \
-                            int32_t                  width,             \
-                            int32_t                  height)            \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    mask_type *mask_line;                                               \
-    int32_t    dst_stride, mask_stride;                                 \
-    uint32_t   src;                                                     \
-                                                                        \
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
-                                                                        \
-    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
-	return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
-                           mask_stride, mask_line, mask_cnt);           \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src, 0,                    \
-                                             mask_line, mask_stride);   \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name,       \
-                                            src_type, src_cnt,          \
-                                            dst_type, dst_cnt)          \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
-                                         int32_t    h,                  \
-                                         dst_type  *dst,                \
-                                         int32_t    dst_stride,         \
-                                         src_type  *src,                \
-                                         int32_t    src_stride,         \
-                                         uint32_t   mask);              \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-                            pixman_op_t              op,                \
-                            pixman_image_t *         src_image,         \
-                            pixman_image_t *         mask_image,        \
-                            pixman_image_t *         dst_image,         \
-                            int32_t                  src_x,             \
-                            int32_t                  src_y,             \
-                            int32_t                  mask_x,            \
-                            int32_t                  mask_y,            \
-                            int32_t                  dest_x,            \
-                            int32_t                  dest_y,            \
-                            int32_t                  width,             \
-                            int32_t                  height)            \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    src_type  *src_line;                                                \
-    int32_t    dst_stride, src_stride;                                  \
-    uint32_t   mask;                                                    \
-                                                                        \
-    mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
-                                                                        \
-    if ((flags & SKIP_ZERO_MASK) && mask == 0)                          \
-	return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src_line, src_stride,      \
-                                             mask);                     \
-}
-
-#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name,           \
-                                               src_type, src_cnt,       \
-                                               mask_type, mask_cnt,     \
-                                               dst_type, dst_cnt)       \
-void                                                                    \
-pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
-                                         int32_t    h,                  \
-                                         dst_type  *dst,                \
-                                         int32_t    dst_stride,         \
-                                         src_type  *src,                \
-                                         int32_t    src_stride,         \
-                                         mask_type *mask,               \
-                                         int32_t    mask_stride);       \
-                                                                        \
-static void                                                             \
-cputype##_composite_##name (pixman_implementation_t *imp,               \
-                            pixman_op_t              op,                \
-                            pixman_image_t *         src_image,         \
-                            pixman_image_t *         mask_image,        \
-                            pixman_image_t *         dst_image,         \
-                            int32_t                  src_x,             \
-                            int32_t                  src_y,             \
-                            int32_t                  mask_x,            \
-                            int32_t                  mask_y,            \
-                            int32_t                  dest_x,            \
-                            int32_t                  dest_y,            \
-                            int32_t                  width,             \
-                            int32_t                  height)            \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    src_type  *src_line;                                                \
-    mask_type *mask_line;                                               \
-    int32_t    dst_stride, src_stride, mask_stride;                     \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
-                           mask_stride, mask_line, mask_cnt);           \
-                                                                        \
-    pixman_composite_##name##_asm_##cputype (width, height,             \
-                                             dst_line, dst_stride,      \
-                                             src_line, src_stride,      \
-                                             mask_line, mask_stride);   \
-}
-
-#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op,             \
-                                               src_type, dst_type)            \
-void                                                                          \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (                \
-                                                       int32_t        w,      \
-                                                       dst_type *     dst,    \
-                                                       src_type *     src,    \
-                                                       pixman_fixed_t vx,     \
-                                                       pixman_fixed_t unit_x);\
-                                                                              \
-static force_inline void                                                      \
-scaled_nearest_scanline_##cputype##_##name##_##op (dst_type *       pd,       \
-                                                   src_type *       ps,       \
-                                                   int32_t          w,        \
-                                                   pixman_fixed_t   vx,       \
-                                                   pixman_fixed_t   unit_x,   \
-                                                   pixman_fixed_t   max_vx)   \
-{                                                                             \
-    pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps,  \
-                                                                  vx, unit_x);\
-}                                                                             \
-                                                                              \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op,                         \
-                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
-                       src_type, dst_type, COVER)                             \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op,                          \
-                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
-                       src_type, dst_type, NONE)                              \
-FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op,                           \
-                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
-                       src_type, dst_type, PAD)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                      \
-    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                             \
-    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                              \
-    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
-
-#endif
+/*
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+#ifndef PIXMAN_ARM_COMMON_H
+#define PIXMAN_ARM_COMMON_H
+
+#include "pixman-fast-path.h"
+
+/* Define some macros which can expand into proxy functions between
+ * ARM assembly optimized functions and the rest of pixman fast path API.
+ *
+ * All the low level ARM assembly functions have to use ARM EABI
+ * calling convention and take up to 8 arguments:
+ *    width, height, dst, dst_stride, src, src_stride, mask, mask_stride
+ *
+ * The arguments are ordered with the most important coming first (the
+ * first 4 arguments are passed to function in registers, the rest are
+ * on stack). The last arguments are optional, for example if the
+ * function is not using mask, then 'mask' and 'mask_stride' can be
+ * omitted when doing a function call.
+ *
+ * Arguments 'src' and 'mask' contain either a pointer to the top left
+ * pixel of the composited rectangle or a pixel color value depending
+ * on the function type. In the case of just a color value (solid source
+ * or mask), the corresponding stride argument is unused.
+ */
+
+#define SKIP_ZERO_SRC  1
+#define SKIP_ZERO_MASK 2
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name,                \
+                                          src_type, src_cnt,            \
+                                          dst_type, dst_cnt)            \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t   w,                   \
+                                         int32_t   h,                   \
+                                         dst_type *dst,                 \
+                                         int32_t   dst_stride,          \
+                                         src_type *src,                 \
+                                         int32_t   src_stride);         \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type *dst_line;                                                 \
+    src_type *src_line;                                                 \
+    int32_t dst_stride, src_stride;                                     \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
+                           src_stride, src_line, src_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src_line, src_stride);     \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name,           \
+                                        dst_type, dst_cnt)              \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+                                         int32_t    h,                  \
+                                         dst_type  *dst,                \
+                                         int32_t    dst_stride,         \
+                                         uint32_t   src);               \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type  *dst_line;                                                \
+    int32_t    dst_stride;                                              \
+    uint32_t   src;                                                     \
+                                                                        \
+    src = _pixman_image_get_solid (					\
+	imp, src_image, dst_image->bits.format);			\
+                                                                        \
+    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
+	return;                                                         \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src);                      \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name,      \
+                                             mask_type, mask_cnt,       \
+                                             dst_type, dst_cnt)         \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+                                         int32_t    h,                  \
+                                         dst_type  *dst,                \
+                                         int32_t    dst_stride,         \
+                                         uint32_t   src,                \
+                                         int32_t    unused,             \
+                                         mask_type *mask,               \
+                                         int32_t    mask_stride);       \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type  *dst_line;                                                \
+    mask_type *mask_line;                                               \
+    int32_t    dst_stride, mask_stride;                                 \
+    uint32_t   src;                                                     \
+                                                                        \
+    src = _pixman_image_get_solid (					\
+	imp, src_image, dst_image->bits.format);			\
+                                                                        \
+    if ((flags & SKIP_ZERO_SRC) && src == 0)                            \
+	return;                                                         \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
+                           mask_stride, mask_line, mask_cnt);           \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src, 0,                    \
+                                             mask_line, mask_stride);   \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(flags, cputype, name,       \
+                                            src_type, src_cnt,          \
+                                            dst_type, dst_cnt)          \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+                                         int32_t    h,                  \
+                                         dst_type  *dst,                \
+                                         int32_t    dst_stride,         \
+                                         src_type  *src,                \
+                                         int32_t    src_stride,         \
+                                         uint32_t   mask);              \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type  *dst_line;                                                \
+    src_type  *src_line;                                                \
+    int32_t    dst_stride, src_stride;                                  \
+    uint32_t   mask;                                                    \
+                                                                        \
+    mask = _pixman_image_get_solid (					\
+	imp, mask_image, dst_image->bits.format);			\
+                                                                        \
+    if ((flags & SKIP_ZERO_MASK) && mask == 0)                          \
+	return;                                                         \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
+                           src_stride, src_line, src_cnt);              \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src_line, src_stride,      \
+                                             mask);                     \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name,           \
+                                               src_type, src_cnt,       \
+                                               mask_type, mask_cnt,     \
+                                               dst_type, dst_cnt)       \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+                                         int32_t    h,                  \
+                                         dst_type  *dst,                \
+                                         int32_t    dst_stride,         \
+                                         src_type  *src,                \
+                                         int32_t    src_stride,         \
+                                         mask_type *mask,               \
+                                         int32_t    mask_stride);       \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type  *dst_line;                                                \
+    src_type  *src_line;                                                \
+    mask_type *mask_line;                                               \
+    int32_t    dst_stride, src_stride, mask_stride;                     \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
+                           src_stride, src_line, src_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
+                           mask_stride, mask_line, mask_cnt);           \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src_line, src_stride,      \
+                                             mask_line, mask_stride);   \
+}
+
+#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST(cputype, name, op,             \
+                                               src_type, dst_type)            \
+void                                                                          \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (                \
+                                                       int32_t        w,      \
+                                                       dst_type *     dst,    \
+                                                       src_type *     src,    \
+                                                       pixman_fixed_t vx,     \
+                                                       pixman_fixed_t unit_x);\
+                                                                              \
+static force_inline void                                                      \
+scaled_nearest_scanline_##cputype##_##name##_##op (dst_type *       pd,       \
+                                                   src_type *       ps,       \
+                                                   int32_t          w,        \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x,   \
+                                                   pixman_fixed_t   max_vx)   \
+{                                                                             \
+    pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps,  \
+                                                                  vx, unit_x);\
+}                                                                             \
+                                                                              \
+FAST_NEAREST_MAINLOOP (cputype##_##name##_cover_##op,                         \
+                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
+                       src_type, dst_type, COVER)                             \
+FAST_NEAREST_MAINLOOP (cputype##_##name##_none_##op,                          \
+                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
+                       src_type, dst_type, NONE)                              \
+FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op,                           \
+                       scaled_nearest_scanline_##cputype##_##name##_##op,     \
+                       src_type, dst_type, PAD)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                      \
+    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                             \
+    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                              \
+    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+
+#endif
diff --git a/pixman/pixman/pixman-bits-image.c b/pixman/pixman/pixman-bits-image.c
index c453e0ee6..a865d719a 100644
--- a/pixman/pixman/pixman-bits-image.c
+++ b/pixman/pixman/pixman-bits-image.c
@@ -35,43 +35,41 @@
 #include "pixman-private.h"
 #include "pixman-combine32.h"
 
-/* Store functions */
-void
-_pixman_image_store_scanline_32 (bits_image_t *  image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 const uint32_t *buffer)
+/*
+ * By default, just evaluate the image at 32bpp and expand.  Individual image
+ * types can plug in a better scanline getter if they want to. For example
+ * we  could produce smoother gradients by evaluating them at higher color
+ * depth, but that's a project for the future.
+ */
+static void
+_pixman_image_get_scanline_generic_64 (pixman_image_t * image,
+                                       int              x,
+                                       int              y,
+                                       int              width,
+                                       uint32_t *       buffer,
+                                       const uint32_t * mask)
 {
-    image->store_scanline_32 (image, x, y, width, buffer);
+    uint32_t *mask8 = NULL;
 
-    if (image->common.alpha_map)
+    /* Contract the mask image, if one exists, so that the 32-bit fetch
+     * function can use it.
+     */
+    if (mask)
     {
-	x -= image->common.alpha_origin_x;
-	y -= image->common.alpha_origin_y;
+	mask8 = pixman_malloc_ab (width, sizeof(uint32_t));
+	if (!mask8)
+	    return;
 
-	image->common.alpha_map->store_scanline_32 (
-	    image->common.alpha_map, x, y, width, buffer);
+	pixman_contract (mask8, (uint64_t *)mask, width);
     }
-}
 
-void
-_pixman_image_store_scanline_64 (bits_image_t *  image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 const uint32_t *buffer)
-{
-    image->store_scanline_64 (image, x, y, width, buffer);
+    /* Fetch the source image into the first half of buffer. */
+    image->bits.get_scanline_32 (image, x, y, width, (uint32_t*)buffer, mask8);
 
-    if (image->common.alpha_map)
-    {
-	x -= image->common.alpha_origin_x;
-	y -= image->common.alpha_origin_y;
+    /* Expand from 32bpp to 64bpp in place. */
+    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width);
 
-	image->common.alpha_map->store_scanline_64 (
-	    image->common.alpha_map, x, y, width, buffer);
-    }
+    free (mask8);
 }
 
 /* Fetch functions */
@@ -297,6 +295,7 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima,
     uint32_t *bottom_row;
     uint32_t *end;
     uint32_t zero[2] = { 0, 0 };
+    uint32_t one = 1;
     int y, y1, y2;
     int disty;
     int mask_inc;
@@ -362,10 +361,8 @@ bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima,
      */
     if (!mask)
     {
-	uint32_t mask_bits = 1;
-
         mask_inc = 0;
-        mask = &mask_bits;
+        mask = &one;
     }
     else
     {
@@ -1337,8 +1334,8 @@ bits_image_property_changed (pixman_image_t *image)
 	if ((info->format == format || info->format == PIXMAN_any)	&&
 	    (info->flags & flags) == info->flags)
 	{
-	    image->common.get_scanline_32 = info->fetch_32;
-	    image->common.get_scanline_64 = info->fetch_64;
+	    image->bits.get_scanline_32 = info->fetch_32;
+	    image->bits.get_scanline_64 = info->fetch_64;
 	    break;
 	}
 
@@ -1347,6 +1344,176 @@ bits_image_property_changed (pixman_image_t *image)
 }
 
 static uint32_t *
+src_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    iter->image->bits.get_scanline_32 (
+	iter->image, iter->x, iter->y++, iter->width, iter->buffer, mask);
+
+    return iter->buffer;
+}
+
+static uint32_t *
+src_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    iter->image->bits.get_scanline_64 (
+	iter->image, iter->x, iter->y++, iter->width, iter->buffer, mask);
+
+    return iter->buffer;
+}
+
+void
+_pixman_bits_image_src_iter_init (pixman_image_t *image,
+				  pixman_iter_t *iter,
+				  int x, int y, int width, int height,
+				  uint8_t *buffer, iter_flags_t flags)
+{
+    if (flags & ITER_NARROW)
+	iter->get_scanline = src_get_scanline_narrow;
+    else
+	iter->get_scanline = src_get_scanline_wide;
+}
+
+static uint32_t *
+dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    pixman_image_t *image  = iter->image;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    uint32_t *	    buffer = iter->buffer;
+
+    image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask);
+    if (image->common.alpha_map)
+    {
+	x -= image->common.alpha_origin_x;
+	y -= image->common.alpha_origin_y;
+
+	image->common.alpha_map->fetch_scanline_32 (
+	    (pixman_image_t *)image->common.alpha_map,
+	    x, y, width, buffer, mask);
+    }
+
+    return iter->buffer;
+}
+
+static uint32_t *
+dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    bits_image_t *  image  = &iter->image->bits;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    uint32_t *	    buffer = iter->buffer;
+
+    image->fetch_scanline_64 (
+	(pixman_image_t *)image, x, y, width, buffer, mask);
+    if (image->common.alpha_map)
+    {
+	x -= image->common.alpha_origin_x;
+	y -= image->common.alpha_origin_y;
+
+	image->common.alpha_map->fetch_scanline_64 (
+	    (pixman_image_t *)image->common.alpha_map, x, y, width, buffer, mask);
+    }
+
+    return iter->buffer;
+}
+
+static void
+dest_write_back_narrow (pixman_iter_t *iter)
+{
+    bits_image_t *  image  = &iter->image->bits;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    const uint32_t *buffer = iter->buffer;
+
+    image->store_scanline_32 (image, x, y, width, buffer);
+
+    if (image->common.alpha_map)
+    {
+	x -= image->common.alpha_origin_x;
+	y -= image->common.alpha_origin_y;
+
+	image->common.alpha_map->store_scanline_32 (
+	    image->common.alpha_map, x, y, width, buffer);
+    }
+
+    iter->y++;
+}
+
+static void
+dest_write_back_wide (pixman_iter_t *iter)
+{
+    bits_image_t *  image  = &iter->image->bits;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    const uint32_t *buffer = iter->buffer;
+
+    image->store_scanline_64 (image, x, y, width, buffer);
+
+    if (image->common.alpha_map)
+    {
+	x -= image->common.alpha_origin_x;
+	y -= image->common.alpha_origin_y;
+
+	image->common.alpha_map->store_scanline_64 (
+	    image->common.alpha_map, x, y, width, buffer);
+    }
+
+    iter->y++;
+}
+
+static void
+dest_write_back_direct (pixman_iter_t *iter)
+{
+    iter->buffer += iter->image->bits.rowstride;
+}
+
+void
+_pixman_bits_image_dest_iter_init (pixman_image_t *image,
+				   pixman_iter_t *iter,
+				   int x, int y, int width, int height,
+				   uint8_t *buffer, iter_flags_t flags)
+{
+    if (flags & ITER_NARROW)
+    {
+	if (((image->common.flags &
+	      (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) ==
+	     (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) &&
+	    (image->bits.format == PIXMAN_a8r8g8b8	||
+	     (image->bits.format == PIXMAN_x8r8g8b8	&&
+	      (flags & ITER_LOCALIZED_ALPHA))))
+	{
+	    iter->buffer = image->bits.bits + y * image->bits.rowstride + x;
+
+	    iter->get_scanline = _pixman_iter_get_scanline_noop;
+	    iter->write_back = dest_write_back_direct;
+	}
+	else
+	{
+	    if ((flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+		(ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+	    {
+		iter->get_scanline = _pixman_iter_get_scanline_noop;
+	    }
+	    else
+	    {
+		iter->get_scanline = dest_get_scanline_narrow;
+	    }
+
+	    iter->write_back = dest_write_back_narrow;
+	}
+    }
+    else
+    {
+	iter->get_scanline = dest_get_scanline_wide;
+	iter->write_back = dest_write_back_wide;
+    }
+}
+
+static uint32_t *
 create_bits (pixman_format_code_t format,
              int                  width,
              int                  height,
diff --git a/pixman/pixman/pixman-conical-gradient.c b/pixman/pixman/pixman-conical-gradient.c
index 769b984d1..9d7d2e8b5 100644
--- a/pixman/pixman/pixman-conical-gradient.c
+++ b/pixman/pixman/pixman-conical-gradient.c
@@ -1,197 +1,214 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <math.h>
-#include "pixman-private.h"
-
-static force_inline double
-coordinates_to_parameter (double x, double y, double angle)
-{
-    double t;
-
-    t = atan2 (y, x) + angle;
-
-    while (t < 0)
-	t += 2 * M_PI;
-
-    while (t >= 2 * M_PI)
-	t -= 2 * M_PI;
-
-    return 1 - t * (1 / (2 * M_PI)); /* Scale t to [0, 1] and
-				      * make rotation CCW
-				      */
-}
-
-static void
-conical_gradient_get_scanline_32 (pixman_image_t *image,
-                                  int             x,
-                                  int             y,
-                                  int             width,
-                                  uint32_t *      buffer,
-                                  const uint32_t *mask)
-{
-    gradient_t *gradient = (gradient_t *)image;
-    conical_gradient_t *conical = (conical_gradient_t *)image;
-    uint32_t       *end = buffer + width;
-    pixman_gradient_walker_t walker;
-    pixman_bool_t affine = TRUE;
-    double cx = 1.;
-    double cy = 0.;
-    double cz = 0.;
-    double rx = x + 0.5;
-    double ry = y + 0.5;
-    double rz = 1.;
-
-    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
-
-    if (image->common.transform)
-    {
-	pixman_vector_t v;
-
-	/* reference point is the center of the pixel */
-	v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
-	v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
-	v.vector[2] = pixman_fixed_1;
-
-	if (!pixman_transform_point_3d (image->common.transform, &v))
-	    return;
-
-	cx = image->common.transform->matrix[0][0] / 65536.;
-	cy = image->common.transform->matrix[1][0] / 65536.;
-	cz = image->common.transform->matrix[2][0] / 65536.;
-
-	rx = v.vector[0] / 65536.;
-	ry = v.vector[1] / 65536.;
-	rz = v.vector[2] / 65536.;
-
-	affine =
-	    image->common.transform->matrix[2][0] == 0 &&
-	    v.vector[2] == pixman_fixed_1;
-    }
-
-    if (affine)
-    {
-	rx -= conical->center.x / 65536.;
-	ry -= conical->center.y / 65536.;
-
-	while (buffer < end)
-	{
-	    if (!mask || *mask++)
-	    {
-		double t = coordinates_to_parameter (rx, ry, conical->angle);
-
-		*buffer = _pixman_gradient_walker_pixel (
-		    &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
-	    }
-
-	    ++buffer;
-
-	    rx += cx;
-	    ry += cy;
-	}
-    }
-    else
-    {
-	while (buffer < end)
-	{
-	    double x, y;
-
-	    if (!mask || *mask++)
-	    {
-		double t;
-
-		if (rz != 0)
-		{
-		    x = rx / rz;
-		    y = ry / rz;
-		}
-		else
-		{
-		    x = y = 0.;
-		}
-
-		x -= conical->center.x / 65536.;
-		y -= conical->center.y / 65536.;
-
-		t = coordinates_to_parameter (x, y, conical->angle);
-
-		*buffer = _pixman_gradient_walker_pixel (
-		    &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
-	    }
-
-	    ++buffer;
-
-	    rx += cx;
-	    ry += cy;
-	    rz += cz;
-	}
-    }
-}
-
-static void
-conical_gradient_property_changed (pixman_image_t *image)
-{
-    image->common.get_scanline_32 = conical_gradient_get_scanline_32;
-    image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64;
-}
-
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_conical_gradient (pixman_point_fixed_t *        center,
-                                      pixman_fixed_t                angle,
-                                      const pixman_gradient_stop_t *stops,
-                                      int                           n_stops)
-{
-    pixman_image_t *image = _pixman_image_allocate ();
-    conical_gradient_t *conical;
-
-    if (!image)
-	return NULL;
-
-    conical = &image->conical;
-
-    if (!_pixman_init_gradient (&conical->common, stops, n_stops))
-    {
-	free (image);
-	return NULL;
-    }
-
-    angle = MOD (angle, pixman_int_to_fixed (360));
-
-    image->type = CONICAL;
-
-    conical->center = *center;
-    conical->angle = (pixman_fixed_to_double (angle) / 180.0) * M_PI;
-
-    image->common.property_changed = conical_gradient_property_changed;
-
-    return image;
-}
-
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <math.h>
+#include "pixman-private.h"
+
+static force_inline double
+coordinates_to_parameter (double x, double y, double angle)
+{
+    double t;
+
+    t = atan2 (y, x) + angle;
+
+    while (t < 0)
+	t += 2 * M_PI;
+
+    while (t >= 2 * M_PI)
+	t -= 2 * M_PI;
+
+    return 1 - t * (1 / (2 * M_PI)); /* Scale t to [0, 1] and
+				      * make rotation CCW
+				      */
+}
+
+static uint32_t *
+conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    pixman_image_t *image = iter->image;
+    int x = iter->x;
+    int y = iter->y;
+    int width = iter->width;
+    uint32_t *buffer = iter->buffer;
+
+    gradient_t *gradient = (gradient_t *)image;
+    conical_gradient_t *conical = (conical_gradient_t *)image;
+    uint32_t       *end = buffer + width;
+    pixman_gradient_walker_t walker;
+    pixman_bool_t affine = TRUE;
+    double cx = 1.;
+    double cy = 0.;
+    double cz = 0.;
+    double rx = x + 0.5;
+    double ry = y + 0.5;
+    double rz = 1.;
+
+    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
+
+    if (image->common.transform)
+    {
+	pixman_vector_t v;
+
+	/* reference point is the center of the pixel */
+	v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+	v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
+	v.vector[2] = pixman_fixed_1;
+
+	if (!pixman_transform_point_3d (image->common.transform, &v))
+	    return iter->buffer;
+
+	cx = image->common.transform->matrix[0][0] / 65536.;
+	cy = image->common.transform->matrix[1][0] / 65536.;
+	cz = image->common.transform->matrix[2][0] / 65536.;
+
+	rx = v.vector[0] / 65536.;
+	ry = v.vector[1] / 65536.;
+	rz = v.vector[2] / 65536.;
+
+	affine =
+	    image->common.transform->matrix[2][0] == 0 &&
+	    v.vector[2] == pixman_fixed_1;
+    }
+
+    if (affine)
+    {
+	rx -= conical->center.x / 65536.;
+	ry -= conical->center.y / 65536.;
+
+	while (buffer < end)
+	{
+	    if (!mask || *mask++)
+	    {
+		double t = coordinates_to_parameter (rx, ry, conical->angle);
+
+		*buffer = _pixman_gradient_walker_pixel (
+		    &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
+	    }
+
+	    ++buffer;
+
+	    rx += cx;
+	    ry += cy;
+	}
+    }
+    else
+    {
+	while (buffer < end)
+	{
+	    double x, y;
+
+	    if (!mask || *mask++)
+	    {
+		double t;
+
+		if (rz != 0)
+		{
+		    x = rx / rz;
+		    y = ry / rz;
+		}
+		else
+		{
+		    x = y = 0.;
+		}
+
+		x -= conical->center.x / 65536.;
+		y -= conical->center.y / 65536.;
+
+		t = coordinates_to_parameter (x, y, conical->angle);
+
+		*buffer = _pixman_gradient_walker_pixel (
+		    &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
+	    }
+
+	    ++buffer;
+
+	    rx += cx;
+	    ry += cy;
+	    rz += cz;
+	}
+    }
+
+    iter->y++;
+    return iter->buffer;
+}
+
+static uint32_t *
+conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    uint32_t *buffer = conical_get_scanline_narrow (iter, NULL);
+
+    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+
+    return buffer;
+}
+
+void
+_pixman_conical_gradient_iter_init (pixman_image_t *image,
+				    pixman_iter_t *iter,
+				    int x, int y, int width, int height,
+				    uint8_t *buffer, iter_flags_t flags)
+{
+    if (flags & ITER_NARROW)
+	iter->get_scanline = conical_get_scanline_narrow;
+    else
+	iter->get_scanline = conical_get_scanline_wide;
+}
+
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_conical_gradient (pixman_point_fixed_t *        center,
+                                      pixman_fixed_t                angle,
+                                      const pixman_gradient_stop_t *stops,
+                                      int                           n_stops)
+{
+    pixman_image_t *image = _pixman_image_allocate ();
+    conical_gradient_t *conical;
+
+    if (!image)
+	return NULL;
+
+    conical = &image->conical;
+
+    if (!_pixman_init_gradient (&conical->common, stops, n_stops))
+    {
+	free (image);
+	return NULL;
+    }
+
+    angle = MOD (angle, pixman_int_to_fixed (360));
+
+    image->type = CONICAL;
+
+    conical->center = *center;
+    conical->angle = (pixman_fixed_to_double (angle) / 180.0) * M_PI;
+
+    return image;
+}
+
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index f103b4cf1..868175f60 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -188,7 +188,7 @@ fast_composite_in_n_8_8 (pixman_implementation_t *imp,
     int32_t w;
     uint16_t t;
 
-    src = _pixman_image_get_solid (src_image, dest_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
     srca = src >> 24;
 
@@ -312,7 +312,7 @@ fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
     int dst_stride, mask_stride;
     int32_t w;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -370,7 +370,7 @@ fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
     int dst_stride, mask_stride;
     int32_t w;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -427,7 +427,7 @@ fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
     int dst_stride, mask_stride;
     int32_t w;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -494,7 +494,7 @@ fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
     int dst_stride, mask_stride;
     int32_t w;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -559,7 +559,7 @@ fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
     int dst_stride, mask_stride;
     int32_t w;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -626,7 +626,7 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
     int dst_stride, mask_stride;
     int32_t w;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -1034,7 +1034,7 @@ fast_composite_add_n_8_8 (pixman_implementation_t *imp,
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
     sa = (src >> 24);
 
     while (height--)
@@ -1146,7 +1146,7 @@ fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
     if (width <= 0)
 	return;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
     srca = src >> 24;
     if (src == 0)
 	return;
@@ -1240,7 +1240,7 @@ fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
     if (width <= 0)
 	return;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
     srca = src >> 24;
     if (src == 0)
 	return;
@@ -1332,7 +1332,7 @@ fast_composite_solid_fill (pixman_implementation_t *imp,
 {
     uint32_t src;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     if (dst_image->bits.format == PIXMAN_a1)
     {
diff --git a/pixman/pixman/pixman-general.c b/pixman/pixman/pixman-general.c
index 5a639051b..16ea3a457 100644
--- a/pixman/pixman/pixman-general.c
+++ b/pixman/pixman/pixman-general.c
@@ -1,315 +1,313 @@
-/*
- * Copyright © 2009 Red Hat, Inc.
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *             2008 Aaron Plattner, NVIDIA Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Red Hat not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  Red Hat makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-private.h"
-
-#define SCANLINE_BUFFER_LENGTH 8192
-
-static void
-general_composite_rect  (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         pixman_image_t *         src,
-                         pixman_image_t *         mask,
-                         pixman_image_t *         dest,
-                         int32_t                  src_x,
-                         int32_t                  src_y,
-                         int32_t                  mask_x,
-                         int32_t                  mask_y,
-                         int32_t                  dest_x,
-                         int32_t                  dest_y,
-                         int32_t                  width,
-                         int32_t                  height)
-{
-    uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8];
-    uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
-    uint8_t *src_buffer, *mask_buffer, *dest_buffer;
-    fetch_scanline_t fetch_src = NULL, fetch_mask = NULL, fetch_dest = NULL;
-    pixman_combine_32_func_t compose;
-    store_scanline_t store;
-    source_image_class_t src_class, mask_class;
-    pixman_bool_t component_alpha;
-    uint32_t *bits;
-    int32_t stride;
-    int narrow, Bpp;
-    int i;
-
-    narrow =
-	(src->common.flags & FAST_PATH_NARROW_FORMAT)		&&
-	(!mask || mask->common.flags & FAST_PATH_NARROW_FORMAT)	&&
-	(dest->common.flags & FAST_PATH_NARROW_FORMAT);
-    Bpp = narrow ? 4 : 8;
-
-    if (width * Bpp > SCANLINE_BUFFER_LENGTH)
-    {
-	scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
-
-	if (!scanline_buffer)
-	    return;
-    }
-
-    src_buffer = scanline_buffer;
-    mask_buffer = src_buffer + width * Bpp;
-    dest_buffer = mask_buffer + width * Bpp;
-
-    src_class = _pixman_image_classify (src,
-                                        src_x, src_y,
-                                        width, height);
-
-    mask_class = SOURCE_IMAGE_CLASS_UNKNOWN;
-
-    if (mask)
-    {
-	mask_class = _pixman_image_classify (mask,
-	                                     src_x, src_y,
-	                                     width, height);
-    }
-
-    if (op == PIXMAN_OP_CLEAR)
-	fetch_src = NULL;
-    else if (narrow)
-	fetch_src = _pixman_image_get_scanline_32;
-    else
-	fetch_src = _pixman_image_get_scanline_64;
-
-    if (!mask || op == PIXMAN_OP_CLEAR)
-	fetch_mask = NULL;
-    else if (narrow)
-	fetch_mask = _pixman_image_get_scanline_32;
-    else
-	fetch_mask = _pixman_image_get_scanline_64;
-
-    if (op == PIXMAN_OP_CLEAR || op == PIXMAN_OP_SRC)
-	fetch_dest = NULL;
-    else if (narrow)
-	fetch_dest = _pixman_image_get_scanline_32;
-    else
-	fetch_dest = _pixman_image_get_scanline_64;
-
-    if (narrow)
-	store = _pixman_image_store_scanline_32;
-    else
-	store = _pixman_image_store_scanline_64;
-
-    /* Skip the store step and composite directly into the
-     * destination if the output format of the compose func matches
-     * the destination format.
-     *
-     * If the destination format is a8r8g8b8 then we can always do
-     * this. If it is x8r8g8b8, then we can only do it if the
-     * operator doesn't make use of destination alpha.
-     */
-    if ((dest->bits.format == PIXMAN_a8r8g8b8)	||
-	(dest->bits.format == PIXMAN_x8r8g8b8	&&
-	 (op == PIXMAN_OP_OVER		||
-	  op == PIXMAN_OP_ADD		||
-	  op == PIXMAN_OP_SRC		||
-	  op == PIXMAN_OP_CLEAR		||
-	  op == PIXMAN_OP_IN_REVERSE	||
-	  op == PIXMAN_OP_OUT_REVERSE	||
-	  op == PIXMAN_OP_DST)))
-    {
-	if (narrow &&
-	    !dest->common.alpha_map &&
-	    !dest->bits.write_func)
-	{
-	    store = NULL;
-	}
-    }
-
-    if (!store)
-    {
-	bits = dest->bits.bits;
-	stride = dest->bits.rowstride;
-    }
-    else
-    {
-	bits = NULL;
-	stride = 0;
-    }
-
-    component_alpha =
-        fetch_src                       &&
-        fetch_mask                      &&
-        mask                            &&
-        mask->common.type == BITS       &&
-        mask->common.component_alpha    &&
-        PIXMAN_FORMAT_RGB (mask->bits.format);
-
-    if (narrow)
-    {
-	if (component_alpha)
-	    compose = _pixman_implementation_combine_32_ca;
-	else
-	    compose = _pixman_implementation_combine_32;
-    }
-    else
-    {
-	if (component_alpha)
-	    compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
-	else
-	    compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
-    }
-
-    if (!compose)
-	return;
-
-    if (!fetch_mask)
-	mask_buffer = NULL;
-
-    for (i = 0; i < height; ++i)
-    {
-	/* fill first half of scanline with source */
-	if (fetch_src)
-	{
-	    if (fetch_mask)
-	    {
-		/* fetch mask before source so that fetching of
-		   source can be optimized */
-		fetch_mask (mask, mask_x, mask_y + i,
-		            width, (void *)mask_buffer, 0);
-
-		if (mask_class == SOURCE_IMAGE_CLASS_HORIZONTAL)
-		    fetch_mask = NULL;
-	    }
-
-	    if (src_class == SOURCE_IMAGE_CLASS_HORIZONTAL)
-	    {
-		fetch_src (src, src_x, src_y + i,
-		           width, (void *)src_buffer, 0);
-		fetch_src = NULL;
-	    }
-	    else
-	    {
-		fetch_src (src, src_x, src_y + i,
-		           width, (void *)src_buffer, (void *)mask_buffer);
-	    }
-	}
-	else if (fetch_mask)
-	{
-	    fetch_mask (mask, mask_x, mask_y + i,
-	                width, (void *)mask_buffer, 0);
-	}
-
-	if (store)
-	{
-	    /* fill dest into second half of scanline */
-	    if (fetch_dest)
-	    {
-		fetch_dest (dest, dest_x, dest_y + i,
-		            width, (void *)dest_buffer, 0);
-	    }
-
-	    /* blend */
-	    compose (imp->toplevel, op,
-		     (void *)dest_buffer,
-		     (void *)src_buffer,
-		     (void *)mask_buffer,
-		     width);
-
-	    /* write back */
-	    store (&(dest->bits), dest_x, dest_y + i, width,
-	           (void *)dest_buffer);
-	}
-	else
-	{
-	    /* blend */
-	    compose (imp->toplevel, op,
-		     bits + (dest_y + i) * stride + dest_x,
-	             (void *)src_buffer, (void *)mask_buffer, width);
-	}
-    }
-
-    if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
-	free (scanline_buffer);
-}
-
-static const pixman_fast_path_t general_fast_path[] =
-{
-    { PIXMAN_OP_any, PIXMAN_any, 0, PIXMAN_any,	0, PIXMAN_any, 0, general_composite_rect },
-    { PIXMAN_OP_NONE }
-};
-
-static pixman_bool_t
-general_blt (pixman_implementation_t *imp,
-             uint32_t *               src_bits,
-             uint32_t *               dst_bits,
-             int                      src_stride,
-             int                      dst_stride,
-             int                      src_bpp,
-             int                      dst_bpp,
-             int                      src_x,
-             int                      src_y,
-             int                      dst_x,
-             int                      dst_y,
-             int                      width,
-             int                      height)
-{
-    /* We can't blit unless we have sse2 or mmx */
-
-    return FALSE;
-}
-
-static pixman_bool_t
-general_fill (pixman_implementation_t *imp,
-              uint32_t *               bits,
-              int                      stride,
-              int                      bpp,
-              int                      x,
-              int                      y,
-              int                      width,
-              int                      height,
-              uint32_t xor)
-{
-    return FALSE;
-}
-
-pixman_implementation_t *
-_pixman_implementation_create_general (void)
-{
-    pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
-
-    _pixman_setup_combiner_functions_32 (imp);
-    _pixman_setup_combiner_functions_64 (imp);
-
-    imp->blt = general_blt;
-    imp->fill = general_fill;
-
-    return imp;
-}
-
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *             2008 Aaron Plattner, NVIDIA Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-private.h"
+
+static void
+general_src_iter_init (pixman_implementation_t *imp,
+		       pixman_iter_t *iter,
+		       pixman_image_t *image,
+		       int x, int y, int width, int height,
+		       uint8_t *buffer, iter_flags_t flags)
+{
+    iter->image = image;
+    iter->x = x;
+    iter->y = y;
+    iter->width = width;
+    iter->buffer = (uint32_t *)buffer;
+
+    if (image->type == SOLID)
+    {
+	_pixman_solid_fill_iter_init (
+	    image, iter, x, y, width, height, buffer, flags);
+    }
+    else if (image->type == LINEAR)
+    {
+	_pixman_linear_gradient_iter_init (
+	    image, iter, x, y, width, height, buffer, flags);
+    }
+    else if (image->type == RADIAL)
+    {
+	_pixman_radial_gradient_iter_init (
+	    image, iter, x, y, width, height, buffer, flags);
+    }
+    else if (image->type == CONICAL)
+    {
+	_pixman_conical_gradient_iter_init (
+	    image, iter, x, y, width, height, buffer, flags);
+    }
+    else if (image->type == BITS)
+    {
+	_pixman_bits_image_src_iter_init (
+	    image, iter, x, y, width, height, buffer, flags);
+    }
+    else
+    {
+	_pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
+    }
+}
+
+static void
+general_dest_iter_init (pixman_implementation_t *imp,
+			pixman_iter_t *iter,
+			pixman_image_t *image,
+			int x, int y, int width, int height,
+			uint8_t *buffer, iter_flags_t flags)
+{
+    iter->image = image;
+    iter->x = x;
+    iter->y = y;
+    iter->width = width;
+    iter->buffer = (uint32_t *)buffer;
+
+    if (image->type == BITS)
+    {
+	_pixman_bits_image_dest_iter_init (
+	    image, iter, x, y, width, height, buffer, flags);
+    }
+    else
+    {
+	_pixman_log_error (FUNC, "Trying to write to a non-writable image");
+    }
+}
+
+typedef struct op_info_t op_info_t;
+struct op_info_t
+{
+    uint8_t src, dst;
+};
+
+#define ITER_IGNORE_BOTH						\
+    (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_LOCALIZED_ALPHA)
+
+static const op_info_t op_flags[PIXMAN_N_OPERATORS] =
+{
+    /* Src                   Dst                   */
+    { ITER_IGNORE_BOTH,      ITER_IGNORE_BOTH      }, /* CLEAR */
+    { ITER_LOCALIZED_ALPHA,  ITER_IGNORE_BOTH      }, /* SRC */
+    { ITER_IGNORE_BOTH,      ITER_LOCALIZED_ALPHA  }, /* DST */
+    { 0,                     ITER_LOCALIZED_ALPHA  }, /* OVER */
+    { ITER_LOCALIZED_ALPHA,  0                     }, /* OVER_REVERSE */
+    { ITER_LOCALIZED_ALPHA,  ITER_IGNORE_RGB       }, /* IN */
+    { ITER_IGNORE_RGB,       ITER_LOCALIZED_ALPHA  }, /* IN_REVERSE */
+    { ITER_LOCALIZED_ALPHA,  ITER_IGNORE_RGB       }, /* OUT */
+    { ITER_IGNORE_RGB,       ITER_LOCALIZED_ALPHA  }, /* OUT_REVERSE */
+    { 0,                     0                     }, /* ATOP */
+    { 0,                     0                     }, /* ATOP_REVERSE */
+    { 0,                     0                     }, /* XOR */
+    { ITER_LOCALIZED_ALPHA,  ITER_LOCALIZED_ALPHA  }, /* ADD */
+    { 0,                     0                     }, /* SATURATE */
+};
+
+#define SCANLINE_BUFFER_LENGTH 8192
+
+static void
+general_composite_rect  (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         pixman_image_t *         src,
+                         pixman_image_t *         mask,
+                         pixman_image_t *         dest,
+                         int32_t                  src_x,
+                         int32_t                  src_y,
+                         int32_t                  mask_x,
+                         int32_t                  mask_y,
+                         int32_t                  dest_x,
+                         int32_t                  dest_y,
+                         int32_t                  width,
+                         int32_t                  height)
+{
+    uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8];
+    uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
+    uint8_t *src_buffer, *mask_buffer, *dest_buffer;
+    pixman_iter_t src_iter, mask_iter, dest_iter;
+    pixman_combine_32_func_t compose;
+    pixman_bool_t component_alpha;
+    iter_flags_t narrow, src_flags;
+    int Bpp;
+    int i;
+
+    if ((src->common.flags & FAST_PATH_NARROW_FORMAT)		&&
+	(!mask || mask->common.flags & FAST_PATH_NARROW_FORMAT)	&&
+	(dest->common.flags & FAST_PATH_NARROW_FORMAT))
+    {
+	narrow = ITER_NARROW;
+	Bpp = 4;
+    }
+    else
+    {
+	narrow = 0;
+	Bpp = 8;
+    }
+
+    if (width * Bpp > SCANLINE_BUFFER_LENGTH)
+    {
+	scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
+
+	if (!scanline_buffer)
+	    return;
+    }
+
+    src_buffer = scanline_buffer;
+    mask_buffer = src_buffer + width * Bpp;
+    dest_buffer = mask_buffer + width * Bpp;
+
+    /* src iter */
+    src_flags = narrow | op_flags[op].src;
+
+    _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src,
+					  src_x, src_y, width, height,
+					  src_buffer, src_flags);
+
+    /* mask iter */
+    if ((src_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
+	(ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
+    {
+	/* If it doesn't matter what the source is, then it doesn't matter
+	 * what the mask is
+	 */
+	mask = NULL;
+    }
+
+    component_alpha =
+        mask                            &&
+        mask->common.type == BITS       &&
+        mask->common.component_alpha    &&
+        PIXMAN_FORMAT_RGB (mask->bits.format);
+
+    _pixman_implementation_src_iter_init (
+	imp->toplevel, &mask_iter, mask, mask_x, mask_y, width, height,
+	mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB));
+
+    /* dest iter */
+    _pixman_implementation_dest_iter_init (imp->toplevel, &dest_iter, dest,
+					   dest_x, dest_y, width, height,
+					   dest_buffer,
+					   narrow | op_flags[op].dst);
+
+    if (narrow)
+    {
+	if (component_alpha)
+	    compose = _pixman_implementation_combine_32_ca;
+	else
+	    compose = _pixman_implementation_combine_32;
+    }
+    else
+    {
+	if (component_alpha)
+	    compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
+	else
+	    compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
+    }
+
+    if (!compose)
+	return;
+
+    for (i = 0; i < height; ++i)
+    {
+	uint32_t *s, *m, *d;
+
+	m = mask_iter.get_scanline (&mask_iter, NULL);
+	s = src_iter.get_scanline (&src_iter, m);
+	d = dest_iter.get_scanline (&dest_iter, NULL);
+
+	compose (imp->toplevel, op, d, s, m, width);
+
+	dest_iter.write_back (&dest_iter);
+    }
+
+    if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
+	free (scanline_buffer);
+}
+
+static const pixman_fast_path_t general_fast_path[] =
+{
+    { PIXMAN_OP_any, PIXMAN_any, 0, PIXMAN_any,	0, PIXMAN_any, 0, general_composite_rect },
+    { PIXMAN_OP_NONE }
+};
+
+static pixman_bool_t
+general_blt (pixman_implementation_t *imp,
+             uint32_t *               src_bits,
+             uint32_t *               dst_bits,
+             int                      src_stride,
+             int                      dst_stride,
+             int                      src_bpp,
+             int                      dst_bpp,
+             int                      src_x,
+             int                      src_y,
+             int                      dst_x,
+             int                      dst_y,
+             int                      width,
+             int                      height)
+{
+    /* We can't blit unless we have sse2 or mmx */
+
+    return FALSE;
+}
+
+static pixman_bool_t
+general_fill (pixman_implementation_t *imp,
+              uint32_t *               bits,
+              int                      stride,
+              int                      bpp,
+              int                      x,
+              int                      y,
+              int                      width,
+              int                      height,
+              uint32_t xor)
+{
+    return FALSE;
+}
+
+pixman_implementation_t *
+_pixman_implementation_create_general (void)
+{
+    pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
+
+    _pixman_setup_combiner_functions_32 (imp);
+    _pixman_setup_combiner_functions_64 (imp);
+
+    imp->blt = general_blt;
+    imp->fill = general_fill;
+    imp->src_iter_init = general_src_iter_init;
+    imp->dest_iter_init = general_dest_iter_init;
+
+    return imp;
+}
+
diff --git a/pixman/pixman/pixman-image.c b/pixman/pixman/pixman-image.c
index 48faa3a10..a72299b3c 100644
--- a/pixman/pixman/pixman-image.c
+++ b/pixman/pixman/pixman-image.c
@@ -1,815 +1,747 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-
-pixman_bool_t
-_pixman_init_gradient (gradient_t *                  gradient,
-                       const pixman_gradient_stop_t *stops,
-                       int                           n_stops)
-{
-    return_val_if_fail (n_stops > 0, FALSE);
-
-    gradient->stops = pixman_malloc_ab (n_stops, sizeof (pixman_gradient_stop_t));
-    if (!gradient->stops)
-	return FALSE;
-
-    memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t));
-
-    gradient->n_stops = n_stops;
-
-    return TRUE;
-}
-
-/*
- * By default, just evaluate the image at 32bpp and expand.  Individual image
- * types can plug in a better scanline getter if they want to. For example
- * we  could produce smoother gradients by evaluating them at higher color
- * depth, but that's a project for the future.
- */
-void
-_pixman_image_get_scanline_generic_64 (pixman_image_t * image,
-                                       int              x,
-                                       int              y,
-                                       int              width,
-                                       uint32_t *       buffer,
-                                       const uint32_t * mask)
-{
-    uint32_t *mask8 = NULL;
-
-    /* Contract the mask image, if one exists, so that the 32-bit fetch
-     * function can use it.
-     */
-    if (mask)
-    {
-	mask8 = pixman_malloc_ab (width, sizeof(uint32_t));
-	if (!mask8)
-	    return;
-
-	pixman_contract (mask8, (uint64_t *)mask, width);
-    }
-
-    /* Fetch the source image into the first half of buffer. */
-    _pixman_image_get_scanline_32 (image, x, y, width, (uint32_t*)buffer, mask8);
-
-    /* Expand from 32bpp to 64bpp in place. */
-    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width);
-
-    free (mask8);
-}
-
-pixman_image_t *
-_pixman_image_allocate (void)
-{
-    pixman_image_t *image = malloc (sizeof (pixman_image_t));
-
-    if (image)
-    {
-	image_common_t *common = &image->common;
-
-	pixman_region32_init (&common->clip_region);
-
-	common->alpha_count = 0;
-	common->have_clip_region = FALSE;
-	common->clip_sources = FALSE;
-	common->transform = NULL;
-	common->repeat = PIXMAN_REPEAT_NONE;
-	common->filter = PIXMAN_FILTER_NEAREST;
-	common->filter_params = NULL;
-	common->n_filter_params = 0;
-	common->alpha_map = NULL;
-	common->component_alpha = FALSE;
-	common->ref_count = 1;
-	common->classify = NULL;
-	common->client_clip = FALSE;
-	common->destroy_func = NULL;
-	common->destroy_data = NULL;
-	common->dirty = TRUE;
-    }
-
-    return image;
-}
-
-source_image_class_t
-_pixman_image_classify (pixman_image_t *image,
-                        int             x,
-                        int             y,
-                        int             width,
-                        int             height)
-{
-    if (image->common.classify)
-	return image->common.classify (image, x, y, width, height);
-    else
-	return SOURCE_IMAGE_CLASS_UNKNOWN;
-}
-
-void
-_pixman_image_get_scanline_32 (pixman_image_t *image,
-                               int             x,
-                               int             y,
-                               int             width,
-                               uint32_t *      buffer,
-                               const uint32_t *mask)
-{
-    image->common.get_scanline_32 (image, x, y, width, buffer, mask);
-}
-
-/* Even thought the type of buffer is uint32_t *, the function actually expects
- * a uint64_t *buffer.
- */
-void
-_pixman_image_get_scanline_64 (pixman_image_t *image,
-                               int             x,
-                               int             y,
-                               int             width,
-                               uint32_t *      buffer,
-                               const uint32_t *unused)
-{
-    image->common.get_scanline_64 (image, x, y, width, buffer, unused);
-}
-
-static void
-image_property_changed (pixman_image_t *image)
-{
-    image->common.dirty = TRUE;
-}
-
-/* Ref Counting */
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_ref (pixman_image_t *image)
-{
-    image->common.ref_count++;
-
-    return image;
-}
-
-/* returns TRUE when the image is freed */
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_unref (pixman_image_t *image)
-{
-    image_common_t *common = (image_common_t *)image;
-
-    common->ref_count--;
-
-    if (common->ref_count == 0)
-    {
-	if (image->common.destroy_func)
-	    image->common.destroy_func (image, image->common.destroy_data);
-
-	pixman_region32_fini (&common->clip_region);
-
-	if (common->transform)
-	    free (common->transform);
-
-	if (common->filter_params)
-	    free (common->filter_params);
-
-	if (common->alpha_map)
-	    pixman_image_unref ((pixman_image_t *)common->alpha_map);
-
-	if (image->type == LINEAR ||
-	    image->type == RADIAL ||
-	    image->type == CONICAL)
-	{
-	    if (image->gradient.stops)
-		free (image->gradient.stops);
-	}
-
-	if (image->type == BITS && image->bits.free_me)
-	    free (image->bits.free_me);
-
-	free (image);
-
-	return TRUE;
-    }
-
-    return FALSE;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_destroy_function (pixman_image_t *            image,
-                                   pixman_image_destroy_func_t func,
-                                   void *                      data)
-{
-    image->common.destroy_func = func;
-    image->common.destroy_data = data;
-}
-
-PIXMAN_EXPORT void *
-pixman_image_get_destroy_data (pixman_image_t *image)
-{
-  return image->common.destroy_data;
-}
-
-void
-_pixman_image_reset_clip_region (pixman_image_t *image)
-{
-    image->common.have_clip_region = FALSE;
-}
-
-/* Executive Summary: This function is a no-op that only exists
- * for historical reasons.
- *
- * There used to be a bug in the X server where it would rely on
- * out-of-bounds accesses when it was asked to composite with a
- * window as the source. It would create a pixman image pointing
- * to some bogus position in memory, but then set a clip region
- * to the position where the actual bits were.
- *
- * Due to a bug in old versions of pixman, where it would not clip
- * against the image bounds when a clip region was set, this would
- * actually work. So when the pixman bug was fixed, a workaround was
- * added to allow certain out-of-bound accesses. This function disabled
- * those workarounds.
- *
- * Since 0.21.2, pixman doesn't do these workarounds anymore, so now
- * this function is a no-op.
- */
-PIXMAN_EXPORT void
-pixman_disable_out_of_bounds_workaround (void)
-{
-}
-
-static void
-compute_image_info (pixman_image_t *image)
-{
-    pixman_format_code_t code;
-    uint32_t flags = 0;
-
-    /* Transform */
-    if (!image->common.transform)
-    {
-	flags |= (FAST_PATH_ID_TRANSFORM	|
-		  FAST_PATH_X_UNIT_POSITIVE	|
-		  FAST_PATH_Y_UNIT_ZERO		|
-		  FAST_PATH_AFFINE_TRANSFORM);
-    }
-    else
-    {
-	flags |= FAST_PATH_HAS_TRANSFORM;
-
-	if (image->common.transform->matrix[2][0] == 0			&&
-	    image->common.transform->matrix[2][1] == 0			&&
-	    image->common.transform->matrix[2][2] == pixman_fixed_1)
-	{
-	    flags |= FAST_PATH_AFFINE_TRANSFORM;
-
-	    if (image->common.transform->matrix[0][1] == 0 &&
-		image->common.transform->matrix[1][0] == 0)
-	    {
-		flags |= FAST_PATH_SCALE_TRANSFORM;
-	    }
-	}
-
-	if (image->common.transform->matrix[0][0] > 0)
-	    flags |= FAST_PATH_X_UNIT_POSITIVE;
-
-	if (image->common.transform->matrix[1][0] == 0)
-	    flags |= FAST_PATH_Y_UNIT_ZERO;
-    }
-
-    /* Filter */
-    switch (image->common.filter)
-    {
-    case PIXMAN_FILTER_NEAREST:
-    case PIXMAN_FILTER_FAST:
-	flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
-	break;
-
-    case PIXMAN_FILTER_BILINEAR:
-    case PIXMAN_FILTER_GOOD:
-    case PIXMAN_FILTER_BEST:
-	flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
-	break;
-
-    case PIXMAN_FILTER_CONVOLUTION:
-	break;
-
-    default:
-	flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
-	break;
-    }
-
-    /* Repeat mode */
-    switch (image->common.repeat)
-    {
-    case PIXMAN_REPEAT_NONE:
-	flags |=
-	    FAST_PATH_NO_REFLECT_REPEAT		|
-	    FAST_PATH_NO_PAD_REPEAT		|
-	    FAST_PATH_NO_NORMAL_REPEAT;
-	break;
-
-    case PIXMAN_REPEAT_REFLECT:
-	flags |=
-	    FAST_PATH_NO_PAD_REPEAT		|
-	    FAST_PATH_NO_NONE_REPEAT		|
-	    FAST_PATH_NO_NORMAL_REPEAT;
-	break;
-
-    case PIXMAN_REPEAT_PAD:
-	flags |=
-	    FAST_PATH_NO_REFLECT_REPEAT		|
-	    FAST_PATH_NO_NONE_REPEAT		|
-	    FAST_PATH_NO_NORMAL_REPEAT;
-	break;
-
-    default:
-	flags |=
-	    FAST_PATH_NO_REFLECT_REPEAT		|
-	    FAST_PATH_NO_PAD_REPEAT		|
-	    FAST_PATH_NO_NONE_REPEAT;
-	break;
-    }
-
-    /* Component alpha */
-    if (image->common.component_alpha)
-	flags |= FAST_PATH_COMPONENT_ALPHA;
-    else
-	flags |= FAST_PATH_UNIFIED_ALPHA;
-
-    flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT);
-
-    /* Type specific checks */
-    switch (image->type)
-    {
-    case SOLID:
-	code = PIXMAN_solid;
-
-	if (image->solid.color.alpha == 0xffff)
-	    flags |= FAST_PATH_IS_OPAQUE;
-	break;
-
-    case BITS:
-	if (image->bits.width == 1	&&
-	    image->bits.height == 1	&&
-	    image->common.repeat != PIXMAN_REPEAT_NONE)
-	{
-	    code = PIXMAN_solid;
-	}
-	else
-	{
-	    code = image->bits.format;
-	}
-
-	if (!PIXMAN_FORMAT_A (image->bits.format)				&&
-	    PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY		&&
-	    PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR)
-	{
-	    flags |= FAST_PATH_SAMPLES_OPAQUE;
-
-	    if (image->common.repeat != PIXMAN_REPEAT_NONE)
-		flags |= FAST_PATH_IS_OPAQUE;
-	}
-
-	if (image->bits.read_func || image->bits.write_func)
-	    flags &= ~FAST_PATH_NO_ACCESSORS;
-
-	if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
-	    flags &= ~FAST_PATH_NARROW_FORMAT;
-	break;
-
-    case RADIAL:
-	code = PIXMAN_unknown;
-
-	/*
-	 * As explained in pixman-radial-gradient.c, every point of
-	 * the plane has a valid associated radius (and thus will be
-	 * colored) if and only if a is negative (i.e. one of the two
-	 * circles contains the other one).
-	 */
-
-        if (image->radial.a >= 0)
-	    break;
-
-	/* Fall through */
-
-    case CONICAL:
-    case LINEAR:
-	code = PIXMAN_unknown;
-
-	if (image->common.repeat != PIXMAN_REPEAT_NONE)
-	{
-	    int i;
-
-	    flags |= FAST_PATH_IS_OPAQUE;
-	    for (i = 0; i < image->gradient.n_stops; ++i)
-	    {
-		if (image->gradient.stops[i].color.alpha != 0xffff)
-		{
-		    flags &= ~FAST_PATH_IS_OPAQUE;
-		    break;
-		}
-	    }
-	}
-	break;
-
-    default:
-	code = PIXMAN_unknown;
-	break;
-    }
-
-    /* Alpha map */
-    if (!image->common.alpha_map)
-    {
-	flags |= FAST_PATH_NO_ALPHA_MAP;
-    }
-    else
-    {
-	if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format))
-	    flags &= ~FAST_PATH_NARROW_FORMAT;
-    }
-
-    /* Both alpha maps and convolution filters can introduce
-     * non-opaqueness in otherwise opaque images. Also
-     * an image with component alpha turned on is only opaque
-     * if all channels are opaque, so we simply turn it off
-     * unconditionally for those images.
-     */
-    if (image->common.alpha_map					||
-	image->common.filter == PIXMAN_FILTER_CONVOLUTION	||
-	image->common.component_alpha)
-    {
-	flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE);
-    }
-
-    image->common.flags = flags;
-    image->common.extended_format_code = code;
-}
-
-void
-_pixman_image_validate (pixman_image_t *image)
-{
-    if (image->common.dirty)
-    {
-	compute_image_info (image);
-
-	/* It is important that property_changed is
-	 * called *after* compute_image_info() because
-	 * property_changed() can make use of the flags
-	 * to set up accessors etc.
-	 */
-	image->common.property_changed (image);
-
-	image->common.dirty = FALSE;
-    }
-
-    if (image->common.alpha_map)
-	_pixman_image_validate ((pixman_image_t *)image->common.alpha_map);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_clip_region32 (pixman_image_t *   image,
-                                pixman_region32_t *region)
-{
-    image_common_t *common = (image_common_t *)image;
-    pixman_bool_t result;
-
-    if (region)
-    {
-	if ((result = pixman_region32_copy (&common->clip_region, region)))
-	    image->common.have_clip_region = TRUE;
-    }
-    else
-    {
-	_pixman_image_reset_clip_region (image);
-
-	result = TRUE;
-    }
-
-    image_property_changed (image);
-
-    return result;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_clip_region (pixman_image_t *   image,
-                              pixman_region16_t *region)
-{
-    image_common_t *common = (image_common_t *)image;
-    pixman_bool_t result;
-
-    if (region)
-    {
-	if ((result = pixman_region32_copy_from_region16 (&common->clip_region, region)))
-	    image->common.have_clip_region = TRUE;
-    }
-    else
-    {
-	_pixman_image_reset_clip_region (image);
-
-	result = TRUE;
-    }
-
-    image_property_changed (image);
-
-    return result;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_has_client_clip (pixman_image_t *image,
-                                  pixman_bool_t   client_clip)
-{
-    image->common.client_clip = client_clip;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_transform (pixman_image_t *          image,
-                            const pixman_transform_t *transform)
-{
-    static const pixman_transform_t id =
-    {
-	{ { pixman_fixed_1, 0, 0 },
-	  { 0, pixman_fixed_1, 0 },
-	  { 0, 0, pixman_fixed_1 } }
-    };
-
-    image_common_t *common = (image_common_t *)image;
-    pixman_bool_t result;
-
-    if (common->transform == transform)
-	return TRUE;
-
-    if (memcmp (&id, transform, sizeof (pixman_transform_t)) == 0)
-    {
-	free (common->transform);
-	common->transform = NULL;
-	result = TRUE;
-
-	goto out;
-    }
-
-    if (common->transform == NULL)
-	common->transform = malloc (sizeof (pixman_transform_t));
-
-    if (common->transform == NULL)
-    {
-	result = FALSE;
-
-	goto out;
-    }
-
-    memcpy (common->transform, transform, sizeof(pixman_transform_t));
-
-    result = TRUE;
-
-out:
-    image_property_changed (image);
-
-    return result;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_repeat (pixman_image_t *image,
-                         pixman_repeat_t repeat)
-{
-    image->common.repeat = repeat;
-
-    image_property_changed (image);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_filter (pixman_image_t *      image,
-                         pixman_filter_t       filter,
-                         const pixman_fixed_t *params,
-                         int                   n_params)
-{
-    image_common_t *common = (image_common_t *)image;
-    pixman_fixed_t *new_params;
-
-    if (params == common->filter_params && filter == common->filter)
-	return TRUE;
-
-    new_params = NULL;
-    if (params)
-    {
-	new_params = pixman_malloc_ab (n_params, sizeof (pixman_fixed_t));
-	if (!new_params)
-	    return FALSE;
-
-	memcpy (new_params,
-	        params, n_params * sizeof (pixman_fixed_t));
-    }
-
-    common->filter = filter;
-
-    if (common->filter_params)
-	free (common->filter_params);
-
-    common->filter_params = new_params;
-    common->n_filter_params = n_params;
-
-    image_property_changed (image);
-    return TRUE;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_source_clipping (pixman_image_t *image,
-                                  pixman_bool_t   clip_sources)
-{
-    image->common.clip_sources = clip_sources;
-
-    image_property_changed (image);
-}
-
-/* Unlike all the other property setters, this function does not
- * copy the content of indexed. Doing this copying is simply
- * way, way too expensive.
- */
-PIXMAN_EXPORT void
-pixman_image_set_indexed (pixman_image_t *        image,
-                          const pixman_indexed_t *indexed)
-{
-    bits_image_t *bits = (bits_image_t *)image;
-
-    bits->indexed = indexed;
-
-    image_property_changed (image);
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_alpha_map (pixman_image_t *image,
-                            pixman_image_t *alpha_map,
-                            int16_t         x,
-                            int16_t         y)
-{
-    image_common_t *common = (image_common_t *)image;
-
-    return_if_fail (!alpha_map || alpha_map->type == BITS);
-
-    if (alpha_map && common->alpha_count > 0)
-    {
-	/* If this image is being used as an alpha map itself,
-	 * then you can't give it an alpha map of its own.
-	 */
-	return;
-    }
-
-    if (alpha_map && alpha_map->common.alpha_map)
-    {
-	/* If the image has an alpha map of its own,
-	 * then it can't be used as an alpha map itself
-	 */
-	return;
-    }
-
-    if (common->alpha_map != (bits_image_t *)alpha_map)
-    {
-	if (common->alpha_map)
-	{
-	    common->alpha_map->common.alpha_count--;
-
-	    pixman_image_unref ((pixman_image_t *)common->alpha_map);
-	}
-
-	if (alpha_map)
-	{
-	    common->alpha_map = (bits_image_t *)pixman_image_ref (alpha_map);
-
-	    common->alpha_map->common.alpha_count++;
-	}
-	else
-	{
-	    common->alpha_map = NULL;
-	}
-    }
-
-    common->alpha_origin_x = x;
-    common->alpha_origin_y = y;
-
-    image_property_changed (image);
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_component_alpha   (pixman_image_t *image,
-                                    pixman_bool_t   component_alpha)
-{
-    image->common.component_alpha = component_alpha;
-
-    image_property_changed (image);
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_image_get_component_alpha   (pixman_image_t       *image)
-{
-    return image->common.component_alpha;
-}
-
-PIXMAN_EXPORT void
-pixman_image_set_accessors (pixman_image_t *           image,
-                            pixman_read_memory_func_t  read_func,
-                            pixman_write_memory_func_t write_func)
-{
-    return_if_fail (image != NULL);
-
-    if (image->type == BITS)
-    {
-	image->bits.read_func = read_func;
-	image->bits.write_func = write_func;
-
-	image_property_changed (image);
-    }
-}
-
-PIXMAN_EXPORT uint32_t *
-pixman_image_get_data (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.bits;
-
-    return NULL;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_width (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.width;
-
-    return 0;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_height (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.height;
-
-    return 0;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_stride (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.rowstride * (int) sizeof (uint32_t);
-
-    return 0;
-}
-
-PIXMAN_EXPORT int
-pixman_image_get_depth (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return PIXMAN_FORMAT_DEPTH (image->bits.format);
-
-    return 0;
-}
-
-PIXMAN_EXPORT pixman_format_code_t
-pixman_image_get_format (pixman_image_t *image)
-{
-    if (image->type == BITS)
-	return image->bits.format;
-
-    return 0;
-}
-
-uint32_t
-_pixman_image_get_solid (pixman_image_t *     image,
-                         pixman_format_code_t format)
-{
-    uint32_t result;
-
-    _pixman_image_get_scanline_32 (image, 0, 0, 1, &result, NULL);
-
-    /* If necessary, convert RGB <--> BGR. */
-    if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB)
-    {
-	result = (((result & 0xff000000) >>  0) |
-	          ((result & 0x00ff0000) >> 16) |
-	          ((result & 0x0000ff00) >>  0) |
-	          ((result & 0x000000ff) << 16));
-    }
-
-    return result;
-}
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+
+pixman_bool_t
+_pixman_init_gradient (gradient_t *                  gradient,
+                       const pixman_gradient_stop_t *stops,
+                       int                           n_stops)
+{
+    return_val_if_fail (n_stops > 0, FALSE);
+
+    gradient->stops = pixman_malloc_ab (n_stops, sizeof (pixman_gradient_stop_t));
+    if (!gradient->stops)
+	return FALSE;
+
+    memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t));
+
+    gradient->n_stops = n_stops;
+
+    return TRUE;
+}
+
+pixman_image_t *
+_pixman_image_allocate (void)
+{
+    pixman_image_t *image = malloc (sizeof (pixman_image_t));
+
+    if (image)
+    {
+	image_common_t *common = &image->common;
+
+	pixman_region32_init (&common->clip_region);
+
+	common->alpha_count = 0;
+	common->have_clip_region = FALSE;
+	common->clip_sources = FALSE;
+	common->transform = NULL;
+	common->repeat = PIXMAN_REPEAT_NONE;
+	common->filter = PIXMAN_FILTER_NEAREST;
+	common->filter_params = NULL;
+	common->n_filter_params = 0;
+	common->alpha_map = NULL;
+	common->component_alpha = FALSE;
+	common->ref_count = 1;
+	common->property_changed = NULL;
+	common->client_clip = FALSE;
+	common->destroy_func = NULL;
+	common->destroy_data = NULL;
+	common->dirty = TRUE;
+    }
+
+    return image;
+}
+
+static void
+image_property_changed (pixman_image_t *image)
+{
+    image->common.dirty = TRUE;
+}
+
+/* Ref Counting */
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_ref (pixman_image_t *image)
+{
+    image->common.ref_count++;
+
+    return image;
+}
+
+/* returns TRUE when the image is freed */
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_unref (pixman_image_t *image)
+{
+    image_common_t *common = (image_common_t *)image;
+
+    common->ref_count--;
+
+    if (common->ref_count == 0)
+    {
+	if (image->common.destroy_func)
+	    image->common.destroy_func (image, image->common.destroy_data);
+
+	pixman_region32_fini (&common->clip_region);
+
+	if (common->transform)
+	    free (common->transform);
+
+	if (common->filter_params)
+	    free (common->filter_params);
+
+	if (common->alpha_map)
+	    pixman_image_unref ((pixman_image_t *)common->alpha_map);
+
+	if (image->type == LINEAR ||
+	    image->type == RADIAL ||
+	    image->type == CONICAL)
+	{
+	    if (image->gradient.stops)
+		free (image->gradient.stops);
+	}
+
+	if (image->type == BITS && image->bits.free_me)
+	    free (image->bits.free_me);
+
+	free (image);
+
+	return TRUE;
+    }
+
+    return FALSE;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_destroy_function (pixman_image_t *            image,
+                                   pixman_image_destroy_func_t func,
+                                   void *                      data)
+{
+    image->common.destroy_func = func;
+    image->common.destroy_data = data;
+}
+
+PIXMAN_EXPORT void *
+pixman_image_get_destroy_data (pixman_image_t *image)
+{
+  return image->common.destroy_data;
+}
+
+void
+_pixman_image_reset_clip_region (pixman_image_t *image)
+{
+    image->common.have_clip_region = FALSE;
+}
+
+/* Executive Summary: This function is a no-op that only exists
+ * for historical reasons.
+ *
+ * There used to be a bug in the X server where it would rely on
+ * out-of-bounds accesses when it was asked to composite with a
+ * window as the source. It would create a pixman image pointing
+ * to some bogus position in memory, but then set a clip region
+ * to the position where the actual bits were.
+ *
+ * Due to a bug in old versions of pixman, where it would not clip
+ * against the image bounds when a clip region was set, this would
+ * actually work. So when the pixman bug was fixed, a workaround was
+ * added to allow certain out-of-bound accesses. This function disabled
+ * those workarounds.
+ *
+ * Since 0.21.2, pixman doesn't do these workarounds anymore, so now
+ * this function is a no-op.
+ */
+PIXMAN_EXPORT void
+pixman_disable_out_of_bounds_workaround (void)
+{
+}
+
+static void
+compute_image_info (pixman_image_t *image)
+{
+    pixman_format_code_t code;
+    uint32_t flags = 0;
+
+    /* Transform */
+    if (!image->common.transform)
+    {
+	flags |= (FAST_PATH_ID_TRANSFORM	|
+		  FAST_PATH_X_UNIT_POSITIVE	|
+		  FAST_PATH_Y_UNIT_ZERO		|
+		  FAST_PATH_AFFINE_TRANSFORM);
+    }
+    else
+    {
+	flags |= FAST_PATH_HAS_TRANSFORM;
+
+	if (image->common.transform->matrix[2][0] == 0			&&
+	    image->common.transform->matrix[2][1] == 0			&&
+	    image->common.transform->matrix[2][2] == pixman_fixed_1)
+	{
+	    flags |= FAST_PATH_AFFINE_TRANSFORM;
+
+	    if (image->common.transform->matrix[0][1] == 0 &&
+		image->common.transform->matrix[1][0] == 0)
+	    {
+		flags |= FAST_PATH_SCALE_TRANSFORM;
+	    }
+	}
+
+	if (image->common.transform->matrix[0][0] > 0)
+	    flags |= FAST_PATH_X_UNIT_POSITIVE;
+
+	if (image->common.transform->matrix[1][0] == 0)
+	    flags |= FAST_PATH_Y_UNIT_ZERO;
+    }
+
+    /* Filter */
+    switch (image->common.filter)
+    {
+    case PIXMAN_FILTER_NEAREST:
+    case PIXMAN_FILTER_FAST:
+	flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	break;
+
+    case PIXMAN_FILTER_BILINEAR:
+    case PIXMAN_FILTER_GOOD:
+    case PIXMAN_FILTER_BEST:
+	flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+	break;
+
+    case PIXMAN_FILTER_CONVOLUTION:
+	break;
+
+    default:
+	flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
+	break;
+    }
+
+    /* Repeat mode */
+    switch (image->common.repeat)
+    {
+    case PIXMAN_REPEAT_NONE:
+	flags |=
+	    FAST_PATH_NO_REFLECT_REPEAT		|
+	    FAST_PATH_NO_PAD_REPEAT		|
+	    FAST_PATH_NO_NORMAL_REPEAT;
+	break;
+
+    case PIXMAN_REPEAT_REFLECT:
+	flags |=
+	    FAST_PATH_NO_PAD_REPEAT		|
+	    FAST_PATH_NO_NONE_REPEAT		|
+	    FAST_PATH_NO_NORMAL_REPEAT;
+	break;
+
+    case PIXMAN_REPEAT_PAD:
+	flags |=
+	    FAST_PATH_NO_REFLECT_REPEAT		|
+	    FAST_PATH_NO_NONE_REPEAT		|
+	    FAST_PATH_NO_NORMAL_REPEAT;
+	break;
+
+    default:
+	flags |=
+	    FAST_PATH_NO_REFLECT_REPEAT		|
+	    FAST_PATH_NO_PAD_REPEAT		|
+	    FAST_PATH_NO_NONE_REPEAT;
+	break;
+    }
+
+    /* Component alpha */
+    if (image->common.component_alpha)
+	flags |= FAST_PATH_COMPONENT_ALPHA;
+    else
+	flags |= FAST_PATH_UNIFIED_ALPHA;
+
+    flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT);
+
+    /* Type specific checks */
+    switch (image->type)
+    {
+    case SOLID:
+	code = PIXMAN_solid;
+
+	if (image->solid.color.alpha == 0xffff)
+	    flags |= FAST_PATH_IS_OPAQUE;
+	break;
+
+    case BITS:
+	if (image->bits.width == 1	&&
+	    image->bits.height == 1	&&
+	    image->common.repeat != PIXMAN_REPEAT_NONE)
+	{
+	    code = PIXMAN_solid;
+	}
+	else
+	{
+	    code = image->bits.format;
+	}
+
+	if (!PIXMAN_FORMAT_A (image->bits.format)				&&
+	    PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY		&&
+	    PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR)
+	{
+	    flags |= FAST_PATH_SAMPLES_OPAQUE;
+
+	    if (image->common.repeat != PIXMAN_REPEAT_NONE)
+		flags |= FAST_PATH_IS_OPAQUE;
+	}
+
+	if (image->bits.read_func || image->bits.write_func)
+	    flags &= ~FAST_PATH_NO_ACCESSORS;
+
+	if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
+	    flags &= ~FAST_PATH_NARROW_FORMAT;
+	break;
+
+    case RADIAL:
+	code = PIXMAN_unknown;
+
+	/*
+	 * As explained in pixman-radial-gradient.c, every point of
+	 * the plane has a valid associated radius (and thus will be
+	 * colored) if and only if a is negative (i.e. one of the two
+	 * circles contains the other one).
+	 */
+
+        if (image->radial.a >= 0)
+	    break;
+
+	/* Fall through */
+
+    case CONICAL:
+    case LINEAR:
+	code = PIXMAN_unknown;
+
+	if (image->common.repeat != PIXMAN_REPEAT_NONE)
+	{
+	    int i;
+
+	    flags |= FAST_PATH_IS_OPAQUE;
+	    for (i = 0; i < image->gradient.n_stops; ++i)
+	    {
+		if (image->gradient.stops[i].color.alpha != 0xffff)
+		{
+		    flags &= ~FAST_PATH_IS_OPAQUE;
+		    break;
+		}
+	    }
+	}
+	break;
+
+    default:
+	code = PIXMAN_unknown;
+	break;
+    }
+
+    /* Alpha map */
+    if (!image->common.alpha_map)
+    {
+	flags |= FAST_PATH_NO_ALPHA_MAP;
+    }
+    else
+    {
+	if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format))
+	    flags &= ~FAST_PATH_NARROW_FORMAT;
+    }
+
+    /* Both alpha maps and convolution filters can introduce
+     * non-opaqueness in otherwise opaque images. Also
+     * an image with component alpha turned on is only opaque
+     * if all channels are opaque, so we simply turn it off
+     * unconditionally for those images.
+     */
+    if (image->common.alpha_map					||
+	image->common.filter == PIXMAN_FILTER_CONVOLUTION	||
+	image->common.component_alpha)
+    {
+	flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE);
+    }
+
+    image->common.flags = flags;
+    image->common.extended_format_code = code;
+}
+
+void
+_pixman_image_validate (pixman_image_t *image)
+{
+    if (image->common.dirty)
+    {
+	compute_image_info (image);
+
+	/* It is important that property_changed is
+	 * called *after* compute_image_info() because
+	 * property_changed() can make use of the flags
+	 * to set up accessors etc.
+	 */
+	if (image->common.property_changed)
+	    image->common.property_changed (image);
+
+	image->common.dirty = FALSE;
+    }
+
+    if (image->common.alpha_map)
+	_pixman_image_validate ((pixman_image_t *)image->common.alpha_map);
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_clip_region32 (pixman_image_t *   image,
+                                pixman_region32_t *region)
+{
+    image_common_t *common = (image_common_t *)image;
+    pixman_bool_t result;
+
+    if (region)
+    {
+	if ((result = pixman_region32_copy (&common->clip_region, region)))
+	    image->common.have_clip_region = TRUE;
+    }
+    else
+    {
+	_pixman_image_reset_clip_region (image);
+
+	result = TRUE;
+    }
+
+    image_property_changed (image);
+
+    return result;
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_clip_region (pixman_image_t *   image,
+                              pixman_region16_t *region)
+{
+    image_common_t *common = (image_common_t *)image;
+    pixman_bool_t result;
+
+    if (region)
+    {
+	if ((result = pixman_region32_copy_from_region16 (&common->clip_region, region)))
+	    image->common.have_clip_region = TRUE;
+    }
+    else
+    {
+	_pixman_image_reset_clip_region (image);
+
+	result = TRUE;
+    }
+
+    image_property_changed (image);
+
+    return result;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_has_client_clip (pixman_image_t *image,
+                                  pixman_bool_t   client_clip)
+{
+    image->common.client_clip = client_clip;
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_transform (pixman_image_t *          image,
+                            const pixman_transform_t *transform)
+{
+    static const pixman_transform_t id =
+    {
+	{ { pixman_fixed_1, 0, 0 },
+	  { 0, pixman_fixed_1, 0 },
+	  { 0, 0, pixman_fixed_1 } }
+    };
+
+    image_common_t *common = (image_common_t *)image;
+    pixman_bool_t result;
+
+    if (common->transform == transform)
+	return TRUE;
+
+    if (memcmp (&id, transform, sizeof (pixman_transform_t)) == 0)
+    {
+	free (common->transform);
+	common->transform = NULL;
+	result = TRUE;
+
+	goto out;
+    }
+
+    if (common->transform == NULL)
+	common->transform = malloc (sizeof (pixman_transform_t));
+
+    if (common->transform == NULL)
+    {
+	result = FALSE;
+
+	goto out;
+    }
+
+    memcpy (common->transform, transform, sizeof(pixman_transform_t));
+
+    result = TRUE;
+
+out:
+    image_property_changed (image);
+
+    return result;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_repeat (pixman_image_t *image,
+                         pixman_repeat_t repeat)
+{
+    image->common.repeat = repeat;
+
+    image_property_changed (image);
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_filter (pixman_image_t *      image,
+                         pixman_filter_t       filter,
+                         const pixman_fixed_t *params,
+                         int                   n_params)
+{
+    image_common_t *common = (image_common_t *)image;
+    pixman_fixed_t *new_params;
+
+    if (params == common->filter_params && filter == common->filter)
+	return TRUE;
+
+    new_params = NULL;
+    if (params)
+    {
+	new_params = pixman_malloc_ab (n_params, sizeof (pixman_fixed_t));
+	if (!new_params)
+	    return FALSE;
+
+	memcpy (new_params,
+	        params, n_params * sizeof (pixman_fixed_t));
+    }
+
+    common->filter = filter;
+
+    if (common->filter_params)
+	free (common->filter_params);
+
+    common->filter_params = new_params;
+    common->n_filter_params = n_params;
+
+    image_property_changed (image);
+    return TRUE;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_source_clipping (pixman_image_t *image,
+                                  pixman_bool_t   clip_sources)
+{
+    image->common.clip_sources = clip_sources;
+
+    image_property_changed (image);
+}
+
+/* Unlike all the other property setters, this function does not
+ * copy the content of indexed. Doing this copying is simply
+ * way, way too expensive.
+ */
+PIXMAN_EXPORT void
+pixman_image_set_indexed (pixman_image_t *        image,
+                          const pixman_indexed_t *indexed)
+{
+    bits_image_t *bits = (bits_image_t *)image;
+
+    bits->indexed = indexed;
+
+    image_property_changed (image);
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_alpha_map (pixman_image_t *image,
+                            pixman_image_t *alpha_map,
+                            int16_t         x,
+                            int16_t         y)
+{
+    image_common_t *common = (image_common_t *)image;
+
+    return_if_fail (!alpha_map || alpha_map->type == BITS);
+
+    if (alpha_map && common->alpha_count > 0)
+    {
+	/* If this image is being used as an alpha map itself,
+	 * then you can't give it an alpha map of its own.
+	 */
+	return;
+    }
+
+    if (alpha_map && alpha_map->common.alpha_map)
+    {
+	/* If the image has an alpha map of its own,
+	 * then it can't be used as an alpha map itself
+	 */
+	return;
+    }
+
+    if (common->alpha_map != (bits_image_t *)alpha_map)
+    {
+	if (common->alpha_map)
+	{
+	    common->alpha_map->common.alpha_count--;
+
+	    pixman_image_unref ((pixman_image_t *)common->alpha_map);
+	}
+
+	if (alpha_map)
+	{
+	    common->alpha_map = (bits_image_t *)pixman_image_ref (alpha_map);
+
+	    common->alpha_map->common.alpha_count++;
+	}
+	else
+	{
+	    common->alpha_map = NULL;
+	}
+    }
+
+    common->alpha_origin_x = x;
+    common->alpha_origin_y = y;
+
+    image_property_changed (image);
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_component_alpha   (pixman_image_t *image,
+                                    pixman_bool_t   component_alpha)
+{
+    image->common.component_alpha = component_alpha;
+
+    image_property_changed (image);
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_get_component_alpha   (pixman_image_t       *image)
+{
+    return image->common.component_alpha;
+}
+
+PIXMAN_EXPORT void
+pixman_image_set_accessors (pixman_image_t *           image,
+                            pixman_read_memory_func_t  read_func,
+                            pixman_write_memory_func_t write_func)
+{
+    return_if_fail (image != NULL);
+
+    if (image->type == BITS)
+    {
+	image->bits.read_func = read_func;
+	image->bits.write_func = write_func;
+
+	image_property_changed (image);
+    }
+}
+
+PIXMAN_EXPORT uint32_t *
+pixman_image_get_data (pixman_image_t *image)
+{
+    if (image->type == BITS)
+	return image->bits.bits;
+
+    return NULL;
+}
+
+PIXMAN_EXPORT int
+pixman_image_get_width (pixman_image_t *image)
+{
+    if (image->type == BITS)
+	return image->bits.width;
+
+    return 0;
+}
+
+PIXMAN_EXPORT int
+pixman_image_get_height (pixman_image_t *image)
+{
+    if (image->type == BITS)
+	return image->bits.height;
+
+    return 0;
+}
+
+PIXMAN_EXPORT int
+pixman_image_get_stride (pixman_image_t *image)
+{
+    if (image->type == BITS)
+	return image->bits.rowstride * (int) sizeof (uint32_t);
+
+    return 0;
+}
+
+PIXMAN_EXPORT int
+pixman_image_get_depth (pixman_image_t *image)
+{
+    if (image->type == BITS)
+	return PIXMAN_FORMAT_DEPTH (image->bits.format);
+
+    return 0;
+}
+
+PIXMAN_EXPORT pixman_format_code_t
+pixman_image_get_format (pixman_image_t *image)
+{
+    if (image->type == BITS)
+	return image->bits.format;
+
+    return 0;
+}
+
+uint32_t
+_pixman_image_get_solid (pixman_implementation_t *imp,
+			 pixman_image_t *         image,
+                         pixman_format_code_t     format)
+{
+    uint32_t result;
+    pixman_iter_t iter;
+
+    _pixman_implementation_src_iter_init (
+	imp, &iter, image, 0, 0, 1, 1,
+	(uint8_t *)&result, ITER_NARROW);
+
+    result = *iter.get_scanline (&iter, NULL);
+
+    /* If necessary, convert RGB <--> BGR. */
+    if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB)
+    {
+	result = (((result & 0xff000000) >>  0) |
+	          ((result & 0x00ff0000) >> 16) |
+	          ((result & 0x0000ff00) >>  0) |
+	          ((result & 0x000000ff) << 16));
+    }
+
+    return result;
+}
diff --git a/pixman/pixman/pixman-implementation.c b/pixman/pixman/pixman-implementation.c
index bc3749ef5..adaf9c61e 100644
--- a/pixman/pixman/pixman-implementation.c
+++ b/pixman/pixman/pixman-implementation.c
@@ -111,6 +111,36 @@ delegate_fill (pixman_implementation_t *imp,
 	imp->delegate, bits, stride, bpp, x, y, width, height, xor);
 }
 
+static void
+delegate_src_iter_init (pixman_implementation_t *imp,
+			pixman_iter_t *	         iter,
+			pixman_image_t *         image,
+			int                      x,
+			int                      y,
+			int                      width,
+			int                      height,
+			uint8_t *		 buffer,
+			iter_flags_t             flags)
+{
+    _pixman_implementation_src_iter_init (
+	imp->delegate, iter, image, x, y, width, height, buffer, flags);
+}
+
+static void
+delegate_dest_iter_init (pixman_implementation_t *imp,
+			 pixman_iter_t *	  iter,
+			 pixman_image_t *         image,
+			 int                      x,
+			 int                      y,
+			 int                      width,
+			 int                      height,
+			 uint8_t *		  buffer,
+			 iter_flags_t             flags)
+{
+    _pixman_implementation_dest_iter_init (
+	imp->delegate, iter, image, x, y, width, height, buffer, flags);
+}
+
 pixman_implementation_t *
 _pixman_implementation_create (pixman_implementation_t *delegate,
 			       const pixman_fast_path_t *fast_paths)
@@ -133,6 +163,8 @@ _pixman_implementation_create (pixman_implementation_t *delegate,
      */
     imp->blt = delegate_blt;
     imp->fill = delegate_fill;
+    imp->src_iter_init = delegate_src_iter_init;
+    imp->dest_iter_init = delegate_dest_iter_init;
 
     for (i = 0; i < PIXMAN_N_OPERATORS; ++i)
     {
@@ -143,7 +175,7 @@ _pixman_implementation_create (pixman_implementation_t *delegate,
     }
 
     imp->fast_paths = fast_paths;
-    
+
     return imp;
 }
 
@@ -225,3 +257,50 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
     return (*imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor);
 }
 
+static uint32_t *
+get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
+{
+    return NULL;
+}
+
+void
+_pixman_implementation_src_iter_init (pixman_implementation_t	*imp,
+				      pixman_iter_t             *iter,
+				      pixman_image_t		*image,
+				      int			 x,
+				      int			 y,
+				      int			 width,
+				      int			 height,
+				      uint8_t			*buffer,
+				      iter_flags_t		 flags)
+{
+    if (!image)
+    {
+	iter->get_scanline = get_scanline_null;
+    }
+    else if ((flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
+	     (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
+    {
+	iter->get_scanline = _pixman_iter_get_scanline_noop;
+    }
+    else
+    {
+	(*imp->src_iter_init) (
+	    imp, iter, image, x, y, width, height, buffer, flags);
+    }
+}
+
+void
+_pixman_implementation_dest_iter_init (pixman_implementation_t	*imp,
+				       pixman_iter_t            *iter,
+				       pixman_image_t		*image,
+				       int			 x,
+				       int			 y,
+				       int			 width,
+				       int			 height,
+				       uint8_t			*buffer,
+				       iter_flags_t		 flags)
+{
+    (*imp->dest_iter_init) (
+	imp, iter, image, x, y, width, height, buffer, flags);
+}
diff --git a/pixman/pixman/pixman-linear-gradient.c b/pixman/pixman/pixman-linear-gradient.c
index 1756b4a0e..07303fc03 100644
--- a/pixman/pixman/pixman-linear-gradient.c
+++ b/pixman/pixman/pixman-linear-gradient.c
@@ -1,260 +1,292 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include <stdlib.h>
-#include "pixman-private.h"
-
-static source_image_class_t
-linear_gradient_classify (pixman_image_t *image,
-                          int             x,
-                          int             y,
-                          int             width,
-                          int             height)
-{
-    linear_gradient_t *linear = (linear_gradient_t *)image;
-    pixman_vector_t v;
-    pixman_fixed_32_32_t l;
-    pixman_fixed_48_16_t dx, dy;
-    double inc;
-    source_image_class_t class;
-
-    class = SOURCE_IMAGE_CLASS_UNKNOWN;
-
-    if (image->common.transform)
-    {
-	/* projective transformation */
-	if (image->common.transform->matrix[2][0] != 0 ||
-	    image->common.transform->matrix[2][1] != 0 ||
-	    image->common.transform->matrix[2][2] == 0)
-	{
-	    return class;
-	}
-
-	v.vector[0] = image->common.transform->matrix[0][1];
-	v.vector[1] = image->common.transform->matrix[1][1];
-	v.vector[2] = image->common.transform->matrix[2][2];
-    }
-    else
-    {
-	v.vector[0] = 0;
-	v.vector[1] = pixman_fixed_1;
-	v.vector[2] = pixman_fixed_1;
-    }
-
-    dx = linear->p2.x - linear->p1.x;
-    dy = linear->p2.y - linear->p1.y;
-
-    l = dx * dx + dy * dy;
-
-    if (l == 0)
-	return class;	
-
-    /*
-     * compute how much the input of the gradient walked changes
-     * when moving vertically through the whole image
-     */
-    inc = height * (double) pixman_fixed_1 * pixman_fixed_1 *
-	(dx * v.vector[0] + dy * v.vector[1]) /
-	(v.vector[2] * (double) l);
-
-    /* check that casting to integer would result in 0 */
-    if (-1 < inc && inc < 1)
-	class = SOURCE_IMAGE_CLASS_HORIZONTAL;
-
-    return class;
-}
-
-static void
-linear_gradient_get_scanline_32 (pixman_image_t *image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 uint32_t *      buffer,
-                                 const uint32_t *mask)
-{
-    pixman_vector_t v, unit;
-    pixman_fixed_32_32_t l;
-    pixman_fixed_48_16_t dx, dy;
-    gradient_t *gradient = (gradient_t *)image;
-    linear_gradient_t *linear = (linear_gradient_t *)image;
-    uint32_t *end = buffer + width;
-    pixman_gradient_walker_t walker;
-
-    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    if (image->common.transform)
-    {
-	if (!pixman_transform_point_3d (image->common.transform, &v))
-	    return;
-
-	unit.vector[0] = image->common.transform->matrix[0][0];
-	unit.vector[1] = image->common.transform->matrix[1][0];
-	unit.vector[2] = image->common.transform->matrix[2][0];
-    }
-    else
-    {
-	unit.vector[0] = pixman_fixed_1;
-	unit.vector[1] = 0;
-	unit.vector[2] = 0;
-    }
-
-    dx = linear->p2.x - linear->p1.x;
-    dy = linear->p2.y - linear->p1.y;
-
-    l = dx * dx + dy * dy;
-
-    if (l == 0 || unit.vector[2] == 0)
-    {
-	/* affine transformation only */
-        pixman_fixed_32_32_t t, next_inc;
-	double inc;
-
-	if (l == 0 || v.vector[2] == 0)
-	{
-	    t = 0;
-	    inc = 0;
-	}
-	else
-	{
-	    double invden, v2;
-
-	    invden = pixman_fixed_1 * (double) pixman_fixed_1 /
-		(l * (double) v.vector[2]);
-	    v2 = v.vector[2] * (1. / pixman_fixed_1);
-	    t = ((dx * v.vector[0] + dy * v.vector[1]) - 
-		 (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
-	    inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
-	}
-	next_inc = 0;
-
-	if (((pixman_fixed_32_32_t )(inc * width)) == 0)
-	{
-	    register uint32_t color;
-
-	    color = _pixman_gradient_walker_pixel (&walker, t);
-	    while (buffer < end)
-		*buffer++ = color;
-	}
-	else
-	{
-	    int i;
-
-	    i = 0;
-	    while (buffer < end)
-	    {
-		if (!mask || *mask++)
-		{
-		    *buffer = _pixman_gradient_walker_pixel (&walker,
-							     t + next_inc);
-		}
-		i++;
-		next_inc = inc * i;
-		buffer++;
-	    }
-	}
-    }
-    else
-    {
-	/* projective transformation */
-        double t;
-
-	t = 0;
-
-	while (buffer < end)
-	{
-	    if (!mask || *mask++)
-	    {
-	        if (v.vector[2] != 0)
-		{
-		    double invden, v2;
-
-		    invden = pixman_fixed_1 * (double) pixman_fixed_1 /
-			(l * (double) v.vector[2]);
-		    v2 = v.vector[2] * (1. / pixman_fixed_1);
-		    t = ((dx * v.vector[0] + dy * v.vector[1]) - 
-			 (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
-		}
-
-		*buffer = _pixman_gradient_walker_pixel (&walker, t);
-	    }
-
-	    ++buffer;
-
-	    v.vector[0] += unit.vector[0];
-	    v.vector[1] += unit.vector[1];
-	    v.vector[2] += unit.vector[2];
-	}
-    }
-}
-
-static void
-linear_gradient_property_changed (pixman_image_t *image)
-{
-    image->common.get_scanline_32 = linear_gradient_get_scanline_32;
-    image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64;
-}
-
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_linear_gradient (pixman_point_fixed_t *        p1,
-                                     pixman_point_fixed_t *        p2,
-                                     const pixman_gradient_stop_t *stops,
-                                     int                           n_stops)
-{
-    pixman_image_t *image;
-    linear_gradient_t *linear;
-
-    image = _pixman_image_allocate ();
-
-    if (!image)
-	return NULL;
-
-    linear = &image->linear;
-
-    if (!_pixman_init_gradient (&linear->common, stops, n_stops))
-    {
-	free (image);
-	return NULL;
-    }
-
-    linear->p1 = *p1;
-    linear->p2 = *p2;
-
-    image->type = LINEAR;
-    image->common.classify = linear_gradient_classify;
-    image->common.property_changed = linear_gradient_property_changed;
-
-    return image;
-}
-
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include "pixman-private.h"
+
+static pixman_bool_t
+linear_gradient_is_horizontal (pixman_image_t *image,
+			       int             x,
+			       int             y,
+			       int             width,
+			       int             height)
+{
+    linear_gradient_t *linear = (linear_gradient_t *)image;
+    pixman_vector_t v;
+    pixman_fixed_32_32_t l;
+    pixman_fixed_48_16_t dx, dy;
+    double inc;
+
+    if (image->common.transform)
+    {
+	/* projective transformation */
+	if (image->common.transform->matrix[2][0] != 0 ||
+	    image->common.transform->matrix[2][1] != 0 ||
+	    image->common.transform->matrix[2][2] == 0)
+	{
+	    return FALSE;
+	}
+
+	v.vector[0] = image->common.transform->matrix[0][1];
+	v.vector[1] = image->common.transform->matrix[1][1];
+	v.vector[2] = image->common.transform->matrix[2][2];
+    }
+    else
+    {
+	v.vector[0] = 0;
+	v.vector[1] = pixman_fixed_1;
+	v.vector[2] = pixman_fixed_1;
+    }
+
+    dx = linear->p2.x - linear->p1.x;
+    dy = linear->p2.y - linear->p1.y;
+
+    l = dx * dx + dy * dy;
+
+    if (l == 0)
+	return FALSE;
+
+    /*
+     * compute how much the input of the gradient walked changes
+     * when moving vertically through the whole image
+     */
+    inc = height * (double) pixman_fixed_1 * pixman_fixed_1 *
+	(dx * v.vector[0] + dy * v.vector[1]) /
+	(v.vector[2] * (double) l);
+
+    /* check that casting to integer would result in 0 */
+    if (-1 < inc && inc < 1)
+	return TRUE;
+
+    return FALSE;
+}
+
+static uint32_t *
+linear_get_scanline_narrow (pixman_iter_t  *iter,
+			    const uint32_t *mask)
+{
+    pixman_image_t *image  = iter->image;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    uint32_t *      buffer = iter->buffer;
+
+    pixman_vector_t v, unit;
+    pixman_fixed_32_32_t l;
+    pixman_fixed_48_16_t dx, dy;
+    gradient_t *gradient = (gradient_t *)image;
+    linear_gradient_t *linear = (linear_gradient_t *)image;
+    uint32_t *end = buffer + width;
+    pixman_gradient_walker_t walker;
+
+    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
+
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    if (image->common.transform)
+    {
+	if (!pixman_transform_point_3d (image->common.transform, &v))
+	    return iter->buffer;
+
+	unit.vector[0] = image->common.transform->matrix[0][0];
+	unit.vector[1] = image->common.transform->matrix[1][0];
+	unit.vector[2] = image->common.transform->matrix[2][0];
+    }
+    else
+    {
+	unit.vector[0] = pixman_fixed_1;
+	unit.vector[1] = 0;
+	unit.vector[2] = 0;
+    }
+
+    dx = linear->p2.x - linear->p1.x;
+    dy = linear->p2.y - linear->p1.y;
+
+    l = dx * dx + dy * dy;
+
+    if (l == 0 || unit.vector[2] == 0)
+    {
+	/* affine transformation only */
+        pixman_fixed_32_32_t t, next_inc;
+	double inc;
+
+	if (l == 0 || v.vector[2] == 0)
+	{
+	    t = 0;
+	    inc = 0;
+	}
+	else
+	{
+	    double invden, v2;
+
+	    invden = pixman_fixed_1 * (double) pixman_fixed_1 /
+		(l * (double) v.vector[2]);
+	    v2 = v.vector[2] * (1. / pixman_fixed_1);
+	    t = ((dx * v.vector[0] + dy * v.vector[1]) - 
+		 (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
+	    inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
+	}
+	next_inc = 0;
+
+	if (((pixman_fixed_32_32_t )(inc * width)) == 0)
+	{
+	    register uint32_t color;
+
+	    color = _pixman_gradient_walker_pixel (&walker, t);
+	    while (buffer < end)
+		*buffer++ = color;
+	}
+	else
+	{
+	    int i;
+
+	    i = 0;
+	    while (buffer < end)
+	    {
+		if (!mask || *mask++)
+		{
+		    *buffer = _pixman_gradient_walker_pixel (&walker,
+							     t + next_inc);
+		}
+		i++;
+		next_inc = inc * i;
+		buffer++;
+	    }
+	}
+    }
+    else
+    {
+	/* projective transformation */
+        double t;
+
+	t = 0;
+
+	while (buffer < end)
+	{
+	    if (!mask || *mask++)
+	    {
+	        if (v.vector[2] != 0)
+		{
+		    double invden, v2;
+
+		    invden = pixman_fixed_1 * (double) pixman_fixed_1 /
+			(l * (double) v.vector[2]);
+		    v2 = v.vector[2] * (1. / pixman_fixed_1);
+		    t = ((dx * v.vector[0] + dy * v.vector[1]) - 
+			 (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
+		}
+
+		*buffer = _pixman_gradient_walker_pixel (&walker, t);
+	    }
+
+	    ++buffer;
+
+	    v.vector[0] += unit.vector[0];
+	    v.vector[1] += unit.vector[1];
+	    v.vector[2] += unit.vector[2];
+	}
+    }
+
+    iter->y++;
+
+    return iter->buffer;
+}
+
+static uint32_t *
+linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    uint32_t *buffer = linear_get_scanline_narrow (iter, NULL);
+
+    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+
+    return buffer;
+}
+
+void
+_pixman_linear_gradient_iter_init (pixman_image_t *image,
+				   pixman_iter_t  *iter,
+				   int             x,
+				   int             y,
+				   int             width,
+				   int             height,
+				   uint8_t        *buffer,
+				   iter_flags_t    flags)
+{
+    if (linear_gradient_is_horizontal (image, x, y, width, height))
+    {
+	if (flags & ITER_NARROW)
+	    linear_get_scanline_narrow (iter, NULL);
+	else
+	    linear_get_scanline_wide (iter, NULL);
+
+	iter->get_scanline = _pixman_iter_get_scanline_noop;
+    }
+    else
+    {
+	if (flags & ITER_NARROW)
+	    iter->get_scanline = linear_get_scanline_narrow;
+	else
+	    iter->get_scanline = linear_get_scanline_wide;
+    }
+}
+
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_linear_gradient (pixman_point_fixed_t *        p1,
+                                     pixman_point_fixed_t *        p2,
+                                     const pixman_gradient_stop_t *stops,
+                                     int                           n_stops)
+{
+    pixman_image_t *image;
+    linear_gradient_t *linear;
+
+    image = _pixman_image_allocate ();
+
+    if (!image)
+	return NULL;
+
+    linear = &image->linear;
+
+    if (!_pixman_init_gradient (&linear->common, stops, n_stops))
+    {
+	free (image);
+	return NULL;
+    }
+
+    linear->p1 = *p1;
+    linear->p2 = *p2;
+
+    image->type = LINEAR;
+
+    return image;
+}
+
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c
index d05a185ea..6daa364fb 100644
--- a/pixman/pixman/pixman-mmx.c
+++ b/pixman/pixman/pixman-mmx.c
@@ -1,3378 +1,3378 @@
-/*
- * Copyright © 2004, 2005 Red Hat, Inc.
- * Copyright © 2004 Nicholas Miell
- * Copyright © 2005 Trolltech AS
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Red Hat not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  Red Hat makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Søren Sandmann (sandmann@redhat.com)
- * Minor Improvements: Nicholas Miell (nmiell@gmail.com)
- * MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com)
- *
- * Based on work by Owen Taylor
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#ifdef USE_MMX
-
-#include <mmintrin.h>
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-
-#define no_vERBOSE
-
-#ifdef VERBOSE
-#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
-#else
-#define CHECKPOINT()
-#endif
-
-/* Notes about writing mmx code
- *
- * give memory operands as the second operand. If you give it as the
- * first, gcc will first load it into a register, then use that
- * register
- *
- *   ie. use
- *
- *         _mm_mullo_pi16 (x, mmx_constant);
- *
- *   not
- *
- *         _mm_mullo_pi16 (mmx_constant, x);
- *
- * Also try to minimize dependencies. i.e. when you need a value, try
- * to calculate it from a value that was calculated as early as
- * possible.
- */
-
-/* --------------- MMX primitives ------------------------------------- */
-
-#ifdef __GNUC__
-typedef uint64_t mmxdatafield;
-#else
-typedef __m64 mmxdatafield;
-/* If __m64 is defined as a struct or union, define M64_MEMBER to be the
-   name of the member used to access the data */
-# ifdef _MSC_VER
-#  define M64_MEMBER m64_u64
-# elif defined(__SUNPRO_C)
-#  define M64_MEMBER l_
-# endif
-#endif
-
-typedef struct
-{
-    mmxdatafield mmx_4x00ff;
-    mmxdatafield mmx_4x0080;
-    mmxdatafield mmx_565_rgb;
-    mmxdatafield mmx_565_unpack_multiplier;
-    mmxdatafield mmx_565_r;
-    mmxdatafield mmx_565_g;
-    mmxdatafield mmx_565_b;
-    mmxdatafield mmx_mask_0;
-    mmxdatafield mmx_mask_1;
-    mmxdatafield mmx_mask_2;
-    mmxdatafield mmx_mask_3;
-    mmxdatafield mmx_full_alpha;
-    mmxdatafield mmx_ffff0000ffff0000;
-    mmxdatafield mmx_0000ffff00000000;
-    mmxdatafield mmx_000000000000ffff;
-} mmx_data_t;
-
-#if defined(_MSC_VER)
-# define MMXDATA_INIT(field, val) { val ## UI64 }
-#elif defined(M64_MEMBER)       /* __m64 is a struct, not an integral type */
-# define MMXDATA_INIT(field, val) field =   { val ## ULL }
-#else                           /* __m64 is an integral type */
-# define MMXDATA_INIT(field, val) field =   val ## ULL
-#endif
-
-static const mmx_data_t c =
-{
-    MMXDATA_INIT (.mmx_4x00ff,                   0x00ff00ff00ff00ff),
-    MMXDATA_INIT (.mmx_4x0080,                   0x0080008000800080),
-    MMXDATA_INIT (.mmx_565_rgb,                  0x000001f0003f001f),
-    MMXDATA_INIT (.mmx_565_unpack_multiplier,    0x0000008404100840),
-    MMXDATA_INIT (.mmx_565_r,                    0x000000f800000000),
-    MMXDATA_INIT (.mmx_565_g,                    0x0000000000fc0000),
-    MMXDATA_INIT (.mmx_565_b,                    0x00000000000000f8),
-    MMXDATA_INIT (.mmx_mask_0,                   0xffffffffffff0000),
-    MMXDATA_INIT (.mmx_mask_1,                   0xffffffff0000ffff),
-    MMXDATA_INIT (.mmx_mask_2,                   0xffff0000ffffffff),
-    MMXDATA_INIT (.mmx_mask_3,                   0x0000ffffffffffff),
-    MMXDATA_INIT (.mmx_full_alpha,               0x00ff000000000000),
-    MMXDATA_INIT (.mmx_ffff0000ffff0000,         0xffff0000ffff0000),
-    MMXDATA_INIT (.mmx_0000ffff00000000,         0x0000ffff00000000),
-    MMXDATA_INIT (.mmx_000000000000ffff,         0x000000000000ffff),
-};
-
-#ifdef __GNUC__
-#    ifdef __ICC
-#        define MC(x) to_m64 (c.mmx_ ## x)
-#    else
-#        define MC(x) ((__m64)c.mmx_ ## x)
-#    endif
-#else
-#    define MC(x) c.mmx_ ## x
-#endif
-
-static force_inline __m64
-to_m64 (uint64_t x)
-{
-#ifdef __ICC
-    return _mm_cvtsi64_m64 (x);
-#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
-    __m64 res;
-
-    res.M64_MEMBER = x;
-    return res;
-#else                           /* __m64 is an integral type */
-    return (__m64)x;
-#endif
-}
-
-static force_inline uint64_t
-to_uint64 (__m64 x)
-{
-#ifdef __ICC
-    return _mm_cvtm64_si64 (x);
-#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
-    uint64_t res = x.M64_MEMBER;
-    return res;
-#else                           /* __m64 is an integral type */
-    return (uint64_t)x;
-#endif
-}
-
-static force_inline __m64
-shift (__m64 v,
-       int   s)
-{
-    if (s > 0)
-	return _mm_slli_si64 (v, s);
-    else if (s < 0)
-	return _mm_srli_si64 (v, -s);
-    else
-	return v;
-}
-
-static force_inline __m64
-negate (__m64 mask)
-{
-    return _mm_xor_si64 (mask, MC (4x00ff));
-}
-
-static force_inline __m64
-pix_multiply (__m64 a, __m64 b)
-{
-    __m64 res;
-
-    res = _mm_mullo_pi16 (a, b);
-    res = _mm_adds_pu16 (res, MC (4x0080));
-    res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
-    res = _mm_srli_pi16 (res, 8);
-
-    return res;
-}
-
-static force_inline __m64
-pix_add (__m64 a, __m64 b)
-{
-    return _mm_adds_pu8 (a, b);
-}
-
-static force_inline __m64
-expand_alpha (__m64 pixel)
-{
-    __m64 t1, t2;
-
-    t1 = shift (pixel, -48);
-    t2 = shift (t1, 16);
-    t1 = _mm_or_si64 (t1, t2);
-    t2 = shift (t1, 32);
-    t1 = _mm_or_si64 (t1, t2);
-
-    return t1;
-}
-
-static force_inline __m64
-expand_alpha_rev (__m64 pixel)
-{
-    __m64 t1, t2;
-
-    /* move alpha to low 16 bits and zero the rest */
-    t1 = shift (pixel,  48);
-    t1 = shift (t1, -48);
-
-    t2 = shift (t1, 16);
-    t1 = _mm_or_si64 (t1, t2);
-    t2 = shift (t1, 32);
-    t1 = _mm_or_si64 (t1, t2);
-
-    return t1;
-}
-
-static force_inline __m64
-invert_colors (__m64 pixel)
-{
-    __m64 x, y, z;
-
-    x = y = z = pixel;
-
-    x = _mm_and_si64 (x, MC (ffff0000ffff0000));
-    y = _mm_and_si64 (y, MC (000000000000ffff));
-    z = _mm_and_si64 (z, MC (0000ffff00000000));
-
-    y = shift (y, 32);
-    z = shift (z, -32);
-
-    x = _mm_or_si64 (x, y);
-    x = _mm_or_si64 (x, z);
-
-    return x;
-}
-
-static force_inline __m64
-over (__m64 src,
-      __m64 srca,
-      __m64 dest)
-{
-    return _mm_adds_pu8 (src, pix_multiply (dest, negate (srca)));
-}
-
-static force_inline __m64
-over_rev_non_pre (__m64 src, __m64 dest)
-{
-    __m64 srca = expand_alpha (src);
-    __m64 srcfaaa = _mm_or_si64 (srca, MC (full_alpha));
-
-    return over (pix_multiply (invert_colors (src), srcfaaa), srca, dest);
-}
-
-static force_inline __m64
-in (__m64 src, __m64 mask)
-{
-    return pix_multiply (src, mask);
-}
-
-static force_inline __m64
-in_over_full_src_alpha (__m64 src, __m64 mask, __m64 dest)
-{
-    src = _mm_or_si64 (src, MC (full_alpha));
-
-    return over (in (src, mask), mask, dest);
-}
-
-#ifndef _MSC_VER
-static force_inline __m64
-in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
-{
-    return over (in (src, mask), pix_multiply (srca, mask), dest);
-}
-
-#else
-
-#define in_over(src, srca, mask, dest)					\
-    over (in (src, mask), pix_multiply (srca, mask), dest)
-
-#endif
-
-static force_inline __m64
-load8888 (uint32_t v)
-{
-    return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ());
-}
-
-static force_inline __m64
-pack8888 (__m64 lo, __m64 hi)
-{
-    return _mm_packs_pu16 (lo, hi);
-}
-
-static force_inline uint32_t
-store8888 (__m64 v)
-{
-    return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()));
-}
-
-/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
- *
- *    00RR00GG00BB
- *
- * --- Expanding 565 in the low word ---
- *
- * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
- * m = m & (01f0003f001f);
- * m = m * (008404100840);
- * m = m >> 8;
- *
- * Note the trick here - the top word is shifted by another nibble to
- * avoid it bumping into the middle word
- */
-static force_inline __m64
-expand565 (__m64 pixel, int pos)
-{
-    __m64 p = pixel;
-    __m64 t1, t2;
-
-    /* move pixel to low 16 bit and zero the rest */
-    p = shift (shift (p, (3 - pos) * 16), -48);
-
-    t1 = shift (p, 36 - 11);
-    t2 = shift (p, 16 - 5);
-
-    p = _mm_or_si64 (t1, p);
-    p = _mm_or_si64 (t2, p);
-    p = _mm_and_si64 (p, MC (565_rgb));
-
-    pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier));
-    return _mm_srli_pi16 (pixel, 8);
-}
-
-static force_inline __m64
-expand8888 (__m64 in, int pos)
-{
-    if (pos == 0)
-	return _mm_unpacklo_pi8 (in, _mm_setzero_si64 ());
-    else
-	return _mm_unpackhi_pi8 (in, _mm_setzero_si64 ());
-}
-
-static force_inline __m64
-expandx888 (__m64 in, int pos)
-{
-    return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha));
-}
-
-static force_inline __m64
-pack_565 (__m64 pixel, __m64 target, int pos)
-{
-    __m64 p = pixel;
-    __m64 t = target;
-    __m64 r, g, b;
-
-    r = _mm_and_si64 (p, MC (565_r));
-    g = _mm_and_si64 (p, MC (565_g));
-    b = _mm_and_si64 (p, MC (565_b));
-
-    r = shift (r, -(32 - 8) + pos * 16);
-    g = shift (g, -(16 - 3) + pos * 16);
-    b = shift (b, -(0  + 3) + pos * 16);
-
-    if (pos == 0)
-	t = _mm_and_si64 (t, MC (mask_0));
-    else if (pos == 1)
-	t = _mm_and_si64 (t, MC (mask_1));
-    else if (pos == 2)
-	t = _mm_and_si64 (t, MC (mask_2));
-    else if (pos == 3)
-	t = _mm_and_si64 (t, MC (mask_3));
-
-    p = _mm_or_si64 (r, t);
-    p = _mm_or_si64 (g, p);
-
-    return _mm_or_si64 (b, p);
-}
-
-#ifndef _MSC_VER
-
-static force_inline __m64
-pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
-{
-    x = pix_multiply (x, a);
-    y = pix_multiply (y, b);
-
-    return pix_add (x, y);
-}
-
-#else
-
-#define pix_add_mul(x, a, y, b)	 \
-    ( x = pix_multiply (x, a),	 \
-      y = pix_multiply (y, a),	 \
-      pix_add (x, y) )
-
-#endif
-
-/* --------------- MMX code patch for fbcompose.c --------------------- */
-
-static force_inline uint32_t
-combine (const uint32_t *src, const uint32_t *mask)
-{
-    uint32_t ssrc = *src;
-
-    if (mask)
-    {
-	__m64 m = load8888 (*mask);
-	__m64 s = load8888 (ssrc);
-
-	m = expand_alpha (m);
-	s = pix_multiply (s, m);
-
-	ssrc = store8888 (s);
-    }
-
-    return ssrc;
-}
-
-static void
-mmx_combine_over_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	uint32_t ssrc = combine (src, mask);
-	uint32_t a = ssrc >> 24;
-
-	if (a == 0xff)
-	{
-	    *dest = ssrc;
-	}
-	else if (ssrc)
-	{
-	    __m64 s, sa;
-	    s = load8888 (ssrc);
-	    sa = expand_alpha (s);
-	    *dest = store8888 (over (s, sa, load8888 (*dest)));
-	}
-
-	++dest;
-	++src;
-	if (mask)
-	    ++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_over_reverse_u (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dest,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 d, da;
-	uint32_t s = combine (src, mask);
-
-	d = load8888 (*dest);
-	da = expand_alpha (d);
-	*dest = store8888 (over (d, da, load8888 (s)));
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_in_u (pixman_implementation_t *imp,
-                  pixman_op_t              op,
-                  uint32_t *               dest,
-                  const uint32_t *         src,
-                  const uint32_t *         mask,
-                  int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 x, a;
-
-	x = load8888 (combine (src, mask));
-	a = load8888 (*dest);
-	a = expand_alpha (a);
-	x = pix_multiply (x, a);
-
-	*dest = store8888 (x);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_in_reverse_u (pixman_implementation_t *imp,
-                          pixman_op_t              op,
-                          uint32_t *               dest,
-                          const uint32_t *         src,
-                          const uint32_t *         mask,
-                          int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 x, a;
-
-	x = load8888 (*dest);
-	a = load8888 (combine (src, mask));
-	a = expand_alpha (a);
-	x = pix_multiply (x, a);
-	*dest = store8888 (x);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_out_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 x, a;
-
-	x = load8888 (combine (src, mask));
-	a = load8888 (*dest);
-	a = expand_alpha (a);
-	a = negate (a);
-	x = pix_multiply (x, a);
-	*dest = store8888 (x);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_out_reverse_u (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           uint32_t *               dest,
-                           const uint32_t *         src,
-                           const uint32_t *         mask,
-                           int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 x, a;
-
-	x = load8888 (*dest);
-	a = load8888 (combine (src, mask));
-	a = expand_alpha (a);
-	a = negate (a);
-	x = pix_multiply (x, a);
-
-	*dest = store8888 (x);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_atop_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 s, da, d, sia;
-
-	s = load8888 (combine (src, mask));
-	d = load8888 (*dest);
-	sia = expand_alpha (s);
-	sia = negate (sia);
-	da = expand_alpha (d);
-	s = pix_add_mul (s, da, d, sia);
-	*dest = store8888 (s);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_atop_reverse_u (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dest,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    const uint32_t *end;
-
-    end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 s, dia, d, sa;
-
-	s = load8888 (combine (src, mask));
-	d = load8888 (*dest);
-	sa = expand_alpha (s);
-	dia = expand_alpha (d);
-	dia = negate (dia);
-	s = pix_add_mul (s, dia, d, sa);
-	*dest = store8888 (s);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_xor_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 s, dia, d, sia;
-
-	s = load8888 (combine (src, mask));
-	d = load8888 (*dest);
-	sia = expand_alpha (s);
-	dia = expand_alpha (d);
-	sia = negate (sia);
-	dia = negate (dia);
-	s = pix_add_mul (s, dia, d, sia);
-	*dest = store8888 (s);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_add_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	__m64 s, d;
-
-	s = load8888 (combine (src, mask));
-	d = load8888 (*dest);
-	s = pix_add (s, d);
-	*dest = store8888 (s);
-
-	++dest;
-	++src;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_saturate_u (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        uint32_t *               dest,
-                        const uint32_t *         src,
-                        const uint32_t *         mask,
-                        int                      width)
-{
-    const uint32_t *end = dest + width;
-
-    while (dest < end)
-    {
-	uint32_t s = combine (src, mask);
-	uint32_t d = *dest;
-	__m64 ms = load8888 (s);
-	__m64 md = load8888 (d);
-	uint32_t sa = s >> 24;
-	uint32_t da = ~d >> 24;
-
-	if (sa > da)
-	{
-	    __m64 msa = load8888 (DIV_UN8 (da, sa) << 24);
-	    msa = expand_alpha (msa);
-	    ms = pix_multiply (ms, msa);
-	}
-
-	md = pix_add (md, ms);
-	*dest = store8888 (md);
-
-	++src;
-	++dest;
-	if (mask)
-	    mask++;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_src_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-
-	s = pix_multiply (s, a);
-	*dest = store8888 (s);
-
-	++src;
-	++mask;
-	++dest;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_over_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dest,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-	__m64 sa = expand_alpha (s);
-
-	*dest = store8888 (in_over (s, sa, a, d));
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_over_reverse_ca (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               dest,
-                             const uint32_t *         src,
-                             const uint32_t *         mask,
-                             int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-	__m64 da = expand_alpha (d);
-
-	*dest = store8888 (over (d, da, in (s, a)));
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_in_ca (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dest,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-	__m64 da = expand_alpha (d);
-
-	s = pix_multiply (s, a);
-	s = pix_multiply (s, da);
-	*dest = store8888 (s);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_in_reverse_ca (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           uint32_t *               dest,
-                           const uint32_t *         src,
-                           const uint32_t *         mask,
-                           int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-	__m64 sa = expand_alpha (s);
-
-	a = pix_multiply (a, sa);
-	d = pix_multiply (d, a);
-	*dest = store8888 (d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_out_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-	__m64 da = expand_alpha (d);
-
-	da = negate (da);
-	s = pix_multiply (s, a);
-	s = pix_multiply (s, da);
-	*dest = store8888 (s);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_out_reverse_ca (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dest,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-	__m64 sa = expand_alpha (s);
-
-	a = pix_multiply (a, sa);
-	a = negate (a);
-	d = pix_multiply (d, a);
-	*dest = store8888 (d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_atop_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dest,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-	__m64 da = expand_alpha (d);
-	__m64 sa = expand_alpha (s);
-
-	s = pix_multiply (s, a);
-	a = pix_multiply (a, sa);
-	a = negate (a);
-	d = pix_add_mul (d, a, s, da);
-	*dest = store8888 (d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               dest,
-                             const uint32_t *         src,
-                             const uint32_t *         mask,
-                             int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-	__m64 da = expand_alpha (d);
-	__m64 sa = expand_alpha (s);
-
-	s = pix_multiply (s, a);
-	a = pix_multiply (a, sa);
-	da = negate (da);
-	d = pix_add_mul (d, a, s, da);
-	*dest = store8888 (d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_xor_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-	__m64 da = expand_alpha (d);
-	__m64 sa = expand_alpha (s);
-
-	s = pix_multiply (s, a);
-	a = pix_multiply (a, sa);
-	da = negate (da);
-	a = negate (a);
-	d = pix_add_mul (d, a, s, da);
-	*dest = store8888 (d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_combine_add_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dest,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    const uint32_t *end = src + width;
-
-    while (src < end)
-    {
-	__m64 a = load8888 (*mask);
-	__m64 s = load8888 (*src);
-	__m64 d = load8888 (*dest);
-
-	s = pix_multiply (s, a);
-	d = pix_add (s, d);
-	*dest = store8888 (d);
-
-	++src;
-	++dest;
-	++mask;
-    }
-    _mm_empty ();
-}
-
-/* ------------- MMX code paths called from fbpict.c -------------------- */
-
-static void
-mmx_composite_over_n_8888 (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           pixman_image_t *         src_image,
-                           pixman_image_t *         mask_image,
-                           pixman_image_t *         dst_image,
-                           int32_t                  src_x,
-                           int32_t                  src_y,
-                           int32_t                  mask_x,
-                           int32_t                  mask_y,
-                           int32_t                  dest_x,
-                           int32_t                  dest_y,
-                           int32_t                  width,
-                           int32_t                  height)
-{
-    uint32_t src;
-    uint32_t    *dst_line, *dst;
-    int32_t w;
-    int dst_stride;
-    __m64 vsrc, vsrca;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    vsrc = load8888 (src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 2)
-	{
-	    __m64 vdest;
-	    __m64 dest0, dest1;
-
-	    vdest = *(__m64 *)dst;
-
-	    dest0 = over (vsrc, vsrca, expand8888 (vdest, 0));
-	    dest1 = over (vsrc, vsrca, expand8888 (vdest, 1));
-
-	    *(__m64 *)dst = pack8888 (dest0, dest1);
-
-	    dst += 2;
-	    w -= 2;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
-
-	    w--;
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_0565 (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           pixman_image_t *         src_image,
-                           pixman_image_t *         mask_image,
-                           pixman_image_t *         dst_image,
-                           int32_t                  src_x,
-                           int32_t                  src_y,
-                           int32_t                  mask_x,
-                           int32_t                  mask_y,
-                           int32_t                  dest_x,
-                           int32_t                  dest_y,
-                           int32_t                  width,
-                           int32_t                  height)
-{
-    uint32_t src;
-    uint16_t    *dst_line, *dst;
-    int32_t w;
-    int dst_stride;
-    __m64 vsrc, vsrca;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
-    vsrc = load8888 (src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	}
-
-	while (w >= 4)
-	{
-	    __m64 vdest;
-
-	    vdest = *(__m64 *)dst;
-
-	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0);
-	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1);
-	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2);
-	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3);
-
-	    *(__m64 *)dst = vdest;
-
-	    dst += 4;
-	    w -= 4;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
-                                   pixman_op_t              op,
-                                   pixman_image_t *         src_image,
-                                   pixman_image_t *         mask_image,
-                                   pixman_image_t *         dst_image,
-                                   int32_t                  src_x,
-                                   int32_t                  src_y,
-                                   int32_t                  mask_x,
-                                   int32_t                  mask_y,
-                                   int32_t                  dest_x,
-                                   int32_t                  dest_y,
-                                   int32_t                  width,
-                                   int32_t                  height)
-{
-    uint32_t src, srca;
-    uint32_t    *dst_line;
-    uint32_t    *mask_line;
-    int dst_stride, mask_stride;
-    __m64 vsrc, vsrca;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	int twidth = width;
-	uint32_t *p = (uint32_t *)mask_line;
-	uint32_t *q = (uint32_t *)dst_line;
-
-	while (twidth && (unsigned long)q & 7)
-	{
-	    uint32_t m = *(uint32_t *)p;
-
-	    if (m)
-	    {
-		__m64 vdest = load8888 (*q);
-		vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
-		*q = store8888 (vdest);
-	    }
-
-	    twidth--;
-	    p++;
-	    q++;
-	}
-
-	while (twidth >= 2)
-	{
-	    uint32_t m0, m1;
-	    m0 = *p;
-	    m1 = *(p + 1);
-
-	    if (m0 | m1)
-	    {
-		__m64 dest0, dest1;
-		__m64 vdest = *(__m64 *)q;
-
-		dest0 = in_over (vsrc, vsrca, load8888 (m0),
-		                 expand8888 (vdest, 0));
-		dest1 = in_over (vsrc, vsrca, load8888 (m1),
-		                 expand8888 (vdest, 1));
-
-		*(__m64 *)q = pack8888 (dest0, dest1);
-	    }
-
-	    p += 2;
-	    q += 2;
-	    twidth -= 2;
-	}
-
-	while (twidth)
-	{
-	    uint32_t m = *(uint32_t *)p;
-
-	    if (m)
-	    {
-		__m64 vdest = load8888 (*q);
-		vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
-		*q = store8888 (vdest);
-	    }
-
-	    twidth--;
-	    p++;
-	    q++;
-	}
-
-	dst_line += dst_stride;
-	mask_line += mask_stride;
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                pixman_image_t *         src_image,
-                                pixman_image_t *         mask_image,
-                                pixman_image_t *         dst_image,
-                                int32_t                  src_x,
-                                int32_t                  src_y,
-                                int32_t                  mask_x,
-                                int32_t                  mask_y,
-                                int32_t                  dest_x,
-                                int32_t                  dest_y,
-                                int32_t                  width,
-                                int32_t                  height)
-{
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    uint32_t mask;
-    __m64 vmask;
-    int dst_stride, src_stride;
-    int32_t w;
-    __m64 srca;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
-    mask &= 0xff000000;
-    mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
-    vmask = load8888 (mask);
-    srca = MC (4x00ff);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    __m64 s = load8888 (*src);
-	    __m64 d = load8888 (*dst);
-
-	    *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	while (w >= 2)
-	{
-	    __m64 vs = *(__m64 *)src;
-	    __m64 vd = *(__m64 *)dst;
-	    __m64 vsrc0 = expand8888 (vs, 0);
-	    __m64 vsrc1 = expand8888 (vs, 1);
-
-	    *(__m64 *)dst = pack8888 (
-	        in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)),
-	        in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1)));
-
-	    w -= 2;
-	    dst += 2;
-	    src += 2;
-	}
-
-	while (w)
-	{
-	    __m64 s = load8888 (*src);
-	    __m64 d = load8888 (*dst);
-
-	    *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                pixman_image_t *         src_image,
-                                pixman_image_t *         mask_image,
-                                pixman_image_t *         dst_image,
-                                int32_t                  src_x,
-                                int32_t                  src_y,
-                                int32_t                  mask_x,
-                                int32_t                  mask_y,
-                                int32_t                  dest_x,
-                                int32_t                  dest_y,
-                                int32_t                  width,
-                                int32_t                  height)
-{
-    uint32_t *dst_line, *dst;
-    uint32_t *src_line, *src;
-    uint32_t mask;
-    __m64 vmask;
-    int dst_stride, src_stride;
-    int32_t w;
-    __m64 srca;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
-
-    mask &= 0xff000000;
-    mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
-    vmask = load8888 (mask);
-    srca = MC (4x00ff);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    __m64 s = load8888 (*src | 0xff000000);
-	    __m64 d = load8888 (*dst);
-
-	    *dst = store8888 (in_over (s, srca, vmask, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	while (w >= 16)
-	{
-	    __m64 vd0 = *(__m64 *)(dst + 0);
-	    __m64 vd1 = *(__m64 *)(dst + 2);
-	    __m64 vd2 = *(__m64 *)(dst + 4);
-	    __m64 vd3 = *(__m64 *)(dst + 6);
-	    __m64 vd4 = *(__m64 *)(dst + 8);
-	    __m64 vd5 = *(__m64 *)(dst + 10);
-	    __m64 vd6 = *(__m64 *)(dst + 12);
-	    __m64 vd7 = *(__m64 *)(dst + 14);
-
-	    __m64 vs0 = *(__m64 *)(src + 0);
-	    __m64 vs1 = *(__m64 *)(src + 2);
-	    __m64 vs2 = *(__m64 *)(src + 4);
-	    __m64 vs3 = *(__m64 *)(src + 6);
-	    __m64 vs4 = *(__m64 *)(src + 8);
-	    __m64 vs5 = *(__m64 *)(src + 10);
-	    __m64 vs6 = *(__m64 *)(src + 12);
-	    __m64 vs7 = *(__m64 *)(src + 14);
-
-	    vd0 = pack8888 (
-	        in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
-	        in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
-
-	    vd1 = pack8888 (
-	        in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
-	        in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
-
-	    vd2 = pack8888 (
-	        in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
-	        in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
-
-	    vd3 = pack8888 (
-	        in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
-	        in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
-
-	    vd4 = pack8888 (
-	        in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
-	        in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
-
-	    vd5 = pack8888 (
-	        in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
-	        in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
-
-	    vd6 = pack8888 (
-	        in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)),
-	        in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1)));
-
-	    vd7 = pack8888 (
-	        in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)),
-	        in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1)));
-
-	    *(__m64 *)(dst + 0) = vd0;
-	    *(__m64 *)(dst + 2) = vd1;
-	    *(__m64 *)(dst + 4) = vd2;
-	    *(__m64 *)(dst + 6) = vd3;
-	    *(__m64 *)(dst + 8) = vd4;
-	    *(__m64 *)(dst + 10) = vd5;
-	    *(__m64 *)(dst + 12) = vd6;
-	    *(__m64 *)(dst + 14) = vd7;
-
-	    w -= 16;
-	    dst += 16;
-	    src += 16;
-	}
-
-	while (w)
-	{
-	    __m64 s = load8888 (*src | 0xff000000);
-	    __m64 d = load8888 (*dst);
-
-	    *dst = store8888 (in_over (s, srca, vmask, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
-                              pixman_op_t              op,
-                              pixman_image_t *         src_image,
-                              pixman_image_t *         mask_image,
-                              pixman_image_t *         dst_image,
-                              int32_t                  src_x,
-                              int32_t                  src_y,
-                              int32_t                  mask_x,
-                              int32_t                  mask_y,
-                              int32_t                  dest_x,
-                              int32_t                  dest_y,
-                              int32_t                  width,
-                              int32_t                  height)
-{
-    uint32_t *dst_line, *dst;
-    uint32_t *src_line, *src;
-    uint32_t s;
-    int dst_stride, src_stride;
-    uint8_t a;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-	    a = s >> 24;
-
-	    if (a == 0xff)
-	    {
-		*dst = s;
-	    }
-	    else if (s)
-	    {
-		__m64 ms, sa;
-		ms = load8888 (s);
-		sa = expand_alpha (ms);
-		*dst = store8888 (over (ms, sa, load8888 (*dst)));
-	    }
-
-	    dst++;
-	}
-    }
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
-                              pixman_op_t              op,
-                              pixman_image_t *         src_image,
-                              pixman_image_t *         mask_image,
-                              pixman_image_t *         dst_image,
-                              int32_t                  src_x,
-                              int32_t                  src_y,
-                              int32_t                  mask_x,
-                              int32_t                  mask_y,
-                              int32_t                  dest_x,
-                              int32_t                  dest_y,
-                              int32_t                  width,
-                              int32_t                  height)
-{
-    uint16_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-#if 0
-    /* FIXME */
-    assert (src_image->drawable == mask_image->drawable);
-#endif
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    __m64 vsrc = load8888 (*src);
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (
-		over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
-
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 4)
-	{
-	    __m64 vsrc0, vsrc1, vsrc2, vsrc3;
-	    __m64 vdest;
-
-	    vsrc0 = load8888 (*(src + 0));
-	    vsrc1 = load8888 (*(src + 1));
-	    vsrc2 = load8888 (*(src + 2));
-	    vsrc3 = load8888 (*(src + 3));
-
-	    vdest = *(__m64 *)dst;
-
-	    vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0);
-	    vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1);
-	    vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2);
-	    vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3);
-
-	    *(__m64 *)dst = vdest;
-
-	    w -= 4;
-	    dst += 4;
-	    src += 4;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    __m64 vsrc = load8888 (*src);
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
-
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             pixman_image_t *         src_image,
-                             pixman_image_t *         mask_image,
-                             pixman_image_t *         dst_image,
-                             int32_t                  src_x,
-                             int32_t                  src_y,
-                             int32_t                  mask_x,
-                             int32_t                  mask_y,
-                             int32_t                  dest_x,
-                             int32_t                  dest_y,
-                             int32_t                  width,
-                             int32_t                  height)
-{
-    uint32_t src, srca;
-    uint32_t *dst_line, *dst;
-    uint8_t *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    __m64 vsrc, vsrca;
-    uint64_t srcsrc;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    srcsrc = (uint64_t)src << 32 | src;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		__m64 vdest = in_over (vsrc, vsrca,
-				       expand_alpha_rev (to_m64 (m)),
-				       load8888 (*dst));
-
-		*dst = store8888 (vdest);
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 2)
-	{
-	    uint64_t m0, m1;
-
-	    m0 = *mask;
-	    m1 = *(mask + 1);
-
-	    if (srca == 0xff && (m0 & m1) == 0xff)
-	    {
-		*(uint64_t *)dst = srcsrc;
-	    }
-	    else if (m0 | m1)
-	    {
-		__m64 vdest;
-		__m64 dest0, dest1;
-
-		vdest = *(__m64 *)dst;
-
-		dest0 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m0)),
-				 expand8888 (vdest, 0));
-		dest1 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m1)),
-				 expand8888 (vdest, 1));
-
-		*(__m64 *)dst = pack8888 (dest0, dest1);
-	    }
-
-	    mask += 2;
-	    dst += 2;
-	    w -= 2;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		__m64 vdest = load8888 (*dst);
-
-		vdest = in_over (
-		    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
-		*dst = store8888 (vdest);
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-pixman_bool_t
-pixman_fill_mmx (uint32_t *bits,
-                 int       stride,
-                 int       bpp,
-                 int       x,
-                 int       y,
-                 int       width,
-                 int       height,
-                 uint32_t xor)
-{
-    uint64_t fill;
-    __m64 vfill;
-    uint32_t byte_width;
-    uint8_t     *byte_line;
-
-#ifdef __GNUC__
-    __m64 v1, v2, v3, v4, v5, v6, v7;
-#endif
-
-    if (bpp != 16 && bpp != 32 && bpp != 8)
-	return FALSE;
-
-    if (bpp == 8)
-    {
-	stride = stride * (int) sizeof (uint32_t) / 1;
-	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
-	byte_width = width;
-	stride *= 1;
-        xor = (xor & 0xff) * 0x01010101;
-    }
-    else if (bpp == 16)
-    {
-	stride = stride * (int) sizeof (uint32_t) / 2;
-	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
-	byte_width = 2 * width;
-	stride *= 2;
-        xor = (xor & 0xffff) * 0x00010001;
-    }
-    else
-    {
-	stride = stride * (int) sizeof (uint32_t) / 4;
-	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
-	byte_width = 4 * width;
-	stride *= 4;
-    }
-
-    fill = ((uint64_t)xor << 32) | xor;
-    vfill = to_m64 (fill);
-
-#ifdef __GNUC__
-    __asm__ (
-        "movq		%7,	%0\n"
-        "movq		%7,	%1\n"
-        "movq		%7,	%2\n"
-        "movq		%7,	%3\n"
-        "movq		%7,	%4\n"
-        "movq		%7,	%5\n"
-        "movq		%7,	%6\n"
-	: "=&y" (v1), "=&y" (v2), "=&y" (v3),
-	  "=&y" (v4), "=&y" (v5), "=&y" (v6), "=y" (v7)
-	: "y" (vfill));
-#endif
-
-    while (height--)
-    {
-	int w;
-	uint8_t *d = byte_line;
-
-	byte_line += stride;
-	w = byte_width;
-
-	while (w >= 1 && ((unsigned long)d & 1))
-	{
-	    *(uint8_t *)d = (xor & 0xff);
-	    w--;
-	    d++;
-	}
-
-	while (w >= 2 && ((unsigned long)d & 3))
-	{
-	    *(uint16_t *)d = xor;
-	    w -= 2;
-	    d += 2;
-	}
-
-	while (w >= 4 && ((unsigned long)d & 7))
-	{
-	    *(uint32_t *)d = xor;
-
-	    w -= 4;
-	    d += 4;
-	}
-
-	while (w >= 64)
-	{
-#ifdef __GNUC__
-	    __asm__ (
-	        "movq	%1,	  (%0)\n"
-	        "movq	%2,	 8(%0)\n"
-	        "movq	%3,	16(%0)\n"
-	        "movq	%4,	24(%0)\n"
-	        "movq	%5,	32(%0)\n"
-	        "movq	%6,	40(%0)\n"
-	        "movq	%7,	48(%0)\n"
-	        "movq	%8,	56(%0)\n"
-		:
-		: "r" (d),
-		  "y" (vfill), "y" (v1), "y" (v2), "y" (v3),
-		  "y" (v4), "y" (v5), "y" (v6), "y" (v7)
-		: "memory");
-#else
-	    *(__m64*) (d +  0) = vfill;
-	    *(__m64*) (d +  8) = vfill;
-	    *(__m64*) (d + 16) = vfill;
-	    *(__m64*) (d + 24) = vfill;
-	    *(__m64*) (d + 32) = vfill;
-	    *(__m64*) (d + 40) = vfill;
-	    *(__m64*) (d + 48) = vfill;
-	    *(__m64*) (d + 56) = vfill;
-#endif
-	    w -= 64;
-	    d += 64;
-	}
-
-	while (w >= 4)
-	{
-	    *(uint32_t *)d = xor;
-
-	    w -= 4;
-	    d += 4;
-	}
-	while (w >= 2)
-	{
-	    *(uint16_t *)d = xor;
-	    w -= 2;
-	    d += 2;
-	}
-	while (w >= 1)
-	{
-	    *(uint8_t *)d = (xor & 0xff);
-	    w--;
-	    d++;
-	}
-
-    }
-
-    _mm_empty ();
-    return TRUE;
-}
-
-static void
-mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            pixman_image_t *         src_image,
-                            pixman_image_t *         mask_image,
-                            pixman_image_t *         dst_image,
-                            int32_t                  src_x,
-                            int32_t                  src_y,
-                            int32_t                  mask_x,
-                            int32_t                  mask_y,
-                            int32_t                  dest_x,
-                            int32_t                  dest_y,
-                            int32_t                  width,
-                            int32_t                  height)
-{
-    uint32_t src, srca;
-    uint32_t    *dst_line, *dst;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    __m64 vsrc, vsrca;
-    uint64_t srcsrc;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-    {
-	pixman_fill_mmx (dst_image->bits.bits, dst_image->bits.rowstride,
-			 PIXMAN_FORMAT_BPP (dst_image->bits.format),
-	                 dest_x, dest_y, width, height, 0);
-	return;
-    }
-
-    srcsrc = (uint64_t)src << 32 | src;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		__m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
-
-		*dst = store8888 (vdest);
-	    }
-	    else
-	    {
-		*dst = 0;
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 2)
-	{
-	    uint64_t m0, m1;
-	    m0 = *mask;
-	    m1 = *(mask + 1);
-
-	    if (srca == 0xff && (m0 & m1) == 0xff)
-	    {
-		*(uint64_t *)dst = srcsrc;
-	    }
-	    else if (m0 | m1)
-	    {
-		__m64 vdest;
-		__m64 dest0, dest1;
-
-		vdest = *(__m64 *)dst;
-
-		dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0)));
-		dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1)));
-
-		*(__m64 *)dst = pack8888 (dest0, dest1);
-	    }
-	    else
-	    {
-		*(uint64_t *)dst = 0;
-	    }
-
-	    mask += 2;
-	    dst += 2;
-	    w -= 2;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		__m64 vdest = load8888 (*dst);
-
-		vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
-		*dst = store8888 (vdest);
-	    }
-	    else
-	    {
-		*dst = 0;
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             pixman_image_t *         src_image,
-                             pixman_image_t *         mask_image,
-                             pixman_image_t *         dst_image,
-                             int32_t                  src_x,
-                             int32_t                  src_y,
-                             int32_t                  mask_x,
-                             int32_t                  mask_y,
-                             int32_t                  dest_x,
-                             int32_t                  dest_y,
-                             int32_t                  width,
-                             int32_t                  height)
-{
-    uint32_t src, srca;
-    uint16_t *dst_line, *dst;
-    uint8_t *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    __m64 vsrc, vsrca, tmp;
-    uint64_t srcsrcsrcsrc, src16;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (src);
-    vsrca = expand_alpha (vsrc);
-
-    tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0);
-    src16 = to_uint64 (tmp);
-
-    srcsrcsrcsrc =
-	(uint64_t)src16 << 48 | (uint64_t)src16 << 32 |
-	(uint64_t)src16 << 16 | (uint64_t)src16;
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		uint64_t d = *dst;
-		__m64 vd = to_m64 (d);
-		__m64 vdest = in_over (
-		    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), expand565 (vd, 0));
-
-		vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
-		*dst = to_uint64 (vd);
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 4)
-	{
-	    uint64_t m0, m1, m2, m3;
-	    m0 = *mask;
-	    m1 = *(mask + 1);
-	    m2 = *(mask + 2);
-	    m3 = *(mask + 3);
-
-	    if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
-	    {
-		*(uint64_t *)dst = srcsrcsrcsrc;
-	    }
-	    else if (m0 | m1 | m2 | m3)
-	    {
-		__m64 vdest;
-		__m64 vm0, vm1, vm2, vm3;
-
-		vdest = *(__m64 *)dst;
-
-		vm0 = to_m64 (m0);
-		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0),
-					   expand565 (vdest, 0)), vdest, 0);
-		vm1 = to_m64 (m1);
-		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1),
-					   expand565 (vdest, 1)), vdest, 1);
-		vm2 = to_m64 (m2);
-		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2),
-					   expand565 (vdest, 2)), vdest, 2);
-		vm3 = to_m64 (m3);
-		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3),
-					   expand565 (vdest, 3)), vdest, 3);
-
-		*(__m64 *)dst = vdest;
-	    }
-
-	    w -= 4;
-	    mask += 4;
-	    dst += 4;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		uint64_t d = *dst;
-		__m64 vd = to_m64 (d);
-		__m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m)),
-				       expand565 (vd, 0));
-		vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
-		*dst = to_uint64 (vd);
-	    }
-
-	    w--;
-	    mask++;
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                pixman_image_t *         src_image,
-                                pixman_image_t *         mask_image,
-                                pixman_image_t *         dst_image,
-                                int32_t                  src_x,
-                                int32_t                  src_y,
-                                int32_t                  mask_x,
-                                int32_t                  mask_y,
-                                int32_t                  dest_x,
-                                int32_t                  dest_y,
-                                int32_t                  width,
-                                int32_t                  height)
-{
-    uint16_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-#if 0
-    /* FIXME */
-    assert (src_image->drawable == mask_image->drawable);
-#endif
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	CHECKPOINT ();
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    __m64 vsrc = load8888 (*src);
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
-
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	CHECKPOINT ();
-
-	while (w >= 4)
-	{
-	    uint32_t s0, s1, s2, s3;
-	    unsigned char a0, a1, a2, a3;
-
-	    s0 = *src;
-	    s1 = *(src + 1);
-	    s2 = *(src + 2);
-	    s3 = *(src + 3);
-
-	    a0 = (s0 >> 24);
-	    a1 = (s1 >> 24);
-	    a2 = (s2 >> 24);
-	    a3 = (s3 >> 24);
-
-	    if ((a0 & a1 & a2 & a3) == 0xFF)
-	    {
-		__m64 vdest;
-		vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0);
-		vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1);
-		vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2);
-		vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3);
-
-		*(__m64 *)dst = vdest;
-	    }
-	    else if (s0 | s1 | s2 | s3)
-	    {
-		__m64 vdest = *(__m64 *)dst;
-
-		vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0);
-		vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1);
-		vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2);
-		vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3);
-
-		*(__m64 *)dst = vdest;
-	    }
-
-	    w -= 4;
-	    dst += 4;
-	    src += 4;
-	}
-
-	CHECKPOINT ();
-
-	while (w)
-	{
-	    __m64 vsrc = load8888 (*src);
-	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (to_m64 (d), 0);
-
-	    vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
-
-	    *dst = to_uint64 (vdest);
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                pixman_image_t *         src_image,
-                                pixman_image_t *         mask_image,
-                                pixman_image_t *         dst_image,
-                                int32_t                  src_x,
-                                int32_t                  src_y,
-                                int32_t                  mask_x,
-                                int32_t                  mask_y,
-                                int32_t                  dest_x,
-                                int32_t                  dest_y,
-                                int32_t                  width,
-                                int32_t                  height)
-{
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-#if 0
-    /* FIXME */
-    assert (src_image->drawable == mask_image->drawable);
-#endif
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    __m64 s = load8888 (*src);
-	    __m64 d = load8888 (*dst);
-
-	    *dst = store8888 (over_rev_non_pre (s, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-
-	while (w >= 2)
-	{
-	    uint64_t s0, s1;
-	    unsigned char a0, a1;
-	    __m64 d0, d1;
-
-	    s0 = *src;
-	    s1 = *(src + 1);
-
-	    a0 = (s0 >> 24);
-	    a1 = (s1 >> 24);
-
-	    if ((a0 & a1) == 0xFF)
-	    {
-		d0 = invert_colors (load8888 (s0));
-		d1 = invert_colors (load8888 (s1));
-
-		*(__m64 *)dst = pack8888 (d0, d1);
-	    }
-	    else if (s0 | s1)
-	    {
-		__m64 vdest = *(__m64 *)dst;
-
-		d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0));
-		d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1));
-
-		*(__m64 *)dst = pack8888 (d0, d1);
-	    }
-
-	    w -= 2;
-	    dst += 2;
-	    src += 2;
-	}
-
-	while (w)
-	{
-	    __m64 s = load8888 (*src);
-	    __m64 d = load8888 (*dst);
-
-	    *dst = store8888 (over_rev_non_pre (s, d));
-
-	    w--;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
-                                   pixman_op_t              op,
-                                   pixman_image_t *         src_image,
-                                   pixman_image_t *         mask_image,
-                                   pixman_image_t *         dst_image,
-                                   int32_t                  src_x,
-                                   int32_t                  src_y,
-                                   int32_t                  mask_x,
-                                   int32_t                  mask_y,
-                                   int32_t                  dest_x,
-                                   int32_t                  dest_y,
-                                   int32_t                  width,
-                                   int32_t                  height)
-{
-    uint32_t src, srca;
-    uint16_t    *dst_line;
-    uint32_t    *mask_line;
-    int dst_stride, mask_stride;
-    __m64 vsrc, vsrca;
-
-    CHECKPOINT ();
-
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
-    vsrc = load8888 (src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	int twidth = width;
-	uint32_t *p = (uint32_t *)mask_line;
-	uint16_t *q = (uint16_t *)dst_line;
-
-	while (twidth && ((unsigned long)q & 7))
-	{
-	    uint32_t m = *(uint32_t *)p;
-
-	    if (m)
-	    {
-		uint64_t d = *q;
-		__m64 vdest = expand565 (to_m64 (d), 0);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
-		*q = to_uint64 (vdest);
-	    }
-
-	    twidth--;
-	    p++;
-	    q++;
-	}
-
-	while (twidth >= 4)
-	{
-	    uint32_t m0, m1, m2, m3;
-
-	    m0 = *p;
-	    m1 = *(p + 1);
-	    m2 = *(p + 2);
-	    m3 = *(p + 3);
-
-	    if ((m0 | m1 | m2 | m3))
-	    {
-		__m64 vdest = *(__m64 *)q;
-
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3);
-
-		*(__m64 *)q = vdest;
-	    }
-	    twidth -= 4;
-	    p += 4;
-	    q += 4;
-	}
-
-	while (twidth)
-	{
-	    uint32_t m;
-
-	    m = *(uint32_t *)p;
-	    if (m)
-	    {
-		uint64_t d = *q;
-		__m64 vdest = expand565 (to_m64 (d), 0);
-		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
-		*q = to_uint64 (vdest);
-	    }
-
-	    twidth--;
-	    p++;
-	    q++;
-	}
-
-	mask_line += mask_stride;
-	dst_line += dst_stride;
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
-                        pixman_op_t              op,
-                        pixman_image_t *         src_image,
-                        pixman_image_t *         mask_image,
-                        pixman_image_t *         dst_image,
-                        int32_t                  src_x,
-                        int32_t                  src_y,
-                        int32_t                  mask_x,
-                        int32_t                  mask_y,
-                        int32_t                  dest_x,
-                        int32_t                  dest_y,
-                        int32_t                  width,
-                        int32_t                  height)
-{
-    uint8_t *dst_line, *dst;
-    uint8_t *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint32_t src;
-    uint8_t sa;
-    __m64 vsrc, vsrca;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    sa = src >> 24;
-
-    vsrc = load8888 (src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	if ((((unsigned long)dst_image & 3) == 0) &&
-	    (((unsigned long)src_image & 3) == 0))
-	{
-	    while (w >= 4)
-	    {
-		uint32_t m;
-		__m64 vmask;
-		__m64 vdest;
-
-		m = 0;
-
-		vmask = load8888 (*(uint32_t *)mask);
-		vdest = load8888 (*(uint32_t *)dst);
-
-		*(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
-
-		dst += 4;
-		mask += 4;
-		w -= 4;
-	    }
-	}
-
-	while (w--)
-	{
-	    uint16_t tmp;
-	    uint8_t a;
-	    uint32_t m, d;
-
-	    a = *mask++;
-	    d = *dst;
-
-	    m = MUL_UN8 (sa, a, tmp);
-	    d = MUL_UN8 (m, d, tmp);
-
-	    *dst++ = d;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_in_8_8 (pixman_implementation_t *imp,
-                      pixman_op_t              op,
-                      pixman_image_t *         src_image,
-                      pixman_image_t *         mask_image,
-                      pixman_image_t *         dst_image,
-                      int32_t                  src_x,
-                      int32_t                  src_y,
-                      int32_t                  mask_x,
-                      int32_t                  mask_y,
-                      int32_t                  dest_x,
-                      int32_t                  dest_y,
-                      int32_t                  width,
-                      int32_t                  height)
-{
-    uint8_t     *dst_line, *dst;
-    uint8_t     *src_line, *src;
-    int src_stride, dst_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	if ((((unsigned long)dst_image & 3) == 0) &&
-	    (((unsigned long)src_image & 3) == 0))
-	{
-	    while (w >= 4)
-	    {
-		uint32_t *s = (uint32_t *)src;
-		uint32_t *d = (uint32_t *)dst;
-
-		*d = store8888 (in (load8888 (*s), load8888 (*d)));
-
-		w -= 4;
-		dst += 4;
-		src += 4;
-	    }
-	}
-
-	while (w--)
-	{
-	    uint8_t s, d;
-	    uint16_t tmp;
-
-	    s = *src;
-	    d = *dst;
-
-	    *dst = MUL_UN8 (s, d, tmp);
-
-	    src++;
-	    dst++;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
-			 pixman_op_t              op,
-			 pixman_image_t *         src_image,
-			 pixman_image_t *         mask_image,
-			 pixman_image_t *         dst_image,
-			 int32_t                  src_x,
-			 int32_t                  src_y,
-			 int32_t                  mask_x,
-			 int32_t                  mask_y,
-			 int32_t                  dest_x,
-			 int32_t                  dest_y,
-			 int32_t                  width,
-			 int32_t                  height)
-{
-    uint8_t     *dst_line, *dst;
-    uint8_t     *mask_line, *mask;
-    int dst_stride, mask_stride;
-    int32_t w;
-    uint32_t src;
-    uint8_t sa;
-    __m64 vsrc, vsrca;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    sa = src >> 24;
-
-    if (src == 0)
-	return;
-
-    vsrc = load8888 (src);
-    vsrca = expand_alpha (vsrc);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-	if ((((unsigned long)mask_image & 3) == 0) &&
-	    (((unsigned long)dst_image  & 3) == 0))
-	{
-	    while (w >= 4)
-	    {
-		__m64 vmask = load8888 (*(uint32_t *)mask);
-		__m64 vdest = load8888 (*(uint32_t *)dst);
-
-		*(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
-
-		w -= 4;
-		dst += 4;
-		mask += 4;
-	    }
-	}
-
-	while (w--)
-	{
-	    uint16_t tmp;
-	    uint16_t a;
-	    uint32_t m, d;
-	    uint32_t r;
-
-	    a = *mask++;
-	    d = *dst;
-
-	    m = MUL_UN8 (sa, a, tmp);
-	    r = ADD_UN8 (m, d, tmp);
-
-	    *dst++ = r;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_add_8_8 (pixman_implementation_t *imp,
-		       pixman_op_t              op,
-		       pixman_image_t *         src_image,
-		       pixman_image_t *         mask_image,
-		       pixman_image_t *         dst_image,
-		       int32_t                  src_x,
-		       int32_t                  src_y,
-		       int32_t                  mask_x,
-		       int32_t                  mask_y,
-		       int32_t                  dest_x,
-		       int32_t                  dest_y,
-		       int32_t                  width,
-		       int32_t                  height)
-{
-    uint8_t *dst_line, *dst;
-    uint8_t *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-    uint8_t s, d;
-    uint16_t t;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    s = *src;
-	    d = *dst;
-	    t = d + s;
-	    s = t | (0 - (t >> 8));
-	    *dst = s;
-
-	    dst++;
-	    src++;
-	    w--;
-	}
-
-	while (w >= 8)
-	{
-	    *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
-	    dst += 8;
-	    src += 8;
-	    w -= 8;
-	}
-
-	while (w)
-	{
-	    s = *src;
-	    d = *dst;
-	    t = d + s;
-	    s = t | (0 - (t >> 8));
-	    *dst = s;
-
-	    dst++;
-	    src++;
-	    w--;
-	}
-    }
-
-    _mm_empty ();
-}
-
-static void
-mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             pixman_image_t *         src_image,
-                             pixman_image_t *         mask_image,
-                             pixman_image_t *         dst_image,
-                             int32_t                  src_x,
-                             int32_t                  src_y,
-                             int32_t                  mask_x,
-                             int32_t                  mask_y,
-                             int32_t                  dest_x,
-                             int32_t                  dest_y,
-                             int32_t                  width,
-                             int32_t                  height)
-{
-    __m64 dst64;
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    CHECKPOINT ();
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w && (unsigned long)dst & 7)
-	{
-	    *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
-	                                           _mm_cvtsi32_si64 (*dst)));
-	    dst++;
-	    src++;
-	    w--;
-	}
-
-	while (w >= 2)
-	{
-	    dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
-	    *(uint64_t*)dst = to_uint64 (dst64);
-	    dst += 2;
-	    src += 2;
-	    w -= 2;
-	}
-
-	if (w)
-	{
-	    *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
-	                                           _mm_cvtsi32_si64 (*dst)));
-
-	}
-    }
-
-    _mm_empty ();
-}
-
-static pixman_bool_t
-pixman_blt_mmx (uint32_t *src_bits,
-                uint32_t *dst_bits,
-                int       src_stride,
-                int       dst_stride,
-                int       src_bpp,
-                int       dst_bpp,
-                int       src_x,
-                int       src_y,
-                int       dst_x,
-                int       dst_y,
-                int       width,
-                int       height)
-{
-    uint8_t *   src_bytes;
-    uint8_t *   dst_bytes;
-    int byte_width;
-
-    if (src_bpp != dst_bpp)
-	return FALSE;
-
-    if (src_bpp == 16)
-    {
-	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
-	src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
-	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
-	byte_width = 2 * width;
-	src_stride *= 2;
-	dst_stride *= 2;
-    }
-    else if (src_bpp == 32)
-    {
-	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
-	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
-	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
-	byte_width = 4 * width;
-	src_stride *= 4;
-	dst_stride *= 4;
-    }
-    else
-    {
-	return FALSE;
-    }
-
-    while (height--)
-    {
-	int w;
-	uint8_t *s = src_bytes;
-	uint8_t *d = dst_bytes;
-	src_bytes += src_stride;
-	dst_bytes += dst_stride;
-	w = byte_width;
-
-	while (w >= 2 && ((unsigned long)d & 3))
-	{
-	    *(uint16_t *)d = *(uint16_t *)s;
-	    w -= 2;
-	    s += 2;
-	    d += 2;
-	}
-
-	while (w >= 4 && ((unsigned long)d & 7))
-	{
-	    *(uint32_t *)d = *(uint32_t *)s;
-
-	    w -= 4;
-	    s += 4;
-	    d += 4;
-	}
-
-	while (w >= 64)
-	{
-#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
-	    __asm__ (
-	        "movq	  (%1),	  %%mm0\n"
-	        "movq	 8(%1),	  %%mm1\n"
-	        "movq	16(%1),	  %%mm2\n"
-	        "movq	24(%1),	  %%mm3\n"
-	        "movq	32(%1),	  %%mm4\n"
-	        "movq	40(%1),	  %%mm5\n"
-	        "movq	48(%1),	  %%mm6\n"
-	        "movq	56(%1),	  %%mm7\n"
-
-	        "movq	%%mm0,	  (%0)\n"
-	        "movq	%%mm1,	 8(%0)\n"
-	        "movq	%%mm2,	16(%0)\n"
-	        "movq	%%mm3,	24(%0)\n"
-	        "movq	%%mm4,	32(%0)\n"
-	        "movq	%%mm5,	40(%0)\n"
-	        "movq	%%mm6,	48(%0)\n"
-	        "movq	%%mm7,	56(%0)\n"
-		:
-		: "r" (d), "r" (s)
-		: "memory",
-		  "%mm0", "%mm1", "%mm2", "%mm3",
-		  "%mm4", "%mm5", "%mm6", "%mm7");
-#else
-	    __m64 v0 = *(__m64 *)(s + 0);
-	    __m64 v1 = *(__m64 *)(s + 8);
-	    __m64 v2 = *(__m64 *)(s + 16);
-	    __m64 v3 = *(__m64 *)(s + 24);
-	    __m64 v4 = *(__m64 *)(s + 32);
-	    __m64 v5 = *(__m64 *)(s + 40);
-	    __m64 v6 = *(__m64 *)(s + 48);
-	    __m64 v7 = *(__m64 *)(s + 56);
-	    *(__m64 *)(d + 0)  = v0;
-	    *(__m64 *)(d + 8)  = v1;
-	    *(__m64 *)(d + 16) = v2;
-	    *(__m64 *)(d + 24) = v3;
-	    *(__m64 *)(d + 32) = v4;
-	    *(__m64 *)(d + 40) = v5;
-	    *(__m64 *)(d + 48) = v6;
-	    *(__m64 *)(d + 56) = v7;
-#endif
-
-	    w -= 64;
-	    s += 64;
-	    d += 64;
-	}
-	while (w >= 4)
-	{
-	    *(uint32_t *)d = *(uint32_t *)s;
-
-	    w -= 4;
-	    s += 4;
-	    d += 4;
-	}
-	if (w >= 2)
-	{
-	    *(uint16_t *)d = *(uint16_t *)s;
-	    w -= 2;
-	    s += 2;
-	    d += 2;
-	}
-    }
-
-    _mm_empty ();
-
-    return TRUE;
-}
-
-static void
-mmx_composite_copy_area (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         pixman_image_t *         src_image,
-                         pixman_image_t *         mask_image,
-                         pixman_image_t *         dst_image,
-                         int32_t                  src_x,
-                         int32_t                  src_y,
-                         int32_t                  mask_x,
-                         int32_t                  mask_y,
-                         int32_t                  dest_x,
-                         int32_t                  dest_y,
-                         int32_t                  width,
-                         int32_t                  height)
-{
-    pixman_blt_mmx (src_image->bits.bits,
-                    dst_image->bits.bits,
-                    src_image->bits.rowstride,
-                    dst_image->bits.rowstride,
-                    PIXMAN_FORMAT_BPP (src_image->bits.format),
-                    PIXMAN_FORMAT_BPP (dst_image->bits.format),
-                    src_x, src_y, dest_x, dest_y, width, height);
-}
-
-#if 0
-static void
-mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
-                                pixman_op_t              op,
-                                pixman_image_t *         src_image,
-                                pixman_image_t *         mask_image,
-                                pixman_image_t *         dst_image,
-                                int32_t                  src_x,
-                                int32_t                  src_y,
-                                int32_t                  mask_x,
-                                int32_t                  mask_y,
-                                int32_t                  dest_x,
-                                int32_t                  dest_y,
-                                int32_t                  width,
-                                int32_t                  height)
-{
-    uint32_t  *src, *src_line;
-    uint32_t  *dst, *dst_line;
-    uint8_t  *mask, *mask_line;
-    int src_stride, mask_stride, dst_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    while (height--)
-    {
-	src = src_line;
-	src_line += src_stride;
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-
-	w = width;
-
-	while (w--)
-	{
-	    uint64_t m = *mask;
-
-	    if (m)
-	    {
-		__m64 s = load8888 (*src | 0xff000000);
-
-		if (m == 0xff)
-		{
-		    *dst = store8888 (s);
-		}
-		else
-		{
-		    __m64 sa = expand_alpha (s);
-		    __m64 vm = expand_alpha_rev (to_m64 (m));
-		    __m64 vdest = in_over (s, sa, vm, load8888 (*dst));
-
-		    *dst = store8888 (vdest);
-		}
-	    }
-
-	    mask++;
-	    dst++;
-	    src++;
-	}
-    }
-
-    _mm_empty ();
-}
-#endif
-
-static const pixman_fast_path_t mmx_fast_paths[] =
-{
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       r5g6b5,   mmx_composite_over_n_8_0565       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       b5g6r5,   mmx_composite_over_n_8_0565       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       a8r8g8b8, mmx_composite_over_n_8_8888       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       x8r8g8b8, mmx_composite_over_n_8_8888       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       a8b8g8r8, mmx_composite_over_n_8_8888       ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       x8b8g8r8, mmx_composite_over_n_8_8888       ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, a8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, x8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, r5g6b5,   mmx_composite_over_n_8888_0565_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, a8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, x8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
-    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, b5g6r5,   mmx_composite_over_n_8888_0565_ca ),
-    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   a8r8g8b8, mmx_composite_over_pixbuf_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   x8r8g8b8, mmx_composite_over_pixbuf_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   r5g6b5,   mmx_composite_over_pixbuf_0565    ),
-    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  a8b8g8r8, mmx_composite_over_pixbuf_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  x8b8g8r8, mmx_composite_over_pixbuf_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  b5g6r5,   mmx_composite_over_pixbuf_0565    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_x888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_x888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_x888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_x888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_8888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_8888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_8888_n_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_8888_n_8888    ),
-#if 0
-    /* FIXME: This code is commented out since it's apparently
-     * not actually faster than the generic code.
-     */
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       x8r8g8b8, mmx_composite_over_x888_8_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       a8r8g8b8, mmx_composite_over_x888_8_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8r8g8, a8,       x8b8g8r8, mmx_composite_over_x888_8_8888    ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8r8g8, a8,       a8r8g8b8, mmx_composite_over_x888_8_8888    ),
-#endif
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     a8r8g8b8, mmx_composite_over_n_8888         ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     x8r8g8b8, mmx_composite_over_n_8888         ),
-    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     r5g6b5,   mmx_composite_over_n_0565         ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
-
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     a8r8g8b8, mmx_composite_over_8888_8888      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     x8r8g8b8, mmx_composite_over_8888_8888      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     r5g6b5,   mmx_composite_over_8888_0565      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     a8b8g8r8, mmx_composite_over_8888_8888      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     x8b8g8r8, mmx_composite_over_8888_8888      ),
-    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     b5g6r5,   mmx_composite_over_8888_0565      ),
-
-    PIXMAN_STD_FAST_PATH    (ADD,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_add_8888_8888       ),
-    PIXMAN_STD_FAST_PATH    (ADD,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_add_8888_8888       ),
-    PIXMAN_STD_FAST_PATH    (ADD,  a8,       null,     a8,       mmx_composite_add_8_8		   ),
-    PIXMAN_STD_FAST_PATH    (ADD,  solid,    a8,       a8,       mmx_composite_add_n_8_8           ),
-
-    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8r8g8b8, mmx_composite_src_n_8_8888        ),
-    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8r8g8b8, mmx_composite_src_n_8_8888        ),
-    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8b8g8r8, mmx_composite_src_n_8_8888        ),
-    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8b8g8r8, mmx_composite_src_n_8_8888        ),
-    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  r5g6b5,   null,     r5g6b5,   mmx_composite_copy_area           ),
-    PIXMAN_STD_FAST_PATH    (SRC,  b5g6r5,   null,     b5g6r5,   mmx_composite_copy_area           ),
-
-    PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
-    PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
-
-    { PIXMAN_OP_NONE },
-};
-
-static pixman_bool_t
-mmx_blt (pixman_implementation_t *imp,
-         uint32_t *               src_bits,
-         uint32_t *               dst_bits,
-         int                      src_stride,
-         int                      dst_stride,
-         int                      src_bpp,
-         int                      dst_bpp,
-         int                      src_x,
-         int                      src_y,
-         int                      dst_x,
-         int                      dst_y,
-         int                      width,
-         int                      height)
-{
-    if (!pixman_blt_mmx (
-            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-            src_x, src_y, dst_x, dst_y, width, height))
-
-    {
-	return _pixman_implementation_blt (
-	    imp->delegate,
-	    src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-	    src_x, src_y, dst_x, dst_y, width, height);
-    }
-
-    return TRUE;
-}
-
-static pixman_bool_t
-mmx_fill (pixman_implementation_t *imp,
-          uint32_t *               bits,
-          int                      stride,
-          int                      bpp,
-          int                      x,
-          int                      y,
-          int                      width,
-          int                      height,
-          uint32_t xor)
-{
-    if (!pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor))
-    {
-	return _pixman_implementation_fill (
-	    imp->delegate, bits, stride, bpp, x, y, width, height, xor);
-    }
-
-    return TRUE;
-}
-
-pixman_implementation_t *
-_pixman_implementation_create_mmx (void)
-{
-    pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
-    pixman_implementation_t *imp = _pixman_implementation_create (general, mmx_fast_paths);
-
-    imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u;
-    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u;
-    imp->combine_32[PIXMAN_OP_IN] = mmx_combine_in_u;
-    imp->combine_32[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_u;
-    imp->combine_32[PIXMAN_OP_OUT] = mmx_combine_out_u;
-    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_u;
-    imp->combine_32[PIXMAN_OP_ATOP] = mmx_combine_atop_u;
-    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_u;
-    imp->combine_32[PIXMAN_OP_XOR] = mmx_combine_xor_u;
-    imp->combine_32[PIXMAN_OP_ADD] = mmx_combine_add_u;
-    imp->combine_32[PIXMAN_OP_SATURATE] = mmx_combine_saturate_u;
-
-    imp->combine_32_ca[PIXMAN_OP_SRC] = mmx_combine_src_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER] = mmx_combine_over_ca;
-    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN] = mmx_combine_in_ca;
-    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT] = mmx_combine_out_ca;
-    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP] = mmx_combine_atop_ca;
-    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_ca;
-    imp->combine_32_ca[PIXMAN_OP_XOR] = mmx_combine_xor_ca;
-    imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca;
-
-    imp->blt = mmx_blt;
-    imp->fill = mmx_fill;
-
-    return imp;
-}
-
-#endif /* USE_MMX */
+/*
+ * Copyright © 2004, 2005 Red Hat, Inc.
+ * Copyright © 2004 Nicholas Miell
+ * Copyright © 2005 Trolltech AS
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Søren Sandmann (sandmann@redhat.com)
+ * Minor Improvements: Nicholas Miell (nmiell@gmail.com)
+ * MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com)
+ *
+ * Based on work by Owen Taylor
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef USE_MMX
+
+#include <mmintrin.h>
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+
+#define no_vERBOSE
+
+#ifdef VERBOSE
+#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
+#else
+#define CHECKPOINT()
+#endif
+
+/* Notes about writing mmx code
+ *
+ * give memory operands as the second operand. If you give it as the
+ * first, gcc will first load it into a register, then use that
+ * register
+ *
+ *   ie. use
+ *
+ *         _mm_mullo_pi16 (x, mmx_constant);
+ *
+ *   not
+ *
+ *         _mm_mullo_pi16 (mmx_constant, x);
+ *
+ * Also try to minimize dependencies. i.e. when you need a value, try
+ * to calculate it from a value that was calculated as early as
+ * possible.
+ */
+
+/* --------------- MMX primitives ------------------------------------- */
+
+#ifdef __GNUC__
+typedef uint64_t mmxdatafield;
+#else
+typedef __m64 mmxdatafield;
+/* If __m64 is defined as a struct or union, define M64_MEMBER to be the
+   name of the member used to access the data */
+# ifdef _MSC_VER
+#  define M64_MEMBER m64_u64
+# elif defined(__SUNPRO_C)
+#  define M64_MEMBER l_
+# endif
+#endif
+
+typedef struct
+{
+    mmxdatafield mmx_4x00ff;
+    mmxdatafield mmx_4x0080;
+    mmxdatafield mmx_565_rgb;
+    mmxdatafield mmx_565_unpack_multiplier;
+    mmxdatafield mmx_565_r;
+    mmxdatafield mmx_565_g;
+    mmxdatafield mmx_565_b;
+    mmxdatafield mmx_mask_0;
+    mmxdatafield mmx_mask_1;
+    mmxdatafield mmx_mask_2;
+    mmxdatafield mmx_mask_3;
+    mmxdatafield mmx_full_alpha;
+    mmxdatafield mmx_ffff0000ffff0000;
+    mmxdatafield mmx_0000ffff00000000;
+    mmxdatafield mmx_000000000000ffff;
+} mmx_data_t;
+
+#if defined(_MSC_VER)
+# define MMXDATA_INIT(field, val) { val ## UI64 }
+#elif defined(M64_MEMBER)       /* __m64 is a struct, not an integral type */
+# define MMXDATA_INIT(field, val) field =   { val ## ULL }
+#else                           /* __m64 is an integral type */
+# define MMXDATA_INIT(field, val) field =   val ## ULL
+#endif
+
+static const mmx_data_t c =
+{
+    MMXDATA_INIT (.mmx_4x00ff,                   0x00ff00ff00ff00ff),
+    MMXDATA_INIT (.mmx_4x0080,                   0x0080008000800080),
+    MMXDATA_INIT (.mmx_565_rgb,                  0x000001f0003f001f),
+    MMXDATA_INIT (.mmx_565_unpack_multiplier,    0x0000008404100840),
+    MMXDATA_INIT (.mmx_565_r,                    0x000000f800000000),
+    MMXDATA_INIT (.mmx_565_g,                    0x0000000000fc0000),
+    MMXDATA_INIT (.mmx_565_b,                    0x00000000000000f8),
+    MMXDATA_INIT (.mmx_mask_0,                   0xffffffffffff0000),
+    MMXDATA_INIT (.mmx_mask_1,                   0xffffffff0000ffff),
+    MMXDATA_INIT (.mmx_mask_2,                   0xffff0000ffffffff),
+    MMXDATA_INIT (.mmx_mask_3,                   0x0000ffffffffffff),
+    MMXDATA_INIT (.mmx_full_alpha,               0x00ff000000000000),
+    MMXDATA_INIT (.mmx_ffff0000ffff0000,         0xffff0000ffff0000),
+    MMXDATA_INIT (.mmx_0000ffff00000000,         0x0000ffff00000000),
+    MMXDATA_INIT (.mmx_000000000000ffff,         0x000000000000ffff),
+};
+
+#ifdef __GNUC__
+#    ifdef __ICC
+#        define MC(x) to_m64 (c.mmx_ ## x)
+#    else
+#        define MC(x) ((__m64)c.mmx_ ## x)
+#    endif
+#else
+#    define MC(x) c.mmx_ ## x
+#endif
+
+static force_inline __m64
+to_m64 (uint64_t x)
+{
+#ifdef __ICC
+    return _mm_cvtsi64_m64 (x);
+#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
+    __m64 res;
+
+    res.M64_MEMBER = x;
+    return res;
+#else                           /* __m64 is an integral type */
+    return (__m64)x;
+#endif
+}
+
+static force_inline uint64_t
+to_uint64 (__m64 x)
+{
+#ifdef __ICC
+    return _mm_cvtm64_si64 (x);
+#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
+    uint64_t res = x.M64_MEMBER;
+    return res;
+#else                           /* __m64 is an integral type */
+    return (uint64_t)x;
+#endif
+}
+
+static force_inline __m64
+shift (__m64 v,
+       int   s)
+{
+    if (s > 0)
+	return _mm_slli_si64 (v, s);
+    else if (s < 0)
+	return _mm_srli_si64 (v, -s);
+    else
+	return v;
+}
+
+static force_inline __m64
+negate (__m64 mask)
+{
+    return _mm_xor_si64 (mask, MC (4x00ff));
+}
+
+static force_inline __m64
+pix_multiply (__m64 a, __m64 b)
+{
+    __m64 res;
+
+    res = _mm_mullo_pi16 (a, b);
+    res = _mm_adds_pu16 (res, MC (4x0080));
+    res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
+    res = _mm_srli_pi16 (res, 8);
+
+    return res;
+}
+
+static force_inline __m64
+pix_add (__m64 a, __m64 b)
+{
+    return _mm_adds_pu8 (a, b);
+}
+
+static force_inline __m64
+expand_alpha (__m64 pixel)
+{
+    __m64 t1, t2;
+
+    t1 = shift (pixel, -48);
+    t2 = shift (t1, 16);
+    t1 = _mm_or_si64 (t1, t2);
+    t2 = shift (t1, 32);
+    t1 = _mm_or_si64 (t1, t2);
+
+    return t1;
+}
+
+static force_inline __m64
+expand_alpha_rev (__m64 pixel)
+{
+    __m64 t1, t2;
+
+    /* move alpha to low 16 bits and zero the rest */
+    t1 = shift (pixel,  48);
+    t1 = shift (t1, -48);
+
+    t2 = shift (t1, 16);
+    t1 = _mm_or_si64 (t1, t2);
+    t2 = shift (t1, 32);
+    t1 = _mm_or_si64 (t1, t2);
+
+    return t1;
+}
+
+static force_inline __m64
+invert_colors (__m64 pixel)
+{
+    __m64 x, y, z;
+
+    x = y = z = pixel;
+
+    x = _mm_and_si64 (x, MC (ffff0000ffff0000));
+    y = _mm_and_si64 (y, MC (000000000000ffff));
+    z = _mm_and_si64 (z, MC (0000ffff00000000));
+
+    y = shift (y, 32);
+    z = shift (z, -32);
+
+    x = _mm_or_si64 (x, y);
+    x = _mm_or_si64 (x, z);
+
+    return x;
+}
+
+static force_inline __m64
+over (__m64 src,
+      __m64 srca,
+      __m64 dest)
+{
+    return _mm_adds_pu8 (src, pix_multiply (dest, negate (srca)));
+}
+
+static force_inline __m64
+over_rev_non_pre (__m64 src, __m64 dest)
+{
+    __m64 srca = expand_alpha (src);
+    __m64 srcfaaa = _mm_or_si64 (srca, MC (full_alpha));
+
+    return over (pix_multiply (invert_colors (src), srcfaaa), srca, dest);
+}
+
+static force_inline __m64
+in (__m64 src, __m64 mask)
+{
+    return pix_multiply (src, mask);
+}
+
+static force_inline __m64
+in_over_full_src_alpha (__m64 src, __m64 mask, __m64 dest)
+{
+    src = _mm_or_si64 (src, MC (full_alpha));
+
+    return over (in (src, mask), mask, dest);
+}
+
+#ifndef _MSC_VER
+static force_inline __m64
+in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
+{
+    return over (in (src, mask), pix_multiply (srca, mask), dest);
+}
+
+#else
+
+#define in_over(src, srca, mask, dest)					\
+    over (in (src, mask), pix_multiply (srca, mask), dest)
+
+#endif
+
+static force_inline __m64
+load8888 (uint32_t v)
+{
+    return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ());
+}
+
+static force_inline __m64
+pack8888 (__m64 lo, __m64 hi)
+{
+    return _mm_packs_pu16 (lo, hi);
+}
+
+static force_inline uint32_t
+store8888 (__m64 v)
+{
+    return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()));
+}
+
+/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
+ *
+ *    00RR00GG00BB
+ *
+ * --- Expanding 565 in the low word ---
+ *
+ * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
+ * m = m & (01f0003f001f);
+ * m = m * (008404100840);
+ * m = m >> 8;
+ *
+ * Note the trick here - the top word is shifted by another nibble to
+ * avoid it bumping into the middle word
+ */
+static force_inline __m64
+expand565 (__m64 pixel, int pos)
+{
+    __m64 p = pixel;
+    __m64 t1, t2;
+
+    /* move pixel to low 16 bit and zero the rest */
+    p = shift (shift (p, (3 - pos) * 16), -48);
+
+    t1 = shift (p, 36 - 11);
+    t2 = shift (p, 16 - 5);
+
+    p = _mm_or_si64 (t1, p);
+    p = _mm_or_si64 (t2, p);
+    p = _mm_and_si64 (p, MC (565_rgb));
+
+    pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier));
+    return _mm_srli_pi16 (pixel, 8);
+}
+
+static force_inline __m64
+expand8888 (__m64 in, int pos)
+{
+    if (pos == 0)
+	return _mm_unpacklo_pi8 (in, _mm_setzero_si64 ());
+    else
+	return _mm_unpackhi_pi8 (in, _mm_setzero_si64 ());
+}
+
+static force_inline __m64
+expandx888 (__m64 in, int pos)
+{
+    return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha));
+}
+
+static force_inline __m64
+pack_565 (__m64 pixel, __m64 target, int pos)
+{
+    __m64 p = pixel;
+    __m64 t = target;
+    __m64 r, g, b;
+
+    r = _mm_and_si64 (p, MC (565_r));
+    g = _mm_and_si64 (p, MC (565_g));
+    b = _mm_and_si64 (p, MC (565_b));
+
+    r = shift (r, -(32 - 8) + pos * 16);
+    g = shift (g, -(16 - 3) + pos * 16);
+    b = shift (b, -(0  + 3) + pos * 16);
+
+    if (pos == 0)
+	t = _mm_and_si64 (t, MC (mask_0));
+    else if (pos == 1)
+	t = _mm_and_si64 (t, MC (mask_1));
+    else if (pos == 2)
+	t = _mm_and_si64 (t, MC (mask_2));
+    else if (pos == 3)
+	t = _mm_and_si64 (t, MC (mask_3));
+
+    p = _mm_or_si64 (r, t);
+    p = _mm_or_si64 (g, p);
+
+    return _mm_or_si64 (b, p);
+}
+
+#ifndef _MSC_VER
+
+static force_inline __m64
+pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
+{
+    x = pix_multiply (x, a);
+    y = pix_multiply (y, b);
+
+    return pix_add (x, y);
+}
+
+#else
+
+#define pix_add_mul(x, a, y, b)	 \
+    ( x = pix_multiply (x, a),	 \
+      y = pix_multiply (y, a),	 \
+      pix_add (x, y) )
+
+#endif
+
+/* --------------- MMX code patch for fbcompose.c --------------------- */
+
+static force_inline uint32_t
+combine (const uint32_t *src, const uint32_t *mask)
+{
+    uint32_t ssrc = *src;
+
+    if (mask)
+    {
+	__m64 m = load8888 (*mask);
+	__m64 s = load8888 (ssrc);
+
+	m = expand_alpha (m);
+	s = pix_multiply (s, m);
+
+	ssrc = store8888 (s);
+    }
+
+    return ssrc;
+}
+
+static void
+mmx_combine_over_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	uint32_t ssrc = combine (src, mask);
+	uint32_t a = ssrc >> 24;
+
+	if (a == 0xff)
+	{
+	    *dest = ssrc;
+	}
+	else if (ssrc)
+	{
+	    __m64 s, sa;
+	    s = load8888 (ssrc);
+	    sa = expand_alpha (s);
+	    *dest = store8888 (over (s, sa, load8888 (*dest)));
+	}
+
+	++dest;
+	++src;
+	if (mask)
+	    ++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_over_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	__m64 d, da;
+	uint32_t s = combine (src, mask);
+
+	d = load8888 (*dest);
+	da = expand_alpha (d);
+	*dest = store8888 (over (d, da, load8888 (s)));
+
+	++dest;
+	++src;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_in_u (pixman_implementation_t *imp,
+                  pixman_op_t              op,
+                  uint32_t *               dest,
+                  const uint32_t *         src,
+                  const uint32_t *         mask,
+                  int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	__m64 x, a;
+
+	x = load8888 (combine (src, mask));
+	a = load8888 (*dest);
+	a = expand_alpha (a);
+	x = pix_multiply (x, a);
+
+	*dest = store8888 (x);
+
+	++dest;
+	++src;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_in_reverse_u (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *               dest,
+                          const uint32_t *         src,
+                          const uint32_t *         mask,
+                          int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	__m64 x, a;
+
+	x = load8888 (*dest);
+	a = load8888 (combine (src, mask));
+	a = expand_alpha (a);
+	x = pix_multiply (x, a);
+	*dest = store8888 (x);
+
+	++dest;
+	++src;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_out_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	__m64 x, a;
+
+	x = load8888 (combine (src, mask));
+	a = load8888 (*dest);
+	a = expand_alpha (a);
+	a = negate (a);
+	x = pix_multiply (x, a);
+	*dest = store8888 (x);
+
+	++dest;
+	++src;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_out_reverse_u (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               dest,
+                           const uint32_t *         src,
+                           const uint32_t *         mask,
+                           int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	__m64 x, a;
+
+	x = load8888 (*dest);
+	a = load8888 (combine (src, mask));
+	a = expand_alpha (a);
+	a = negate (a);
+	x = pix_multiply (x, a);
+
+	*dest = store8888 (x);
+
+	++dest;
+	++src;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_atop_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	__m64 s, da, d, sia;
+
+	s = load8888 (combine (src, mask));
+	d = load8888 (*dest);
+	sia = expand_alpha (s);
+	sia = negate (sia);
+	da = expand_alpha (d);
+	s = pix_add_mul (s, da, d, sia);
+	*dest = store8888 (s);
+
+	++dest;
+	++src;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_atop_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
+{
+    const uint32_t *end;
+
+    end = dest + width;
+
+    while (dest < end)
+    {
+	__m64 s, dia, d, sa;
+
+	s = load8888 (combine (src, mask));
+	d = load8888 (*dest);
+	sa = expand_alpha (s);
+	dia = expand_alpha (d);
+	dia = negate (dia);
+	s = pix_add_mul (s, dia, d, sa);
+	*dest = store8888 (s);
+
+	++dest;
+	++src;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_xor_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	__m64 s, dia, d, sia;
+
+	s = load8888 (combine (src, mask));
+	d = load8888 (*dest);
+	sia = expand_alpha (s);
+	dia = expand_alpha (d);
+	sia = negate (sia);
+	dia = negate (dia);
+	s = pix_add_mul (s, dia, d, sia);
+	*dest = store8888 (s);
+
+	++dest;
+	++src;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_add_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	__m64 s, d;
+
+	s = load8888 (combine (src, mask));
+	d = load8888 (*dest);
+	s = pix_add (s, d);
+	*dest = store8888 (s);
+
+	++dest;
+	++src;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_saturate_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *               dest,
+                        const uint32_t *         src,
+                        const uint32_t *         mask,
+                        int                      width)
+{
+    const uint32_t *end = dest + width;
+
+    while (dest < end)
+    {
+	uint32_t s = combine (src, mask);
+	uint32_t d = *dest;
+	__m64 ms = load8888 (s);
+	__m64 md = load8888 (d);
+	uint32_t sa = s >> 24;
+	uint32_t da = ~d >> 24;
+
+	if (sa > da)
+	{
+	    __m64 msa = load8888 (DIV_UN8 (da, sa) << 24);
+	    msa = expand_alpha (msa);
+	    ms = pix_multiply (ms, msa);
+	}
+
+	md = pix_add (md, ms);
+	*dest = store8888 (md);
+
+	++src;
+	++dest;
+	if (mask)
+	    mask++;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_src_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+
+	s = pix_multiply (s, a);
+	*dest = store8888 (s);
+
+	++src;
+	++mask;
+	++dest;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_over_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dest,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 sa = expand_alpha (s);
+
+	*dest = store8888 (in_over (s, sa, a, d));
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_over_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dest,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+
+	*dest = store8888 (over (d, da, in (s, a)));
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_in_ca (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+
+	s = pix_multiply (s, a);
+	s = pix_multiply (s, da);
+	*dest = store8888 (s);
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_in_reverse_ca (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               dest,
+                           const uint32_t *         src,
+                           const uint32_t *         mask,
+                           int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 sa = expand_alpha (s);
+
+	a = pix_multiply (a, sa);
+	d = pix_multiply (d, a);
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_out_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+
+	da = negate (da);
+	s = pix_multiply (s, a);
+	s = pix_multiply (s, da);
+	*dest = store8888 (s);
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_out_reverse_ca (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 sa = expand_alpha (s);
+
+	a = pix_multiply (a, sa);
+	a = negate (a);
+	d = pix_multiply (d, a);
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_atop_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dest,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+	__m64 sa = expand_alpha (s);
+
+	s = pix_multiply (s, a);
+	a = pix_multiply (a, sa);
+	a = negate (a);
+	d = pix_add_mul (d, a, s, da);
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dest,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+	__m64 sa = expand_alpha (s);
+
+	s = pix_multiply (s, a);
+	a = pix_multiply (a, sa);
+	da = negate (da);
+	d = pix_add_mul (d, a, s, da);
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_xor_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+	__m64 sa = expand_alpha (s);
+
+	s = pix_multiply (s, a);
+	a = pix_multiply (a, sa);
+	da = negate (da);
+	a = negate (a);
+	d = pix_add_mul (d, a, s, da);
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_combine_add_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = src + width;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+
+	s = pix_multiply (s, a);
+	d = pix_add (s, d);
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
+    }
+    _mm_empty ();
+}
+
+/* ------------- MMX code paths called from fbpict.c -------------------- */
+
+static void
+mmx_composite_over_n_8888 (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           pixman_image_t *         src_image,
+                           pixman_image_t *         mask_image,
+                           pixman_image_t *         dst_image,
+                           int32_t                  src_x,
+                           int32_t                  src_y,
+                           int32_t                  mask_x,
+                           int32_t                  mask_y,
+                           int32_t                  dest_x,
+                           int32_t                  dest_y,
+                           int32_t                  width,
+                           int32_t                  height)
+{
+    uint32_t src;
+    uint32_t    *dst_line, *dst;
+    int32_t w;
+    int dst_stride;
+    __m64 vsrc, vsrca;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	w = width;
+
+	CHECKPOINT ();
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
+
+	    w--;
+	    dst++;
+	}
+
+	while (w >= 2)
+	{
+	    __m64 vdest;
+	    __m64 dest0, dest1;
+
+	    vdest = *(__m64 *)dst;
+
+	    dest0 = over (vsrc, vsrca, expand8888 (vdest, 0));
+	    dest1 = over (vsrc, vsrca, expand8888 (vdest, 1));
+
+	    *(__m64 *)dst = pack8888 (dest0, dest1);
+
+	    dst += 2;
+	    w -= 2;
+	}
+
+	CHECKPOINT ();
+
+	while (w)
+	{
+	    *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
+
+	    w--;
+	    dst++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_n_0565 (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           pixman_image_t *         src_image,
+                           pixman_image_t *         mask_image,
+                           pixman_image_t *         dst_image,
+                           int32_t                  src_x,
+                           int32_t                  src_y,
+                           int32_t                  mask_x,
+                           int32_t                  mask_y,
+                           int32_t                  dest_x,
+                           int32_t                  dest_y,
+                           int32_t                  width,
+                           int32_t                  height)
+{
+    uint32_t src;
+    uint16_t    *dst_line, *dst;
+    int32_t w;
+    int dst_stride;
+    __m64 vsrc, vsrca;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	w = width;
+
+	CHECKPOINT ();
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    uint64_t d = *dst;
+	    __m64 vdest = expand565 (to_m64 (d), 0);
+
+	    vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
+	    *dst = to_uint64 (vdest);
+
+	    w--;
+	    dst++;
+	}
+
+	while (w >= 4)
+	{
+	    __m64 vdest;
+
+	    vdest = *(__m64 *)dst;
+
+	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0);
+	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1);
+	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2);
+	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3);
+
+	    *(__m64 *)dst = vdest;
+
+	    dst += 4;
+	    w -= 4;
+	}
+
+	CHECKPOINT ();
+
+	while (w)
+	{
+	    uint64_t d = *dst;
+	    __m64 vdest = expand565 (to_m64 (d), 0);
+
+	    vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
+	    *dst = to_uint64 (vdest);
+
+	    w--;
+	    dst++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
+                                   pixman_op_t              op,
+                                   pixman_image_t *         src_image,
+                                   pixman_image_t *         mask_image,
+                                   pixman_image_t *         dst_image,
+                                   int32_t                  src_x,
+                                   int32_t                  src_y,
+                                   int32_t                  mask_x,
+                                   int32_t                  mask_y,
+                                   int32_t                  dest_x,
+                                   int32_t                  dest_y,
+                                   int32_t                  width,
+                                   int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t    *dst_line;
+    uint32_t    *mask_line;
+    int dst_stride, mask_stride;
+    __m64 vsrc, vsrca;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
+
+    while (height--)
+    {
+	int twidth = width;
+	uint32_t *p = (uint32_t *)mask_line;
+	uint32_t *q = (uint32_t *)dst_line;
+
+	while (twidth && (unsigned long)q & 7)
+	{
+	    uint32_t m = *(uint32_t *)p;
+
+	    if (m)
+	    {
+		__m64 vdest = load8888 (*q);
+		vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
+		*q = store8888 (vdest);
+	    }
+
+	    twidth--;
+	    p++;
+	    q++;
+	}
+
+	while (twidth >= 2)
+	{
+	    uint32_t m0, m1;
+	    m0 = *p;
+	    m1 = *(p + 1);
+
+	    if (m0 | m1)
+	    {
+		__m64 dest0, dest1;
+		__m64 vdest = *(__m64 *)q;
+
+		dest0 = in_over (vsrc, vsrca, load8888 (m0),
+		                 expand8888 (vdest, 0));
+		dest1 = in_over (vsrc, vsrca, load8888 (m1),
+		                 expand8888 (vdest, 1));
+
+		*(__m64 *)q = pack8888 (dest0, dest1);
+	    }
+
+	    p += 2;
+	    q += 2;
+	    twidth -= 2;
+	}
+
+	while (twidth)
+	{
+	    uint32_t m = *(uint32_t *)p;
+
+	    if (m)
+	    {
+		__m64 vdest = load8888 (*q);
+		vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
+		*q = store8888 (vdest);
+	    }
+
+	    twidth--;
+	    p++;
+	    q++;
+	}
+
+	dst_line += dst_stride;
+	mask_line += mask_stride;
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    uint32_t mask;
+    __m64 vmask;
+    int dst_stride, src_stride;
+    int32_t w;
+    __m64 srca;
+
+    CHECKPOINT ();
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+    mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format);
+    mask &= 0xff000000;
+    mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
+    vmask = load8888 (mask);
+    srca = MC (4x00ff);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    __m64 s = load8888 (*src);
+	    __m64 d = load8888 (*dst);
+
+	    *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+
+	while (w >= 2)
+	{
+	    __m64 vs = *(__m64 *)src;
+	    __m64 vd = *(__m64 *)dst;
+	    __m64 vsrc0 = expand8888 (vs, 0);
+	    __m64 vsrc1 = expand8888 (vs, 1);
+
+	    *(__m64 *)dst = pack8888 (
+	        in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)),
+	        in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1)));
+
+	    w -= 2;
+	    dst += 2;
+	    src += 2;
+	}
+
+	while (w)
+	{
+	    __m64 s = load8888 (*src);
+	    __m64 d = load8888 (*dst);
+
+	    *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint32_t *dst_line, *dst;
+    uint32_t *src_line, *src;
+    uint32_t mask;
+    __m64 vmask;
+    int dst_stride, src_stride;
+    int32_t w;
+    __m64 srca;
+
+    CHECKPOINT ();
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format);
+
+    mask &= 0xff000000;
+    mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
+    vmask = load8888 (mask);
+    srca = MC (4x00ff);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    __m64 s = load8888 (*src | 0xff000000);
+	    __m64 d = load8888 (*dst);
+
+	    *dst = store8888 (in_over (s, srca, vmask, d));
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+
+	while (w >= 16)
+	{
+	    __m64 vd0 = *(__m64 *)(dst + 0);
+	    __m64 vd1 = *(__m64 *)(dst + 2);
+	    __m64 vd2 = *(__m64 *)(dst + 4);
+	    __m64 vd3 = *(__m64 *)(dst + 6);
+	    __m64 vd4 = *(__m64 *)(dst + 8);
+	    __m64 vd5 = *(__m64 *)(dst + 10);
+	    __m64 vd6 = *(__m64 *)(dst + 12);
+	    __m64 vd7 = *(__m64 *)(dst + 14);
+
+	    __m64 vs0 = *(__m64 *)(src + 0);
+	    __m64 vs1 = *(__m64 *)(src + 2);
+	    __m64 vs2 = *(__m64 *)(src + 4);
+	    __m64 vs3 = *(__m64 *)(src + 6);
+	    __m64 vs4 = *(__m64 *)(src + 8);
+	    __m64 vs5 = *(__m64 *)(src + 10);
+	    __m64 vs6 = *(__m64 *)(src + 12);
+	    __m64 vs7 = *(__m64 *)(src + 14);
+
+	    vd0 = pack8888 (
+	        in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
+	        in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
+
+	    vd1 = pack8888 (
+	        in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
+	        in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
+
+	    vd2 = pack8888 (
+	        in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
+	        in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
+
+	    vd3 = pack8888 (
+	        in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
+	        in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
+
+	    vd4 = pack8888 (
+	        in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
+	        in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
+
+	    vd5 = pack8888 (
+	        in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
+	        in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
+
+	    vd6 = pack8888 (
+	        in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)),
+	        in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1)));
+
+	    vd7 = pack8888 (
+	        in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)),
+	        in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1)));
+
+	    *(__m64 *)(dst + 0) = vd0;
+	    *(__m64 *)(dst + 2) = vd1;
+	    *(__m64 *)(dst + 4) = vd2;
+	    *(__m64 *)(dst + 6) = vd3;
+	    *(__m64 *)(dst + 8) = vd4;
+	    *(__m64 *)(dst + 10) = vd5;
+	    *(__m64 *)(dst + 12) = vd6;
+	    *(__m64 *)(dst + 14) = vd7;
+
+	    w -= 16;
+	    dst += 16;
+	    src += 16;
+	}
+
+	while (w)
+	{
+	    __m64 s = load8888 (*src | 0xff000000);
+	    __m64 d = load8888 (*dst);
+
+	    *dst = store8888 (in_over (s, srca, vmask, d));
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
+{
+    uint32_t *dst_line, *dst;
+    uint32_t *src_line, *src;
+    uint32_t s;
+    int dst_stride, src_stride;
+    uint8_t a;
+    int32_t w;
+
+    CHECKPOINT ();
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	while (w--)
+	{
+	    s = *src++;
+	    a = s >> 24;
+
+	    if (a == 0xff)
+	    {
+		*dst = s;
+	    }
+	    else if (s)
+	    {
+		__m64 ms, sa;
+		ms = load8888 (s);
+		sa = expand_alpha (ms);
+		*dst = store8888 (over (ms, sa, load8888 (*dst)));
+	    }
+
+	    dst++;
+	}
+    }
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
+{
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+
+    CHECKPOINT ();
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+#if 0
+    /* FIXME */
+    assert (src_image->drawable == mask_image->drawable);
+#endif
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	CHECKPOINT ();
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    __m64 vsrc = load8888 (*src);
+	    uint64_t d = *dst;
+	    __m64 vdest = expand565 (to_m64 (d), 0);
+
+	    vdest = pack_565 (
+		over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
+
+	    *dst = to_uint64 (vdest);
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+
+	CHECKPOINT ();
+
+	while (w >= 4)
+	{
+	    __m64 vsrc0, vsrc1, vsrc2, vsrc3;
+	    __m64 vdest;
+
+	    vsrc0 = load8888 (*(src + 0));
+	    vsrc1 = load8888 (*(src + 1));
+	    vsrc2 = load8888 (*(src + 2));
+	    vsrc3 = load8888 (*(src + 3));
+
+	    vdest = *(__m64 *)dst;
+
+	    vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0);
+	    vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1);
+	    vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2);
+	    vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3);
+
+	    *(__m64 *)dst = vdest;
+
+	    w -= 4;
+	    dst += 4;
+	    src += 4;
+	}
+
+	CHECKPOINT ();
+
+	while (w)
+	{
+	    __m64 vsrc = load8888 (*src);
+	    uint64_t d = *dst;
+	    __m64 vdest = expand565 (to_m64 (d), 0);
+
+	    vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
+
+	    *dst = to_uint64 (vdest);
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    __m64 vsrc, vsrca;
+    uint64_t srcsrc;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+	return;
+
+    srcsrc = (uint64_t)src << 32 | src;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	CHECKPOINT ();
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    uint64_t m = *mask;
+
+	    if (m)
+	    {
+		__m64 vdest = in_over (vsrc, vsrca,
+				       expand_alpha_rev (to_m64 (m)),
+				       load8888 (*dst));
+
+		*dst = store8888 (vdest);
+	    }
+
+	    w--;
+	    mask++;
+	    dst++;
+	}
+
+	CHECKPOINT ();
+
+	while (w >= 2)
+	{
+	    uint64_t m0, m1;
+
+	    m0 = *mask;
+	    m1 = *(mask + 1);
+
+	    if (srca == 0xff && (m0 & m1) == 0xff)
+	    {
+		*(uint64_t *)dst = srcsrc;
+	    }
+	    else if (m0 | m1)
+	    {
+		__m64 vdest;
+		__m64 dest0, dest1;
+
+		vdest = *(__m64 *)dst;
+
+		dest0 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m0)),
+				 expand8888 (vdest, 0));
+		dest1 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m1)),
+				 expand8888 (vdest, 1));
+
+		*(__m64 *)dst = pack8888 (dest0, dest1);
+	    }
+
+	    mask += 2;
+	    dst += 2;
+	    w -= 2;
+	}
+
+	CHECKPOINT ();
+
+	while (w)
+	{
+	    uint64_t m = *mask;
+
+	    if (m)
+	    {
+		__m64 vdest = load8888 (*dst);
+
+		vdest = in_over (
+		    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
+		*dst = store8888 (vdest);
+	    }
+
+	    w--;
+	    mask++;
+	    dst++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+pixman_bool_t
+pixman_fill_mmx (uint32_t *bits,
+                 int       stride,
+                 int       bpp,
+                 int       x,
+                 int       y,
+                 int       width,
+                 int       height,
+                 uint32_t xor)
+{
+    uint64_t fill;
+    __m64 vfill;
+    uint32_t byte_width;
+    uint8_t     *byte_line;
+
+#ifdef __GNUC__
+    __m64 v1, v2, v3, v4, v5, v6, v7;
+#endif
+
+    if (bpp != 16 && bpp != 32 && bpp != 8)
+	return FALSE;
+
+    if (bpp == 8)
+    {
+	stride = stride * (int) sizeof (uint32_t) / 1;
+	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
+	byte_width = width;
+	stride *= 1;
+        xor = (xor & 0xff) * 0x01010101;
+    }
+    else if (bpp == 16)
+    {
+	stride = stride * (int) sizeof (uint32_t) / 2;
+	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+	byte_width = 2 * width;
+	stride *= 2;
+        xor = (xor & 0xffff) * 0x00010001;
+    }
+    else
+    {
+	stride = stride * (int) sizeof (uint32_t) / 4;
+	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+	byte_width = 4 * width;
+	stride *= 4;
+    }
+
+    fill = ((uint64_t)xor << 32) | xor;
+    vfill = to_m64 (fill);
+
+#ifdef __GNUC__
+    __asm__ (
+        "movq		%7,	%0\n"
+        "movq		%7,	%1\n"
+        "movq		%7,	%2\n"
+        "movq		%7,	%3\n"
+        "movq		%7,	%4\n"
+        "movq		%7,	%5\n"
+        "movq		%7,	%6\n"
+	: "=&y" (v1), "=&y" (v2), "=&y" (v3),
+	  "=&y" (v4), "=&y" (v5), "=&y" (v6), "=y" (v7)
+	: "y" (vfill));
+#endif
+
+    while (height--)
+    {
+	int w;
+	uint8_t *d = byte_line;
+
+	byte_line += stride;
+	w = byte_width;
+
+	while (w >= 1 && ((unsigned long)d & 1))
+	{
+	    *(uint8_t *)d = (xor & 0xff);
+	    w--;
+	    d++;
+	}
+
+	while (w >= 2 && ((unsigned long)d & 3))
+	{
+	    *(uint16_t *)d = xor;
+	    w -= 2;
+	    d += 2;
+	}
+
+	while (w >= 4 && ((unsigned long)d & 7))
+	{
+	    *(uint32_t *)d = xor;
+
+	    w -= 4;
+	    d += 4;
+	}
+
+	while (w >= 64)
+	{
+#ifdef __GNUC__
+	    __asm__ (
+	        "movq	%1,	  (%0)\n"
+	        "movq	%2,	 8(%0)\n"
+	        "movq	%3,	16(%0)\n"
+	        "movq	%4,	24(%0)\n"
+	        "movq	%5,	32(%0)\n"
+	        "movq	%6,	40(%0)\n"
+	        "movq	%7,	48(%0)\n"
+	        "movq	%8,	56(%0)\n"
+		:
+		: "r" (d),
+		  "y" (vfill), "y" (v1), "y" (v2), "y" (v3),
+		  "y" (v4), "y" (v5), "y" (v6), "y" (v7)
+		: "memory");
+#else
+	    *(__m64*) (d +  0) = vfill;
+	    *(__m64*) (d +  8) = vfill;
+	    *(__m64*) (d + 16) = vfill;
+	    *(__m64*) (d + 24) = vfill;
+	    *(__m64*) (d + 32) = vfill;
+	    *(__m64*) (d + 40) = vfill;
+	    *(__m64*) (d + 48) = vfill;
+	    *(__m64*) (d + 56) = vfill;
+#endif
+	    w -= 64;
+	    d += 64;
+	}
+
+	while (w >= 4)
+	{
+	    *(uint32_t *)d = xor;
+
+	    w -= 4;
+	    d += 4;
+	}
+	while (w >= 2)
+	{
+	    *(uint16_t *)d = xor;
+	    w -= 2;
+	    d += 2;
+	}
+	while (w >= 1)
+	{
+	    *(uint8_t *)d = (xor & 0xff);
+	    w--;
+	    d++;
+	}
+
+    }
+
+    _mm_empty ();
+    return TRUE;
+}
+
+static void
+mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            pixman_image_t *         src_image,
+                            pixman_image_t *         mask_image,
+                            pixman_image_t *         dst_image,
+                            int32_t                  src_x,
+                            int32_t                  src_y,
+                            int32_t                  mask_x,
+                            int32_t                  mask_y,
+                            int32_t                  dest_x,
+                            int32_t                  dest_y,
+                            int32_t                  width,
+                            int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t    *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    __m64 vsrc, vsrca;
+    uint64_t srcsrc;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+    {
+	pixman_fill_mmx (dst_image->bits.bits, dst_image->bits.rowstride,
+			 PIXMAN_FORMAT_BPP (dst_image->bits.format),
+	                 dest_x, dest_y, width, height, 0);
+	return;
+    }
+
+    srcsrc = (uint64_t)src << 32 | src;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	CHECKPOINT ();
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    uint64_t m = *mask;
+
+	    if (m)
+	    {
+		__m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
+
+		*dst = store8888 (vdest);
+	    }
+	    else
+	    {
+		*dst = 0;
+	    }
+
+	    w--;
+	    mask++;
+	    dst++;
+	}
+
+	CHECKPOINT ();
+
+	while (w >= 2)
+	{
+	    uint64_t m0, m1;
+	    m0 = *mask;
+	    m1 = *(mask + 1);
+
+	    if (srca == 0xff && (m0 & m1) == 0xff)
+	    {
+		*(uint64_t *)dst = srcsrc;
+	    }
+	    else if (m0 | m1)
+	    {
+		__m64 vdest;
+		__m64 dest0, dest1;
+
+		vdest = *(__m64 *)dst;
+
+		dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0)));
+		dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1)));
+
+		*(__m64 *)dst = pack8888 (dest0, dest1);
+	    }
+	    else
+	    {
+		*(uint64_t *)dst = 0;
+	    }
+
+	    mask += 2;
+	    dst += 2;
+	    w -= 2;
+	}
+
+	CHECKPOINT ();
+
+	while (w)
+	{
+	    uint64_t m = *mask;
+
+	    if (m)
+	    {
+		__m64 vdest = load8888 (*dst);
+
+		vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
+		*dst = store8888 (vdest);
+	    }
+	    else
+	    {
+		*dst = 0;
+	    }
+
+	    w--;
+	    mask++;
+	    dst++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
+{
+    uint32_t src, srca;
+    uint16_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    __m64 vsrc, vsrca, tmp;
+    uint64_t srcsrcsrcsrc, src16;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
+
+    tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0);
+    src16 = to_uint64 (tmp);
+
+    srcsrcsrcsrc =
+	(uint64_t)src16 << 48 | (uint64_t)src16 << 32 |
+	(uint64_t)src16 << 16 | (uint64_t)src16;
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	CHECKPOINT ();
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    uint64_t m = *mask;
+
+	    if (m)
+	    {
+		uint64_t d = *dst;
+		__m64 vd = to_m64 (d);
+		__m64 vdest = in_over (
+		    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), expand565 (vd, 0));
+
+		vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
+		*dst = to_uint64 (vd);
+	    }
+
+	    w--;
+	    mask++;
+	    dst++;
+	}
+
+	CHECKPOINT ();
+
+	while (w >= 4)
+	{
+	    uint64_t m0, m1, m2, m3;
+	    m0 = *mask;
+	    m1 = *(mask + 1);
+	    m2 = *(mask + 2);
+	    m3 = *(mask + 3);
+
+	    if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
+	    {
+		*(uint64_t *)dst = srcsrcsrcsrc;
+	    }
+	    else if (m0 | m1 | m2 | m3)
+	    {
+		__m64 vdest;
+		__m64 vm0, vm1, vm2, vm3;
+
+		vdest = *(__m64 *)dst;
+
+		vm0 = to_m64 (m0);
+		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0),
+					   expand565 (vdest, 0)), vdest, 0);
+		vm1 = to_m64 (m1);
+		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1),
+					   expand565 (vdest, 1)), vdest, 1);
+		vm2 = to_m64 (m2);
+		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2),
+					   expand565 (vdest, 2)), vdest, 2);
+		vm3 = to_m64 (m3);
+		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3),
+					   expand565 (vdest, 3)), vdest, 3);
+
+		*(__m64 *)dst = vdest;
+	    }
+
+	    w -= 4;
+	    mask += 4;
+	    dst += 4;
+	}
+
+	CHECKPOINT ();
+
+	while (w)
+	{
+	    uint64_t m = *mask;
+
+	    if (m)
+	    {
+		uint64_t d = *dst;
+		__m64 vd = to_m64 (d);
+		__m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m)),
+				       expand565 (vd, 0));
+		vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
+		*dst = to_uint64 (vd);
+	    }
+
+	    w--;
+	    mask++;
+	    dst++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+
+    CHECKPOINT ();
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+#if 0
+    /* FIXME */
+    assert (src_image->drawable == mask_image->drawable);
+#endif
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	CHECKPOINT ();
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    __m64 vsrc = load8888 (*src);
+	    uint64_t d = *dst;
+	    __m64 vdest = expand565 (to_m64 (d), 0);
+
+	    vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
+
+	    *dst = to_uint64 (vdest);
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+
+	CHECKPOINT ();
+
+	while (w >= 4)
+	{
+	    uint32_t s0, s1, s2, s3;
+	    unsigned char a0, a1, a2, a3;
+
+	    s0 = *src;
+	    s1 = *(src + 1);
+	    s2 = *(src + 2);
+	    s3 = *(src + 3);
+
+	    a0 = (s0 >> 24);
+	    a1 = (s1 >> 24);
+	    a2 = (s2 >> 24);
+	    a3 = (s3 >> 24);
+
+	    if ((a0 & a1 & a2 & a3) == 0xFF)
+	    {
+		__m64 vdest;
+		vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0);
+		vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1);
+		vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2);
+		vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3);
+
+		*(__m64 *)dst = vdest;
+	    }
+	    else if (s0 | s1 | s2 | s3)
+	    {
+		__m64 vdest = *(__m64 *)dst;
+
+		vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0);
+		vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1);
+		vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2);
+		vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3);
+
+		*(__m64 *)dst = vdest;
+	    }
+
+	    w -= 4;
+	    dst += 4;
+	    src += 4;
+	}
+
+	CHECKPOINT ();
+
+	while (w)
+	{
+	    __m64 vsrc = load8888 (*src);
+	    uint64_t d = *dst;
+	    __m64 vdest = expand565 (to_m64 (d), 0);
+
+	    vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
+
+	    *dst = to_uint64 (vdest);
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+
+    CHECKPOINT ();
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+#if 0
+    /* FIXME */
+    assert (src_image->drawable == mask_image->drawable);
+#endif
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    __m64 s = load8888 (*src);
+	    __m64 d = load8888 (*dst);
+
+	    *dst = store8888 (over_rev_non_pre (s, d));
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+
+	while (w >= 2)
+	{
+	    uint64_t s0, s1;
+	    unsigned char a0, a1;
+	    __m64 d0, d1;
+
+	    s0 = *src;
+	    s1 = *(src + 1);
+
+	    a0 = (s0 >> 24);
+	    a1 = (s1 >> 24);
+
+	    if ((a0 & a1) == 0xFF)
+	    {
+		d0 = invert_colors (load8888 (s0));
+		d1 = invert_colors (load8888 (s1));
+
+		*(__m64 *)dst = pack8888 (d0, d1);
+	    }
+	    else if (s0 | s1)
+	    {
+		__m64 vdest = *(__m64 *)dst;
+
+		d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0));
+		d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1));
+
+		*(__m64 *)dst = pack8888 (d0, d1);
+	    }
+
+	    w -= 2;
+	    dst += 2;
+	    src += 2;
+	}
+
+	while (w)
+	{
+	    __m64 s = load8888 (*src);
+	    __m64 d = load8888 (*dst);
+
+	    *dst = store8888 (over_rev_non_pre (s, d));
+
+	    w--;
+	    dst++;
+	    src++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
+                                   pixman_op_t              op,
+                                   pixman_image_t *         src_image,
+                                   pixman_image_t *         mask_image,
+                                   pixman_image_t *         dst_image,
+                                   int32_t                  src_x,
+                                   int32_t                  src_y,
+                                   int32_t                  mask_x,
+                                   int32_t                  mask_y,
+                                   int32_t                  dest_x,
+                                   int32_t                  dest_y,
+                                   int32_t                  width,
+                                   int32_t                  height)
+{
+    uint32_t src, srca;
+    uint16_t    *dst_line;
+    uint32_t    *mask_line;
+    int dst_stride, mask_stride;
+    __m64 vsrc, vsrca;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
+
+    while (height--)
+    {
+	int twidth = width;
+	uint32_t *p = (uint32_t *)mask_line;
+	uint16_t *q = (uint16_t *)dst_line;
+
+	while (twidth && ((unsigned long)q & 7))
+	{
+	    uint32_t m = *(uint32_t *)p;
+
+	    if (m)
+	    {
+		uint64_t d = *q;
+		__m64 vdest = expand565 (to_m64 (d), 0);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
+		*q = to_uint64 (vdest);
+	    }
+
+	    twidth--;
+	    p++;
+	    q++;
+	}
+
+	while (twidth >= 4)
+	{
+	    uint32_t m0, m1, m2, m3;
+
+	    m0 = *p;
+	    m1 = *(p + 1);
+	    m2 = *(p + 2);
+	    m3 = *(p + 3);
+
+	    if ((m0 | m1 | m2 | m3))
+	    {
+		__m64 vdest = *(__m64 *)q;
+
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3);
+
+		*(__m64 *)q = vdest;
+	    }
+	    twidth -= 4;
+	    p += 4;
+	    q += 4;
+	}
+
+	while (twidth)
+	{
+	    uint32_t m;
+
+	    m = *(uint32_t *)p;
+	    if (m)
+	    {
+		uint64_t d = *q;
+		__m64 vdest = expand565 (to_m64 (d), 0);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
+		*q = to_uint64 (vdest);
+	    }
+
+	    twidth--;
+	    p++;
+	    q++;
+	}
+
+	mask_line += mask_stride;
+	dst_line += dst_stride;
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        pixman_image_t *         src_image,
+                        pixman_image_t *         mask_image,
+                        pixman_image_t *         dst_image,
+                        int32_t                  src_x,
+                        int32_t                  src_y,
+                        int32_t                  mask_x,
+                        int32_t                  mask_y,
+                        int32_t                  dest_x,
+                        int32_t                  dest_y,
+                        int32_t                  width,
+                        int32_t                  height)
+{
+    uint8_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t src;
+    uint8_t sa;
+    __m64 vsrc, vsrca;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+    sa = src >> 24;
+
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	if ((((unsigned long)dst_image & 3) == 0) &&
+	    (((unsigned long)src_image & 3) == 0))
+	{
+	    while (w >= 4)
+	    {
+		uint32_t m;
+		__m64 vmask;
+		__m64 vdest;
+
+		m = 0;
+
+		vmask = load8888 (*(uint32_t *)mask);
+		vdest = load8888 (*(uint32_t *)dst);
+
+		*(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
+
+		dst += 4;
+		mask += 4;
+		w -= 4;
+	    }
+	}
+
+	while (w--)
+	{
+	    uint16_t tmp;
+	    uint8_t a;
+	    uint32_t m, d;
+
+	    a = *mask++;
+	    d = *dst;
+
+	    m = MUL_UN8 (sa, a, tmp);
+	    d = MUL_UN8 (m, d, tmp);
+
+	    *dst++ = d;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_in_8_8 (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      pixman_image_t *         src_image,
+                      pixman_image_t *         mask_image,
+                      pixman_image_t *         dst_image,
+                      int32_t                  src_x,
+                      int32_t                  src_y,
+                      int32_t                  mask_x,
+                      int32_t                  mask_y,
+                      int32_t                  dest_x,
+                      int32_t                  dest_y,
+                      int32_t                  width,
+                      int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int src_stride, dst_stride;
+    int32_t w;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	if ((((unsigned long)dst_image & 3) == 0) &&
+	    (((unsigned long)src_image & 3) == 0))
+	{
+	    while (w >= 4)
+	    {
+		uint32_t *s = (uint32_t *)src;
+		uint32_t *d = (uint32_t *)dst;
+
+		*d = store8888 (in (load8888 (*s), load8888 (*d)));
+
+		w -= 4;
+		dst += 4;
+		src += 4;
+	    }
+	}
+
+	while (w--)
+	{
+	    uint8_t s, d;
+	    uint16_t tmp;
+
+	    s = *src;
+	    d = *dst;
+
+	    *dst = MUL_UN8 (s, d, tmp);
+
+	    src++;
+	    dst++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
+			 pixman_op_t              op,
+			 pixman_image_t *         src_image,
+			 pixman_image_t *         mask_image,
+			 pixman_image_t *         dst_image,
+			 int32_t                  src_x,
+			 int32_t                  src_y,
+			 int32_t                  mask_x,
+			 int32_t                  mask_y,
+			 int32_t                  dest_x,
+			 int32_t                  dest_y,
+			 int32_t                  width,
+			 int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t src;
+    uint8_t sa;
+    __m64 vsrc, vsrca;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+    sa = src >> 24;
+
+    if (src == 0)
+	return;
+
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	if ((((unsigned long)mask_image & 3) == 0) &&
+	    (((unsigned long)dst_image  & 3) == 0))
+	{
+	    while (w >= 4)
+	    {
+		__m64 vmask = load8888 (*(uint32_t *)mask);
+		__m64 vdest = load8888 (*(uint32_t *)dst);
+
+		*(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+
+		w -= 4;
+		dst += 4;
+		mask += 4;
+	    }
+	}
+
+	while (w--)
+	{
+	    uint16_t tmp;
+	    uint16_t a;
+	    uint32_t m, d;
+	    uint32_t r;
+
+	    a = *mask++;
+	    d = *dst;
+
+	    m = MUL_UN8 (sa, a, tmp);
+	    r = ADD_UN8 (m, d, tmp);
+
+	    *dst++ = r;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_add_8_8 (pixman_implementation_t *imp,
+		       pixman_op_t              op,
+		       pixman_image_t *         src_image,
+		       pixman_image_t *         mask_image,
+		       pixman_image_t *         dst_image,
+		       int32_t                  src_x,
+		       int32_t                  src_y,
+		       int32_t                  mask_x,
+		       int32_t                  mask_y,
+		       int32_t                  dest_x,
+		       int32_t                  dest_y,
+		       int32_t                  width,
+		       int32_t                  height)
+{
+    uint8_t *dst_line, *dst;
+    uint8_t *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint8_t s, d;
+    uint16_t t;
+
+    CHECKPOINT ();
+
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    s = *src;
+	    d = *dst;
+	    t = d + s;
+	    s = t | (0 - (t >> 8));
+	    *dst = s;
+
+	    dst++;
+	    src++;
+	    w--;
+	}
+
+	while (w >= 8)
+	{
+	    *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
+	    dst += 8;
+	    src += 8;
+	    w -= 8;
+	}
+
+	while (w)
+	{
+	    s = *src;
+	    d = *dst;
+	    t = d + s;
+	    s = t | (0 - (t >> 8));
+	    *dst = s;
+
+	    dst++;
+	    src++;
+	    w--;
+	}
+    }
+
+    _mm_empty ();
+}
+
+static void
+mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
+{
+    __m64 dst64;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+
+    CHECKPOINT ();
+
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	while (w && (unsigned long)dst & 7)
+	{
+	    *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
+	                                           _mm_cvtsi32_si64 (*dst)));
+	    dst++;
+	    src++;
+	    w--;
+	}
+
+	while (w >= 2)
+	{
+	    dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
+	    *(uint64_t*)dst = to_uint64 (dst64);
+	    dst += 2;
+	    src += 2;
+	    w -= 2;
+	}
+
+	if (w)
+	{
+	    *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
+	                                           _mm_cvtsi32_si64 (*dst)));
+
+	}
+    }
+
+    _mm_empty ();
+}
+
+static pixman_bool_t
+pixman_blt_mmx (uint32_t *src_bits,
+                uint32_t *dst_bits,
+                int       src_stride,
+                int       dst_stride,
+                int       src_bpp,
+                int       dst_bpp,
+                int       src_x,
+                int       src_y,
+                int       dst_x,
+                int       dst_y,
+                int       width,
+                int       height)
+{
+    uint8_t *   src_bytes;
+    uint8_t *   dst_bytes;
+    int byte_width;
+
+    if (src_bpp != dst_bpp)
+	return FALSE;
+
+    if (src_bpp == 16)
+    {
+	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+	src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+	byte_width = 2 * width;
+	src_stride *= 2;
+	dst_stride *= 2;
+    }
+    else if (src_bpp == 32)
+    {
+	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+	byte_width = 4 * width;
+	src_stride *= 4;
+	dst_stride *= 4;
+    }
+    else
+    {
+	return FALSE;
+    }
+
+    while (height--)
+    {
+	int w;
+	uint8_t *s = src_bytes;
+	uint8_t *d = dst_bytes;
+	src_bytes += src_stride;
+	dst_bytes += dst_stride;
+	w = byte_width;
+
+	while (w >= 2 && ((unsigned long)d & 3))
+	{
+	    *(uint16_t *)d = *(uint16_t *)s;
+	    w -= 2;
+	    s += 2;
+	    d += 2;
+	}
+
+	while (w >= 4 && ((unsigned long)d & 7))
+	{
+	    *(uint32_t *)d = *(uint32_t *)s;
+
+	    w -= 4;
+	    s += 4;
+	    d += 4;
+	}
+
+	while (w >= 64)
+	{
+#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
+	    __asm__ (
+	        "movq	  (%1),	  %%mm0\n"
+	        "movq	 8(%1),	  %%mm1\n"
+	        "movq	16(%1),	  %%mm2\n"
+	        "movq	24(%1),	  %%mm3\n"
+	        "movq	32(%1),	  %%mm4\n"
+	        "movq	40(%1),	  %%mm5\n"
+	        "movq	48(%1),	  %%mm6\n"
+	        "movq	56(%1),	  %%mm7\n"
+
+	        "movq	%%mm0,	  (%0)\n"
+	        "movq	%%mm1,	 8(%0)\n"
+	        "movq	%%mm2,	16(%0)\n"
+	        "movq	%%mm3,	24(%0)\n"
+	        "movq	%%mm4,	32(%0)\n"
+	        "movq	%%mm5,	40(%0)\n"
+	        "movq	%%mm6,	48(%0)\n"
+	        "movq	%%mm7,	56(%0)\n"
+		:
+		: "r" (d), "r" (s)
+		: "memory",
+		  "%mm0", "%mm1", "%mm2", "%mm3",
+		  "%mm4", "%mm5", "%mm6", "%mm7");
+#else
+	    __m64 v0 = *(__m64 *)(s + 0);
+	    __m64 v1 = *(__m64 *)(s + 8);
+	    __m64 v2 = *(__m64 *)(s + 16);
+	    __m64 v3 = *(__m64 *)(s + 24);
+	    __m64 v4 = *(__m64 *)(s + 32);
+	    __m64 v5 = *(__m64 *)(s + 40);
+	    __m64 v6 = *(__m64 *)(s + 48);
+	    __m64 v7 = *(__m64 *)(s + 56);
+	    *(__m64 *)(d + 0)  = v0;
+	    *(__m64 *)(d + 8)  = v1;
+	    *(__m64 *)(d + 16) = v2;
+	    *(__m64 *)(d + 24) = v3;
+	    *(__m64 *)(d + 32) = v4;
+	    *(__m64 *)(d + 40) = v5;
+	    *(__m64 *)(d + 48) = v6;
+	    *(__m64 *)(d + 56) = v7;
+#endif
+
+	    w -= 64;
+	    s += 64;
+	    d += 64;
+	}
+	while (w >= 4)
+	{
+	    *(uint32_t *)d = *(uint32_t *)s;
+
+	    w -= 4;
+	    s += 4;
+	    d += 4;
+	}
+	if (w >= 2)
+	{
+	    *(uint16_t *)d = *(uint16_t *)s;
+	    w -= 2;
+	    s += 2;
+	    d += 2;
+	}
+    }
+
+    _mm_empty ();
+
+    return TRUE;
+}
+
+static void
+mmx_composite_copy_area (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         pixman_image_t *         src_image,
+                         pixman_image_t *         mask_image,
+                         pixman_image_t *         dst_image,
+                         int32_t                  src_x,
+                         int32_t                  src_y,
+                         int32_t                  mask_x,
+                         int32_t                  mask_y,
+                         int32_t                  dest_x,
+                         int32_t                  dest_y,
+                         int32_t                  width,
+                         int32_t                  height)
+{
+    pixman_blt_mmx (src_image->bits.bits,
+                    dst_image->bits.bits,
+                    src_image->bits.rowstride,
+                    dst_image->bits.rowstride,
+                    PIXMAN_FORMAT_BPP (src_image->bits.format),
+                    PIXMAN_FORMAT_BPP (dst_image->bits.format),
+                    src_x, src_y, dest_x, dest_y, width, height);
+}
+
+#if 0
+static void
+mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint32_t  *src, *src_line;
+    uint32_t  *dst, *dst_line;
+    uint8_t  *mask, *mask_line;
+    int src_stride, mask_stride, dst_stride;
+    int32_t w;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+    while (height--)
+    {
+	src = src_line;
+	src_line += src_stride;
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+
+	w = width;
+
+	while (w--)
+	{
+	    uint64_t m = *mask;
+
+	    if (m)
+	    {
+		__m64 s = load8888 (*src | 0xff000000);
+
+		if (m == 0xff)
+		{
+		    *dst = store8888 (s);
+		}
+		else
+		{
+		    __m64 sa = expand_alpha (s);
+		    __m64 vm = expand_alpha_rev (to_m64 (m));
+		    __m64 vdest = in_over (s, sa, vm, load8888 (*dst));
+
+		    *dst = store8888 (vdest);
+		}
+	    }
+
+	    mask++;
+	    dst++;
+	    src++;
+	}
+    }
+
+    _mm_empty ();
+}
+#endif
+
+static const pixman_fast_path_t mmx_fast_paths[] =
+{
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       r5g6b5,   mmx_composite_over_n_8_0565       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       b5g6r5,   mmx_composite_over_n_8_0565       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       a8r8g8b8, mmx_composite_over_n_8_8888       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       x8r8g8b8, mmx_composite_over_n_8_8888       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       a8b8g8r8, mmx_composite_over_n_8_8888       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       x8b8g8r8, mmx_composite_over_n_8_8888       ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, a8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, x8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, r5g6b5,   mmx_composite_over_n_8888_0565_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, a8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, x8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, b5g6r5,   mmx_composite_over_n_8888_0565_ca ),
+    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   a8r8g8b8, mmx_composite_over_pixbuf_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   x8r8g8b8, mmx_composite_over_pixbuf_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   r5g6b5,   mmx_composite_over_pixbuf_0565    ),
+    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  a8b8g8r8, mmx_composite_over_pixbuf_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  x8b8g8r8, mmx_composite_over_pixbuf_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  b5g6r5,   mmx_composite_over_pixbuf_0565    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_x888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_x888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_x888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_x888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_8888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_8888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_8888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_8888_n_8888    ),
+#if 0
+    /* FIXME: This code is commented out since it's apparently
+     * not actually faster than the generic code.
+     */
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       x8r8g8b8, mmx_composite_over_x888_8_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       a8r8g8b8, mmx_composite_over_x888_8_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8r8g8, a8,       x8b8g8r8, mmx_composite_over_x888_8_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8r8g8, a8,       a8r8g8b8, mmx_composite_over_x888_8_8888    ),
+#endif
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     a8r8g8b8, mmx_composite_over_n_8888         ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     x8r8g8b8, mmx_composite_over_n_8888         ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     r5g6b5,   mmx_composite_over_n_0565         ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
+
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     a8r8g8b8, mmx_composite_over_8888_8888      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     x8r8g8b8, mmx_composite_over_8888_8888      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     r5g6b5,   mmx_composite_over_8888_0565      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     a8b8g8r8, mmx_composite_over_8888_8888      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     x8b8g8r8, mmx_composite_over_8888_8888      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     b5g6r5,   mmx_composite_over_8888_0565      ),
+
+    PIXMAN_STD_FAST_PATH    (ADD,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_add_8888_8888       ),
+    PIXMAN_STD_FAST_PATH    (ADD,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_add_8888_8888       ),
+    PIXMAN_STD_FAST_PATH    (ADD,  a8,       null,     a8,       mmx_composite_add_8_8		   ),
+    PIXMAN_STD_FAST_PATH    (ADD,  solid,    a8,       a8,       mmx_composite_add_n_8_8           ),
+
+    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8r8g8b8, mmx_composite_src_n_8_8888        ),
+    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8r8g8b8, mmx_composite_src_n_8_8888        ),
+    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8b8g8r8, mmx_composite_src_n_8_8888        ),
+    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8b8g8r8, mmx_composite_src_n_8_8888        ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  r5g6b5,   null,     r5g6b5,   mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  b5g6r5,   null,     b5g6r5,   mmx_composite_copy_area           ),
+
+    PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
+    PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
+
+    { PIXMAN_OP_NONE },
+};
+
+static pixman_bool_t
+mmx_blt (pixman_implementation_t *imp,
+         uint32_t *               src_bits,
+         uint32_t *               dst_bits,
+         int                      src_stride,
+         int                      dst_stride,
+         int                      src_bpp,
+         int                      dst_bpp,
+         int                      src_x,
+         int                      src_y,
+         int                      dst_x,
+         int                      dst_y,
+         int                      width,
+         int                      height)
+{
+    if (!pixman_blt_mmx (
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dst_x, dst_y, width, height))
+
+    {
+	return _pixman_implementation_blt (
+	    imp->delegate,
+	    src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+	    src_x, src_y, dst_x, dst_y, width, height);
+    }
+
+    return TRUE;
+}
+
+static pixman_bool_t
+mmx_fill (pixman_implementation_t *imp,
+          uint32_t *               bits,
+          int                      stride,
+          int                      bpp,
+          int                      x,
+          int                      y,
+          int                      width,
+          int                      height,
+          uint32_t xor)
+{
+    if (!pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor))
+    {
+	return _pixman_implementation_fill (
+	    imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+    }
+
+    return TRUE;
+}
+
+pixman_implementation_t *
+_pixman_implementation_create_mmx (void)
+{
+    pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
+    pixman_implementation_t *imp = _pixman_implementation_create (general, mmx_fast_paths);
+
+    imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u;
+    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u;
+    imp->combine_32[PIXMAN_OP_IN] = mmx_combine_in_u;
+    imp->combine_32[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_OUT] = mmx_combine_out_u;
+    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_ATOP] = mmx_combine_atop_u;
+    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_XOR] = mmx_combine_xor_u;
+    imp->combine_32[PIXMAN_OP_ADD] = mmx_combine_add_u;
+    imp->combine_32[PIXMAN_OP_SATURATE] = mmx_combine_saturate_u;
+
+    imp->combine_32_ca[PIXMAN_OP_SRC] = mmx_combine_src_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVER] = mmx_combine_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN] = mmx_combine_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT] = mmx_combine_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP] = mmx_combine_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_XOR] = mmx_combine_xor_ca;
+    imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca;
+
+    imp->blt = mmx_blt;
+    imp->fill = mmx_fill;
+
+    return imp;
+}
+
+#endif /* USE_MMX */
diff --git a/pixman/pixman/pixman-private.h b/pixman/pixman/pixman-private.h
index b6eb24835..1662d2c46 100644
--- a/pixman/pixman/pixman-private.h
+++ b/pixman/pixman/pixman-private.h
@@ -1,867 +1,912 @@
-#ifndef PACKAGE
-#  error config.h must be included before pixman-private.h
-#endif
-
-#ifndef PIXMAN_PRIVATE_H
-#define PIXMAN_PRIVATE_H
-
-#define PIXMAN_DISABLE_DEPRECATED
-#define PIXMAN_USE_INTERNAL_API
-
-#include "pixman.h"
-#include <time.h>
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "pixman-compiler.h"
-
-/*
- * Images
- */
-typedef struct image_common image_common_t;
-typedef struct solid_fill solid_fill_t;
-typedef struct gradient gradient_t;
-typedef struct linear_gradient linear_gradient_t;
-typedef struct horizontal_gradient horizontal_gradient_t;
-typedef struct vertical_gradient vertical_gradient_t;
-typedef struct conical_gradient conical_gradient_t;
-typedef struct radial_gradient radial_gradient_t;
-typedef struct bits_image bits_image_t;
-typedef struct circle circle_t;
-
-typedef void (*fetch_scanline_t) (pixman_image_t *image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  uint32_t       *buffer,
-				  const uint32_t *mask);
-
-typedef uint32_t (*fetch_pixel_32_t) (bits_image_t *image,
-				      int           x,
-				      int           y);
-
-typedef uint64_t (*fetch_pixel_64_t) (bits_image_t *image,
-				      int           x,
-				      int           y);
-
-typedef void (*store_scanline_t) (bits_image_t *  image,
-				  int             x,
-				  int             y,
-				  int             width,
-				  const uint32_t *values);
-
-typedef enum
-{
-    BITS,
-    LINEAR,
-    CONICAL,
-    RADIAL,
-    SOLID
-} image_type_t;
-
-typedef enum
-{
-    SOURCE_IMAGE_CLASS_UNKNOWN,
-    SOURCE_IMAGE_CLASS_HORIZONTAL,
-} source_image_class_t;
-
-typedef source_image_class_t (*classify_func_t) (pixman_image_t *image,
-						int             x,
-						int             y,
-						int             width,
-						int             height);
-typedef void (*property_changed_func_t) (pixman_image_t *image);
-
-struct image_common
-{
-    image_type_t                type;
-    int32_t                     ref_count;
-    pixman_region32_t           clip_region;
-    int32_t			alpha_count;	    /* How many times this image is being used as an alpha map */
-    pixman_bool_t               have_clip_region;   /* FALSE if there is no clip */
-    pixman_bool_t               client_clip;        /* Whether the source clip was
-						       set by a client */
-    pixman_bool_t               clip_sources;       /* Whether the clip applies when
-						     * the image is used as a source
-						     */
-    pixman_bool_t		dirty;
-    pixman_transform_t *        transform;
-    pixman_repeat_t             repeat;
-    pixman_filter_t             filter;
-    pixman_fixed_t *            filter_params;
-    int                         n_filter_params;
-    bits_image_t *              alpha_map;
-    int                         alpha_origin_x;
-    int                         alpha_origin_y;
-    pixman_bool_t               component_alpha;
-    classify_func_t             classify;
-    property_changed_func_t     property_changed;
-    fetch_scanline_t            get_scanline_32;
-    fetch_scanline_t            get_scanline_64;
-
-    pixman_image_destroy_func_t destroy_func;
-    void *                      destroy_data;
-
-    uint32_t			flags;
-    pixman_format_code_t	extended_format_code;
-};
-
-struct solid_fill
-{
-    image_common_t common;
-    pixman_color_t color;
-    
-    uint32_t	   color_32;
-    uint64_t	   color_64;
-};
-
-struct gradient
-{
-    image_common_t	    common;
-    int                     n_stops;
-    pixman_gradient_stop_t *stops;
-};
-
-struct linear_gradient
-{
-    gradient_t           common;
-    pixman_point_fixed_t p1;
-    pixman_point_fixed_t p2;
-};
-
-struct circle
-{
-    pixman_fixed_t x;
-    pixman_fixed_t y;
-    pixman_fixed_t radius;
-};
-
-struct radial_gradient
-{
-    gradient_t common;
-
-    circle_t   c1;
-    circle_t   c2;
-
-    circle_t   delta;
-    double     a;
-    double     inva;
-    double     mindr;
-};
-
-struct conical_gradient
-{
-    gradient_t           common;
-    pixman_point_fixed_t center;
-    double		 angle;
-};
-
-struct bits_image
-{
-    image_common_t             common;
-    pixman_format_code_t       format;
-    const pixman_indexed_t *   indexed;
-    int                        width;
-    int                        height;
-    uint32_t *                 bits;
-    uint32_t *                 free_me;
-    int                        rowstride;  /* in number of uint32_t's */
-
-    fetch_scanline_t           fetch_scanline_32;
-    fetch_pixel_32_t	       fetch_pixel_32;
-    store_scanline_t           store_scanline_32;
-
-    fetch_scanline_t           fetch_scanline_64;
-    fetch_pixel_64_t	       fetch_pixel_64;
-    store_scanline_t           store_scanline_64;
-
-    /* Used for indirect access to the bits */
-    pixman_read_memory_func_t  read_func;
-    pixman_write_memory_func_t write_func;
-};
-
-union pixman_image
-{
-    image_type_t       type;
-    image_common_t     common;
-    bits_image_t       bits;
-    gradient_t         gradient;
-    linear_gradient_t  linear;
-    conical_gradient_t conical;
-    radial_gradient_t  radial;
-    solid_fill_t       solid;
-};
-
-void
-_pixman_bits_image_setup_accessors (bits_image_t *image);
-
-void
-_pixman_image_get_scanline_generic_64  (pixman_image_t *image,
-                                        int             x,
-                                        int             y,
-                                        int             width,
-                                        uint32_t *      buffer,
-                                        const uint32_t *mask);
-
-source_image_class_t
-_pixman_image_classify (pixman_image_t *image,
-                        int             x,
-                        int             y,
-                        int             width,
-                        int             height);
-
-void
-_pixman_image_get_scanline_32 (pixman_image_t *image,
-                               int             x,
-                               int             y,
-                               int             width,
-                               uint32_t *      buffer,
-                               const uint32_t *mask);
-
-/* Even thought the type of buffer is uint32_t *, the function actually expects
- * a uint64_t *buffer.
- */
-void
-_pixman_image_get_scanline_64 (pixman_image_t *image,
-                               int             x,
-                               int             y,
-                               int             width,
-                               uint32_t *      buffer,
-                               const uint32_t *unused);
-
-void
-_pixman_image_store_scanline_32 (bits_image_t *  image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 const uint32_t *buffer);
-
-/* Even though the type of buffer is uint32_t *, the function
- * actually expects a uint64_t *buffer.
- */
-void
-_pixman_image_store_scanline_64 (bits_image_t *  image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 const uint32_t *buffer);
-
-pixman_image_t *
-_pixman_image_allocate (void);
-
-pixman_bool_t
-_pixman_init_gradient (gradient_t *                  gradient,
-                       const pixman_gradient_stop_t *stops,
-                       int                           n_stops);
-void
-_pixman_image_reset_clip_region (pixman_image_t *image);
-
-void
-_pixman_image_validate (pixman_image_t *image);
-
-uint32_t
-_pixman_image_get_solid (pixman_image_t *     image,
-                         pixman_format_code_t format);
-
-#define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul)	\
-    do									\
-    {									\
-	uint32_t *__bits__;						\
-	int       __stride__;						\
-        								\
-	__bits__ = image->bits.bits;					\
-	__stride__ = image->bits.rowstride;				\
-	(out_stride) =							\
-	    __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type);	\
-	(line) =							\
-	    ((type *) __bits__) + (out_stride) * (y) + (mul) * (x);	\
-    } while (0)
-
-/*
- * Gradient walker
- */
-typedef struct
-{
-    uint32_t                left_ag;
-    uint32_t                left_rb;
-    uint32_t                right_ag;
-    uint32_t                right_rb;
-    int32_t                 left_x;
-    int32_t                 right_x;
-    int32_t                 stepper;
-
-    pixman_gradient_stop_t *stops;
-    int                     num_stops;
-    unsigned int            spread;
-
-    int                     need_reset;
-} pixman_gradient_walker_t;
-
-void
-_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
-                              gradient_t *              gradient,
-                              unsigned int              spread);
-
-void
-_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
-                               pixman_fixed_32_32_t      pos);
-
-uint32_t
-_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
-                               pixman_fixed_32_32_t      x);
-
-/*
- * Edges
- */
-
-#define MAX_ALPHA(n)    ((1 << (n)) - 1)
-#define N_Y_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) - 1)
-#define N_X_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) + 1)
-
-#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC (n))
-#define STEP_Y_BIG(n)   (pixman_fixed_1 - (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
-
-#define Y_FRAC_FIRST(n) (STEP_Y_BIG (n) / 2)
-#define Y_FRAC_LAST(n)  (Y_FRAC_FIRST (n) + (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
-
-#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC (n))
-#define STEP_X_BIG(n)   (pixman_fixed_1 - (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
-
-#define X_FRAC_FIRST(n) (STEP_X_BIG (n) / 2)
-#define X_FRAC_LAST(n)  (X_FRAC_FIRST (n) + (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
-
-#define RENDER_SAMPLES_X(x, n)						\
-    ((n) == 1? 0 : (pixman_fixed_frac (x) +				\
-		    X_FRAC_FIRST (n)) / STEP_X_SMALL (n))
-
-void
-pixman_rasterize_edges_accessors (pixman_image_t *image,
-                                  pixman_edge_t * l,
-                                  pixman_edge_t * r,
-                                  pixman_fixed_t  t,
-                                  pixman_fixed_t  b);
-
-/*
- * Implementations
- */
-typedef struct pixman_implementation_t pixman_implementation_t;
-
-typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp,
-					  pixman_op_t              op,
-					  uint32_t *               dest,
-					  const uint32_t *         src,
-					  const uint32_t *         mask,
-					  int                      width);
-
-typedef void (*pixman_combine_64_func_t) (pixman_implementation_t *imp,
-					  pixman_op_t              op,
-					  uint64_t *               dest,
-					  const uint64_t *         src,
-					  const uint64_t *         mask,
-					  int                      width);
-
-typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp,
-					 pixman_op_t              op,
-					 pixman_image_t *         src,
-					 pixman_image_t *         mask,
-					 pixman_image_t *         dest,
-					 int32_t                  src_x,
-					 int32_t                  src_y,
-					 int32_t                  mask_x,
-					 int32_t                  mask_y,
-					 int32_t                  dest_x,
-					 int32_t                  dest_y,
-					 int32_t                  width,
-					 int32_t                  height);
-typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp,
-					    uint32_t *               src_bits,
-					    uint32_t *               dst_bits,
-					    int                      src_stride,
-					    int                      dst_stride,
-					    int                      src_bpp,
-					    int                      dst_bpp,
-					    int                      src_x,
-					    int                      src_y,
-					    int                      dst_x,
-					    int                      dst_y,
-					    int                      width,
-					    int                      height);
-typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
-					     uint32_t *               bits,
-					     int                      stride,
-					     int                      bpp,
-					     int                      x,
-					     int                      y,
-					     int                      width,
-					     int                      height,
-					     uint32_t                 xor);
-
-void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
-void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp);
-
-typedef struct
-{
-    pixman_op_t             op;
-    pixman_format_code_t    src_format;
-    uint32_t		    src_flags;
-    pixman_format_code_t    mask_format;
-    uint32_t		    mask_flags;
-    pixman_format_code_t    dest_format;
-    uint32_t		    dest_flags;
-    pixman_composite_func_t func;
-} pixman_fast_path_t;
-
-struct pixman_implementation_t
-{
-    pixman_implementation_t *	toplevel;
-    pixman_implementation_t *	delegate;
-    const pixman_fast_path_t *	fast_paths;
-    
-    pixman_blt_func_t		blt;
-    pixman_fill_func_t		fill;
-
-    pixman_combine_32_func_t	combine_32[PIXMAN_N_OPERATORS];
-    pixman_combine_32_func_t	combine_32_ca[PIXMAN_N_OPERATORS];
-    pixman_combine_64_func_t	combine_64[PIXMAN_N_OPERATORS];
-    pixman_combine_64_func_t	combine_64_ca[PIXMAN_N_OPERATORS];
-};
-
-pixman_implementation_t *
-_pixman_implementation_create (pixman_implementation_t *delegate,
-			       const pixman_fast_path_t *fast_paths);
-
-void
-_pixman_implementation_combine_32 (pixman_implementation_t *imp,
-                                   pixman_op_t              op,
-                                   uint32_t *               dest,
-                                   const uint32_t *         src,
-                                   const uint32_t *         mask,
-                                   int                      width);
-void
-_pixman_implementation_combine_64 (pixman_implementation_t *imp,
-                                   pixman_op_t              op,
-                                   uint64_t *               dest,
-                                   const uint64_t *         src,
-                                   const uint64_t *         mask,
-                                   int                      width);
-void
-_pixman_implementation_combine_32_ca (pixman_implementation_t *imp,
-                                      pixman_op_t              op,
-                                      uint32_t *               dest,
-                                      const uint32_t *         src,
-                                      const uint32_t *         mask,
-                                      int                      width);
-void
-_pixman_implementation_combine_64_ca (pixman_implementation_t *imp,
-                                      pixman_op_t              op,
-                                      uint64_t *               dest,
-                                      const uint64_t *         src,
-                                      const uint64_t *         mask,
-                                      int                      width);
-
-pixman_bool_t
-_pixman_implementation_blt (pixman_implementation_t *imp,
-                            uint32_t *               src_bits,
-                            uint32_t *               dst_bits,
-                            int                      src_stride,
-                            int                      dst_stride,
-                            int                      src_bpp,
-                            int                      dst_bpp,
-                            int                      src_x,
-                            int                      src_y,
-                            int                      dst_x,
-                            int                      dst_y,
-                            int                      width,
-                            int                      height);
-
-pixman_bool_t
-_pixman_implementation_fill (pixman_implementation_t *imp,
-                             uint32_t *               bits,
-                             int                      stride,
-                             int                      bpp,
-                             int                      x,
-                             int                      y,
-                             int                      width,
-                             int                      height,
-                             uint32_t                 xor);
-
-/* Specific implementations */
-pixman_implementation_t *
-_pixman_implementation_create_general (void);
-
-pixman_implementation_t *
-_pixman_implementation_create_fast_path (void);
-
-#ifdef USE_MMX
-pixman_implementation_t *
-_pixman_implementation_create_mmx (void);
-#endif
-
-#ifdef USE_SSE2
-pixman_implementation_t *
-_pixman_implementation_create_sse2 (void);
-#endif
-
-#ifdef USE_ARM_SIMD
-pixman_implementation_t *
-_pixman_implementation_create_arm_simd (void);
-#endif
-
-#ifdef USE_ARM_NEON
-pixman_implementation_t *
-_pixman_implementation_create_arm_neon (void);
-#endif
-
-#ifdef USE_VMX
-pixman_implementation_t *
-_pixman_implementation_create_vmx (void);
-#endif
-
-pixman_implementation_t *
-_pixman_choose_implementation (void);
-
-
-
-/*
- * Utilities
- */
-
-/* These "formats" all have depth 0, so they
- * will never clash with any real ones
- */
-#define PIXMAN_null             PIXMAN_FORMAT (0, 0, 0, 0, 0, 0)
-#define PIXMAN_solid            PIXMAN_FORMAT (0, 1, 0, 0, 0, 0)
-#define PIXMAN_pixbuf		PIXMAN_FORMAT (0, 2, 0, 0, 0, 0)
-#define PIXMAN_rpixbuf		PIXMAN_FORMAT (0, 3, 0, 0, 0, 0)
-#define PIXMAN_unknown		PIXMAN_FORMAT (0, 4, 0, 0, 0, 0)
-#define PIXMAN_any		PIXMAN_FORMAT (0, 5, 0, 0, 0, 0)
-
-#define PIXMAN_OP_any		(PIXMAN_N_OPERATORS + 1)
-
-#define FAST_PATH_ID_TRANSFORM			(1 <<  0)
-#define FAST_PATH_NO_ALPHA_MAP			(1 <<  1)
-#define FAST_PATH_NO_CONVOLUTION_FILTER		(1 <<  2)
-#define FAST_PATH_NO_PAD_REPEAT			(1 <<  3)
-#define FAST_PATH_NO_REFLECT_REPEAT		(1 <<  4)
-#define FAST_PATH_NO_ACCESSORS			(1 <<  5)
-#define FAST_PATH_NARROW_FORMAT			(1 <<  6)
-#define FAST_PATH_COMPONENT_ALPHA		(1 <<  8)
-#define FAST_PATH_SAMPLES_OPAQUE		(1 <<  7)
-#define FAST_PATH_UNIFIED_ALPHA			(1 <<  9)
-#define FAST_PATH_SCALE_TRANSFORM		(1 << 10)
-#define FAST_PATH_NEAREST_FILTER		(1 << 11)
-#define FAST_PATH_HAS_TRANSFORM			(1 << 12)
-#define FAST_PATH_IS_OPAQUE			(1 << 13)
-#define FAST_PATH_NO_NORMAL_REPEAT		(1 << 14)
-#define FAST_PATH_NO_NONE_REPEAT		(1 << 15)
-#define FAST_PATH_SAMPLES_COVER_CLIP		(1 << 16)
-#define FAST_PATH_X_UNIT_POSITIVE		(1 << 17)
-#define FAST_PATH_AFFINE_TRANSFORM		(1 << 18)
-#define FAST_PATH_Y_UNIT_ZERO			(1 << 19)
-#define FAST_PATH_BILINEAR_FILTER		(1 << 20)
-
-#define FAST_PATH_PAD_REPEAT						\
-    (FAST_PATH_NO_NONE_REPEAT		|				\
-     FAST_PATH_NO_NORMAL_REPEAT		|				\
-     FAST_PATH_NO_REFLECT_REPEAT)
-
-#define FAST_PATH_NORMAL_REPEAT						\
-    (FAST_PATH_NO_NONE_REPEAT		|				\
-     FAST_PATH_NO_PAD_REPEAT		|				\
-     FAST_PATH_NO_REFLECT_REPEAT)
-
-#define FAST_PATH_NONE_REPEAT						\
-    (FAST_PATH_NO_NORMAL_REPEAT		|				\
-     FAST_PATH_NO_PAD_REPEAT		|				\
-     FAST_PATH_NO_REFLECT_REPEAT)
-
-#define FAST_PATH_REFLECT_REPEAT					\
-    (FAST_PATH_NO_NONE_REPEAT		|				\
-     FAST_PATH_NO_NORMAL_REPEAT		|				\
-     FAST_PATH_NO_PAD_REPEAT)
-
-#define FAST_PATH_STANDARD_FLAGS					\
-    (FAST_PATH_NO_CONVOLUTION_FILTER	|				\
-     FAST_PATH_NO_ACCESSORS		|				\
-     FAST_PATH_NO_ALPHA_MAP		|				\
-     FAST_PATH_NARROW_FORMAT)
-
-#define FAST_PATH_STD_DEST_FLAGS					\
-    (FAST_PATH_NO_ACCESSORS		|				\
-     FAST_PATH_NO_ALPHA_MAP		|				\
-     FAST_PATH_NARROW_FORMAT)
-
-#define SOURCE_FLAGS(format)						\
-    (FAST_PATH_STANDARD_FLAGS |						\
-     ((PIXMAN_ ## format == PIXMAN_solid) ?				\
-      0 : (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_ID_TRANSFORM)))
-
-#define MASK_FLAGS(format, extra)					\
-    ((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra))
-
-#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \
-    PIXMAN_OP_ ## op,							\
-    PIXMAN_ ## src,							\
-    src_flags,							        \
-    PIXMAN_ ## mask,						        \
-    mask_flags,							        \
-    PIXMAN_ ## dest,	                                                \
-    dest_flags,							        \
-    func
-
-#define PIXMAN_STD_FAST_PATH(op, src, mask, dest, func)			\
-    { FAST_PATH (							\
-	    op,								\
-	    src,  SOURCE_FLAGS (src),					\
-	    mask, MASK_FLAGS (mask, FAST_PATH_UNIFIED_ALPHA),		\
-	    dest, FAST_PATH_STD_DEST_FLAGS,				\
-	    func) }
-
-#define PIXMAN_STD_FAST_PATH_CA(op, src, mask, dest, func)		\
-    { FAST_PATH (							\
-	    op,								\
-	    src,  SOURCE_FLAGS (src),					\
-	    mask, MASK_FLAGS (mask, FAST_PATH_COMPONENT_ALPHA),		\
-	    dest, FAST_PATH_STD_DEST_FLAGS,				\
-	    func) }
-
-/* Memory allocation helpers */
-void *
-pixman_malloc_ab (unsigned int n, unsigned int b);
-
-void *
-pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c);
-
-pixman_bool_t
-pixman_multiply_overflows_int (unsigned int a, unsigned int b);
-
-pixman_bool_t
-pixman_addition_overflows_int (unsigned int a, unsigned int b);
-
-/* Compositing utilities */
-void
-pixman_expand (uint64_t *           dst,
-               const uint32_t *     src,
-               pixman_format_code_t format,
-               int                  width);
-
-void
-pixman_contract (uint32_t *      dst,
-                 const uint64_t *src,
-                 int             width);
-
-
-/* Region Helpers */
-pixman_bool_t
-pixman_region32_copy_from_region16 (pixman_region32_t *dst,
-                                    pixman_region16_t *src);
-
-pixman_bool_t
-pixman_region16_copy_from_region32 (pixman_region16_t *dst,
-                                    pixman_region32_t *src);
-
-
-/* Misc macros */
-
-#ifndef FALSE
-#   define FALSE 0
-#endif
-
-#ifndef TRUE
-#   define TRUE 1
-#endif
-
-#ifndef MIN
-#  define MIN(a, b) ((a < b) ? a : b)
-#endif
-
-#ifndef MAX
-#  define MAX(a, b) ((a > b) ? a : b)
-#endif
-
-/* Integer division that rounds towards -infinity */
-#define DIV(a, b)					   \
-    ((((a) < 0) == ((b) < 0)) ? (a) / (b) :                \
-     ((a) - (b) + 1 - (((b) < 0) << 1)) / (b))
-
-/* Modulus that produces the remainder wrt. DIV */
-#define MOD(a, b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b))
-
-#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
-
-/* Conversion between 8888 and 0565 */
-
-#define CONVERT_8888_TO_0565(s)						\
-    ((((s) >> 3) & 0x001f) |						\
-     (((s) >> 5) & 0x07e0) |						\
-     (((s) >> 8) & 0xf800))
-
-#define CONVERT_0565_TO_0888(s)						\
-    (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) |			\
-     ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) |			\
-     ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)))
-
-#define CONVERT_0565_TO_8888(s) (CONVERT_0565_TO_0888(s) | 0xff000000)
-
-/* Trivial versions that are useful in macros */
-#define CONVERT_8888_TO_8888(s) (s)
-#define CONVERT_0565_TO_0565(s) (s)
-
-#define PIXMAN_FORMAT_IS_WIDE(f)					\
-    (PIXMAN_FORMAT_A (f) > 8 ||						\
-     PIXMAN_FORMAT_R (f) > 8 ||						\
-     PIXMAN_FORMAT_G (f) > 8 ||						\
-     PIXMAN_FORMAT_B (f) > 8)
-
-#ifdef WORDS_BIGENDIAN
-#   define SCREEN_SHIFT_LEFT(x,n)	((x) << (n))
-#   define SCREEN_SHIFT_RIGHT(x,n)	((x) >> (n))
-#else
-#   define SCREEN_SHIFT_LEFT(x,n)	((x) >> (n))
-#   define SCREEN_SHIFT_RIGHT(x,n)	((x) << (n))
-#endif
-
-/*
- * Various debugging code
- */
-
-#undef DEBUG
-
-#define COMPILE_TIME_ASSERT(x)						\
-    do { typedef int compile_time_assertion [(x)?1:-1]; } while (0)
-
-/* Turn on debugging depending on what type of release this is
- */
-#if (((PIXMAN_VERSION_MICRO % 2) == 0) && ((PIXMAN_VERSION_MINOR % 2) == 1))
-
-/* Debugging gets turned on for development releases because these
- * are the things that end up in bleeding edge distributions such
- * as Rawhide etc.
- *
- * For performance reasons we don't turn it on for stable releases or
- * random git checkouts. (Random git checkouts are often used for
- * performance work).
- */
-
-#    define DEBUG
-
-#endif
-
-#ifdef DEBUG
-
-void
-_pixman_log_error (const char *function, const char *message);
-
-#define return_if_fail(expr)                                            \
-    do                                                                  \
-    {                                                                   \
-	if (!(expr))							\
-	{								\
-	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
-	    return;							\
-	}								\
-    }                                                                   \
-    while (0)
-
-#define return_val_if_fail(expr, retval)                                \
-    do                                                                  \
-    {                                                                   \
-	if (!(expr))                                                    \
-	{								\
-	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
-	    return (retval);						\
-	}								\
-    }                                                                   \
-    while (0)
-
-#define critical_if_fail(expr)						\
-    do									\
-    {									\
-	if (!(expr))							\
-	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
-    }									\
-    while (0)
-
-
-#else
-
-#define _pixman_log_error(f,m) do { } while (0)				\
-
-#define return_if_fail(expr)						\
-    do                                                                  \
-    {                                                                   \
-	if (!(expr))							\
-	    return;							\
-    }                                                                   \
-    while (0)
-
-#define return_val_if_fail(expr, retval)                                \
-    do                                                                  \
-    {                                                                   \
-	if (!(expr))							\
-	    return (retval);						\
-    }                                                                   \
-    while (0)
-
-#define critical_if_fail(expr)						\
-    do									\
-    {									\
-    }									\
-    while (0)
-#endif
-
-/*
- * Timers
- */
-
-#ifdef PIXMAN_TIMERS
-
-static inline uint64_t
-oil_profile_stamp_rdtsc (void)
-{
-    uint64_t ts;
-
-    __asm__ __volatile__ ("rdtsc\n" : "=A" (ts));
-    return ts;
-}
-
-#define OIL_STAMP oil_profile_stamp_rdtsc
-
-typedef struct pixman_timer_t pixman_timer_t;
-
-struct pixman_timer_t
-{
-    int             initialized;
-    const char *    name;
-    uint64_t        n_times;
-    uint64_t        total;
-    pixman_timer_t *next;
-};
-
-extern int timer_defined;
-
-void pixman_timer_register (pixman_timer_t *timer);
-
-#define TIMER_BEGIN(tname)                                              \
-    {                                                                   \
-	static pixman_timer_t timer ## tname;                           \
-	uint64_t              begin ## tname;                           \
-        								\
-	if (!timer ## tname.initialized)				\
-	{                                                               \
-	    timer ## tname.initialized = 1;				\
-	    timer ## tname.name = # tname;				\
-	    pixman_timer_register (&timer ## tname);			\
-	}                                                               \
-									\
-	timer ## tname.n_times++;					\
-	begin ## tname = OIL_STAMP ();
-
-#define TIMER_END(tname)                                                \
-    timer ## tname.total += OIL_STAMP () - begin ## tname;		\
-    }
-
-#endif /* PIXMAN_TIMERS */
-
-#endif /* PIXMAN_PRIVATE_H */
+#ifndef PACKAGE
+#  error config.h must be included before pixman-private.h
+#endif
+
+#ifndef PIXMAN_PRIVATE_H
+#define PIXMAN_PRIVATE_H
+
+#define PIXMAN_DISABLE_DEPRECATED
+#define PIXMAN_USE_INTERNAL_API
+
+#include "pixman.h"
+#include <time.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "pixman-compiler.h"
+
+/*
+ * Images
+ */
+typedef struct image_common image_common_t;
+typedef struct solid_fill solid_fill_t;
+typedef struct gradient gradient_t;
+typedef struct linear_gradient linear_gradient_t;
+typedef struct horizontal_gradient horizontal_gradient_t;
+typedef struct vertical_gradient vertical_gradient_t;
+typedef struct conical_gradient conical_gradient_t;
+typedef struct radial_gradient radial_gradient_t;
+typedef struct bits_image bits_image_t;
+typedef struct circle circle_t;
+
+typedef void (*fetch_scanline_t) (pixman_image_t *image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  uint32_t       *buffer,
+				  const uint32_t *mask);
+
+typedef uint32_t (*fetch_pixel_32_t) (bits_image_t *image,
+				      int           x,
+				      int           y);
+
+typedef uint64_t (*fetch_pixel_64_t) (bits_image_t *image,
+				      int           x,
+				      int           y);
+
+typedef void (*store_scanline_t) (bits_image_t *  image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  const uint32_t *values);
+
+typedef enum
+{
+    BITS,
+    LINEAR,
+    CONICAL,
+    RADIAL,
+    SOLID
+} image_type_t;
+
+typedef void (*property_changed_func_t) (pixman_image_t *image);
+
+struct image_common
+{
+    image_type_t                type;
+    int32_t                     ref_count;
+    pixman_region32_t           clip_region;
+    int32_t			alpha_count;	    /* How many times this image is being used as an alpha map */
+    pixman_bool_t               have_clip_region;   /* FALSE if there is no clip */
+    pixman_bool_t               client_clip;        /* Whether the source clip was
+						       set by a client */
+    pixman_bool_t               clip_sources;       /* Whether the clip applies when
+						     * the image is used as a source
+						     */
+    pixman_bool_t		dirty;
+    pixman_transform_t *        transform;
+    pixman_repeat_t             repeat;
+    pixman_filter_t             filter;
+    pixman_fixed_t *            filter_params;
+    int                         n_filter_params;
+    bits_image_t *              alpha_map;
+    int                         alpha_origin_x;
+    int                         alpha_origin_y;
+    pixman_bool_t               component_alpha;
+    property_changed_func_t     property_changed;
+
+    pixman_image_destroy_func_t destroy_func;
+    void *                      destroy_data;
+
+    uint32_t			flags;
+    pixman_format_code_t	extended_format_code;
+};
+
+struct solid_fill
+{
+    image_common_t common;
+    pixman_color_t color;
+    
+    uint32_t	   color_32;
+    uint64_t	   color_64;
+};
+
+struct gradient
+{
+    image_common_t	    common;
+    int                     n_stops;
+    pixman_gradient_stop_t *stops;
+};
+
+struct linear_gradient
+{
+    gradient_t           common;
+    pixman_point_fixed_t p1;
+    pixman_point_fixed_t p2;
+};
+
+struct circle
+{
+    pixman_fixed_t x;
+    pixman_fixed_t y;
+    pixman_fixed_t radius;
+};
+
+struct radial_gradient
+{
+    gradient_t common;
+
+    circle_t   c1;
+    circle_t   c2;
+
+    circle_t   delta;
+    double     a;
+    double     inva;
+    double     mindr;
+};
+
+struct conical_gradient
+{
+    gradient_t           common;
+    pixman_point_fixed_t center;
+    double		 angle;
+};
+
+struct bits_image
+{
+    image_common_t             common;
+    pixman_format_code_t       format;
+    const pixman_indexed_t *   indexed;
+    int                        width;
+    int                        height;
+    uint32_t *                 bits;
+    uint32_t *                 free_me;
+    int                        rowstride;  /* in number of uint32_t's */
+
+    fetch_scanline_t           get_scanline_32;
+    fetch_scanline_t           get_scanline_64;
+
+    fetch_scanline_t           fetch_scanline_32;
+    fetch_pixel_32_t	       fetch_pixel_32;
+    store_scanline_t           store_scanline_32;
+
+    fetch_scanline_t           fetch_scanline_64;
+    fetch_pixel_64_t	       fetch_pixel_64;
+    store_scanline_t           store_scanline_64;
+
+    /* Used for indirect access to the bits */
+    pixman_read_memory_func_t  read_func;
+    pixman_write_memory_func_t write_func;
+};
+
+union pixman_image
+{
+    image_type_t       type;
+    image_common_t     common;
+    bits_image_t       bits;
+    gradient_t         gradient;
+    linear_gradient_t  linear;
+    conical_gradient_t conical;
+    radial_gradient_t  radial;
+    solid_fill_t       solid;
+};
+
+typedef struct pixman_iter_t pixman_iter_t;
+typedef enum
+{
+    ITER_NARROW =		(1 << 0),
+
+    /* "Localized alpha" is when the alpha channel is used only to compute
+     * the alpha value of the destination. This means that the computation
+     * of the RGB values of the result is independent of the alpha value.
+     *
+     * For example, the OVER operator has localized alpha for the
+     * destination, because the RGB values of the result can be computed
+     * without knowing the destination alpha. Similarly, ADD has localized
+     * alpha for both source and destination because the RGB values of the
+     * result can be computed without knowing the alpha value of source or
+     * destination.
+     *
+     * When he destination is xRGB, this is useful knowledge, because then
+     * we can treat it as if it were ARGB, which means in some cases we can
+     * avoid copying it to a temporary buffer.
+     */
+    ITER_LOCALIZED_ALPHA =	(1 << 1),
+    ITER_IGNORE_ALPHA =		(1 << 2),
+    ITER_IGNORE_RGB =		(1 << 3)
+} iter_flags_t;
+
+struct pixman_iter_t
+{
+    uint32_t *(* get_scanline) (pixman_iter_t *iter, const uint32_t *mask);
+    void      (* write_back)   (pixman_iter_t *iter);
+
+    pixman_image_t *    image;
+    uint32_t *          buffer;
+    int                 x, y;
+    int                 width;
+};
+
+void
+_pixman_bits_image_setup_accessors (bits_image_t *image);
+
+void
+_pixman_bits_image_src_iter_init (pixman_image_t *image,
+				  pixman_iter_t *iter,
+				  int x, int y, int width, int height,
+				  uint8_t *buffer, iter_flags_t flags);
+void
+_pixman_bits_image_dest_iter_init (pixman_image_t *image,
+				   pixman_iter_t *iter,
+				   int x, int y, int width, int height,
+				   uint8_t *buffer, iter_flags_t flags);
+
+void
+_pixman_solid_fill_iter_init (pixman_image_t *image,
+			      pixman_iter_t  *iter,
+			      int x, int y, int width, int height,
+			      uint8_t *buffer, iter_flags_t flags);
+
+void
+_pixman_linear_gradient_iter_init (pixman_image_t *image,
+				   pixman_iter_t  *iter,
+				   int x, int y, int width, int height,
+				   uint8_t *buffer, iter_flags_t flags);
+
+void
+_pixman_radial_gradient_iter_init (pixman_image_t *image,
+				   pixman_iter_t *iter,
+				   int x, int y, int width, int height,
+				   uint8_t *buffer, iter_flags_t flags);
+
+void
+_pixman_conical_gradient_iter_init (pixman_image_t *image,
+				    pixman_iter_t *iter,
+				    int x, int y, int width, int height,
+				    uint8_t *buffer, iter_flags_t flags);
+
+pixman_image_t *
+_pixman_image_allocate (void);
+
+pixman_bool_t
+_pixman_init_gradient (gradient_t *                  gradient,
+                       const pixman_gradient_stop_t *stops,
+                       int                           n_stops);
+void
+_pixman_image_reset_clip_region (pixman_image_t *image);
+
+void
+_pixman_image_validate (pixman_image_t *image);
+
+#define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul)	\
+    do									\
+    {									\
+	uint32_t *__bits__;						\
+	int       __stride__;						\
+        								\
+	__bits__ = image->bits.bits;					\
+	__stride__ = image->bits.rowstride;				\
+	(out_stride) =							\
+	    __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type);	\
+	(line) =							\
+	    ((type *) __bits__) + (out_stride) * (y) + (mul) * (x);	\
+    } while (0)
+
+/*
+ * Gradient walker
+ */
+typedef struct
+{
+    uint32_t                left_ag;
+    uint32_t                left_rb;
+    uint32_t                right_ag;
+    uint32_t                right_rb;
+    int32_t                 left_x;
+    int32_t                 right_x;
+    int32_t                 stepper;
+
+    pixman_gradient_stop_t *stops;
+    int                     num_stops;
+    unsigned int            spread;
+
+    int                     need_reset;
+} pixman_gradient_walker_t;
+
+void
+_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
+                              gradient_t *              gradient,
+                              unsigned int              spread);
+
+void
+_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
+                               pixman_fixed_32_32_t      pos);
+
+uint32_t
+_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
+                               pixman_fixed_32_32_t      x);
+
+/*
+ * Edges
+ */
+
+#define MAX_ALPHA(n)    ((1 << (n)) - 1)
+#define N_Y_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) - 1)
+#define N_X_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) + 1)
+
+#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC (n))
+#define STEP_Y_BIG(n)   (pixman_fixed_1 - (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
+
+#define Y_FRAC_FIRST(n) (STEP_Y_BIG (n) / 2)
+#define Y_FRAC_LAST(n)  (Y_FRAC_FIRST (n) + (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
+
+#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC (n))
+#define STEP_X_BIG(n)   (pixman_fixed_1 - (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
+
+#define X_FRAC_FIRST(n) (STEP_X_BIG (n) / 2)
+#define X_FRAC_LAST(n)  (X_FRAC_FIRST (n) + (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
+
+#define RENDER_SAMPLES_X(x, n)						\
+    ((n) == 1? 0 : (pixman_fixed_frac (x) +				\
+		    X_FRAC_FIRST (n)) / STEP_X_SMALL (n))
+
+void
+pixman_rasterize_edges_accessors (pixman_image_t *image,
+                                  pixman_edge_t * l,
+                                  pixman_edge_t * r,
+                                  pixman_fixed_t  t,
+                                  pixman_fixed_t  b);
+
+/*
+ * Implementations
+ */
+typedef struct pixman_implementation_t pixman_implementation_t;
+
+typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp,
+					  pixman_op_t              op,
+					  uint32_t *               dest,
+					  const uint32_t *         src,
+					  const uint32_t *         mask,
+					  int                      width);
+
+typedef void (*pixman_combine_64_func_t) (pixman_implementation_t *imp,
+					  pixman_op_t              op,
+					  uint64_t *               dest,
+					  const uint64_t *         src,
+					  const uint64_t *         mask,
+					  int                      width);
+
+typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp,
+					 pixman_op_t              op,
+					 pixman_image_t *         src,
+					 pixman_image_t *         mask,
+					 pixman_image_t *         dest,
+					 int32_t                  src_x,
+					 int32_t                  src_y,
+					 int32_t                  mask_x,
+					 int32_t                  mask_y,
+					 int32_t                  dest_x,
+					 int32_t                  dest_y,
+					 int32_t                  width,
+					 int32_t                  height);
+typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp,
+					    uint32_t *               src_bits,
+					    uint32_t *               dst_bits,
+					    int                      src_stride,
+					    int                      dst_stride,
+					    int                      src_bpp,
+					    int                      dst_bpp,
+					    int                      src_x,
+					    int                      src_y,
+					    int                      dst_x,
+					    int                      dst_y,
+					    int                      width,
+					    int                      height);
+typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
+					     uint32_t *               bits,
+					     int                      stride,
+					     int                      bpp,
+					     int                      x,
+					     int                      y,
+					     int                      width,
+					     int                      height,
+					     uint32_t                 xor);
+typedef void (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
+                                         pixman_iter_t           *iter,
+                                         pixman_image_t          *image,
+                                         int                      x,
+                                         int                      y,
+                                         int                      width,
+                                         int                      height,
+                                         uint8_t                 *buffer,
+                                         iter_flags_t             flags);
+
+void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
+void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp);
+
+typedef struct
+{
+    pixman_op_t             op;
+    pixman_format_code_t    src_format;
+    uint32_t		    src_flags;
+    pixman_format_code_t    mask_format;
+    uint32_t		    mask_flags;
+    pixman_format_code_t    dest_format;
+    uint32_t		    dest_flags;
+    pixman_composite_func_t func;
+} pixman_fast_path_t;
+
+struct pixman_implementation_t
+{
+    pixman_implementation_t *	toplevel;
+    pixman_implementation_t *	delegate;
+    const pixman_fast_path_t *	fast_paths;
+
+    pixman_blt_func_t		blt;
+    pixman_fill_func_t		fill;
+    pixman_iter_init_func_t     src_iter_init;
+    pixman_iter_init_func_t     dest_iter_init;
+
+    pixman_combine_32_func_t	combine_32[PIXMAN_N_OPERATORS];
+    pixman_combine_32_func_t	combine_32_ca[PIXMAN_N_OPERATORS];
+    pixman_combine_64_func_t	combine_64[PIXMAN_N_OPERATORS];
+    pixman_combine_64_func_t	combine_64_ca[PIXMAN_N_OPERATORS];
+};
+
+uint32_t
+_pixman_image_get_solid (pixman_implementation_t *imp,
+			 pixman_image_t *         image,
+                         pixman_format_code_t     format);
+
+pixman_implementation_t *
+_pixman_implementation_create (pixman_implementation_t *delegate,
+			       const pixman_fast_path_t *fast_paths);
+
+void
+_pixman_implementation_combine_32 (pixman_implementation_t *imp,
+                                   pixman_op_t              op,
+                                   uint32_t *               dest,
+                                   const uint32_t *         src,
+                                   const uint32_t *         mask,
+                                   int                      width);
+void
+_pixman_implementation_combine_64 (pixman_implementation_t *imp,
+                                   pixman_op_t              op,
+                                   uint64_t *               dest,
+                                   const uint64_t *         src,
+                                   const uint64_t *         mask,
+                                   int                      width);
+void
+_pixman_implementation_combine_32_ca (pixman_implementation_t *imp,
+                                      pixman_op_t              op,
+                                      uint32_t *               dest,
+                                      const uint32_t *         src,
+                                      const uint32_t *         mask,
+                                      int                      width);
+void
+_pixman_implementation_combine_64_ca (pixman_implementation_t *imp,
+                                      pixman_op_t              op,
+                                      uint64_t *               dest,
+                                      const uint64_t *         src,
+                                      const uint64_t *         mask,
+                                      int                      width);
+
+pixman_bool_t
+_pixman_implementation_blt (pixman_implementation_t *imp,
+                            uint32_t *               src_bits,
+                            uint32_t *               dst_bits,
+                            int                      src_stride,
+                            int                      dst_stride,
+                            int                      src_bpp,
+                            int                      dst_bpp,
+                            int                      src_x,
+                            int                      src_y,
+                            int                      dst_x,
+                            int                      dst_y,
+                            int                      width,
+                            int                      height);
+
+pixman_bool_t
+_pixman_implementation_fill (pixman_implementation_t *imp,
+                             uint32_t *               bits,
+                             int                      stride,
+                             int                      bpp,
+                             int                      x,
+                             int                      y,
+                             int                      width,
+                             int                      height,
+                             uint32_t                 xor);
+
+void
+_pixman_implementation_src_iter_init (pixman_implementation_t       *imp,
+				      pixman_iter_t                 *iter,
+				      pixman_image_t                *image,
+				      int                            x,
+				      int                            y,
+				      int                            width,
+				      int                            height,
+				      uint8_t                       *buffer,
+				      iter_flags_t                   flags);
+
+void
+_pixman_implementation_dest_iter_init (pixman_implementation_t       *imp,
+				       pixman_iter_t                 *iter,
+				       pixman_image_t                *image,
+				       int                            x,
+				       int                            y,
+				       int                            width,
+				       int                            height,
+				       uint8_t                       *buffer,
+				       iter_flags_t                   flags);
+
+/* Specific implementations */
+pixman_implementation_t *
+_pixman_implementation_create_general (void);
+
+pixman_implementation_t *
+_pixman_implementation_create_fast_path (void);
+
+#ifdef USE_MMX
+pixman_implementation_t *
+_pixman_implementation_create_mmx (void);
+#endif
+
+#ifdef USE_SSE2
+pixman_implementation_t *
+_pixman_implementation_create_sse2 (void);
+#endif
+
+#ifdef USE_ARM_SIMD
+pixman_implementation_t *
+_pixman_implementation_create_arm_simd (void);
+#endif
+
+#ifdef USE_ARM_NEON
+pixman_implementation_t *
+_pixman_implementation_create_arm_neon (void);
+#endif
+
+#ifdef USE_VMX
+pixman_implementation_t *
+_pixman_implementation_create_vmx (void);
+#endif
+
+pixman_implementation_t *
+_pixman_choose_implementation (void);
+
+
+
+/*
+ * Utilities
+ */
+uint32_t *
+_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
+
+/* These "formats" all have depth 0, so they
+ * will never clash with any real ones
+ */
+#define PIXMAN_null             PIXMAN_FORMAT (0, 0, 0, 0, 0, 0)
+#define PIXMAN_solid            PIXMAN_FORMAT (0, 1, 0, 0, 0, 0)
+#define PIXMAN_pixbuf		PIXMAN_FORMAT (0, 2, 0, 0, 0, 0)
+#define PIXMAN_rpixbuf		PIXMAN_FORMAT (0, 3, 0, 0, 0, 0)
+#define PIXMAN_unknown		PIXMAN_FORMAT (0, 4, 0, 0, 0, 0)
+#define PIXMAN_any		PIXMAN_FORMAT (0, 5, 0, 0, 0, 0)
+
+#define PIXMAN_OP_any		(PIXMAN_N_OPERATORS + 1)
+
+#define FAST_PATH_ID_TRANSFORM			(1 <<  0)
+#define FAST_PATH_NO_ALPHA_MAP			(1 <<  1)
+#define FAST_PATH_NO_CONVOLUTION_FILTER		(1 <<  2)
+#define FAST_PATH_NO_PAD_REPEAT			(1 <<  3)
+#define FAST_PATH_NO_REFLECT_REPEAT		(1 <<  4)
+#define FAST_PATH_NO_ACCESSORS			(1 <<  5)
+#define FAST_PATH_NARROW_FORMAT			(1 <<  6)
+#define FAST_PATH_COMPONENT_ALPHA		(1 <<  8)
+#define FAST_PATH_SAMPLES_OPAQUE		(1 <<  7)
+#define FAST_PATH_UNIFIED_ALPHA			(1 <<  9)
+#define FAST_PATH_SCALE_TRANSFORM		(1 << 10)
+#define FAST_PATH_NEAREST_FILTER		(1 << 11)
+#define FAST_PATH_HAS_TRANSFORM			(1 << 12)
+#define FAST_PATH_IS_OPAQUE			(1 << 13)
+#define FAST_PATH_NO_NORMAL_REPEAT		(1 << 14)
+#define FAST_PATH_NO_NONE_REPEAT		(1 << 15)
+#define FAST_PATH_SAMPLES_COVER_CLIP		(1 << 16)
+#define FAST_PATH_X_UNIT_POSITIVE		(1 << 17)
+#define FAST_PATH_AFFINE_TRANSFORM		(1 << 18)
+#define FAST_PATH_Y_UNIT_ZERO			(1 << 19)
+#define FAST_PATH_BILINEAR_FILTER		(1 << 20)
+
+#define FAST_PATH_PAD_REPEAT						\
+    (FAST_PATH_NO_NONE_REPEAT		|				\
+     FAST_PATH_NO_NORMAL_REPEAT		|				\
+     FAST_PATH_NO_REFLECT_REPEAT)
+
+#define FAST_PATH_NORMAL_REPEAT						\
+    (FAST_PATH_NO_NONE_REPEAT		|				\
+     FAST_PATH_NO_PAD_REPEAT		|				\
+     FAST_PATH_NO_REFLECT_REPEAT)
+
+#define FAST_PATH_NONE_REPEAT						\
+    (FAST_PATH_NO_NORMAL_REPEAT		|				\
+     FAST_PATH_NO_PAD_REPEAT		|				\
+     FAST_PATH_NO_REFLECT_REPEAT)
+
+#define FAST_PATH_REFLECT_REPEAT					\
+    (FAST_PATH_NO_NONE_REPEAT		|				\
+     FAST_PATH_NO_NORMAL_REPEAT		|				\
+     FAST_PATH_NO_PAD_REPEAT)
+
+#define FAST_PATH_STANDARD_FLAGS					\
+    (FAST_PATH_NO_CONVOLUTION_FILTER	|				\
+     FAST_PATH_NO_ACCESSORS		|				\
+     FAST_PATH_NO_ALPHA_MAP		|				\
+     FAST_PATH_NARROW_FORMAT)
+
+#define FAST_PATH_STD_DEST_FLAGS					\
+    (FAST_PATH_NO_ACCESSORS		|				\
+     FAST_PATH_NO_ALPHA_MAP		|				\
+     FAST_PATH_NARROW_FORMAT)
+
+#define SOURCE_FLAGS(format)						\
+    (FAST_PATH_STANDARD_FLAGS |						\
+     ((PIXMAN_ ## format == PIXMAN_solid) ?				\
+      0 : (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_ID_TRANSFORM)))
+
+#define MASK_FLAGS(format, extra)					\
+    ((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra))
+
+#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \
+    PIXMAN_OP_ ## op,							\
+    PIXMAN_ ## src,							\
+    src_flags,							        \
+    PIXMAN_ ## mask,						        \
+    mask_flags,							        \
+    PIXMAN_ ## dest,	                                                \
+    dest_flags,							        \
+    func
+
+#define PIXMAN_STD_FAST_PATH(op, src, mask, dest, func)			\
+    { FAST_PATH (							\
+	    op,								\
+	    src,  SOURCE_FLAGS (src),					\
+	    mask, MASK_FLAGS (mask, FAST_PATH_UNIFIED_ALPHA),		\
+	    dest, FAST_PATH_STD_DEST_FLAGS,				\
+	    func) }
+
+#define PIXMAN_STD_FAST_PATH_CA(op, src, mask, dest, func)		\
+    { FAST_PATH (							\
+	    op,								\
+	    src,  SOURCE_FLAGS (src),					\
+	    mask, MASK_FLAGS (mask, FAST_PATH_COMPONENT_ALPHA),		\
+	    dest, FAST_PATH_STD_DEST_FLAGS,				\
+	    func) }
+
+/* Memory allocation helpers */
+void *
+pixman_malloc_ab (unsigned int n, unsigned int b);
+
+void *
+pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c);
+
+pixman_bool_t
+pixman_multiply_overflows_int (unsigned int a, unsigned int b);
+
+pixman_bool_t
+pixman_addition_overflows_int (unsigned int a, unsigned int b);
+
+/* Compositing utilities */
+void
+pixman_expand (uint64_t *           dst,
+               const uint32_t *     src,
+               pixman_format_code_t format,
+               int                  width);
+
+void
+pixman_contract (uint32_t *      dst,
+                 const uint64_t *src,
+                 int             width);
+
+
+/* Region Helpers */
+pixman_bool_t
+pixman_region32_copy_from_region16 (pixman_region32_t *dst,
+                                    pixman_region16_t *src);
+
+pixman_bool_t
+pixman_region16_copy_from_region32 (pixman_region16_t *dst,
+                                    pixman_region32_t *src);
+
+
+/* Misc macros */
+
+#ifndef FALSE
+#   define FALSE 0
+#endif
+
+#ifndef TRUE
+#   define TRUE 1
+#endif
+
+#ifndef MIN
+#  define MIN(a, b) ((a < b) ? a : b)
+#endif
+
+#ifndef MAX
+#  define MAX(a, b) ((a > b) ? a : b)
+#endif
+
+/* Integer division that rounds towards -infinity */
+#define DIV(a, b)					   \
+    ((((a) < 0) == ((b) < 0)) ? (a) / (b) :                \
+     ((a) - (b) + 1 - (((b) < 0) << 1)) / (b))
+
+/* Modulus that produces the remainder wrt. DIV */
+#define MOD(a, b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b))
+
+#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
+
+/* Conversion between 8888 and 0565 */
+
+#define CONVERT_8888_TO_0565(s)						\
+    ((((s) >> 3) & 0x001f) |						\
+     (((s) >> 5) & 0x07e0) |						\
+     (((s) >> 8) & 0xf800))
+
+#define CONVERT_0565_TO_0888(s)						\
+    (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) |			\
+     ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) |			\
+     ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)))
+
+#define CONVERT_0565_TO_8888(s) (CONVERT_0565_TO_0888(s) | 0xff000000)
+
+/* Trivial versions that are useful in macros */
+#define CONVERT_8888_TO_8888(s) (s)
+#define CONVERT_0565_TO_0565(s) (s)
+
+#define PIXMAN_FORMAT_IS_WIDE(f)					\
+    (PIXMAN_FORMAT_A (f) > 8 ||						\
+     PIXMAN_FORMAT_R (f) > 8 ||						\
+     PIXMAN_FORMAT_G (f) > 8 ||						\
+     PIXMAN_FORMAT_B (f) > 8)
+
+#ifdef WORDS_BIGENDIAN
+#   define SCREEN_SHIFT_LEFT(x,n)	((x) << (n))
+#   define SCREEN_SHIFT_RIGHT(x,n)	((x) >> (n))
+#else
+#   define SCREEN_SHIFT_LEFT(x,n)	((x) >> (n))
+#   define SCREEN_SHIFT_RIGHT(x,n)	((x) << (n))
+#endif
+
+/*
+ * Various debugging code
+ */
+
+#undef DEBUG
+
+#define COMPILE_TIME_ASSERT(x)						\
+    do { typedef int compile_time_assertion [(x)?1:-1]; } while (0)
+
+/* Turn on debugging depending on what type of release this is
+ */
+#if (((PIXMAN_VERSION_MICRO % 2) == 0) && ((PIXMAN_VERSION_MINOR % 2) == 1))
+
+/* Debugging gets turned on for development releases because these
+ * are the things that end up in bleeding edge distributions such
+ * as Rawhide etc.
+ *
+ * For performance reasons we don't turn it on for stable releases or
+ * random git checkouts. (Random git checkouts are often used for
+ * performance work).
+ */
+
+#    define DEBUG
+
+#endif
+
+#ifdef DEBUG
+
+void
+_pixman_log_error (const char *function, const char *message);
+
+#define return_if_fail(expr)                                            \
+    do                                                                  \
+    {                                                                   \
+	if (!(expr))							\
+	{								\
+	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
+	    return;							\
+	}								\
+    }                                                                   \
+    while (0)
+
+#define return_val_if_fail(expr, retval)                                \
+    do                                                                  \
+    {                                                                   \
+	if (!(expr))                                                    \
+	{								\
+	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
+	    return (retval);						\
+	}								\
+    }                                                                   \
+    while (0)
+
+#define critical_if_fail(expr)						\
+    do									\
+    {									\
+	if (!(expr))							\
+	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
+    }									\
+    while (0)
+
+
+#else
+
+#define _pixman_log_error(f,m) do { } while (0)				\
+
+#define return_if_fail(expr)						\
+    do                                                                  \
+    {                                                                   \
+	if (!(expr))							\
+	    return;							\
+    }                                                                   \
+    while (0)
+
+#define return_val_if_fail(expr, retval)                                \
+    do                                                                  \
+    {                                                                   \
+	if (!(expr))							\
+	    return (retval);						\
+    }                                                                   \
+    while (0)
+
+#define critical_if_fail(expr)						\
+    do									\
+    {									\
+    }									\
+    while (0)
+#endif
+
+/*
+ * Timers
+ */
+
+#ifdef PIXMAN_TIMERS
+
+static inline uint64_t
+oil_profile_stamp_rdtsc (void)
+{
+    uint64_t ts;
+
+    __asm__ __volatile__ ("rdtsc\n" : "=A" (ts));
+    return ts;
+}
+
+#define OIL_STAMP oil_profile_stamp_rdtsc
+
+typedef struct pixman_timer_t pixman_timer_t;
+
+struct pixman_timer_t
+{
+    int             initialized;
+    const char *    name;
+    uint64_t        n_times;
+    uint64_t        total;
+    pixman_timer_t *next;
+};
+
+extern int timer_defined;
+
+void pixman_timer_register (pixman_timer_t *timer);
+
+#define TIMER_BEGIN(tname)                                              \
+    {                                                                   \
+	static pixman_timer_t timer ## tname;                           \
+	uint64_t              begin ## tname;                           \
+        								\
+	if (!timer ## tname.initialized)				\
+	{                                                               \
+	    timer ## tname.initialized = 1;				\
+	    timer ## tname.name = # tname;				\
+	    pixman_timer_register (&timer ## tname);			\
+	}                                                               \
+									\
+	timer ## tname.n_times++;					\
+	begin ## tname = OIL_STAMP ();
+
+#define TIMER_END(tname)                                                \
+    timer ## tname.total += OIL_STAMP () - begin ## tname;		\
+    }
+
+#endif /* PIXMAN_TIMERS */
+
+#endif /* PIXMAN_PRIVATE_H */
diff --git a/pixman/pixman/pixman-radial-gradient.c b/pixman/pixman/pixman-radial-gradient.c
index 97c151865..6523b8259 100644
--- a/pixman/pixman/pixman-radial-gradient.c
+++ b/pixman/pixman/pixman-radial-gradient.c
@@ -1,447 +1,463 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- *
- * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
- * Copyright © 2000 SuSE, Inc.
- *             2005 Lars Knoll & Zack Rusin, Trolltech
- * Copyright © 2007 Red Hat, Inc.
- *
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include <stdlib.h>
-#include <math.h>
-#include "pixman-private.h"
-
-static inline pixman_fixed_32_32_t
-dot (pixman_fixed_48_16_t x1,
-     pixman_fixed_48_16_t y1,
-     pixman_fixed_48_16_t z1,
-     pixman_fixed_48_16_t x2,
-     pixman_fixed_48_16_t y2,
-     pixman_fixed_48_16_t z2)
-{
-    /*
-     * Exact computation, assuming that the input values can
-     * be represented as pixman_fixed_16_16_t
-     */
-    return x1 * x2 + y1 * y2 + z1 * z2;
-}
-
-static inline double
-fdot (double x1,
-      double y1,
-      double z1,
-      double x2,
-      double y2,
-      double z2)
-{
-    /*
-     * Error can be unbound in some special cases.
-     * Using clever dot product algorithms (for example compensated
-     * dot product) would improve this but make the code much less
-     * obvious
-     */
-    return x1 * x2 + y1 * y2 + z1 * z2;
-}
-
-static uint32_t
-radial_compute_color (double                    a,
-		      double                    b,
-		      double                    c,
-		      double                    inva,
-		      double                    dr,
-		      double                    mindr,
-		      pixman_gradient_walker_t *walker,
-		      pixman_repeat_t           repeat)
-{
-    /*
-     * In this function error propagation can lead to bad results:
-     *  - det can have an unbound error (if b*b-a*c is very small),
-     *    potentially making it the opposite sign of what it should have been
-     *    (thus clearing a pixel that would have been colored or vice-versa)
-     *    or propagating the error to sqrtdet;
-     *    if det has the wrong sign or b is very small, this can lead to bad
-     *    results
-     *
-     *  - the algorithm used to compute the solutions of the quadratic
-     *    equation is not numerically stable (but saves one division compared
-     *    to the numerically stable one);
-     *    this can be a problem if a*c is much smaller than b*b
-     *
-     *  - the above problems are worse if a is small (as inva becomes bigger)
-     */
-    double det;
-
-    if (a == 0)
-    {
-	double t;
-
-	if (b == 0)
-	    return 0;
-
-	t = pixman_fixed_1 / 2 * c / b;
-	if (repeat == PIXMAN_REPEAT_NONE)
-	{
-	    if (0 <= t && t <= pixman_fixed_1)
-		return _pixman_gradient_walker_pixel (walker, t);
-	}
-	else
-	{
-	    if (t * dr > mindr)
-		return _pixman_gradient_walker_pixel (walker, t);
-	}
-
-	return 0;
-    }
-
-    det = fdot (b, a, 0, b, -c, 0);
-    if (det >= 0)
-    {
-	double sqrtdet, t0, t1;
-
-	sqrtdet = sqrt (det);
-	t0 = (b + sqrtdet) * inva;
-	t1 = (b - sqrtdet) * inva;
-
-	if (repeat == PIXMAN_REPEAT_NONE)
-	{
-	    if (0 <= t0 && t0 <= pixman_fixed_1)
-		return _pixman_gradient_walker_pixel (walker, t0);
-	    else if (0 <= t1 && t1 <= pixman_fixed_1)
-		return _pixman_gradient_walker_pixel (walker, t1);
-	}
-	else
-	{
-	    if (t0 * dr > mindr)
-		return _pixman_gradient_walker_pixel (walker, t0);
-	    else if (t1 * dr > mindr)
-		return _pixman_gradient_walker_pixel (walker, t1);
-	}
-    }
-
-    return 0;
-}
-
-static void
-radial_gradient_get_scanline_32 (pixman_image_t *image,
-                                 int             x,
-                                 int             y,
-                                 int             width,
-                                 uint32_t *      buffer,
-                                 const uint32_t *mask)
-{
-    /*
-     * Implementation of radial gradients following the PDF specification.
-     * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference
-     * Manual (PDF 32000-1:2008 at the time of this writing).
-     * 
-     * In the radial gradient problem we are given two circles (c₁,r₁) and
-     * (c₂,r₂) that define the gradient itself.
-     *
-     * Mathematically the gradient can be defined as the family of circles
-     *
-     *     ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂)
-     *
-     * excluding those circles whose radius would be < 0. When a point
-     * belongs to more than one circle, the one with a bigger t is the only
-     * one that contributes to its color. When a point does not belong
-     * to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0).
-     * Further limitations on the range of values for t are imposed when
-     * the gradient is not repeated, namely t must belong to [0,1].
-     *
-     * The graphical result is the same as drawing the valid (radius > 0)
-     * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient
-     * is not repeated) using SOURCE operatior composition.
-     *
-     * It looks like a cone pointing towards the viewer if the ending circle
-     * is smaller than the starting one, a cone pointing inside the page if
-     * the starting circle is the smaller one and like a cylinder if they
-     * have the same radius.
-     *
-     * What we actually do is, given the point whose color we are interested
-     * in, compute the t values for that point, solving for t in:
-     *
-     *     length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂
-     * 
-     * Let's rewrite it in a simpler way, by defining some auxiliary
-     * variables:
-     *
-     *     cd = c₂ - c₁
-     *     pd = p - c₁
-     *     dr = r₂ - r₁
-     *     lenght(t·cd - pd) = r₁ + t·dr
-     *
-     * which actually means
-     *
-     *     hypot(t·cdx - pdx, t·cdy - pdy) = r₁ + t·dr
-     *
-     * or
-     *
-     *     ⎷((t·cdx - pdx)² + (t·cdy - pdy)²) = r₁ + t·dr.
-     *
-     * If we impose (as stated earlier) that r₁ + t·dr >= 0, it becomes:
-     *
-     *     (t·cdx - pdx)² + (t·cdy - pdy)² = (r₁ + t·dr)²
-     *
-     * where we can actually expand the squares and solve for t:
-     *
-     *     t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² =
-     *       = r₁² + 2·r₁·t·dr + t²·dr²
-     *
-     *     (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r₁·dr)t +
-     *         (pdx² + pdy² - r₁²) = 0
-     *
-     *     A = cdx² + cdy² - dr²
-     *     B = pdx·cdx + pdy·cdy + r₁·dr
-     *     C = pdx² + pdy² - r₁²
-     *     At² - 2Bt + C = 0
-     * 
-     * The solutions (unless the equation degenerates because of A = 0) are:
-     *
-     *     t = (B ± ⎷(B² - A·C)) / A
-     *
-     * The solution we are going to prefer is the bigger one, unless the
-     * radius associated to it is negative (or it falls outside the valid t
-     * range).
-     *
-     * Additional observations (useful for optimizations):
-     * A does not depend on p
-     *
-     * A < 0 <=> one of the two circles completely contains the other one
-     *   <=> for every p, the radiuses associated with the two t solutions
-     *       have opposite sign
-     */
-
-    gradient_t *gradient = (gradient_t *)image;
-    radial_gradient_t *radial = (radial_gradient_t *)image;
-    uint32_t *end = buffer + width;
-    pixman_gradient_walker_t walker;
-    pixman_vector_t v, unit;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
-
-    if (image->common.transform)
-    {
-	if (!pixman_transform_point_3d (image->common.transform, &v))
-	    return;
-	
-	unit.vector[0] = image->common.transform->matrix[0][0];
-	unit.vector[1] = image->common.transform->matrix[1][0];
-	unit.vector[2] = image->common.transform->matrix[2][0];
-    }
-    else
-    {
-	unit.vector[0] = pixman_fixed_1;
-	unit.vector[1] = 0;
-	unit.vector[2] = 0;
-    }
-
-    if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)
-    {
-	/*
-	 * Given:
-	 *
-	 * t = (B ± ⎷(B² - A·C)) / A
-	 *
-	 * where
-	 *
-	 * A = cdx² + cdy² - dr²
-	 * B = pdx·cdx + pdy·cdy + r₁·dr
-	 * C = pdx² + pdy² - r₁²
-	 * det = B² - A·C
-	 *
-	 * Since we have an affine transformation, we know that (pdx, pdy)
-	 * increase linearly with each pixel,
-	 *
-	 * pdx = pdx₀ + n·ux,
-	 * pdy = pdy₀ + n·uy,
-	 *
-	 * we can then express B, C and det through multiple differentiation.
-	 */
-	pixman_fixed_32_32_t b, db, c, dc, ddc;
-
-	/* warning: this computation may overflow */
-	v.vector[0] -= radial->c1.x;
-	v.vector[1] -= radial->c1.y;
-
-	/*
-	 * B and C are computed and updated exactly.
-	 * If fdot was used instead of dot, in the worst case it would
-	 * lose 11 bits of precision in each of the multiplication and
-	 * summing up would zero out all the bit that were preserved,
-	 * thus making the result 0 instead of the correct one.
-	 * This would mean a worst case of unbound relative error or
-	 * about 2^10 absolute error
-	 */
-	b = dot (v.vector[0], v.vector[1], radial->c1.radius,
-		 radial->delta.x, radial->delta.y, radial->delta.radius);
-	db = dot (unit.vector[0], unit.vector[1], 0,
-		  radial->delta.x, radial->delta.y, 0);
-
-	c = dot (v.vector[0], v.vector[1],
-		 -((pixman_fixed_48_16_t) radial->c1.radius),
-		 v.vector[0], v.vector[1], radial->c1.radius);
-	dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0],
-		  2 * (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1],
-		  0,
-		  unit.vector[0], unit.vector[1], 0);
-	ddc = 2 * dot (unit.vector[0], unit.vector[1], 0,
-		       unit.vector[0], unit.vector[1], 0);
-
-	while (buffer < end)
-	{
-	    if (!mask || *mask++)
-	    {
-		*buffer = radial_compute_color (radial->a, b, c,
-						radial->inva,
-						radial->delta.radius,
-						radial->mindr,
-						&walker,
-						image->common.repeat);
-	    }
-
-	    b += db;
-	    c += dc;
-	    dc += ddc;
-	    ++buffer;
-	}
-    }
-    else
-    {
-	/* projective */
-	/* Warning:
-	 * error propagation guarantees are much looser than in the affine case
-	 */
-	while (buffer < end)
-	{
-	    if (!mask || *mask++)
-	    {
-		if (v.vector[2] != 0)
-		{
-		    double pdx, pdy, invv2, b, c;
-
-		    invv2 = 1. * pixman_fixed_1 / v.vector[2];
-
-		    pdx = v.vector[0] * invv2 - radial->c1.x;
-		    /*    / pixman_fixed_1 */
-
-		    pdy = v.vector[1] * invv2 - radial->c1.y;
-		    /*    / pixman_fixed_1 */
-
-		    b = fdot (pdx, pdy, radial->c1.radius,
-			      radial->delta.x, radial->delta.y,
-			      radial->delta.radius);
-		    /*  / pixman_fixed_1 / pixman_fixed_1 */
-
-		    c = fdot (pdx, pdy, -radial->c1.radius,
-			      pdx, pdy, radial->c1.radius);
-		    /*  / pixman_fixed_1 / pixman_fixed_1 */
-
-		    *buffer = radial_compute_color (radial->a, b, c,
-						    radial->inva,
-						    radial->delta.radius,
-						    radial->mindr,
-						    &walker,
-						    image->common.repeat);
-		}
-		else
-		{
-		    *buffer = 0;
-		}
-	    }
-
-	    ++buffer;
-
-	    v.vector[0] += unit.vector[0];
-	    v.vector[1] += unit.vector[1];
-	    v.vector[2] += unit.vector[2];
-	}
-    }
-}
-
-static void
-radial_gradient_property_changed (pixman_image_t *image)
-{
-    image->common.get_scanline_32 = radial_gradient_get_scanline_32;
-    image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64;
-}
-
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_radial_gradient (pixman_point_fixed_t *        inner,
-                                     pixman_point_fixed_t *        outer,
-                                     pixman_fixed_t                inner_radius,
-                                     pixman_fixed_t                outer_radius,
-                                     const pixman_gradient_stop_t *stops,
-                                     int                           n_stops)
-{
-    pixman_image_t *image;
-    radial_gradient_t *radial;
-
-    image = _pixman_image_allocate ();
-
-    if (!image)
-	return NULL;
-
-    radial = &image->radial;
-
-    if (!_pixman_init_gradient (&radial->common, stops, n_stops))
-    {
-	free (image);
-	return NULL;
-    }
-
-    image->type = RADIAL;
-
-    radial->c1.x = inner->x;
-    radial->c1.y = inner->y;
-    radial->c1.radius = inner_radius;
-    radial->c2.x = outer->x;
-    radial->c2.y = outer->y;
-    radial->c2.radius = outer_radius;
-
-    /* warning: this computations may overflow */
-    radial->delta.x = radial->c2.x - radial->c1.x;
-    radial->delta.y = radial->c2.y - radial->c1.y;
-    radial->delta.radius = radial->c2.radius - radial->c1.radius;
-
-    /* computed exactly, then cast to double -> every bit of the double
-       representation is correct (53 bits) */
-    radial->a = dot (radial->delta.x, radial->delta.y, -radial->delta.radius,
-		     radial->delta.x, radial->delta.y, radial->delta.radius);
-    if (radial->a != 0)
-	radial->inva = 1. * pixman_fixed_1 / radial->a;
-
-    radial->mindr = -1. * pixman_fixed_1 * radial->c1.radius;
-
-    image->common.property_changed = radial_gradient_property_changed;
-
-    return image;
-}
-
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ * Copyright © 2000 SuSE, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include <math.h>
+#include "pixman-private.h"
+
+static inline pixman_fixed_32_32_t
+dot (pixman_fixed_48_16_t x1,
+     pixman_fixed_48_16_t y1,
+     pixman_fixed_48_16_t z1,
+     pixman_fixed_48_16_t x2,
+     pixman_fixed_48_16_t y2,
+     pixman_fixed_48_16_t z2)
+{
+    /*
+     * Exact computation, assuming that the input values can
+     * be represented as pixman_fixed_16_16_t
+     */
+    return x1 * x2 + y1 * y2 + z1 * z2;
+}
+
+static inline double
+fdot (double x1,
+      double y1,
+      double z1,
+      double x2,
+      double y2,
+      double z2)
+{
+    /*
+     * Error can be unbound in some special cases.
+     * Using clever dot product algorithms (for example compensated
+     * dot product) would improve this but make the code much less
+     * obvious
+     */
+    return x1 * x2 + y1 * y2 + z1 * z2;
+}
+
+static uint32_t
+radial_compute_color (double                    a,
+		      double                    b,
+		      double                    c,
+		      double                    inva,
+		      double                    dr,
+		      double                    mindr,
+		      pixman_gradient_walker_t *walker,
+		      pixman_repeat_t           repeat)
+{
+    /*
+     * In this function error propagation can lead to bad results:
+     *  - det can have an unbound error (if b*b-a*c is very small),
+     *    potentially making it the opposite sign of what it should have been
+     *    (thus clearing a pixel that would have been colored or vice-versa)
+     *    or propagating the error to sqrtdet;
+     *    if det has the wrong sign or b is very small, this can lead to bad
+     *    results
+     *
+     *  - the algorithm used to compute the solutions of the quadratic
+     *    equation is not numerically stable (but saves one division compared
+     *    to the numerically stable one);
+     *    this can be a problem if a*c is much smaller than b*b
+     *
+     *  - the above problems are worse if a is small (as inva becomes bigger)
+     */
+    double det;
+
+    if (a == 0)
+    {
+	double t;
+
+	if (b == 0)
+	    return 0;
+
+	t = pixman_fixed_1 / 2 * c / b;
+	if (repeat == PIXMAN_REPEAT_NONE)
+	{
+	    if (0 <= t && t <= pixman_fixed_1)
+		return _pixman_gradient_walker_pixel (walker, t);
+	}
+	else
+	{
+	    if (t * dr > mindr)
+		return _pixman_gradient_walker_pixel (walker, t);
+	}
+
+	return 0;
+    }
+
+    det = fdot (b, a, 0, b, -c, 0);
+    if (det >= 0)
+    {
+	double sqrtdet, t0, t1;
+
+	sqrtdet = sqrt (det);
+	t0 = (b + sqrtdet) * inva;
+	t1 = (b - sqrtdet) * inva;
+
+	if (repeat == PIXMAN_REPEAT_NONE)
+	{
+	    if (0 <= t0 && t0 <= pixman_fixed_1)
+		return _pixman_gradient_walker_pixel (walker, t0);
+	    else if (0 <= t1 && t1 <= pixman_fixed_1)
+		return _pixman_gradient_walker_pixel (walker, t1);
+	}
+	else
+	{
+	    if (t0 * dr > mindr)
+		return _pixman_gradient_walker_pixel (walker, t0);
+	    else if (t1 * dr > mindr)
+		return _pixman_gradient_walker_pixel (walker, t1);
+	}
+    }
+
+    return 0;
+}
+
+static uint32_t *
+radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    /*
+     * Implementation of radial gradients following the PDF specification.
+     * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference
+     * Manual (PDF 32000-1:2008 at the time of this writing).
+     * 
+     * In the radial gradient problem we are given two circles (c₁,r₁) and
+     * (c₂,r₂) that define the gradient itself.
+     *
+     * Mathematically the gradient can be defined as the family of circles
+     *
+     *     ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂)
+     *
+     * excluding those circles whose radius would be < 0. When a point
+     * belongs to more than one circle, the one with a bigger t is the only
+     * one that contributes to its color. When a point does not belong
+     * to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0).
+     * Further limitations on the range of values for t are imposed when
+     * the gradient is not repeated, namely t must belong to [0,1].
+     *
+     * The graphical result is the same as drawing the valid (radius > 0)
+     * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient
+     * is not repeated) using SOURCE operatior composition.
+     *
+     * It looks like a cone pointing towards the viewer if the ending circle
+     * is smaller than the starting one, a cone pointing inside the page if
+     * the starting circle is the smaller one and like a cylinder if they
+     * have the same radius.
+     *
+     * What we actually do is, given the point whose color we are interested
+     * in, compute the t values for that point, solving for t in:
+     *
+     *     length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂
+     * 
+     * Let's rewrite it in a simpler way, by defining some auxiliary
+     * variables:
+     *
+     *     cd = c₂ - c₁
+     *     pd = p - c₁
+     *     dr = r₂ - r₁
+     *     lenght(t·cd - pd) = r₁ + t·dr
+     *
+     * which actually means
+     *
+     *     hypot(t·cdx - pdx, t·cdy - pdy) = r₁ + t·dr
+     *
+     * or
+     *
+     *     ⎷((t·cdx - pdx)² + (t·cdy - pdy)²) = r₁ + t·dr.
+     *
+     * If we impose (as stated earlier) that r₁ + t·dr >= 0, it becomes:
+     *
+     *     (t·cdx - pdx)² + (t·cdy - pdy)² = (r₁ + t·dr)²
+     *
+     * where we can actually expand the squares and solve for t:
+     *
+     *     t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² =
+     *       = r₁² + 2·r₁·t·dr + t²·dr²
+     *
+     *     (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r₁·dr)t +
+     *         (pdx² + pdy² - r₁²) = 0
+     *
+     *     A = cdx² + cdy² - dr²
+     *     B = pdx·cdx + pdy·cdy + r₁·dr
+     *     C = pdx² + pdy² - r₁²
+     *     At² - 2Bt + C = 0
+     * 
+     * The solutions (unless the equation degenerates because of A = 0) are:
+     *
+     *     t = (B ± ⎷(B² - A·C)) / A
+     *
+     * The solution we are going to prefer is the bigger one, unless the
+     * radius associated to it is negative (or it falls outside the valid t
+     * range).
+     *
+     * Additional observations (useful for optimizations):
+     * A does not depend on p
+     *
+     * A < 0 <=> one of the two circles completely contains the other one
+     *   <=> for every p, the radiuses associated with the two t solutions
+     *       have opposite sign
+     */
+    pixman_image_t *image = iter->image;
+    int x = iter->x;
+    int y = iter->y;
+    int width = iter->width;
+    uint32_t *buffer = iter->buffer;
+
+    gradient_t *gradient = (gradient_t *)image;
+    radial_gradient_t *radial = (radial_gradient_t *)image;
+    uint32_t *end = buffer + width;
+    pixman_gradient_walker_t walker;
+    pixman_vector_t v, unit;
+
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
+
+    if (image->common.transform)
+    {
+	if (!pixman_transform_point_3d (image->common.transform, &v))
+	    return iter->buffer;
+	
+	unit.vector[0] = image->common.transform->matrix[0][0];
+	unit.vector[1] = image->common.transform->matrix[1][0];
+	unit.vector[2] = image->common.transform->matrix[2][0];
+    }
+    else
+    {
+	unit.vector[0] = pixman_fixed_1;
+	unit.vector[1] = 0;
+	unit.vector[2] = 0;
+    }
+
+    if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)
+    {
+	/*
+	 * Given:
+	 *
+	 * t = (B ± ⎷(B² - A·C)) / A
+	 *
+	 * where
+	 *
+	 * A = cdx² + cdy² - dr²
+	 * B = pdx·cdx + pdy·cdy + r₁·dr
+	 * C = pdx² + pdy² - r₁²
+	 * det = B² - A·C
+	 *
+	 * Since we have an affine transformation, we know that (pdx, pdy)
+	 * increase linearly with each pixel,
+	 *
+	 * pdx = pdx₀ + n·ux,
+	 * pdy = pdy₀ + n·uy,
+	 *
+	 * we can then express B, C and det through multiple differentiation.
+	 */
+	pixman_fixed_32_32_t b, db, c, dc, ddc;
+
+	/* warning: this computation may overflow */
+	v.vector[0] -= radial->c1.x;
+	v.vector[1] -= radial->c1.y;
+
+	/*
+	 * B and C are computed and updated exactly.
+	 * If fdot was used instead of dot, in the worst case it would
+	 * lose 11 bits of precision in each of the multiplication and
+	 * summing up would zero out all the bit that were preserved,
+	 * thus making the result 0 instead of the correct one.
+	 * This would mean a worst case of unbound relative error or
+	 * about 2^10 absolute error
+	 */
+	b = dot (v.vector[0], v.vector[1], radial->c1.radius,
+		 radial->delta.x, radial->delta.y, radial->delta.radius);
+	db = dot (unit.vector[0], unit.vector[1], 0,
+		  radial->delta.x, radial->delta.y, 0);
+
+	c = dot (v.vector[0], v.vector[1],
+		 -((pixman_fixed_48_16_t) radial->c1.radius),
+		 v.vector[0], v.vector[1], radial->c1.radius);
+	dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0],
+		  2 * (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1],
+		  0,
+		  unit.vector[0], unit.vector[1], 0);
+	ddc = 2 * dot (unit.vector[0], unit.vector[1], 0,
+		       unit.vector[0], unit.vector[1], 0);
+
+	while (buffer < end)
+	{
+	    if (!mask || *mask++)
+	    {
+		*buffer = radial_compute_color (radial->a, b, c,
+						radial->inva,
+						radial->delta.radius,
+						radial->mindr,
+						&walker,
+						image->common.repeat);
+	    }
+
+	    b += db;
+	    c += dc;
+	    dc += ddc;
+	    ++buffer;
+	}
+    }
+    else
+    {
+	/* projective */
+	/* Warning:
+	 * error propagation guarantees are much looser than in the affine case
+	 */
+	while (buffer < end)
+	{
+	    if (!mask || *mask++)
+	    {
+		if (v.vector[2] != 0)
+		{
+		    double pdx, pdy, invv2, b, c;
+
+		    invv2 = 1. * pixman_fixed_1 / v.vector[2];
+
+		    pdx = v.vector[0] * invv2 - radial->c1.x;
+		    /*    / pixman_fixed_1 */
+
+		    pdy = v.vector[1] * invv2 - radial->c1.y;
+		    /*    / pixman_fixed_1 */
+
+		    b = fdot (pdx, pdy, radial->c1.radius,
+			      radial->delta.x, radial->delta.y,
+			      radial->delta.radius);
+		    /*  / pixman_fixed_1 / pixman_fixed_1 */
+
+		    c = fdot (pdx, pdy, -radial->c1.radius,
+			      pdx, pdy, radial->c1.radius);
+		    /*  / pixman_fixed_1 / pixman_fixed_1 */
+
+		    *buffer = radial_compute_color (radial->a, b, c,
+						    radial->inva,
+						    radial->delta.radius,
+						    radial->mindr,
+						    &walker,
+						    image->common.repeat);
+		}
+		else
+		{
+		    *buffer = 0;
+		}
+	    }
+
+	    ++buffer;
+
+	    v.vector[0] += unit.vector[0];
+	    v.vector[1] += unit.vector[1];
+	    v.vector[2] += unit.vector[2];
+	}
+    }
+
+    iter->y++;
+    return iter->buffer;
+}
+
+static uint32_t *
+radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    uint32_t *buffer = radial_get_scanline_narrow (iter, NULL);
+
+    pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+
+    return buffer;
+}
+
+void
+_pixman_radial_gradient_iter_init (pixman_image_t *image,
+				   pixman_iter_t *iter,
+				   int x, int y, int width, int height,
+				   uint8_t *buffer, iter_flags_t flags)
+{
+    if (flags & ITER_NARROW)
+	iter->get_scanline = radial_get_scanline_narrow;
+    else
+	iter->get_scanline = radial_get_scanline_wide;
+}
+
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_radial_gradient (pixman_point_fixed_t *        inner,
+                                     pixman_point_fixed_t *        outer,
+                                     pixman_fixed_t                inner_radius,
+                                     pixman_fixed_t                outer_radius,
+                                     const pixman_gradient_stop_t *stops,
+                                     int                           n_stops)
+{
+    pixman_image_t *image;
+    radial_gradient_t *radial;
+
+    image = _pixman_image_allocate ();
+
+    if (!image)
+	return NULL;
+
+    radial = &image->radial;
+
+    if (!_pixman_init_gradient (&radial->common, stops, n_stops))
+    {
+	free (image);
+	return NULL;
+    }
+
+    image->type = RADIAL;
+
+    radial->c1.x = inner->x;
+    radial->c1.y = inner->y;
+    radial->c1.radius = inner_radius;
+    radial->c2.x = outer->x;
+    radial->c2.y = outer->y;
+    radial->c2.radius = outer_radius;
+
+    /* warning: this computations may overflow */
+    radial->delta.x = radial->c2.x - radial->c1.x;
+    radial->delta.y = radial->c2.y - radial->c1.y;
+    radial->delta.radius = radial->c2.radius - radial->c1.radius;
+
+    /* computed exactly, then cast to double -> every bit of the double
+       representation is correct (53 bits) */
+    radial->a = dot (radial->delta.x, radial->delta.y, -radial->delta.radius,
+		     radial->delta.x, radial->delta.y, radial->delta.radius);
+    if (radial->a != 0)
+	radial->inva = 1. * pixman_fixed_1 / radial->a;
+
+    radial->mindr = -1. * pixman_fixed_1 * radial->c1.radius;
+
+    return image;
+}
+
diff --git a/pixman/pixman/pixman-solid-fill.c b/pixman/pixman/pixman-solid-fill.c
index afff3c479..67681f2c0 100644
--- a/pixman/pixman/pixman-solid-fill.c
+++ b/pixman/pixman/pixman-solid-fill.c
@@ -1,117 +1,92 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007, 2009 Red Hat, Inc.
- * Copyright © 2009 Soren Sandmann
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include "pixman-private.h"
-
-static void
-solid_fill_get_scanline_32 (pixman_image_t *image,
-                            int             x,
-                            int             y,
-                            int             width,
-                            uint32_t *      buffer,
-                            const uint32_t *mask)
-{
-    uint32_t *end = buffer + width;
-    uint32_t color = image->solid.color_32;
-
-    while (buffer < end)
-	*(buffer++) = color;
-
-    return;
-}
-
-static void
-solid_fill_get_scanline_64 (pixman_image_t *image,
-			    int             x,
-			    int             y,
-			    int             width,
-			    uint32_t *      buffer,
-			    const uint32_t *mask)
-{
-    uint64_t *b = (uint64_t *)buffer;
-    uint64_t *e = b + width;
-    uint64_t color = image->solid.color_64;
-
-    while (b < e)
-	*(b++) = color;
-}
-
-static source_image_class_t
-solid_fill_classify (pixman_image_t *image,
-                     int             x,
-                     int             y,
-                     int             width,
-                     int             height)
-{
-    return SOURCE_IMAGE_CLASS_HORIZONTAL;
-}
-
-static void
-solid_fill_property_changed (pixman_image_t *image)
-{
-    image->common.get_scanline_32 = solid_fill_get_scanline_32;
-    image->common.get_scanline_64 = solid_fill_get_scanline_64;
-}
-
-static uint32_t
-color_to_uint32 (const pixman_color_t *color)
-{
-    return
-        (color->alpha >> 8 << 24) |
-        (color->red >> 8 << 16) |
-        (color->green & 0xff00) |
-        (color->blue >> 8);
-}
-
-static uint64_t
-color_to_uint64 (const pixman_color_t *color)
-{
-    return
-        ((uint64_t)color->alpha << 48) |
-        ((uint64_t)color->red << 32) |
-        ((uint64_t)color->green << 16) |
-        ((uint64_t)color->blue);
-}
-
-PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_solid_fill (pixman_color_t *color)
-{
-    pixman_image_t *img = _pixman_image_allocate ();
-
-    if (!img)
-	return NULL;
-
-    img->type = SOLID;
-    img->solid.color = *color;
-    img->solid.color_32 = color_to_uint32 (color);
-    img->solid.color_64 = color_to_uint64 (color);
-
-    img->common.classify = solid_fill_classify;
-    img->common.property_changed = solid_fill_property_changed;
-
-    return img;
-}
-
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007, 2009 Red Hat, Inc.
+ * Copyright © 2009 Soren Sandmann
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "pixman-private.h"
+
+void
+_pixman_solid_fill_iter_init (pixman_image_t *image,
+			      pixman_iter_t  *iter,
+			      int x, int y, int width, int height,
+			      uint8_t *buffer, iter_flags_t flags)
+{
+    if (flags & ITER_NARROW)
+    {
+	uint32_t *b = (uint32_t *)buffer;
+	uint32_t *e = b + width;
+	uint32_t color = image->solid.color_32;
+
+	while (b < e)
+	    *(b++) = color;
+    }
+    else
+    {
+	uint64_t *b = (uint64_t *)buffer;
+	uint64_t *e = b + width;
+	uint64_t color = image->solid.color_64;
+
+	while (b < e)
+	    *(b++) = color;
+    }
+
+    iter->get_scanline = _pixman_iter_get_scanline_noop;
+}
+
+static uint32_t
+color_to_uint32 (const pixman_color_t *color)
+{
+    return
+        (color->alpha >> 8 << 24) |
+        (color->red >> 8 << 16) |
+        (color->green & 0xff00) |
+        (color->blue >> 8);
+}
+
+static uint64_t
+color_to_uint64 (const pixman_color_t *color)
+{
+    return
+        ((uint64_t)color->alpha << 48) |
+        ((uint64_t)color->red << 32) |
+        ((uint64_t)color->green << 16) |
+        ((uint64_t)color->blue);
+}
+
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_solid_fill (pixman_color_t *color)
+{
+    pixman_image_t *img = _pixman_image_allocate ();
+
+    if (!img)
+	return NULL;
+
+    img->type = SOLID;
+    img->solid.color = *color;
+    img->solid.color_32 = color_to_uint32 (color);
+    img->solid.color_64 = color_to_uint64 (color);
+
+    return img;
+}
+
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index 94ba54cbf..3c0a42f41 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -2598,7 +2598,7 @@ sse2_composite_over_n_8888 (pixman_implementation_t *imp,
     __m128i xmm_src, xmm_alpha;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     if (src == 0)
 	return;
@@ -2681,7 +2681,7 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp,
     __m128i xmm_src, xmm_alpha;
     __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     if (src == 0)
 	return;
@@ -2776,7 +2776,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
 
     __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
     srca = src >> 24;
 
     if (src == 0)
@@ -2904,7 +2904,7 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 
     __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     if (src == 0)
 	return;
@@ -3036,7 +3036,7 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (
 	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
-    mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8);
+    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
 
     xmm_mask = create_mask_16_128 (mask >> 24);
 
@@ -3226,7 +3226,7 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (
 	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
-    mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8);
+    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
 
     xmm_mask = create_mask_16_128 (mask >> 24);
     xmm_alpha = mask_00ff;
@@ -3498,7 +3498,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
 
     __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -3782,7 +3782,7 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
     __m128i xmm_src, xmm_def;
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -3918,7 +3918,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
     __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -4318,7 +4318,7 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 
     __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     if (src == 0)
 	return;
@@ -4471,7 +4471,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (
 	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     sa = src >> 24;
 
@@ -4570,7 +4570,7 @@ sse2_composite_in_n_8 (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (
 	dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
 
@@ -4758,7 +4758,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (
 	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     sa = src >> 24;
 
@@ -4855,7 +4855,7 @@ sse2_composite_add_n_8 (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (
 	dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     src >>= 24;
 
@@ -5480,7 +5480,7 @@ sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
     int dst_stride;
     int32_t w;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
     if (src == 0)
 	return;
diff --git a/pixman/pixman/pixman-utils.c b/pixman/pixman/pixman-utils.c
index 3ef88b753..cb4e62199 100644
--- a/pixman/pixman/pixman-utils.c
+++ b/pixman/pixman/pixman-utils.c
@@ -167,6 +167,12 @@ pixman_contract (uint32_t *      dst,
     }
 }
 
+uint32_t *
+_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask)
+{
+    return iter->buffer;
+}
+
 #define N_TMP_BOXES (16)
 
 pixman_bool_t
diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am
index 71e535374..8d8471d1c 100644
--- a/pixman/test/Makefile.am
+++ b/pixman/test/Makefile.am
@@ -92,6 +92,7 @@ TESTPROGRAMS_GTK =		\
 	clip-in			\
 	composite-test		\
 	gradient-test		\
+	radial-test		\
 	alpha-test		\
 	screen-test		\
 	convolution-test	\
@@ -102,6 +103,9 @@ INCLUDES += $(GTK_CFLAGS)
 gradient_test_LDADD = $(GTK_LDADD)
 gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
 
+radial_test_LDADD = $(GTK_LDADD)
+radial_test_SOURCES = radial-test.c utils.c utils.h $(GTK_UTILS)
+
 alpha_test_LDADD = $(GTK_LDADD)
 alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
 
diff --git a/pixman/test/alphamap.c b/pixman/test/alphamap.c
index ba3130e5d..554b309fb 100644
--- a/pixman/test/alphamap.c
+++ b/pixman/test/alphamap.c
@@ -1,259 +1,256 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include "utils.h"
-
-#define WIDTH 100
-#define HEIGHT 100
-
-static const pixman_format_code_t formats[] =
-{
-    PIXMAN_a8r8g8b8,
-    PIXMAN_a2r10g10b10,
-    PIXMAN_a4r4g4b4,
-    PIXMAN_a8
-};
-
-static const pixman_format_code_t alpha_formats[] =
-{
-    PIXMAN_null,
-    PIXMAN_a8,
-    PIXMAN_a2r10g10b10,
-    PIXMAN_a4r4g4b4
-};
-
-static const int origins[] =
-{
-    0, 10, -100
-};
-
-static const char *
-format_name (pixman_format_code_t format)
-{
-    if (format == PIXMAN_a8)
-	return "a8";
-    else if (format == PIXMAN_a2r10g10b10)
-	return "a2r10g10b10";
-    else if (format == PIXMAN_a8r8g8b8)
-	return "a8r8g8b8";
-    else if (format == PIXMAN_a4r4g4b4)
-	return "a4r4g4b4";
-    else if (format == PIXMAN_null)
-	return "none";
-    else
-	assert (0);
-
-    return "<unknown - bug in alphamap.c>";
-}
-
-static void
-on_destroy (pixman_image_t *image, void *data)
-{
-    uint32_t *bits = pixman_image_get_data (image);
-
-    fence_free (bits);
-}
-
-static pixman_image_t *
-make_image (pixman_format_code_t format)
-{
-    uint32_t *bits;
-    uint8_t bpp = PIXMAN_FORMAT_BPP (format) / 8;
-    pixman_image_t *image;
-
-    bits = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * bpp);
-
-    image = pixman_image_create_bits (format, WIDTH, HEIGHT, bits, WIDTH * bpp);
-
-    if (image && bits)
-	pixman_image_set_destroy_function (image, on_destroy, NULL);
-
-    return image;
-}
-
-static pixman_image_t *
-create_image (pixman_format_code_t format, pixman_format_code_t alpha_format,
-	      int alpha_origin_x, int alpha_origin_y)
-{
-    pixman_image_t *image = make_image (format);
-
-    if (alpha_format != PIXMAN_null)
-    {
-	pixman_image_t *alpha = make_image (alpha_format);
-
-	pixman_image_set_alpha_map (image, alpha,
-				    alpha_origin_x, alpha_origin_y);
-	pixman_image_unref (alpha);
-    }
-
-    return image;
-}
-
-static uint8_t
-get_alpha (pixman_image_t *image, int x, int y, int orig_x, int orig_y)
-{
-    uint8_t *bits;
-    uint8_t r;
-
-    if (image->common.alpha_map)
-    {
-	if (x - orig_x >= 0 && x - orig_x < WIDTH &&
-	    y - orig_y >= 0 && y - orig_y < HEIGHT)
-	{
-	    image = (pixman_image_t *)image->common.alpha_map;
-
-	    x -= orig_x;
-	    y -= orig_y;
-	}
-	else
-	{
-	    return 0;
-	}
-    }
-
-    bits = (uint8_t *)image->bits.bits;
-
-    if (image->bits.format == PIXMAN_a8)
-    {
-	r = bits[y * WIDTH + x];
-    }
-    else if (image->bits.format == PIXMAN_a2r10g10b10)
-    {
-	r = ((uint32_t *)bits)[y * WIDTH + x] >> 30;
-	r |= r << 2;
-	r |= r << 4;
-    }
-    else if (image->bits.format == PIXMAN_a8r8g8b8)
-    {
-	r = ((uint32_t *)bits)[y * WIDTH + x] >> 24;
-    }
-    else if (image->bits.format == PIXMAN_a4r4g4b4)
-    {
-	r = ((uint16_t *)bits)[y * WIDTH + x] >> 12;
-	r |= r << 4;
-    }
-    else
-    {
-	assert (0);
-    }
-
-    return r;
-}
-
-#define ARRAY_LENGTH(A) ((int) (sizeof (A) / sizeof ((A) [0])))
-
-static int
-run_test (int s, int d, int sa, int da, int soff, int doff)
-{
-    pixman_format_code_t sf = formats[s];
-    pixman_format_code_t df = formats[d];
-    pixman_format_code_t saf = alpha_formats[sa];
-    pixman_format_code_t daf = alpha_formats[da];
-    pixman_image_t *src, *dst, *orig_dst;
-    pixman_transform_t t1;
-    int j, k;
-    int n_alpha_bits;
-
-    soff = origins[soff];
-    doff = origins[doff];
-
-    n_alpha_bits = PIXMAN_FORMAT_A (df);
-    if (daf != PIXMAN_null)
-	n_alpha_bits = PIXMAN_FORMAT_A (daf);
-
-
-    src = create_image (sf, saf, soff, soff);
-    orig_dst = create_image (df, daf, doff, doff);
-    dst = create_image (df, daf, doff, doff);
-
-    /* Transformations on destinations should be ignored, so just set some
-     * random one.
-     */
-    pixman_transform_init_identity (&t1);
-    pixman_transform_scale (&t1, NULL, pixman_int_to_fixed (100), pixman_int_to_fixed (11));
-    pixman_transform_rotate (&t1, NULL, pixman_double_to_fixed (0.5), pixman_double_to_fixed (0.11));
-    pixman_transform_translate (&t1, NULL, pixman_int_to_fixed (11), pixman_int_to_fixed (17));
-
-#if 0
-    /* Unfortunately, this is actually broken at the moment, so we can't
-     * actually turn it on
-     */
-    pixman_image_set_transform (dst, &t1);
-#endif
-
-    pixman_image_composite (PIXMAN_OP_SRC, orig_dst, NULL, dst,
-			    0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
-
-    pixman_image_composite (PIXMAN_OP_ADD, src, NULL, dst,
-			    0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
-
-    for (j = MAX (doff, 0); j < MIN (HEIGHT, HEIGHT + doff); ++j)
-    {
-	for (k = MAX (doff, 0); k < MIN (WIDTH, WIDTH + doff); ++k)
-	{
-	    uint8_t sa, da, oda, ref;
-
-	    sa = get_alpha (src, k, j, soff, soff);
-	    da = get_alpha (dst, k, j, doff, doff);
-	    oda = get_alpha (orig_dst, k, j, doff, doff);
-
-	    if (sa + oda > 255)
-		ref = 255;
-	    else
-		ref = sa + oda;
-
-	    if (da >> (8 - n_alpha_bits) != ref >> (8 - n_alpha_bits))
-	    {
-		printf ("\nWrong alpha value at (%d, %d). Should be 0x%x; got 0x%x. Source was 0x%x, original dest was 0x%x\n",
-			k, j, ref, da, sa, oda);
-
-		printf ("src: %s, alpha: %s, origin %d %d\ndst: %s, alpha: %s, origin: %d %d\n\n",
-			format_name (sf),
-			format_name (saf),
-			soff, soff,
-			format_name (df),
-			format_name (daf),
-			doff, doff);
-		return 1;
-	    }
-	}
-    }
-
-    pixman_image_set_alpha_map (src, NULL, 0, 0);
-    pixman_image_set_alpha_map (dst, NULL, 0, 0);
-    pixman_image_set_alpha_map (orig_dst, NULL, 0, 0);
-
-    pixman_image_unref (src);
-    pixman_image_unref (dst);
-    pixman_image_unref (orig_dst);
-
-    return 0;
-}
-
-int
-main (int argc, char **argv)
-{
-    int i, j, a, b, x, y;
-
-    for (i = 0; i < ARRAY_LENGTH (formats); ++i)
-    {
-	for (j = 0; j < ARRAY_LENGTH (formats); ++j)
-	{
-	    for (a = 0; a < ARRAY_LENGTH (alpha_formats); ++a)
-	    {
-		for (b = 0; b < ARRAY_LENGTH (alpha_formats); ++b)
-		{
-		    for (x = 0; x < ARRAY_LENGTH (origins); ++x)
-		    {
-			for (y = 0; y < ARRAY_LENGTH (origins); ++y)
-			{
-			    if (run_test (i, j, a, b, x, y) != 0)
-				return 1;
-			}
-		    }
-		}
-	    }
-	}
-    }
-
-    return 0;
-}
+#include <stdio.h>
+#include <stdlib.h>
+#include "utils.h"
+
+#define WIDTH 100
+#define HEIGHT 100
+
+static const pixman_format_code_t formats[] =
+{
+    PIXMAN_a8r8g8b8,
+    PIXMAN_a2r10g10b10,
+    PIXMAN_a4r4g4b4,
+    PIXMAN_a8
+};
+
+static const pixman_format_code_t alpha_formats[] =
+{
+    PIXMAN_null,
+    PIXMAN_a8,
+    PIXMAN_a2r10g10b10,
+    PIXMAN_a4r4g4b4
+};
+
+static const int origins[] =
+{
+    0, 10, -100
+};
+
+static const char *
+format_name (pixman_format_code_t format)
+{
+    if (format == PIXMAN_a8)
+	return "a8";
+    else if (format == PIXMAN_a2r10g10b10)
+	return "a2r10g10b10";
+    else if (format == PIXMAN_a8r8g8b8)
+	return "a8r8g8b8";
+    else if (format == PIXMAN_a4r4g4b4)
+	return "a4r4g4b4";
+    else if (format == PIXMAN_null)
+	return "none";
+    else
+	assert (0);
+
+    return "<unknown - bug in alphamap.c>";
+}
+
+static void
+on_destroy (pixman_image_t *image, void *data)
+{
+    uint32_t *bits = pixman_image_get_data (image);
+
+    fence_free (bits);
+}
+
+static pixman_image_t *
+make_image (pixman_format_code_t format)
+{
+    uint32_t *bits;
+    uint8_t bpp = PIXMAN_FORMAT_BPP (format) / 8;
+    pixman_image_t *image;
+
+    bits = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * bpp);
+
+    image = pixman_image_create_bits (format, WIDTH, HEIGHT, bits, WIDTH * bpp);
+
+    if (image && bits)
+	pixman_image_set_destroy_function (image, on_destroy, NULL);
+
+    return image;
+}
+
+static pixman_image_t *
+create_image (pixman_format_code_t format, pixman_format_code_t alpha_format,
+	      int alpha_origin_x, int alpha_origin_y)
+{
+    pixman_image_t *image = make_image (format);
+
+    if (alpha_format != PIXMAN_null)
+    {
+	pixman_image_t *alpha = make_image (alpha_format);
+
+	pixman_image_set_alpha_map (image, alpha,
+				    alpha_origin_x, alpha_origin_y);
+	pixman_image_unref (alpha);
+    }
+
+    return image;
+}
+
+static uint8_t
+get_alpha (pixman_image_t *image, int x, int y, int orig_x, int orig_y)
+{
+    uint8_t *bits;
+    uint8_t r;
+
+    if (image->common.alpha_map)
+    {
+	if (x - orig_x >= 0 && x - orig_x < WIDTH &&
+	    y - orig_y >= 0 && y - orig_y < HEIGHT)
+	{
+	    image = (pixman_image_t *)image->common.alpha_map;
+
+	    x -= orig_x;
+	    y -= orig_y;
+	}
+	else
+	{
+	    return 0;
+	}
+    }
+
+    bits = (uint8_t *)image->bits.bits;
+
+    if (image->bits.format == PIXMAN_a8)
+    {
+	r = bits[y * WIDTH + x];
+    }
+    else if (image->bits.format == PIXMAN_a2r10g10b10)
+    {
+	r = ((uint32_t *)bits)[y * WIDTH + x] >> 30;
+	r |= r << 2;
+	r |= r << 4;
+    }
+    else if (image->bits.format == PIXMAN_a8r8g8b8)
+    {
+	r = ((uint32_t *)bits)[y * WIDTH + x] >> 24;
+    }
+    else if (image->bits.format == PIXMAN_a4r4g4b4)
+    {
+	r = ((uint16_t *)bits)[y * WIDTH + x] >> 12;
+	r |= r << 4;
+    }
+    else
+    {
+	assert (0);
+    }
+
+    return r;
+}
+
+#define ARRAY_LENGTH(A) ((int) (sizeof (A) / sizeof ((A) [0])))
+
+static int
+run_test (int s, int d, int sa, int da, int soff, int doff)
+{
+    pixman_format_code_t sf = formats[s];
+    pixman_format_code_t df = formats[d];
+    pixman_format_code_t saf = alpha_formats[sa];
+    pixman_format_code_t daf = alpha_formats[da];
+    pixman_image_t *src, *dst, *orig_dst;
+    pixman_transform_t t1;
+    int j, k;
+    int n_alpha_bits;
+
+    soff = origins[soff];
+    doff = origins[doff];
+
+    n_alpha_bits = PIXMAN_FORMAT_A (df);
+    if (daf != PIXMAN_null)
+	n_alpha_bits = PIXMAN_FORMAT_A (daf);
+
+
+    src = create_image (sf, saf, soff, soff);
+    orig_dst = create_image (df, daf, doff, doff);
+    dst = create_image (df, daf, doff, doff);
+
+    /* Transformations, repeats and filters on destinations should be ignored,
+     * so just set some random ones.
+     */
+    pixman_transform_init_identity (&t1);
+    pixman_transform_scale (&t1, NULL, pixman_int_to_fixed (100), pixman_int_to_fixed (11));
+    pixman_transform_rotate (&t1, NULL, pixman_double_to_fixed (0.5), pixman_double_to_fixed (0.11));
+    pixman_transform_translate (&t1, NULL, pixman_int_to_fixed (11), pixman_int_to_fixed (17));
+
+    pixman_image_set_transform (dst, &t1);
+    pixman_image_set_filter (dst, PIXMAN_FILTER_BILINEAR, NULL, 0);
+    pixman_image_set_repeat (dst, PIXMAN_REPEAT_REFLECT);
+
+    pixman_image_composite (PIXMAN_OP_SRC, orig_dst, NULL, dst,
+			    0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
+
+    pixman_image_composite (PIXMAN_OP_ADD, src, NULL, dst,
+			    0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
+
+    for (j = MAX (doff, 0); j < MIN (HEIGHT, HEIGHT + doff); ++j)
+    {
+	for (k = MAX (doff, 0); k < MIN (WIDTH, WIDTH + doff); ++k)
+	{
+	    uint8_t sa, da, oda, ref;
+
+	    sa = get_alpha (src, k, j, soff, soff);
+	    da = get_alpha (dst, k, j, doff, doff);
+	    oda = get_alpha (orig_dst, k, j, doff, doff);
+
+	    if (sa + oda > 255)
+		ref = 255;
+	    else
+		ref = sa + oda;
+
+	    if (da >> (8 - n_alpha_bits) != ref >> (8 - n_alpha_bits))
+	    {
+		printf ("\nWrong alpha value at (%d, %d). Should be 0x%x; got 0x%x. Source was 0x%x, original dest was 0x%x\n",
+			k, j, ref, da, sa, oda);
+
+		printf ("src: %s, alpha: %s, origin %d %d\ndst: %s, alpha: %s, origin: %d %d\n\n",
+			format_name (sf),
+			format_name (saf),
+			soff, soff,
+			format_name (df),
+			format_name (daf),
+			doff, doff);
+		return 1;
+	    }
+	}
+    }
+
+    pixman_image_set_alpha_map (src, NULL, 0, 0);
+    pixman_image_set_alpha_map (dst, NULL, 0, 0);
+    pixman_image_set_alpha_map (orig_dst, NULL, 0, 0);
+
+    pixman_image_unref (src);
+    pixman_image_unref (dst);
+    pixman_image_unref (orig_dst);
+
+    return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+    int i, j, a, b, x, y;
+
+    for (i = 0; i < ARRAY_LENGTH (formats); ++i)
+    {
+	for (j = 0; j < ARRAY_LENGTH (formats); ++j)
+	{
+	    for (a = 0; a < ARRAY_LENGTH (alpha_formats); ++a)
+	    {
+		for (b = 0; b < ARRAY_LENGTH (alpha_formats); ++b)
+		{
+		    for (x = 0; x < ARRAY_LENGTH (origins); ++x)
+		    {
+			for (y = 0; y < ARRAY_LENGTH (origins); ++y)
+			{
+			    if (run_test (i, j, a, b, x, y) != 0)
+				return 1;
+			}
+		    }
+		}
+	    }
+	}
+    }
+
+    return 0;
+}
diff --git a/pixman/test/radial-test.c b/pixman/test/radial-test.c
new file mode 100644
index 000000000..5d716c339
--- /dev/null
+++ b/pixman/test/radial-test.c
@@ -0,0 +1,198 @@
+#include "utils.h"
+#include "gtk-utils.h"
+
+#define NUM_GRADIENTS 7
+#define NUM_STOPS 3
+#define NUM_REPEAT 4
+#define SIZE 128
+#define WIDTH (SIZE * NUM_GRADIENTS)
+#define HEIGHT (SIZE * NUM_REPEAT)
+
+/*
+ * We want to test all the possible relative positions of the start
+ * and end circle:
+ *
+ *  - The start circle can be smaller/equal/bigger than the end
+ *    circle. A radial gradient can be classified in one of these
+ *    three cases depending on the sign of dr.
+ *
+ *  - The smaller circle can be completely inside/internally
+ *    tangent/outside (at least in part) of the bigger circle. This
+ *    classification is the same as the one which can be computed by
+ *    examining the sign of a = (dx^2 + dy^2 - dr^2).
+ *
+ *  - If the two circles have the same size, neither can be inside or
+ *    internally tangent
+ *
+ * This test draws radial gradients whose circles always have the same
+ * centers (0, 0) and (1, 0), but with different radiuses. From left
+ * to right:
+ *
+ * - Small start circle completely inside the end circle
+ *     0.25 -> 1.75; dr =  1.5 > 0; a = 1 - 1.50^2 < 0
+ *
+ * - Small start circle internally tangent to the end circle
+ *     0.50 -> 1.50; dr =  1.0 > 0; a = 1 - 1.00^2 = 0
+ *
+ * - Small start circle outside of the end circle
+ *     0.50 -> 1.00; dr =  0.5 > 0; a = 1 - 0.50^2 > 0
+ *
+ * - Start circle with the same size as the end circle
+ *     1.00 -> 1.00; dr =  0.0 = 0; a = 1 - 0.00^2 > 0
+ *
+ * - Small end circle outside of the start circle
+ *     1.00 -> 0.50; dr = -0.5 > 0; a = 1 - 0.50^2 > 0
+ *
+ * - Small end circle internally tangent to the start circle
+ *     1.50 -> 0.50; dr = -1.0 > 0; a = 1 - 1.00^2 = 0
+ *
+ * - Small end circle completely inside the start circle
+ *     1.75 -> 0.25; dr = -1.5 > 0; a = 1 - 1.50^2 < 0
+ *
+ */
+
+const static double radiuses[NUM_GRADIENTS] = {
+    0.25,
+    0.50,
+    0.50,
+    1.00,
+    1.00,
+    1.50,
+    1.75
+};
+
+#define double_to_color(x)					\
+    (((uint32_t) ((x)*65536)) - (((uint32_t) ((x)*65536)) >> 16))
+
+#define PIXMAN_STOP(offset,r,g,b,a)		\
+    { pixman_double_to_fixed (offset),		\
+	{					\
+	double_to_color (r),			\
+	double_to_color (g),			\
+	double_to_color (b),			\
+	double_to_color (a)			\
+	}					\
+    }
+
+static const pixman_gradient_stop_t stops[NUM_STOPS] = {
+    PIXMAN_STOP (0.0,        1, 0, 0, 0.75),
+    PIXMAN_STOP (0.70710678, 0, 1, 0, 0),
+    PIXMAN_STOP (1.0,        0, 0, 1, 1)
+};
+
+static pixman_image_t *
+create_radial (int index)
+{
+    pixman_point_fixed_t p0, p1;
+    pixman_fixed_t r0, r1;
+    double x0, x1, radius0, radius1, left, right, center;
+
+    x0 = 0;
+    x1 = 1;
+    radius0 = radiuses[index];
+    radius1 = radiuses[NUM_GRADIENTS - index - 1];
+
+    /* center the gradient */
+    left = MIN (x0 - radius0, x1 - radius1);
+    right = MAX (x0 + radius0, x1 + radius1);
+    center = (left + right) * 0.5;
+    x0 -= center;
+    x1 -= center;
+
+    /* scale to make it fit within a 1x1 rect centered in (0,0) */
+    x0 *= 0.25;
+    x1 *= 0.25;
+    radius0 *= 0.25;
+    radius1 *= 0.25;
+
+    p0.x = pixman_double_to_fixed (x0);
+    p0.y = pixman_double_to_fixed (0);
+
+    p1.x = pixman_double_to_fixed (x1);
+    p1.y = pixman_double_to_fixed (0);
+
+    r0 = pixman_double_to_fixed (radius0);
+    r1 = pixman_double_to_fixed (radius1);
+
+    return pixman_image_create_radial_gradient (&p0, &p1,
+						r0, r1,
+						stops, NUM_STOPS);
+}
+
+static const pixman_repeat_t repeat[NUM_REPEAT] = {
+    PIXMAN_REPEAT_NONE,
+    PIXMAN_REPEAT_NORMAL,
+    PIXMAN_REPEAT_REFLECT,
+    PIXMAN_REPEAT_PAD
+};
+
+int
+main (int argc, char **argv)
+{
+    pixman_transform_t transform;
+    pixman_image_t *src_img, *dest_img;
+    int i, j;
+
+    enable_fp_exceptions ();
+
+    dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
+					 WIDTH, HEIGHT,
+					 NULL, 0);
+
+    pixman_transform_init_identity (&transform);
+
+    /*
+     * The create_radial() function returns gradients centered in the
+     * origin and whose interesting part fits a 1x1 square. We want to
+     * paint these gradients on a SIZExSIZE square and to make things
+     * easier we want the origin in the top-left corner of the square
+     * we want to see.
+     */
+    pixman_transform_translate (NULL, &transform,
+				pixman_double_to_fixed (0.5),
+				pixman_double_to_fixed (0.5));
+
+    pixman_transform_scale (NULL, &transform,
+			    pixman_double_to_fixed (SIZE),
+			    pixman_double_to_fixed (SIZE));
+
+    /*
+     * Gradients are evaluated at the center of each pixel, so we need
+     * to translate by half a pixel to trigger some interesting
+     * cornercases. In particular, the original implementation of PDF
+     * radial gradients tried to divide by 0 when using this transform
+     * on the "tangent circles" cases.
+     */
+    pixman_transform_translate (NULL, &transform,
+				pixman_double_to_fixed (0.5),
+				pixman_double_to_fixed (0.5));
+
+    for (i = 0; i < NUM_GRADIENTS; i++)
+    {
+	src_img = create_radial (i);
+	pixman_image_set_transform (src_img, &transform);
+
+	for (j = 0; j < NUM_REPEAT; j++)
+	{
+	    pixman_image_set_repeat (src_img, repeat[j]);
+
+	    pixman_image_composite32 (PIXMAN_OP_OVER,
+				      src_img,
+				      NULL,
+				      dest_img,
+				      0, 0,
+				      0, 0,
+				      i * SIZE, j * SIZE,
+				      SIZE, SIZE);
+
+	}
+
+	pixman_image_unref (src_img);
+    }
+
+    show_image (dest_img);
+
+    pixman_image_unref (dest_img);
+
+    return 0;
+}