aboutsummaryrefslogtreecommitdiff
path: root/pixman/pixman/pixman-mmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman/pixman-mmx.c')
-rw-r--r--pixman/pixman/pixman-mmx.c82
1 files changed, 38 insertions, 44 deletions
diff --git a/pixman/pixman/pixman-mmx.c b/pixman/pixman/pixman-mmx.c
index 39e85c586..ed5107321 100644
--- a/pixman/pixman/pixman-mmx.c
+++ b/pixman/pixman/pixman-mmx.c
@@ -309,6 +309,29 @@ negate (__m64 mask)
return _mm_xor_si64 (mask, MC (4x00ff));
}
+/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1
+ * and maps its result to the same range.
+ *
+ * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner:
+ * Notation, Notation, Notation", the first of which is
+ *
+ * prod(a, b) = (a * b + 128) / 255.
+ *
+ * By approximating the division by 255 as 257/65536 it can be replaced by a
+ * multiply and a right shift. This is the implementation that we use in
+ * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended
+ * 3DNow!, and unavailable at the time of the book's publication) to perform
+ * the multiplication and right shift in a single operation.
+ *
+ * prod(a, b) = ((a * b + 128) * 257) >> 16.
+ *
+ * A third way (how pix_multiply() was implemented prior to 14208344) exists
+ * also that performs the multiplication by 257 with adds and shifts.
+ *
+ * Where temp = a * b + 128
+ *
+ * prod(a, b) = (temp + (temp >> 8)) >> 8.
+ */
static force_inline __m64
pix_multiply (__m64 a, __m64 b)
{
@@ -3926,52 +3949,23 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
return iter->buffer;
}
-typedef struct
-{
- pixman_format_code_t format;
- pixman_iter_get_scanline_t get_scanline;
-} fetcher_info_t;
-
-static const fetcher_info_t fetchers[] =
-{
- { PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 },
- { PIXMAN_r5g6b5, mmx_fetch_r5g6b5 },
- { PIXMAN_a8, mmx_fetch_a8 },
- { PIXMAN_null }
-};
-
-static pixman_bool_t
-mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
-{
- pixman_image_t *image = iter->image;
-
-#define FLAGS \
+#define IMAGE_FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
- if ((iter->iter_flags & ITER_NARROW) &&
- (iter->image_flags & FLAGS) == FLAGS)
- {
- const fetcher_info_t *f;
-
- for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
- {
- if (image->common.extended_format_code == f->format)
- {
- uint8_t *b = (uint8_t *)image->bits.bits;
- int s = image->bits.rowstride * 4;
-
- iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
- iter->stride = s;
-
- iter->get_scanline = f->get_scanline;
- return TRUE;
- }
- }
- }
-
- return FALSE;
-}
+static const pixman_iter_info_t mmx_iters[] =
+{
+ { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL
+ },
+ { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL
+ },
+ { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+ _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL
+ },
+ { PIXMAN_null },
+};
static const pixman_fast_path_t mmx_fast_paths[] =
{
@@ -4101,7 +4095,7 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback)
imp->blt = mmx_blt;
imp->fill = mmx_fill;
- imp->src_iter_init = mmx_src_iter_init;
+ imp->iter_info = mmx_iters;
return imp;
}