aboutsummaryrefslogtreecommitdiff
path: root/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'pixman')
-rw-r--r--pixman/Makefile.am2
-rw-r--r--pixman/configure.ac2
-rw-r--r--pixman/pixman/pixman-arm-common.h24
-rw-r--r--pixman/pixman/pixman-fast-path.h234
-rw-r--r--pixman/pixman/pixman-sse2.c11
5 files changed, 238 insertions, 35 deletions
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 658a375c3..ff87e26a3 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -19,7 +19,7 @@ RELEASE_CAIRO_URL = http://cairographics.org/$(RELEASE_OR_SNAPSHOT)s
RELEASE_XORG_URL = http://xorg.freedesktop.org/archive/individual/lib
RELEASE_XORG_HOST = $(USERNAME)@xorg.freedesktop.org
RELEASE_XORG_DIR = /srv/xorg.freedesktop.org/archive/individual/lib
-RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org
+RELEASE_ANNOUNCE_LIST = cairo-announce@cairographics.org, xorg-announce@lists.freedesktop.org, pixman@lists.freedesktop.org
tar_gz = $(PACKAGE)-$(VERSION).tar.gz
tar_bz2 = $(PACKAGE)-$(VERSION).tar.bz2
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 8ce9e1923..4c62102be 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
m4_define([pixman_major], 0)
m4_define([pixman_minor], 23)
-m4_define([pixman_micro], 1)
+m4_define([pixman_micro], 3)
m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h
index f7a10c49e..f1d212c84 100644
--- a/pixman/pixman/pixman-arm-common.h
+++ b/pixman/pixman/pixman-arm-common.h
@@ -348,13 +348,17 @@ scaled_bilinear_scanline_##cputype##_##name##_##op ( \
\
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \
+ src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \
+ src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint32_t, dst_type, PAD, FALSE, FALSE)
+ src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
+ src_type, uint32_t, dst_type, NORMAL, \
+ FLAG_NONE)
#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op, \
@@ -393,12 +397,20 @@ scaled_bilinear_scanline_##cputype##_##name##_##op ( \
\
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint8_t, dst_type, COVER, TRUE, FALSE) \
+ src_type, uint8_t, dst_type, COVER, \
+ FLAG_HAVE_NON_SOLID_MASK) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+ src_type, uint8_t, dst_type, NONE, \
+ FLAG_HAVE_NON_SOLID_MASK) \
FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
scaled_bilinear_scanline_##cputype##_##name##_##op, \
- src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+ src_type, uint8_t, dst_type, PAD, \
+ FLAG_HAVE_NON_SOLID_MASK) \
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
+ src_type, uint8_t, dst_type, NORMAL, \
+ FLAG_HAVE_NON_SOLID_MASK)
+
#endif
diff --git a/pixman/pixman/pixman-fast-path.h b/pixman/pixman/pixman-fast-path.h
index fcbaa955d..e94591a27 100644
--- a/pixman/pixman/pixman-fast-path.h
+++ b/pixman/pixman/pixman-fast-path.h
@@ -30,6 +30,29 @@
#define PIXMAN_REPEAT_COVER -1
+/* Flags describing input parameters to fast path macro template.
+ * Turning on some flag values may indicate that
+ * "some property X is available so template can use this" or
+ * "some property X should be handled by template".
+ *
+ * FLAG_HAVE_SOLID_MASK
+ * Input mask is solid so template should handle this.
+ *
+ * FLAG_HAVE_NON_SOLID_MASK
+ * Input mask is bits mask so template should handle this.
+ *
+ * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
+ * exclusive. (It's not allowed to turn both flags on)
+ */
+#define FLAG_NONE (0)
+#define FLAG_HAVE_SOLID_MASK (1 << 1)
+#define FLAG_HAVE_NON_SOLID_MASK (1 << 2)
+
+/* To avoid too short repeated scanline function calls, extend source
+ * scanlines having width less than below constant value.
+ */
+#define REPEAT_NORMAL_MIN_WIDTH 64
+
static force_inline pixman_bool_t
repeat (pixman_repeat_t repeat, int *c, int size)
{
@@ -651,7 +674,7 @@ bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
* multiplication instructions.
*/
#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
+ dst_type_t, repeat_mode, flags) \
static void \
fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
pixman_composite_info_t *info) \
@@ -672,20 +695,23 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
const mask_type_t *mask = &solid_mask; \
int src_stride, mask_stride, dst_stride; \
\
+ int src_width; \
+ pixman_fixed_t src_width_fixed; \
+ int max_x; \
+ pixman_bool_t need_src_extension; \
+ \
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
- if (have_mask) \
+ if (flags & FLAG_HAVE_SOLID_MASK) \
{ \
- if (mask_is_solid) \
- { \
- solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
- mask_stride = 0; \
- } \
- else \
- { \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
- mask_stride, mask_line, 1); \
- } \
+ solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
+ mask_stride = 0; \
+ } \
+ else if (flags & FLAG_HAVE_NON_SOLID_MASK) \
+ { \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
+ mask_stride, mask_line, 1); \
} \
+ \
/* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
* transformed from destination space to source space */ \
PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
@@ -722,13 +748,37 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
v.vector[0] += left_pad * unit_x; \
} \
\
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ vx = v.vector[0]; \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \
+ max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1; \
+ \
+ if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \
+ { \
+ src_width = 0; \
+ \
+ while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \
+ src_width += src_image->bits.width; \
+ \
+ need_src_extension = TRUE; \
+ } \
+ else \
+ { \
+ src_width = src_image->bits.width; \
+ need_src_extension = FALSE; \
+ } \
+ \
+ src_width_fixed = pixman_int_to_fixed (src_width); \
+ } \
+ \
while (--height >= 0) \
{ \
int weight1, weight2; \
dst = dst_line; \
dst_line += dst_stride; \
vx = v.vector[0]; \
- if (have_mask && !mask_is_solid) \
+ if (flags & FLAG_HAVE_NON_SOLID_MASK) \
{ \
mask = mask_line; \
mask_line += mask_stride; \
@@ -766,7 +816,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
scanline_func (dst, mask, \
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
dst += left_pad; \
- if (have_mask && !mask_is_solid) \
+ if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_pad; \
} \
if (width > 0) \
@@ -774,7 +824,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
scanline_func (dst, mask, \
src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
dst += width; \
- if (have_mask && !mask_is_solid) \
+ if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += width; \
} \
if (right_pad > 0) \
@@ -821,7 +871,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
scanline_func (dst, mask, \
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
dst += left_pad; \
- if (have_mask && !mask_is_solid) \
+ if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_pad; \
} \
if (left_tz > 0) \
@@ -834,7 +884,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
buf1, buf2, left_tz, weight1, weight2, \
pixman_fixed_frac (vx), unit_x, 0, FALSE); \
dst += left_tz; \
- if (have_mask && !mask_is_solid) \
+ if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_tz; \
vx += left_tz * unit_x; \
} \
@@ -843,7 +893,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
scanline_func (dst, mask, \
src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
dst += width; \
- if (have_mask && !mask_is_solid) \
+ if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += width; \
vx += width * unit_x; \
} \
@@ -857,7 +907,7 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
buf1, buf2, right_tz, weight1, weight2, \
pixman_fixed_frac (vx), unit_x, 0, FALSE); \
dst += right_tz; \
- if (have_mask && !mask_is_solid) \
+ if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += right_tz; \
} \
if (right_pad > 0) \
@@ -868,6 +918,106 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
} \
} \
+ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ int32_t num_pixels; \
+ int32_t width_remain; \
+ src_type_t * src_line_top; \
+ src_type_t * src_line_bottom; \
+ src_type_t buf1[2]; \
+ src_type_t buf2[2]; \
+ src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \
+ src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \
+ int i, j; \
+ \
+ repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \
+ repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \
+ src_line_top = src_first_line + src_stride * y1; \
+ src_line_bottom = src_first_line + src_stride * y2; \
+ \
+ if (need_src_extension) \
+ { \
+ for (i=0; i<src_width;) \
+ { \
+ for (j=0; j<src_image->bits.width; j++, i++) \
+ { \
+ extended_src_line0[i] = src_line_top[j]; \
+ extended_src_line1[i] = src_line_bottom[j]; \
+ } \
+ } \
+ \
+ src_line_top = &extended_src_line0[0]; \
+ src_line_bottom = &extended_src_line1[0]; \
+ } \
+ \
+ /* Top & Bottom wrap around buffer */ \
+ buf1[0] = src_line_top[src_width - 1]; \
+ buf1[1] = src_line_top[0]; \
+ buf2[0] = src_line_bottom[src_width - 1]; \
+ buf2[1] = src_line_bottom[0]; \
+ \
+ width_remain = width; \
+ \
+ while (width_remain > 0) \
+ { \
+ /* We use src_width_fixed because it can make vx in original source range */ \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
+ \
+ /* Wrap around part */ \
+ if (pixman_fixed_to_int (vx) == src_width - 1) \
+ { \
+ /* for positive unit_x \
+ * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \
+ * \
+ * vx is in range [0, src_width_fixed - pixman_fixed_e] \
+ * So we are safe from overflow. \
+ */ \
+ num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \
+ \
+ if (num_pixels > width_remain) \
+ num_pixels = width_remain; \
+ \
+ scanline_func (dst, mask, buf1, buf2, num_pixels, \
+ weight1, weight2, pixman_fixed_frac(vx), \
+ unit_x, src_width_fixed, FALSE); \
+ \
+ width_remain -= num_pixels; \
+ vx += num_pixels * unit_x; \
+ dst += num_pixels; \
+ \
+ if (flags & FLAG_HAVE_NON_SOLID_MASK) \
+ mask += num_pixels; \
+ \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
+ } \
+ \
+ /* Normal scanline composite */ \
+ if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \
+ { \
+ /* for positive unit_x \
+ * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \
+ * \
+ * vx is in range [0, src_width_fixed - pixman_fixed_e] \
+ * So we are safe from overflow here. \
+ */ \
+ num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \
+ / unit_x) + 1; \
+ \
+ if (num_pixels > width_remain) \
+ num_pixels = width_remain; \
+ \
+ scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \
+ weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \
+ \
+ width_remain -= num_pixels; \
+ vx += num_pixels * unit_x; \
+ dst += num_pixels; \
+ \
+ if (flags & FLAG_HAVE_NON_SOLID_MASK) \
+ mask += num_pixels; \
+ } \
+ } \
+ } \
else \
{ \
scanline_func (dst, mask, src_first_line + src_stride * y1, \
@@ -879,9 +1029,9 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
- dst_type_t, repeat_mode, have_mask, mask_is_solid) \
+ dst_type_t, repeat_mode, flags) \
FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
- dst_type_t, repeat_mode, have_mask, mask_is_solid)
+ dst_type_t, repeat_mode, flags)
#define SCALED_BILINEAR_FLAGS \
(FAST_PATH_SCALE_TRANSFORM | \
@@ -921,6 +1071,17 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
}
+#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_BILINEAR_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
+ }
+
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
@@ -952,6 +1113,17 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
}
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_BILINEAR_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
+ }
+
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
@@ -983,20 +1155,34 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,
fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
}
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_BILINEAR_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
+ }
+
/* Prefer the use of 'cover' variant, because it is faster */
#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func)
+ SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \
+ SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
#endif
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index 79ef6880b..3d51c2fda 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -5385,15 +5385,20 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
- COVER, FALSE, FALSE)
+ COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
- PAD, FALSE, FALSE)
+ PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
- NONE, FALSE, FALSE)
+ NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ NORMAL, FLAG_NONE)
+
static const pixman_fast_path_t sse2_fast_paths[] =
{