diff options
| -rw-r--r-- | mesalib/src/mapi/glapi/gen/gl_x86_asm.py | 2 | ||||
| -rw-r--r-- | mesalib/src/mapi/glapi/glapi_x86.S | 2 | ||||
| -rw-r--r-- | mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c | 22 | ||||
| -rw-r--r-- | pixman/pixman/pixman-fast-path.h | 898 | ||||
| -rw-r--r-- | pixman/test/utils.c | 2 | 
5 files changed, 470 insertions, 456 deletions
| diff --git a/mesalib/src/mapi/glapi/gen/gl_x86_asm.py b/mesalib/src/mapi/glapi/gen/gl_x86_asm.py index 324e13b79..ede034e58 100644 --- a/mesalib/src/mapi/glapi/gen/gl_x86_asm.py +++ b/mesalib/src/mapi/glapi/gen/gl_x86_asm.py @@ -73,7 +73,7 @@ class PrintGenericStubs(gl_XML.gl_print_base):  		print '#define GL_OFFSET(x) CODEPTR(REGOFF(4 * x, EAX))'
  		print ''
  		print '#if defined(GNU_ASSEMBLER) && !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__)'
 -		print '#define GLOBL_FN(x) GLOBL x ; .type x, function'
 +		print '#define GLOBL_FN(x) GLOBL x ; .type x, @function'
  		print '#else'
  		print '#define GLOBL_FN(x) GLOBL x'
  		print '#endif'
 diff --git a/mesalib/src/mapi/glapi/glapi_x86.S b/mesalib/src/mapi/glapi/glapi_x86.S index 78e1ea668..6246e660f 100644 --- a/mesalib/src/mapi/glapi/glapi_x86.S +++ b/mesalib/src/mapi/glapi/glapi_x86.S @@ -46,7 +46,7 @@  #define GL_OFFSET(x) CODEPTR(REGOFF(4 * x, EAX))
  #if defined(GNU_ASSEMBLER) && !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__)
 -#define GLOBL_FN(x) GLOBL x ; .type x, function
 +#define GLOBL_FN(x) GLOBL x ; .type x, @function
  #else
  #define GLOBL_FN(x) GLOBL x
  #endif
 diff --git a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c index b24729440..87c69e4dd 100644 --- a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -751,10 +751,12 @@ compile_instruction(  /**
   * Emit the TGSI instructions to adjust the WPOS pixel center convention
 + * Basically, add (adjX, adjY) to the fragment position.
   */
  static void
  emit_adjusted_wpos( struct st_translate *t,
 -                    const struct gl_program *program, GLfloat value)
 +                    const struct gl_program *program,
 +                    GLfloat adjX, GLfloat adjY)
  {
     struct ureg_program *ureg = t->ureg;
     struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
 @@ -764,7 +766,7 @@ emit_adjusted_wpos( struct st_translate *t,      * The shader might also use gl_FragCoord.w and .z.
      */
     ureg_ADD(ureg, wpos_temp, wpos_input,
 -            ureg_imm4f(ureg, value, value, 0.0f, 0.0f));
 +            ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
     t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
  }
 @@ -848,9 +850,12 @@ emit_wpos(struct st_context *st,     boolean invert = FALSE;
     if (fp->OriginUpperLeft) {
 +      /* Fragment shader wants origin in upper-left */
        if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
 +         /* the driver supports upper-left origin */
        }
        else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
 +         /* the driver supports lower-left origin, need to invert Y */
           ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
           invert = TRUE;
        }
 @@ -858,28 +863,37 @@ emit_wpos(struct st_context *st,           assert(0);
     }
     else {
 +      /* Fragment shader wants origin in lower-left */
        if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
 +         /* the driver supports lower-left origin */
           ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
        else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
 +         /* the driver supports upper-left origin, need to invert Y */
           invert = TRUE;
        else
           assert(0);
     }
     if (fp->PixelCenterInteger) {
 +      /* Fragment shader wants pixel center integer */
        if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
 +         /* the driver supports pixel center integer */
           ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
        else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
 -         emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f);
 +         /* the driver supports pixel center half integer, need to bias X,Y */
 +         emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
        else
           assert(0);
     }
     else {
 +      /* Fragment shader wants pixel center half integer */
        if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
 +         /* the driver supports pixel center half integer */
        }
        else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
 +         /* the driver supports pixel center integer, need to bias X,Y */
           ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
 -         emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f);
 +         emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
        }
        else
           assert(0);
 diff --git a/pixman/pixman/pixman-fast-path.h b/pixman/pixman/pixman-fast-path.h index b46937a28..bb7032d86 100644 --- a/pixman/pixman/pixman-fast-path.h +++ b/pixman/pixman/pixman-fast-path.h @@ -1,449 +1,449 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission.  SuSE makes no representations about the - * suitability of this software for any purpose.  It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author:  Keith Packard, SuSE, Inc. - */ - -#ifndef PIXMAN_FAST_PATH_H__ -#define PIXMAN_FAST_PATH_H__ - -#include "pixman-private.h" - -#define PIXMAN_REPEAT_COVER -1 - -static force_inline pixman_bool_t -repeat (pixman_repeat_t repeat, int *c, int size) -{ -    if (repeat == PIXMAN_REPEAT_NONE) -    { -	if (*c < 0 || *c >= size) -	    return FALSE; -    } -    else if (repeat == PIXMAN_REPEAT_NORMAL) -    { -	while (*c >= size) -	    *c -= size; -	while (*c < 0) -	    *c += size; -    } -    else if (repeat == PIXMAN_REPEAT_PAD) -    { -	*c = CLIP (*c, 0, size - 1); -    } -    else /* REFLECT */ -    { -	*c = MOD (*c, size * 2); -	if (*c >= size) -	    *c = size * 2 - *c - 1; -    } -    return TRUE; -} - -/* - * For each scanline fetched from source image with PAD repeat: - * - calculate how many pixels need to be padded on the left side - * - calculate how many pixels need to be padded on the right side - * - update width to only count pixels which are fetched from the image - * All this information is returned via 'width', 'left_pad', 'right_pad' - * arguments. The code is assuming that 'unit_x' is positive. - * - * Note: 64-bit math is used in order to avoid potential overflows, which - *       is probably excessive in many cases. This particular function - *       may need its own correctness test and performance tuning. - */ -static force_inline void -pad_repeat_get_scanline_bounds (int32_t         source_image_width, -				pixman_fixed_t  vx, -				pixman_fixed_t  unit_x, -				int32_t *       width, -				int32_t *       left_pad, -				int32_t *       right_pad) -{ -    int64_t max_vx = (int64_t) source_image_width << 16; -    int64_t tmp; -    if (vx < 0) -    { -	tmp = ((int64_t) unit_x - 1 - vx) / unit_x; -	if (tmp > *width) -	{ -	    *left_pad = *width; -	    *width = 0; -	} -	else -	{ -	    *left_pad = (int32_t) tmp; -	    *width -= (int32_t) tmp; -	} -    } -    else -    { -	*left_pad = 0; -    } -    tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; -    if (tmp < 0) -    { -	*right_pad = *width; -	*width = 0; -    } -    else if (tmp >= *width) -    { -	*right_pad = 0; -    } -    else -    { -	*right_pad = *width - (int32_t) tmp; -	*width = (int32_t) tmp; -    } -} - -/* A macroified version of specialized nearest scalers for some - * common 8888 and 565 formats. It supports SRC and OVER ops. - * - * There are two repeat versions, one that handles repeat normal, - * and one without repeat handling that only works if the src region - * used is completely covered by the pre-repeated source samples. - * - * The loops are unrolled to process two pixels per iteration for better - * performance on most CPU architectures (superscalar processors - * can issue several operations simultaneously, other processors can hide - * instructions latencies by pipelining operations). Unrolling more - * does not make much sense because the compiler will start running out - * of spare registers soon. - */ - -#define GET_8888_ALPHA(s) ((s) >> 24) - /* This is not actually used since we don't have an OVER with -    565 source, but it is needed to build. */ -#define GET_0565_ALPHA(s) 0xff - -#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\ -			      src_type_t, dst_type_t, OP, repeat_mode)				\ -static force_inline void									\ -scanline_func_name (dst_type_t     *dst,							\ -		    src_type_t     *src,							\ -		    int32_t         w,								\ -		    pixman_fixed_t  vx,								\ -		    pixman_fixed_t  unit_x,							\ -		    pixman_fixed_t  max_vx)							\ -{												\ -	uint32_t   d;										\ -	src_type_t s1, s2;									\ -	uint8_t    a1, a2;									\ -	int        x1, x2;									\ -												\ -	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\ -	    abort();										\ -												\ -	while ((w -= 2) >= 0)									\ -	{											\ -	    x1 = vx >> 16;									\ -	    vx += unit_x;									\ -	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\ -	    {											\ -		/* This works because we know that unit_x is positive */			\ -		while (vx >= max_vx)								\ -		    vx -= max_vx;								\ -	    }											\ -	    s1 = src[x1];									\ -												\ -	    x2 = vx >> 16;									\ -	    vx += unit_x;									\ -	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\ -	    {											\ -		/* This works because we know that unit_x is positive */			\ -		while (vx >= max_vx)								\ -		    vx -= max_vx;								\ -	    }											\ -	    s2 = src[x2];									\ -												\ -	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\ -	    {											\ -		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\ -		a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\ -												\ -		if (a1 == 0xff)									\ -		{										\ -		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\ -		}										\ -		else if (s1)									\ -		{										\ -		    d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst);				\ -		    s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);				\ -		    a1 ^= 0xff;									\ -		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\ -		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\ -		}										\ -		dst++;										\ -												\ -		if (a2 == 0xff)									\ -		{										\ -		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);			\ -		}										\ -		else if (s2)									\ -		{										\ -		    d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);				\ -		    s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2);				\ -		    a2 ^= 0xff;									\ -		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\ -		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\ -		}										\ -		dst++;										\ -	    }											\ -	    else /* PIXMAN_OP_SRC */								\ -	    {											\ -		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\ -		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);			\ -	    }											\ -	}											\ -												\ -	if (w & 1)										\ -	{											\ -	    x1 = vx >> 16;									\ -	    s1 = src[x1];									\ -												\ -	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\ -	    {											\ -		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\ -												\ -		if (a1 == 0xff)									\ -		{										\ -		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\ -		}										\ -		else if (s1)									\ -		{										\ -		    d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);				\ -		    s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);				\ -		    a1 ^= 0xff;									\ -		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\ -		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\ -		}										\ -		dst++;										\ -	    }											\ -	    else /* PIXMAN_OP_SRC */								\ -	    {											\ -		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\ -	    }											\ -	}											\ -} - -#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t,	\ -				  repeat_mode)							\ -static void											\ -fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\ -						   pixman_op_t              op,			\ -						   pixman_image_t *         src_image,		\ -						   pixman_image_t *         mask_image,		\ -						   pixman_image_t *         dst_image,		\ -						   int32_t                  src_x,		\ -						   int32_t                  src_y,		\ -						   int32_t                  mask_x,		\ -						   int32_t                  mask_y,		\ -						   int32_t                  dst_x,		\ -						   int32_t                  dst_y,		\ -						   int32_t                  width,		\ -						   int32_t                  height)		\ -{												\ -    dst_type_t *dst_line;									\ -    src_type_t *src_first_line;									\ -    int       y;										\ -    pixman_fixed_t max_vx = 0; /* suppress uninitialized variable warning */			\ -    pixman_fixed_t max_vy;									\ -    pixman_vector_t v;										\ -    pixman_fixed_t vx, vy;									\ -    pixman_fixed_t unit_x, unit_y;								\ -    int32_t left_pad, right_pad;								\ -												\ -    src_type_t *src;										\ -    dst_type_t *dst;										\ -    int       src_stride, dst_stride;								\ -												\ -    PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);	\ -    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\ -     * transformed from destination space to source space */					\ -    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\ -												\ -    /* reference point is the center of the pixel */						\ -    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\ -    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\ -    v.vector[2] = pixman_fixed_1;								\ -												\ -    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\ -	return;											\ -												\ -    unit_x = src_image->common.transform->matrix[0][0];						\ -    unit_y = src_image->common.transform->matrix[1][1];						\ -												\ -    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\ -    v.vector[0] -= pixman_fixed_e;								\ -    v.vector[1] -= pixman_fixed_e;								\ -												\ -    vx = v.vector[0];										\ -    vy = v.vector[1];										\ -												\ -    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\ -    {												\ -	/* Clamp repeating positions inside the actual samples */				\ -	max_vx = src_image->bits.width << 16;							\ -	max_vy = src_image->bits.height << 16;							\ -												\ -	repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);						\ -	repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\ -    }												\ -												\ -    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\ -	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\ -    {												\ -	pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\ -					&width, &left_pad, &right_pad);				\ -	vx += left_pad * unit_x;								\ -    }												\ -												\ -    while (--height >= 0)									\ -    {												\ -	dst = dst_line;										\ -	dst_line += dst_stride;									\ -												\ -	y = vy >> 16;										\ -	vy += unit_y;										\ -	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\ -	    repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\ -	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\ -	{											\ -	    repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\ -	    src = src_first_line + src_stride * y;						\ -	    if (left_pad > 0)									\ -	    {											\ -		scanline_func (dst, src, left_pad, 0, 0, 0);					\ -	    }											\ -	    if (width > 0)									\ -	    {											\ -		scanline_func (dst + left_pad, src, width, vx, unit_x, 0);			\ -	    }											\ -	    if (right_pad > 0)									\ -	    {											\ -		scanline_func (dst + left_pad + width, src + src_image->bits.width - 1,		\ -			        right_pad, 0, 0, 0);						\ -	    }											\ -	}											\ -	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\ -	{											\ -	    static src_type_t zero[1] = { 0 };							\ -	    if (y < 0 || y >= src_image->bits.height)						\ -	    {											\ -		scanline_func (dst, zero, left_pad + width + right_pad, 0, 0, 0);		\ -		continue;									\ -	    }											\ -	    src = src_first_line + src_stride * y;						\ -	    if (left_pad > 0)									\ -	    {											\ -		scanline_func (dst, zero, left_pad, 0, 0, 0);					\ -	    }											\ -	    if (width > 0)									\ -	    {											\ -		scanline_func (dst + left_pad, src, width, vx, unit_x, 0);			\ -	    }											\ -	    if (right_pad > 0)									\ -	    {											\ -		scanline_func (dst + left_pad + width, zero, right_pad, 0, 0, 0);		\ -	    }											\ -	}											\ -	else											\ -	{											\ -	    src = src_first_line + src_stride * y;						\ -	    scanline_func (dst, src, width, vx, unit_x, max_vx);				\ -	}											\ -    }												\ -} - -/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\ -			      repeat_mode)							\ -	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t,	\ -			      repeat_mode)							\ - -#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\ -		     src_type_t, dst_type_t, OP, repeat_mode)				\ -    FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\ -			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\ -			  OP, repeat_mode)						\ -    FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP,				\ -			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\ -			  src_type_t, dst_type_t, repeat_mode) - - -#define SCALED_NEAREST_FLAGS						\ -    (FAST_PATH_SCALE_TRANSFORM	|					\ -     FAST_PATH_NO_ALPHA_MAP	|					\ -     FAST_PATH_NEAREST_FILTER	|					\ -     FAST_PATH_NO_ACCESSORS	|					\ -     FAST_PATH_NARROW_FORMAT) - -#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\ -    {   PIXMAN_OP_ ## op,						\ -	PIXMAN_ ## s,							\ -	(SCALED_NEAREST_FLAGS		|				\ -	 FAST_PATH_NORMAL_REPEAT	|				\ -	 FAST_PATH_X_UNIT_POSITIVE),					\ -	PIXMAN_null, 0,							\ -	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\ -	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\ -    } - -#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\ -    {   PIXMAN_OP_ ## op,						\ -	PIXMAN_ ## s,							\ -	(SCALED_NEAREST_FLAGS		|				\ -	 FAST_PATH_PAD_REPEAT		|				\ -	 FAST_PATH_X_UNIT_POSITIVE),					\ -	PIXMAN_null, 0,							\ -	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\ -	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\ -    } - -#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\ -    {   PIXMAN_OP_ ## op,						\ -	PIXMAN_ ## s,							\ -	(SCALED_NEAREST_FLAGS		|				\ -	 FAST_PATH_NONE_REPEAT		|				\ -	 FAST_PATH_X_UNIT_POSITIVE),					\ -	PIXMAN_null, 0,							\ -	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\ -	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\ -    } - -#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\ -    {   PIXMAN_OP_ ## op,						\ -	PIXMAN_ ## s,							\ -	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\ -	PIXMAN_null, 0,							\ -	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\ -	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\ -    } - -/* Prefer the use of 'cover' variant, because it is faster */ -#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\ -    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\ -    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\ -    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\ -    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) - -#endif +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
 +/*
 + * Copyright © 2000 SuSE, Inc.
 + * Copyright © 2007 Red Hat, Inc.
 + *
 + * Permission to use, copy, modify, distribute, and sell this software and its
 + * documentation for any purpose is hereby granted without fee, provided that
 + * the above copyright notice appear in all copies and that both that
 + * copyright notice and this permission notice appear in supporting
 + * documentation, and that the name of SuSE not be used in advertising or
 + * publicity pertaining to distribution of the software without specific,
 + * written prior permission.  SuSE makes no representations about the
 + * suitability of this software for any purpose.  It is provided "as is"
 + * without express or implied warranty.
 + *
 + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
 + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 + *
 + * Author:  Keith Packard, SuSE, Inc.
 + */
 +
 +#ifndef PIXMAN_FAST_PATH_H__
 +#define PIXMAN_FAST_PATH_H__
 +
 +#include "pixman-private.h"
 +
 +#define PIXMAN_REPEAT_COVER -1
 +
 +static force_inline pixman_bool_t
 +repeat (pixman_repeat_t repeat, int *c, int size)
 +{
 +    if (repeat == PIXMAN_REPEAT_NONE)
 +    {
 +	if (*c < 0 || *c >= size)
 +	    return FALSE;
 +    }
 +    else if (repeat == PIXMAN_REPEAT_NORMAL)
 +    {
 +	while (*c >= size)
 +	    *c -= size;
 +	while (*c < 0)
 +	    *c += size;
 +    }
 +    else if (repeat == PIXMAN_REPEAT_PAD)
 +    {
 +	*c = CLIP (*c, 0, size - 1);
 +    }
 +    else /* REFLECT */
 +    {
 +	*c = MOD (*c, size * 2);
 +	if (*c >= size)
 +	    *c = size * 2 - *c - 1;
 +    }
 +    return TRUE;
 +}
 +
 +/*
 + * For each scanline fetched from source image with PAD repeat:
 + * - calculate how many pixels need to be padded on the left side
 + * - calculate how many pixels need to be padded on the right side
 + * - update width to only count pixels which are fetched from the image
 + * All this information is returned via 'width', 'left_pad', 'right_pad'
 + * arguments. The code is assuming that 'unit_x' is positive.
 + *
 + * Note: 64-bit math is used in order to avoid potential overflows, which
 + *       is probably excessive in many cases. This particular function
 + *       may need its own correctness test and performance tuning.
 + */
 +static force_inline void
 +pad_repeat_get_scanline_bounds (int32_t         source_image_width,
 +				pixman_fixed_t  vx,
 +				pixman_fixed_t  unit_x,
 +				int32_t *       width,
 +				int32_t *       left_pad,
 +				int32_t *       right_pad)
 +{
 +    int64_t max_vx = (int64_t) source_image_width << 16;
 +    int64_t tmp;
 +    if (vx < 0)
 +    {
 +	tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
 +	if (tmp > *width)
 +	{
 +	    *left_pad = *width;
 +	    *width = 0;
 +	}
 +	else
 +	{
 +	    *left_pad = (int32_t) tmp;
 +	    *width -= (int32_t) tmp;
 +	}
 +    }
 +    else
 +    {
 +	*left_pad = 0;
 +    }
 +    tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
 +    if (tmp < 0)
 +    {
 +	*right_pad = *width;
 +	*width = 0;
 +    }
 +    else if (tmp >= *width)
 +    {
 +	*right_pad = 0;
 +    }
 +    else
 +    {
 +	*right_pad = *width - (int32_t) tmp;
 +	*width = (int32_t) tmp;
 +    }
 +}
 +
 +/* A macroified version of specialized nearest scalers for some
 + * common 8888 and 565 formats. It supports SRC and OVER ops.
 + *
 + * There are two repeat versions, one that handles repeat normal,
 + * and one without repeat handling that only works if the src region
 + * used is completely covered by the pre-repeated source samples.
 + *
 + * The loops are unrolled to process two pixels per iteration for better
 + * performance on most CPU architectures (superscalar processors
 + * can issue several operations simultaneously, other processors can hide
 + * instructions latencies by pipelining operations). Unrolling more
 + * does not make much sense because the compiler will start running out
 + * of spare registers soon.
 + */
 +
 +#define GET_8888_ALPHA(s) ((s) >> 24)
 + /* This is not actually used since we don't have an OVER with
 +    565 source, but it is needed to build. */
 +#define GET_0565_ALPHA(s) 0xff
 +
 +#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
 +			      src_type_t, dst_type_t, OP, repeat_mode)				\
 +static force_inline void									\
 +scanline_func_name (dst_type_t     *dst,							\
 +		    src_type_t     *src,							\
 +		    int32_t         w,								\
 +		    pixman_fixed_t  vx,								\
 +		    pixman_fixed_t  unit_x,							\
 +		    pixman_fixed_t  max_vx)							\
 +{												\
 +	uint32_t   d;										\
 +	src_type_t s1, s2;									\
 +	uint8_t    a1, a2;									\
 +	int        x1, x2;									\
 +												\
 +	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
 +	    abort();										\
 +												\
 +	while ((w -= 2) >= 0)									\
 +	{											\
 +	    x1 = vx >> 16;									\
 +	    vx += unit_x;									\
 +	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
 +	    {											\
 +		/* This works because we know that unit_x is positive */			\
 +		while (vx >= max_vx)								\
 +		    vx -= max_vx;								\
 +	    }											\
 +	    s1 = src[x1];									\
 +												\
 +	    x2 = vx >> 16;									\
 +	    vx += unit_x;									\
 +	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
 +	    {											\
 +		/* This works because we know that unit_x is positive */			\
 +		while (vx >= max_vx)								\
 +		    vx -= max_vx;								\
 +	    }											\
 +	    s2 = src[x2];									\
 +												\
 +	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
 +	    {											\
 +		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
 +		a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\
 +												\
 +		if (a1 == 0xff)									\
 +		{										\
 +		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
 +		}										\
 +		else if (s1)									\
 +		{										\
 +		    d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst);				\
 +		    s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);				\
 +		    a1 ^= 0xff;									\
 +		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
 +		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\
 +		}										\
 +		dst++;										\
 +												\
 +		if (a2 == 0xff)									\
 +		{										\
 +		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);			\
 +		}										\
 +		else if (s2)									\
 +		{										\
 +		    d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);				\
 +		    s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2);				\
 +		    a2 ^= 0xff;									\
 +		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
 +		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\
 +		}										\
 +		dst++;										\
 +	    }											\
 +	    else /* PIXMAN_OP_SRC */								\
 +	    {											\
 +		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
 +		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);			\
 +	    }											\
 +	}											\
 +												\
 +	if (w & 1)										\
 +	{											\
 +	    x1 = vx >> 16;									\
 +	    s1 = src[x1];									\
 +												\
 +	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
 +	    {											\
 +		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
 +												\
 +		if (a1 == 0xff)									\
 +		{										\
 +		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
 +		}										\
 +		else if (s1)									\
 +		{										\
 +		    d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);				\
 +		    s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);				\
 +		    a1 ^= 0xff;									\
 +		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
 +		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\
 +		}										\
 +		dst++;										\
 +	    }											\
 +	    else /* PIXMAN_OP_SRC */								\
 +	    {											\
 +		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
 +	    }											\
 +	}											\
 +}
 +
 +#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
 +				  repeat_mode)							\
 +static void											\
 +fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
 +						   pixman_op_t              op,			\
 +						   pixman_image_t *         src_image,		\
 +						   pixman_image_t *         mask_image,		\
 +						   pixman_image_t *         dst_image,		\
 +						   int32_t                  src_x,		\
 +						   int32_t                  src_y,		\
 +						   int32_t                  mask_x,		\
 +						   int32_t                  mask_y,		\
 +						   int32_t                  dst_x,		\
 +						   int32_t                  dst_y,		\
 +						   int32_t                  width,		\
 +						   int32_t                  height)		\
 +{												\
 +    dst_type_t *dst_line;									\
 +    src_type_t *src_first_line;									\
 +    int       y;										\
 +    pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
 +    pixman_fixed_t max_vy;									\
 +    pixman_vector_t v;										\
 +    pixman_fixed_t vx, vy;									\
 +    pixman_fixed_t unit_x, unit_y;								\
 +    int32_t left_pad, right_pad;								\
 +												\
 +    src_type_t *src;										\
 +    dst_type_t *dst;										\
 +    int       src_stride, dst_stride;								\
 +												\
 +    PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);	\
 +    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
 +     * transformed from destination space to source space */					\
 +    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
 +												\
 +    /* reference point is the center of the pixel */						\
 +    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
 +    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
 +    v.vector[2] = pixman_fixed_1;								\
 +												\
 +    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
 +	return;											\
 +												\
 +    unit_x = src_image->common.transform->matrix[0][0];						\
 +    unit_y = src_image->common.transform->matrix[1][1];						\
 +												\
 +    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\
 +    v.vector[0] -= pixman_fixed_e;								\
 +    v.vector[1] -= pixman_fixed_e;								\
 +												\
 +    vx = v.vector[0];										\
 +    vy = v.vector[1];										\
 +												\
 +    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
 +    {												\
 +	/* Clamp repeating positions inside the actual samples */				\
 +	max_vx = src_image->bits.width << 16;							\
 +	max_vy = src_image->bits.height << 16;							\
 +												\
 +	repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);						\
 +	repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
 +    }												\
 +												\
 +    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
 +	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
 +    {												\
 +	pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\
 +					&width, &left_pad, &right_pad);				\
 +	vx += left_pad * unit_x;								\
 +    }												\
 +												\
 +    while (--height >= 0)									\
 +    {												\
 +	dst = dst_line;										\
 +	dst_line += dst_stride;									\
 +												\
 +	y = vy >> 16;										\
 +	vy += unit_y;										\
 +	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
 +	    repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
 +	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
 +	{											\
 +	    repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\
 +	    src = src_first_line + src_stride * y;						\
 +	    if (left_pad > 0)									\
 +	    {											\
 +		scanline_func (dst, src, left_pad, 0, 0, 0);					\
 +	    }											\
 +	    if (width > 0)									\
 +	    {											\
 +		scanline_func (dst + left_pad, src, width, vx, unit_x, 0);			\
 +	    }											\
 +	    if (right_pad > 0)									\
 +	    {											\
 +		scanline_func (dst + left_pad + width, src + src_image->bits.width - 1,		\
 +			        right_pad, 0, 0, 0);						\
 +	    }											\
 +	}											\
 +	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
 +	{											\
 +	    static src_type_t zero[1] = { 0 };							\
 +	    if (y < 0 || y >= src_image->bits.height)						\
 +	    {											\
 +		scanline_func (dst, zero, left_pad + width + right_pad, 0, 0, 0);		\
 +		continue;									\
 +	    }											\
 +	    src = src_first_line + src_stride * y;						\
 +	    if (left_pad > 0)									\
 +	    {											\
 +		scanline_func (dst, zero, left_pad, 0, 0, 0);					\
 +	    }											\
 +	    if (width > 0)									\
 +	    {											\
 +		scanline_func (dst + left_pad, src, width, vx, unit_x, 0);			\
 +	    }											\
 +	    if (right_pad > 0)									\
 +	    {											\
 +		scanline_func (dst + left_pad + width, zero, right_pad, 0, 0, 0);		\
 +	    }											\
 +	}											\
 +	else											\
 +	{											\
 +	    src = src_first_line + src_stride * y;						\
 +	    scanline_func (dst, src, width, vx, unit_x, max_vx);				\
 +	}											\
 +    }												\
 +}
 +
 +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
 +#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
 +			      repeat_mode)							\
 +	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t,	\
 +			      repeat_mode)							\
 +
 +#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
 +		     src_type_t, dst_type_t, OP, repeat_mode)				\
 +    FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
 +			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
 +			  OP, repeat_mode)						\
 +    FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP,				\
 +			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
 +			  src_type_t, dst_type_t, repeat_mode)
 +
 +
 +#define SCALED_NEAREST_FLAGS						\
 +    (FAST_PATH_SCALE_TRANSFORM	|					\
 +     FAST_PATH_NO_ALPHA_MAP	|					\
 +     FAST_PATH_NEAREST_FILTER	|					\
 +     FAST_PATH_NO_ACCESSORS	|					\
 +     FAST_PATH_NARROW_FORMAT)
 +
 +#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\
 +    {   PIXMAN_OP_ ## op,						\
 +	PIXMAN_ ## s,							\
 +	(SCALED_NEAREST_FLAGS		|				\
 +	 FAST_PATH_NORMAL_REPEAT	|				\
 +	 FAST_PATH_X_UNIT_POSITIVE),					\
 +	PIXMAN_null, 0,							\
 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 +	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
 +    }
 +
 +#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\
 +    {   PIXMAN_OP_ ## op,						\
 +	PIXMAN_ ## s,							\
 +	(SCALED_NEAREST_FLAGS		|				\
 +	 FAST_PATH_PAD_REPEAT		|				\
 +	 FAST_PATH_X_UNIT_POSITIVE),					\
 +	PIXMAN_null, 0,							\
 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 +	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
 +    }
 +
 +#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\
 +    {   PIXMAN_OP_ ## op,						\
 +	PIXMAN_ ## s,							\
 +	(SCALED_NEAREST_FLAGS		|				\
 +	 FAST_PATH_NONE_REPEAT		|				\
 +	 FAST_PATH_X_UNIT_POSITIVE),					\
 +	PIXMAN_null, 0,							\
 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 +	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
 +    }
 +
 +#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
 +    {   PIXMAN_OP_ ## op,						\
 +	PIXMAN_ ## s,							\
 +	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
 +	PIXMAN_null, 0,							\
 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
 +	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
 +    }
 +
 +/* Prefer the use of 'cover' variant, because it is faster */
 +#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
 +    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
 +    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\
 +    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
 +    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
 +
 +#endif
 diff --git a/pixman/test/utils.c b/pixman/test/utils.c index 883095250..a896e5be1 100644 --- a/pixman/test/utils.c +++ b/pixman/test/utils.c @@ -295,7 +295,7 @@ fence_free (void *data)  #else
  void *
 -fence_malloc (uint32_t len)
 +fence_malloc (int64_t len)
  {
      return malloc (len);
  }
 | 
